p2p_daemon.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. import asyncio
  2. import os
  3. import secrets
  4. from collections.abc import AsyncIterable as AsyncIterableABC
  5. from contextlib import closing, suppress
  6. from dataclasses import dataclass
  7. from importlib.resources import path
  8. from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple, Type, TypeVar, Union
  9. from google.protobuf.message import Message
  10. from multiaddr import Multiaddr
  11. import hivemind.hivemind_cli as cli
  12. import hivemind.p2p.p2p_daemon_bindings.p2pclient as p2pclient
  13. from hivemind.p2p.p2p_daemon_bindings.control import P2PDaemonError, P2PHandlerError
  14. from hivemind.p2p.p2p_daemon_bindings.datastructures import PeerID, PeerInfo, StreamInfo
  15. from hivemind.proto.p2pd_pb2 import RPCError
  16. from hivemind.utils.asyncio import as_aiter, asingle
  17. from hivemind.utils.logging import get_logger
  18. logger = get_logger(__name__)
  19. P2PD_FILENAME = "p2pd"
  20. @dataclass(frozen=True)
  21. class P2PContext(object):
  22. handle_name: str
  23. local_id: PeerID
  24. remote_id: PeerID = None
  25. class P2P:
  26. """
  27. This class is responsible for establishing peer-to-peer connections through NAT and/or firewalls.
  28. It creates and manages a libp2p daemon (https://libp2p.io) in a background process,
  29. then terminates it when P2P is shut down. In order to communicate, a P2P instance should
  30. either use one or more initial_peers that will connect it to the rest of the swarm or
  31. use the public IPFS network (https://ipfs.io).
  32. For incoming connections, P2P instances add RPC handlers that may be accessed by other peers:
  33. - `P2P.add_protobuf_handler` accepts a protobuf message and returns another protobuf
  34. - `P2P.add_binary_stream_handler` transfers raw data using bi-directional streaming interface
  35. To access these handlers, a P2P instance can `P2P.call_protobuf_handler`/`P2P.call_binary_stream_handler`,
  36. using the recipient's unique `P2P.peer_id` and the name of the corresponding handler.
  37. """
  38. HEADER_LEN = 8
  39. BYTEORDER = "big"
  40. MESSAGE_MARKER = b"\x00"
  41. ERROR_MARKER = b"\x01"
  42. END_OF_STREAM = RPCError()
  43. DHT_MODE_MAPPING = {
  44. "dht": {"dht": 1},
  45. "dht_server": {"dhtServer": 1},
  46. "dht_client": {"dhtClient": 1},
  47. }
  48. FORCE_REACHABILITY_MAPPING = {
  49. "public": {"forceReachabilityPublic": 1},
  50. "private": {"forceReachabilityPrivate": 1},
  51. }
  52. _UNIX_SOCKET_PREFIX = "/unix/tmp/hivemind-"
  53. def __init__(self):
  54. self.peer_id = None
  55. self._client = None
  56. self._child = None
  57. self._alive = False
  58. self._reader_task = None
  59. self._listen_task = None
  60. @classmethod
  61. async def create(
  62. cls,
  63. initial_peers: Optional[Sequence[Union[Multiaddr, str]]] = None,
  64. *,
  65. announce_maddrs: Optional[Sequence[Union[Multiaddr, str]]] = None,
  66. auto_nat: bool = True,
  67. conn_manager: bool = True,
  68. dht_mode: str = "dht_server",
  69. force_reachability: Optional[str] = None,
  70. host_maddrs: Optional[Sequence[Union[Multiaddr, str]]] = ("/ip4/127.0.0.1/tcp/0",),
  71. identity_path: Optional[str] = None,
  72. idle_timeout: float = 30,
  73. nat_port_map: bool = True,
  74. quic: bool = False,
  75. relay_hop_limit: int = 0,
  76. startup_timeout: float = 15,
  77. tls: bool = True,
  78. use_auto_relay: bool = False,
  79. use_ipfs: bool = False,
  80. use_relay: bool = True,
  81. use_relay_hop: bool = False,
  82. use_relay_discovery: bool = False,
  83. ) -> "P2P":
  84. """
  85. Start a new p2pd process and connect to it.
  86. :param initial_peers: List of bootstrap peers
  87. :param auto_nat: Enables the AutoNAT service
  88. :param announce_maddrs: Visible multiaddrs that the peer will announce
  89. for external connections from other p2p instances
  90. :param conn_manager: Enables the Connection Manager
  91. :param dht_mode: DHT mode (dht_client/dht_server/dht)
  92. :param force_reachability: Force reachability mode (public/private)
  93. :param host_maddrs: Multiaddrs to listen for external connections from other p2p instances
  94. :param identity_path: Path to a pre-generated private key file. If defined, makes the peer ID deterministic.
  95. May be generated using ``./p2p-keygen`` from ``go-libp2p-daemon``.
  96. :param idle_timeout: kill daemon if client has been idle for a given number of
  97. seconds before opening persistent streams
  98. :param nat_port_map: Enables NAT port mapping
  99. :param quic: Enables the QUIC transport
  100. :param relay_hop_limit: sets the hop limit for hop relays
  101. :param startup_timeout: raise a P2PDaemonError if the daemon does not start in ``startup_timeout`` seconds
  102. :param tls: Enables TLS1.3 channel security protocol
  103. :param use_auto_relay: enables autorelay
  104. :param use_ipfs: Bootstrap to IPFS (incompatible with initial_peers)
  105. :param use_relay: enables circuit relay
  106. :param use_relay_hop: enables hop for relay
  107. :param use_relay_discovery: enables passive discovery for relay
  108. :return: a wrapper for the p2p daemon
  109. """
  110. assert not (
  111. initial_peers and use_ipfs
  112. ), "User-defined initial_peers and use_ipfs=True are incompatible, please choose one option"
  113. self = cls()
  114. with path(cli, P2PD_FILENAME) as p:
  115. p2pd_path = p
  116. socket_uid = secrets.token_urlsafe(8)
  117. self._daemon_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pd-{socket_uid}.sock")
  118. self._client_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pclient-{socket_uid}.sock")
  119. need_bootstrap = bool(initial_peers) or use_ipfs
  120. process_kwargs = cls.DHT_MODE_MAPPING.get(dht_mode, {"dht": 0})
  121. process_kwargs.update(cls.FORCE_REACHABILITY_MAPPING.get(force_reachability, {}))
  122. for param, value in [
  123. ("bootstrapPeers", initial_peers),
  124. ("hostAddrs", host_maddrs),
  125. ("announceAddrs", announce_maddrs),
  126. ]:
  127. if value:
  128. process_kwargs[param] = self._maddrs_to_str(value)
  129. if identity_path is not None:
  130. process_kwargs["id"] = identity_path
  131. proc_args = self._make_process_args(
  132. str(p2pd_path),
  133. autoRelay=use_auto_relay,
  134. autonat=auto_nat,
  135. b=need_bootstrap,
  136. connManager=conn_manager,
  137. idleTimeout=f"{idle_timeout}s",
  138. listen=self._daemon_listen_maddr,
  139. natPortMap=nat_port_map,
  140. quic=quic,
  141. relay=use_relay,
  142. relayDiscovery=use_relay_discovery,
  143. relayHop=use_relay_hop,
  144. relayHopLimit=relay_hop_limit,
  145. tls=tls,
  146. **process_kwargs,
  147. )
  148. self._child = await asyncio.subprocess.create_subprocess_exec(
  149. *proc_args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT
  150. )
  151. self._alive = True
  152. ready = asyncio.Future()
  153. self._reader_task = asyncio.create_task(self._read_outputs(ready))
  154. try:
  155. await asyncio.wait_for(ready, startup_timeout)
  156. except asyncio.TimeoutError:
  157. await self.shutdown()
  158. raise P2PDaemonError(f"Daemon failed to start in {startup_timeout:.1f} seconds")
  159. self._client = await p2pclient.Client.create(self._daemon_listen_maddr, self._client_listen_maddr)
  160. await self._ping_daemon()
  161. return self
  162. @classmethod
  163. async def replicate(cls, daemon_listen_maddr: Multiaddr) -> "P2P":
  164. """
  165. Connect to existing p2p daemon
  166. :param daemon_listen_maddr: multiaddr of the existing p2p daemon
  167. :return: new wrapper for the existing p2p daemon
  168. """
  169. self = cls()
  170. # There is no child under control
  171. # Use external already running p2pd
  172. self._child = None
  173. self._alive = True
  174. socket_uid = secrets.token_urlsafe(8)
  175. self._daemon_listen_maddr = daemon_listen_maddr
  176. self._client_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pclient-{socket_uid}.sock")
  177. self._client = await p2pclient.Client.create(self._daemon_listen_maddr, self._client_listen_maddr)
  178. await self._ping_daemon()
  179. return self
  180. async def _ping_daemon(self) -> None:
  181. self.peer_id, self._visible_maddrs = await self._client.identify()
  182. logger.debug(f"Launched p2pd with peer id = {self.peer_id}, host multiaddrs = {self._visible_maddrs}")
  183. async def get_visible_maddrs(self, latest: bool = False) -> List[Multiaddr]:
  184. """
  185. Get multiaddrs of the current peer that should be accessible by other peers.
  186. :param latest: ask the P2P daemon to refresh the visible multiaddrs
  187. """
  188. if latest:
  189. _, self._visible_maddrs = await self._client.identify()
  190. if not self._visible_maddrs:
  191. raise ValueError(f"No multiaddrs found for peer {self.peer_id}")
  192. p2p_maddr = Multiaddr(f"/p2p/{self.peer_id.to_base58()}")
  193. return [addr.encapsulate(p2p_maddr) for addr in self._visible_maddrs]
  194. async def list_peers(self) -> List[PeerInfo]:
  195. return list(await self._client.list_peers())
  196. async def wait_for_at_least_n_peers(self, n_peers: int, attempts: int = 3, delay: float = 1) -> None:
  197. for _ in range(attempts):
  198. peers = await self._client.list_peers()
  199. if len(peers) >= n_peers:
  200. return
  201. await asyncio.sleep(delay)
  202. raise RuntimeError("Not enough peers")
  203. @property
  204. def daemon_listen_maddr(self) -> Multiaddr:
  205. return self._daemon_listen_maddr
  206. @staticmethod
  207. async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2 ** 16) -> None:
  208. writer.write(len(data).to_bytes(P2P.HEADER_LEN, P2P.BYTEORDER))
  209. data = memoryview(data)
  210. for offset in range(0, len(data), chunk_size):
  211. writer.write(data[offset : offset + chunk_size])
  212. await writer.drain()
  213. @staticmethod
  214. async def receive_raw_data(reader: asyncio.StreamReader) -> bytes:
  215. header = await reader.readexactly(P2P.HEADER_LEN)
  216. content_length = int.from_bytes(header, P2P.BYTEORDER)
  217. data = await reader.readexactly(content_length)
  218. return data
  219. TInputProtobuf = TypeVar("TInputProtobuf")
  220. TOutputProtobuf = TypeVar("TOutputProtobuf")
  221. @staticmethod
  222. async def send_protobuf(protobuf: Union[TOutputProtobuf, RPCError], writer: asyncio.StreamWriter) -> None:
  223. if isinstance(protobuf, RPCError):
  224. writer.write(P2P.ERROR_MARKER)
  225. else:
  226. writer.write(P2P.MESSAGE_MARKER)
  227. await P2P.send_raw_data(protobuf.SerializeToString(), writer)
  228. @staticmethod
  229. async def receive_protobuf(
  230. input_protobuf_type: Type[Message], reader: asyncio.StreamReader
  231. ) -> Tuple[Optional[TInputProtobuf], Optional[RPCError]]:
  232. msg_type = await reader.readexactly(1)
  233. if msg_type == P2P.MESSAGE_MARKER:
  234. protobuf = input_protobuf_type()
  235. protobuf.ParseFromString(await P2P.receive_raw_data(reader))
  236. return protobuf, None
  237. elif msg_type == P2P.ERROR_MARKER:
  238. protobuf = RPCError()
  239. protobuf.ParseFromString(await P2P.receive_raw_data(reader))
  240. return None, protobuf
  241. else:
  242. raise TypeError("Invalid Protobuf message type")
  243. TInputStream = AsyncIterator[TInputProtobuf]
  244. TOutputStream = AsyncIterator[TOutputProtobuf]
  245. async def _add_protobuf_stream_handler(
  246. self,
  247. name: str,
  248. handler: Callable[[TInputStream, P2PContext], TOutputStream],
  249. input_protobuf_type: Type[Message],
  250. max_prefetch: int = 5,
  251. ) -> None:
  252. """
  253. :param max_prefetch: Maximum number of items to prefetch from the request stream.
  254. ``max_prefetch <= 0`` means unlimited.
  255. :note: Since the cancel messages are sent via the input stream,
  256. they will not be received while the prefetch buffer is full.
  257. """
  258. async def _handle_stream(
  259. stream_info: StreamInfo, reader: asyncio.StreamReader, writer: asyncio.StreamWriter
  260. ) -> None:
  261. context = P2PContext(
  262. handle_name=name,
  263. local_id=self.peer_id,
  264. remote_id=stream_info.peer_id,
  265. )
  266. requests = asyncio.Queue(max_prefetch)
  267. async def _read_stream() -> P2P.TInputStream:
  268. while True:
  269. request = await requests.get()
  270. if request is None:
  271. break
  272. yield request
  273. async def _process_stream() -> None:
  274. try:
  275. async for response in handler(_read_stream(), context):
  276. try:
  277. await P2P.send_protobuf(response, writer)
  278. except Exception:
  279. # The connection is unexpectedly closed by the caller or broken.
  280. # The loglevel is DEBUG since the actual error will be reported on the caller
  281. logger.debug("Exception while sending response:", exc_info=True)
  282. break
  283. except Exception as e:
  284. logger.warning("Handler failed with the exception:", exc_info=True)
  285. with suppress(Exception):
  286. # Sometimes `e` is a connection error, so it is okay if we fail to report `e` to the caller
  287. await P2P.send_protobuf(RPCError(message=str(e)), writer)
  288. with closing(writer):
  289. writer.transport.set_write_buffer_limits(low=2 ** 30 - 2 ** 18, high=2 ** 30)
  290. logger.warning(f"Set watermarks to: {writer.transport.get_write_buffer_limits()}")
  291. processing_task = asyncio.create_task(_process_stream())
  292. try:
  293. while True:
  294. receive_task = asyncio.create_task(P2P.receive_protobuf(input_protobuf_type, reader))
  295. await asyncio.wait({processing_task, receive_task}, return_when=asyncio.FIRST_COMPLETED)
  296. if processing_task.done():
  297. receive_task.cancel()
  298. return
  299. if receive_task.done():
  300. try:
  301. request, _ = await receive_task
  302. except asyncio.IncompleteReadError: # Connection is closed (the client cancelled or died)
  303. return
  304. await requests.put(request) # `request` is None for the end-of-stream message
  305. except Exception:
  306. logger.warning("Exception while receiving requests:", exc_info=True)
  307. finally:
  308. processing_task.cancel()
  309. await self.add_binary_stream_handler(name, _handle_stream)
  310. async def _iterate_protobuf_stream_handler(
  311. self, peer_id: PeerID, name: str, requests: TInputStream, output_protobuf_type: Type[Message]
  312. ) -> TOutputStream:
  313. _, reader, writer = await self.call_binary_stream_handler(peer_id, name)
  314. async def _write_to_stream() -> None:
  315. async for request in requests:
  316. await P2P.send_protobuf(request, writer)
  317. await P2P.send_protobuf(P2P.END_OF_STREAM, writer)
  318. with closing(writer):
  319. writer.transport.set_write_buffer_limits(low=2 ** 30 - 2 ** 18, high=2 ** 30)
  320. logger.warning(f"Set watermarks to: {writer.transport.get_write_buffer_limits()}")
  321. writing_task = asyncio.create_task(_write_to_stream())
  322. try:
  323. while True:
  324. try:
  325. response, err = await P2P.receive_protobuf(output_protobuf_type, reader)
  326. except asyncio.IncompleteReadError: # Connection is closed
  327. break
  328. if err is not None:
  329. raise P2PHandlerError(f"Failed to call handler `{name}` at {peer_id}: {err.message}")
  330. yield response
  331. await writing_task
  332. finally:
  333. writing_task.cancel()
  334. async def add_protobuf_handler(
  335. self,
  336. name: str,
  337. handler: Callable[
  338. [Union[TInputProtobuf, TInputStream], P2PContext], Union[Awaitable[TOutputProtobuf], TOutputStream]
  339. ],
  340. input_protobuf_type: Type[Message],
  341. *,
  342. stream_input: bool = False,
  343. stream_output: bool = False,
  344. ) -> None:
  345. """
  346. :param stream_input: If True, assume ``handler`` to take ``TInputStream``
  347. (not just ``TInputProtobuf``) as input.
  348. :param stream_output: If True, assume ``handler`` to return ``TOutputStream``
  349. (not ``Awaitable[TOutputProtobuf]``).
  350. """
  351. if not stream_input and not stream_output:
  352. await self._add_protobuf_unary_handler(name, handler, input_protobuf_type)
  353. return
  354. async def _stream_handler(requests: P2P.TInputStream, context: P2PContext) -> P2P.TOutputStream:
  355. input = requests if stream_input else await asingle(requests)
  356. output = handler(input, context)
  357. if isinstance(output, AsyncIterableABC):
  358. async for item in output:
  359. yield item
  360. else:
  361. yield await output
  362. await self._add_protobuf_stream_handler(name, _stream_handler, input_protobuf_type)
  363. async def _add_protobuf_unary_handler(
  364. self,
  365. handle_name: str,
  366. handler: Callable[[TInputProtobuf, P2PContext], Awaitable[TOutputProtobuf]],
  367. input_protobuf_type: Type[Message],
  368. ) -> None:
  369. """
  370. Register a request-response (unary) handler. Unary requests and responses
  371. are sent through persistent multiplexed connections to the daemon for the
  372. sake of reducing the number of open files.
  373. :param handle_name: name of the handler (protocol id)
  374. :param handler: function handling the unary requests
  375. :param input_protobuf_type: protobuf type of the request
  376. """
  377. async def _unary_handler(request: bytes, remote_id: PeerID) -> bytes:
  378. input_serialized = input_protobuf_type.FromString(request)
  379. context = P2PContext(
  380. handle_name=handle_name,
  381. local_id=self.peer_id,
  382. remote_id=remote_id,
  383. )
  384. response = await handler(input_serialized, context)
  385. return response.SerializeToString()
  386. await self._client.add_unary_handler(handle_name, _unary_handler)
  387. async def call_protobuf_handler(
  388. self,
  389. peer_id: PeerID,
  390. name: str,
  391. input: Union[TInputProtobuf, TInputStream],
  392. output_protobuf_type: Type[Message],
  393. ) -> Awaitable[TOutputProtobuf]:
  394. if not isinstance(input, AsyncIterableABC):
  395. return await self._call_unary_protobuf_handler(peer_id, name, input, output_protobuf_type)
  396. responses = self._iterate_protobuf_stream_handler(peer_id, name, input, output_protobuf_type)
  397. return await asingle(responses)
  398. async def _call_unary_protobuf_handler(
  399. self,
  400. peer_id: PeerID,
  401. handle_name: str,
  402. input: TInputProtobuf,
  403. output_protobuf_type: Type[Message],
  404. ) -> Awaitable[TOutputProtobuf]:
  405. serialized_input = input.SerializeToString()
  406. response = await self._client.call_unary_handler(peer_id, handle_name, serialized_input)
  407. return output_protobuf_type.FromString(response)
  408. def iterate_protobuf_handler(
  409. self,
  410. peer_id: PeerID,
  411. name: str,
  412. input: Union[TInputProtobuf, TInputStream],
  413. output_protobuf_type: Type[Message],
  414. ) -> TOutputStream:
  415. requests = input if isinstance(input, AsyncIterableABC) else as_aiter(input)
  416. return self._iterate_protobuf_stream_handler(peer_id, name, requests, output_protobuf_type)
  417. def _start_listening(self) -> None:
  418. async def listen() -> None:
  419. async with self._client.listen():
  420. await asyncio.Future() # Wait until this task will be cancelled in _terminate()
  421. self._listen_task = asyncio.create_task(listen())
  422. async def add_binary_stream_handler(self, name: str, handler: p2pclient.StreamHandler) -> None:
  423. if self._listen_task is None:
  424. self._start_listening()
  425. await self._client.stream_handler(name, handler)
  426. async def call_binary_stream_handler(
  427. self, peer_id: PeerID, handler_name: str
  428. ) -> Tuple[StreamInfo, asyncio.StreamReader, asyncio.StreamWriter]:
  429. return await self._client.stream_open(peer_id, (handler_name,))
  430. def __del__(self):
  431. self._terminate()
  432. @property
  433. def is_alive(self) -> bool:
  434. return self._alive
  435. async def shutdown(self) -> None:
  436. self._terminate()
  437. if self._child is not None:
  438. await self._child.wait()
  439. def _terminate(self) -> None:
  440. if self._client is not None:
  441. self._client.close()
  442. if self._listen_task is not None:
  443. self._listen_task.cancel()
  444. if self._reader_task is not None:
  445. self._reader_task.cancel()
  446. self._alive = False
  447. if self._child is not None and self._child.returncode is None:
  448. self._child.terminate()
  449. logger.debug(f"Terminated p2pd with id = {self.peer_id}")
  450. with suppress(FileNotFoundError):
  451. os.remove(self._daemon_listen_maddr["unix"])
  452. with suppress(FileNotFoundError):
  453. os.remove(self._client_listen_maddr["unix"])
  454. @staticmethod
  455. def _make_process_args(*args, **kwargs) -> List[str]:
  456. proc_args = []
  457. proc_args.extend(str(entry) for entry in args)
  458. proc_args.extend(
  459. f"-{key}={P2P._convert_process_arg_type(value)}" if value is not None else f"-{key}"
  460. for key, value in kwargs.items()
  461. )
  462. return proc_args
  463. @staticmethod
  464. def _convert_process_arg_type(val: Any) -> Any:
  465. if isinstance(val, bool):
  466. return int(val)
  467. return val
  468. @staticmethod
  469. def _maddrs_to_str(maddrs: List[Multiaddr]) -> str:
  470. return ",".join(str(addr) for addr in maddrs)
  471. async def _read_outputs(self, ready: asyncio.Future) -> None:
  472. last_line = None
  473. while True:
  474. line = await self._child.stdout.readline()
  475. if not line: # Stream closed
  476. break
  477. last_line = line.rstrip().decode(errors="ignore")
  478. if last_line.startswith("Peer ID:"):
  479. ready.set_result(None)
  480. if not ready.done():
  481. ready.set_exception(P2PDaemonError(f"Daemon failed to start: {last_line}"))