p2p_daemon.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. import asyncio
  2. import os
  3. import secrets
  4. from collections.abc import AsyncIterable as AsyncIterableABC
  5. from contextlib import closing, suppress
  6. from dataclasses import dataclass
  7. from importlib.resources import path
  8. from subprocess import Popen
  9. from typing import Any, AsyncIterator, Awaitable, Callable, List, Optional, Sequence, Tuple, TypeVar, Union
  10. from multiaddr import Multiaddr
  11. import hivemind.hivemind_cli as cli
  12. import hivemind.p2p.p2p_daemon_bindings.p2pclient as p2pclient
  13. from hivemind.p2p.p2p_daemon_bindings.datastructures import PeerID, PeerInfo, StreamInfo
  14. from hivemind.proto.p2pd_pb2 import RPCError
  15. from hivemind.utils.asyncio import aiter
  16. from hivemind.utils.logging import get_logger
  17. logger = get_logger(__name__)
  18. P2PD_FILENAME = "p2pd"
  19. @dataclass(frozen=True)
  20. class P2PContext(object):
  21. handle_name: str
  22. local_id: PeerID
  23. remote_id: PeerID = None
  24. remote_maddr: Multiaddr = None
  25. class P2P:
  26. """
  27. This class is responsible for establishing peer-to-peer connections through NAT and/or firewalls.
  28. It creates and manages a libp2p daemon (https://libp2p.io) in a background process,
  29. then terminates it when P2P is shut down. In order to communicate, a P2P instance should
  30. either use one or more initial_peers that will connect it to the rest of the swarm or
  31. use the public IPFS network (https://ipfs.io).
  32. For incoming connections, P2P instances add RPC handlers that may be accessed by other peers:
  33. - `P2P.add_protobuf_handler` accepts a protobuf message and returns another protobuf
  34. - `P2P.add_binary_stream_handler` transfers raw data using bi-directional streaming interface
  35. To access these handlers, a P2P instance can `P2P.call_protobuf_handler`/`P2P.call_binary_stream_handler`,
  36. using the recipient's unique `P2P.id` and the name of the corresponding handler.
  37. """
  38. HEADER_LEN = 8
  39. BYTEORDER = "big"
  40. MESSAGE_MARKER = b"\x00"
  41. ERROR_MARKER = b"\x01"
  42. END_OF_STREAM = RPCError()
  43. DHT_MODE_MAPPING = {
  44. "dht": {"dht": 1},
  45. "dht_server": {"dhtServer": 1},
  46. "dht_client": {"dhtClient": 1},
  47. }
  48. FORCE_REACHABILITY_MAPPING = {
  49. "public": {"forceReachabilityPublic": 1},
  50. "private": {"forceReachabilityPrivate": 1},
  51. }
  52. _UNIX_SOCKET_PREFIX = "/unix/tmp/hivemind-"
  53. def __init__(self):
  54. self.id = None
  55. self._child = None
  56. self._alive = False
  57. self._listen_task = None
  58. self._server_stopped = asyncio.Event()
  59. @classmethod
  60. async def create(
  61. cls,
  62. initial_peers: Optional[Sequence[Union[Multiaddr, str]]] = None,
  63. use_ipfs: bool = False,
  64. host_maddrs: Optional[Sequence[Union[Multiaddr, str]]] = ("/ip4/127.0.0.1/tcp/0",),
  65. announce_maddrs: Optional[Sequence[Union[Multiaddr, str]]] = None,
  66. quic: bool = True,
  67. tls: bool = True,
  68. conn_manager: bool = True,
  69. dht_mode: str = "dht_server",
  70. force_reachability: Optional[str] = None,
  71. nat_port_map: bool = True,
  72. auto_nat: bool = True,
  73. use_relay: bool = True,
  74. use_relay_hop: bool = False,
  75. use_relay_discovery: bool = False,
  76. use_auto_relay: bool = False,
  77. relay_hop_limit: int = 0,
  78. quiet: bool = True,
  79. ping_n_attempts: int = 5,
  80. ping_delay: float = 0.4,
  81. ) -> "P2P":
  82. """
  83. Start a new p2pd process and connect to it.
  84. :param initial_peers: List of bootstrap peers
  85. :param use_ipfs: Bootstrap to IPFS (incompatible with initial_peers)
  86. :param host_maddrs: Multiaddrs to listen for external connections from other p2p instances
  87. :param announce_maddrs: Visible multiaddrs that the peer will announce
  88. for external connections from other p2p instances
  89. :param quic: Enables the QUIC transport
  90. :param tls: Enables TLS1.3 channel security protocol
  91. :param conn_manager: Enables the Connection Manager
  92. :param dht_mode: DHT mode (dht_client/dht_server/dht)
  93. :param force_reachability: Force reachability mode (public/private)
  94. :param nat_port_map: Enables NAT port mapping
  95. :param auto_nat: Enables the AutoNAT service
  96. :param use_relay: enables circuit relay
  97. :param use_relay_hop: enables hop for relay
  98. :param use_relay_discovery: enables passive discovery for relay
  99. :param use_auto_relay: enables autorelay
  100. :param relay_hop_limit: sets the hop limit for hop relays
  101. :param quiet: make the daemon process quiet
  102. :param ping_n_attempts: try to ping the daemon with this number of attempts after starting it
  103. :param ping_delay: wait for ``ping_delay * (2 ** (k - 1))`` seconds before the k-th attempt to ping the daemon
  104. (in particular, wait for ``ping_delay`` seconds before the first attempt)
  105. :return: a wrapper for the p2p daemon
  106. """
  107. assert not (
  108. initial_peers and use_ipfs
  109. ), "User-defined initial_peers and use_ipfs=True are incompatible, please choose one option"
  110. self = cls()
  111. with path(cli, P2PD_FILENAME) as p:
  112. p2pd_path = p
  113. socket_uid = secrets.token_urlsafe(8)
  114. self._daemon_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pd-{socket_uid}.sock")
  115. self._client_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pclient-{socket_uid}.sock")
  116. need_bootstrap = bool(initial_peers) or use_ipfs
  117. process_kwargs = cls.DHT_MODE_MAPPING.get(dht_mode, {"dht": 0})
  118. process_kwargs.update(cls.FORCE_REACHABILITY_MAPPING.get(force_reachability, {}))
  119. for param, value in [
  120. ("bootstrapPeers", initial_peers),
  121. ("hostAddrs", host_maddrs),
  122. ("announceAddrs", announce_maddrs),
  123. ]:
  124. if value:
  125. process_kwargs[param] = self._maddrs_to_str(value)
  126. proc_args = self._make_process_args(
  127. str(p2pd_path),
  128. listen=self._daemon_listen_maddr,
  129. quic=quic,
  130. tls=tls,
  131. connManager=conn_manager,
  132. natPortMap=nat_port_map,
  133. autonat=auto_nat,
  134. relay=use_relay,
  135. relayHop=use_relay_hop,
  136. relayDiscovery=use_relay_discovery,
  137. autoRelay=use_auto_relay,
  138. relayHopLimit=relay_hop_limit,
  139. b=need_bootstrap,
  140. q=quiet,
  141. **process_kwargs,
  142. )
  143. self._child = Popen(args=proc_args, encoding="utf8")
  144. self._alive = True
  145. self._client = p2pclient.Client(self._daemon_listen_maddr, self._client_listen_maddr)
  146. await self._ping_daemon_with_retries(ping_n_attempts, ping_delay)
  147. return self
  148. async def _ping_daemon_with_retries(self, ping_n_attempts: int, ping_delay: float) -> None:
  149. for try_number in range(ping_n_attempts):
  150. await asyncio.sleep(ping_delay * (2 ** try_number))
  151. if self._child.poll() is not None: # Process died
  152. break
  153. try:
  154. await self._ping_daemon()
  155. break
  156. except Exception as e:
  157. if try_number == ping_n_attempts - 1:
  158. logger.exception("Failed to ping p2pd that has just started")
  159. await self.shutdown()
  160. raise
  161. if self._child.returncode is not None:
  162. raise RuntimeError(f"The p2p daemon has died with return code {self._child.returncode}")
  163. @classmethod
  164. async def replicate(cls, daemon_listen_maddr: Multiaddr) -> "P2P":
  165. """
  166. Connect to existing p2p daemon
  167. :param daemon_listen_maddr: multiaddr of the existing p2p daemon
  168. :return: new wrapper for the existing p2p daemon
  169. """
  170. self = cls()
  171. # There is no child under control
  172. # Use external already running p2pd
  173. self._child = None
  174. self._alive = True
  175. socket_uid = secrets.token_urlsafe(8)
  176. self._daemon_listen_maddr = daemon_listen_maddr
  177. self._client_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pclient-{socket_uid}.sock")
  178. self._client = p2pclient.Client(self._daemon_listen_maddr, self._client_listen_maddr)
  179. await self._ping_daemon()
  180. return self
  181. async def _ping_daemon(self) -> None:
  182. self.id, self._visible_maddrs = await self._client.identify()
  183. logger.debug(f"Launched p2pd with id = {self.id}, host multiaddrs = {self._visible_maddrs}")
  184. async def get_visible_maddrs(self, latest: bool = False) -> List[Multiaddr]:
  185. """
  186. Get multiaddrs of the current peer that should be accessible by other peers.
  187. :param latest: ask the P2P daemon to refresh the visible multiaddrs
  188. """
  189. if latest:
  190. _, self._visible_maddrs = await self._client.identify()
  191. if not self._visible_maddrs:
  192. raise ValueError(f"No multiaddrs found for peer {self.id}")
  193. p2p_maddr = Multiaddr(f"/p2p/{self.id.to_base58()}")
  194. return [addr.encapsulate(p2p_maddr) for addr in self._visible_maddrs]
  195. async def list_peers(self) -> List[PeerInfo]:
  196. return list(await self._client.list_peers())
  197. async def wait_for_at_least_n_peers(self, n_peers: int, attempts: int = 3, delay: float = 1) -> None:
  198. for _ in range(attempts):
  199. peers = await self._client.list_peers()
  200. if len(peers) >= n_peers:
  201. return
  202. await asyncio.sleep(delay)
  203. raise RuntimeError("Not enough peers")
  204. @property
  205. def daemon_listen_maddr(self) -> Multiaddr:
  206. return self._daemon_listen_maddr
  207. @staticmethod
  208. async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2 ** 16) -> None:
  209. writer.write(len(data).to_bytes(P2P.HEADER_LEN, P2P.BYTEORDER))
  210. data = memoryview(data)
  211. for offset in range(0, len(data), chunk_size):
  212. writer.write(data[offset : offset + chunk_size])
  213. await writer.drain()
  214. @staticmethod
  215. async def receive_raw_data(reader: asyncio.StreamReader) -> bytes:
  216. header = await reader.readexactly(P2P.HEADER_LEN)
  217. content_length = int.from_bytes(header, P2P.BYTEORDER)
  218. data = await reader.readexactly(content_length)
  219. return data
  220. TInputProtobuf = TypeVar("TInputProtobuf")
  221. TOutputProtobuf = TypeVar("TOutputProtobuf")
  222. @staticmethod
  223. async def send_protobuf(protobuf: Union[TOutputProtobuf, RPCError], writer: asyncio.StreamWriter) -> None:
  224. if isinstance(protobuf, RPCError):
  225. writer.write(P2P.ERROR_MARKER)
  226. else:
  227. writer.write(P2P.MESSAGE_MARKER)
  228. await P2P.send_raw_data(protobuf.SerializeToString(), writer)
  229. @staticmethod
  230. async def receive_protobuf(
  231. input_protobuf_type: type, reader: asyncio.StreamReader
  232. ) -> Tuple[Optional[TInputProtobuf], Optional[RPCError]]:
  233. msg_type = await reader.readexactly(1)
  234. if msg_type == P2P.MESSAGE_MARKER:
  235. protobuf = input_protobuf_type()
  236. protobuf.ParseFromString(await P2P.receive_raw_data(reader))
  237. return protobuf, None
  238. elif msg_type == P2P.ERROR_MARKER:
  239. protobuf = RPCError()
  240. protobuf.ParseFromString(await P2P.receive_raw_data(reader))
  241. return None, protobuf
  242. else:
  243. raise TypeError("Invalid Protobuf message type")
  244. TInputStream = AsyncIterator[TInputProtobuf]
  245. TOutputStream = AsyncIterator[TOutputProtobuf]
  246. async def _add_protobuf_stream_handler(
  247. self,
  248. name: str,
  249. handler: Callable[[TInputStream, P2PContext], TOutputStream],
  250. input_protobuf_type: type,
  251. max_prefetch: int = 5,
  252. ) -> None:
  253. """
  254. :param max_prefetch: Maximum number of items to prefetch from the request stream.
  255. ``max_prefetch <= 0`` means unlimited.
  256. :note: Since the cancel messages are sent via the input stream,
  257. they will not be received while the prefetch buffer is full.
  258. """
  259. if self._listen_task is None:
  260. self._start_listening()
  261. async def _handle_stream(
  262. stream_info: StreamInfo, reader: asyncio.StreamReader, writer: asyncio.StreamWriter
  263. ) -> None:
  264. context = P2PContext(
  265. handle_name=name,
  266. local_id=self.id,
  267. remote_id=stream_info.peer_id,
  268. remote_maddr=stream_info.addr,
  269. )
  270. requests = asyncio.Queue(max_prefetch)
  271. async def _read_stream() -> P2P.TInputStream:
  272. while True:
  273. request = await requests.get()
  274. if request is None:
  275. break
  276. yield request
  277. async def _process_stream() -> None:
  278. try:
  279. async for response in handler(_read_stream(), context):
  280. await P2P.send_protobuf(response, writer)
  281. except Exception as e:
  282. logger.warning("Exception while processing stream and sending responses:", exc_info=True)
  283. await P2P.send_protobuf(RPCError(message=str(e)), writer)
  284. with closing(writer):
  285. processing_task = asyncio.create_task(_process_stream())
  286. try:
  287. while True:
  288. receive_task = asyncio.create_task(P2P.receive_protobuf(input_protobuf_type, reader))
  289. await asyncio.wait({processing_task, receive_task}, return_when=asyncio.FIRST_COMPLETED)
  290. if processing_task.done():
  291. receive_task.cancel()
  292. return
  293. if receive_task.done():
  294. try:
  295. request, _ = await receive_task
  296. except asyncio.IncompleteReadError: # Connection is closed (the client cancelled or died)
  297. return
  298. await requests.put(request) # `request` is None for the end-of-stream message
  299. except Exception:
  300. logger.warning("Exception while receiving requests:", exc_info=True)
  301. finally:
  302. processing_task.cancel()
  303. await self._client.stream_handler(name, _handle_stream)
  304. async def _iterate_protobuf_stream_handler(
  305. self, peer_id: PeerID, name: str, requests: TInputStream, output_protobuf_type: type
  306. ) -> TOutputStream:
  307. _, reader, writer = await self._client.stream_open(peer_id, (name,))
  308. async def _write_to_stream() -> None:
  309. async for request in requests:
  310. await P2P.send_protobuf(request, writer)
  311. await P2P.send_protobuf(P2P.END_OF_STREAM, writer)
  312. with closing(writer):
  313. writing_task = asyncio.create_task(_write_to_stream())
  314. try:
  315. while True:
  316. try:
  317. response, err = await P2P.receive_protobuf(output_protobuf_type, reader)
  318. except asyncio.IncompleteReadError: # Connection is closed
  319. break
  320. if err is not None:
  321. raise P2PHandlerError(f"Failed to call handler `{name}` at {peer_id}: {err.message}")
  322. yield response
  323. await writing_task
  324. finally:
  325. writing_task.cancel()
  326. async def add_protobuf_handler(
  327. self,
  328. name: str,
  329. handler: Callable[
  330. [Union[TInputProtobuf, TInputStream], P2PContext], Union[Awaitable[TOutputProtobuf], TOutputStream]
  331. ],
  332. input_protobuf_type: type,
  333. *,
  334. stream_input: bool = False,
  335. ) -> None:
  336. """
  337. :param stream_input: If True, assume ``handler`` to take ``TInputStream``
  338. (not just ``TInputProtobuf``) as input.
  339. """
  340. async def _stream_handler(requests: P2P.TInputStream, context: P2PContext) -> P2P.TOutputStream:
  341. if stream_input:
  342. input = requests
  343. else:
  344. count = 0
  345. async for input in requests:
  346. count += 1
  347. if count != 1:
  348. raise ValueError(f"Got {count} requests for handler {name} instead of one")
  349. output = handler(input, context)
  350. if isinstance(output, AsyncIterableABC):
  351. async for item in output:
  352. yield item
  353. else:
  354. yield await output
  355. await self._add_protobuf_stream_handler(name, _stream_handler, input_protobuf_type)
  356. async def call_protobuf_handler(
  357. self,
  358. peer_id: PeerID,
  359. name: str,
  360. input: Union[TInputProtobuf, TInputStream],
  361. output_protobuf_type: type,
  362. ) -> Awaitable[TOutputProtobuf]:
  363. requests = input if isinstance(input, AsyncIterableABC) else aiter(input)
  364. responses = self._iterate_protobuf_stream_handler(peer_id, name, requests, output_protobuf_type)
  365. count = 0
  366. async for response in responses:
  367. count += 1
  368. if count != 1:
  369. raise ValueError(f"Got {count} responses from handler {name} instead of one")
  370. return response
  371. def iterate_protobuf_handler(
  372. self,
  373. peer_id: PeerID,
  374. name: str,
  375. input: Union[TInputProtobuf, TInputStream],
  376. output_protobuf_type: type,
  377. ) -> TOutputStream:
  378. requests = input if isinstance(input, AsyncIterableABC) else aiter(input)
  379. return self._iterate_protobuf_stream_handler(peer_id, name, requests, output_protobuf_type)
  380. def _start_listening(self) -> None:
  381. async def listen() -> None:
  382. async with self._client.listen():
  383. await self._server_stopped.wait()
  384. self._listen_task = asyncio.create_task(listen())
  385. async def _stop_listening(self) -> None:
  386. if self._listen_task is not None:
  387. self._server_stopped.set()
  388. self._listen_task.cancel()
  389. try:
  390. await self._listen_task
  391. except asyncio.CancelledError:
  392. self._listen_task = None
  393. self._server_stopped.clear()
  394. async def add_binary_stream_handler(self, name: str, handler: p2pclient.StreamHandler) -> None:
  395. if self._listen_task is None:
  396. self._start_listening()
  397. await self._client.stream_handler(name, handler)
  398. async def call_binary_stream_handler(
  399. self, peer_id: PeerID, handler_name: str
  400. ) -> Tuple[StreamInfo, asyncio.StreamReader, asyncio.StreamWriter]:
  401. return await self._client.stream_open(peer_id, (handler_name,))
  402. def __del__(self):
  403. self._terminate()
  404. @property
  405. def is_alive(self) -> bool:
  406. return self._alive
  407. async def shutdown(self) -> None:
  408. await self._stop_listening()
  409. await asyncio.get_event_loop().run_in_executor(None, self._terminate)
  410. def _terminate(self) -> None:
  411. self._alive = False
  412. if self._child is not None and self._child.poll() is None:
  413. self._child.terminate()
  414. self._child.wait()
  415. logger.debug(f"Terminated p2pd with id = {self.id}")
  416. with suppress(FileNotFoundError):
  417. os.remove(self._daemon_listen_maddr["unix"])
  418. with suppress(FileNotFoundError):
  419. os.remove(self._client_listen_maddr["unix"])
  420. @staticmethod
  421. def _make_process_args(*args, **kwargs) -> List[str]:
  422. proc_args = []
  423. proc_args.extend(str(entry) for entry in args)
  424. proc_args.extend(
  425. f"-{key}={P2P._convert_process_arg_type(value)}" if value is not None else f"-{key}"
  426. for key, value in kwargs.items()
  427. )
  428. return proc_args
  429. @staticmethod
  430. def _convert_process_arg_type(val: Any) -> Any:
  431. if isinstance(val, bool):
  432. return int(val)
  433. return val
  434. @staticmethod
  435. def _maddrs_to_str(maddrs: List[Multiaddr]) -> str:
  436. return ",".join(str(addr) for addr in maddrs)
  437. class P2PInterruptedError(Exception):
  438. pass
  439. class P2PHandlerError(Exception):
  440. pass