p2p_daemon.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. import asyncio
  2. import os
  3. import secrets
  4. from collections.abc import AsyncIterable as AsyncIterableABC
  5. from contextlib import closing, suppress
  6. from dataclasses import dataclass
  7. from importlib.resources import path
  8. from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
  9. from google.protobuf.message import Message
  10. from multiaddr import Multiaddr
  11. import hivemind.hivemind_cli as cli
  12. import hivemind.p2p.p2p_daemon_bindings.p2pclient as p2pclient
  13. from hivemind.p2p.p2p_daemon_bindings.datastructures import PeerID, PeerInfo, StreamInfo
  14. from hivemind.p2p.p2p_daemon_bindings.control import P2PHandlerError
  15. from hivemind.proto.p2pd_pb2 import RPCError
  16. from hivemind.utils.asyncio import aiter, asingle
  17. from hivemind.utils.logging import get_logger
  18. logger = get_logger(__name__)
  19. P2PD_FILENAME = "p2pd"
  20. @dataclass(frozen=True)
  21. class P2PContext(object):
  22. handle_name: str
  23. local_id: PeerID
  24. remote_id: PeerID = None
  25. class P2P:
  26. """
  27. This class is responsible for establishing peer-to-peer connections through NAT and/or firewalls.
  28. It creates and manages a libp2p daemon (https://libp2p.io) in a background process,
  29. then terminates it when P2P is shut down. In order to communicate, a P2P instance should
  30. either use one or more initial_peers that will connect it to the rest of the swarm or
  31. use the public IPFS network (https://ipfs.io).
  32. For incoming connections, P2P instances add RPC handlers that may be accessed by other peers:
  33. - `P2P.add_protobuf_handler` accepts a protobuf message and returns another protobuf
  34. - `P2P.add_binary_stream_handler` transfers raw data using bi-directional streaming interface
  35. To access these handlers, a P2P instance can `P2P.call_protobuf_handler`/`P2P.call_binary_stream_handler`,
  36. using the recipient's unique `P2P.peer_id` and the name of the corresponding handler.
  37. """
  38. HEADER_LEN = 8
  39. BYTEORDER = "big"
  40. MESSAGE_MARKER = b"\x00"
  41. ERROR_MARKER = b"\x01"
  42. END_OF_STREAM = RPCError()
  43. DHT_MODE_MAPPING = {
  44. "dht": {"dht": 1},
  45. "dht_server": {"dhtServer": 1},
  46. "dht_client": {"dhtClient": 1},
  47. }
  48. FORCE_REACHABILITY_MAPPING = {
  49. "public": {"forceReachabilityPublic": 1},
  50. "private": {"forceReachabilityPrivate": 1},
  51. }
  52. _UNIX_SOCKET_PREFIX = "/unix/tmp/hivemind-"
  53. def __init__(self):
  54. self.peer_id = None
  55. self._child = None
  56. self._alive = False
  57. self._reader_task = None
  58. self._listen_task = None
  59. @classmethod
  60. async def create(
  61. cls,
  62. initial_peers: Optional[Sequence[Union[Multiaddr, str]]] = None,
  63. use_ipfs: bool = False,
  64. host_maddrs: Optional[Sequence[Union[Multiaddr, str]]] = ("/ip4/127.0.0.1/tcp/0",),
  65. announce_maddrs: Optional[Sequence[Union[Multiaddr, str]]] = None,
  66. quic: bool = True,
  67. tls: bool = True,
  68. conn_manager: bool = True,
  69. dht_mode: str = "dht_server",
  70. force_reachability: Optional[str] = None,
  71. nat_port_map: bool = True,
  72. auto_nat: bool = True,
  73. use_relay: bool = True,
  74. use_relay_hop: bool = False,
  75. use_relay_discovery: bool = False,
  76. use_auto_relay: bool = False,
  77. relay_hop_limit: int = 0,
  78. startup_timeout: float = 15,
  79. ) -> "P2P":
  80. """
  81. Start a new p2pd process and connect to it.
  82. :param initial_peers: List of bootstrap peers
  83. :param use_ipfs: Bootstrap to IPFS (incompatible with initial_peers)
  84. :param host_maddrs: Multiaddrs to listen for external connections from other p2p instances
  85. :param announce_maddrs: Visible multiaddrs that the peer will announce
  86. for external connections from other p2p instances
  87. :param quic: Enables the QUIC transport
  88. :param tls: Enables TLS1.3 channel security protocol
  89. :param conn_manager: Enables the Connection Manager
  90. :param dht_mode: DHT mode (dht_client/dht_server/dht)
  91. :param force_reachability: Force reachability mode (public/private)
  92. :param nat_port_map: Enables NAT port mapping
  93. :param auto_nat: Enables the AutoNAT service
  94. :param use_relay: enables circuit relay
  95. :param use_relay_hop: enables hop for relay
  96. :param use_relay_discovery: enables passive discovery for relay
  97. :param use_auto_relay: enables autorelay
  98. :param relay_hop_limit: sets the hop limit for hop relays
  99. :param startup_timeout: raise a P2PDaemonError if the daemon does not start in ``startup_timeout`` seconds
  100. :return: a wrapper for the p2p daemon
  101. """
  102. assert not (
  103. initial_peers and use_ipfs
  104. ), "User-defined initial_peers and use_ipfs=True are incompatible, please choose one option"
  105. self = cls()
  106. with path(cli, P2PD_FILENAME) as p:
  107. p2pd_path = p
  108. socket_uid = secrets.token_urlsafe(8)
  109. self._daemon_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pd-{socket_uid}.sock")
  110. self._client_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pclient-{socket_uid}.sock")
  111. need_bootstrap = bool(initial_peers) or use_ipfs
  112. process_kwargs = cls.DHT_MODE_MAPPING.get(dht_mode, {"dht": 0})
  113. process_kwargs.update(cls.FORCE_REACHABILITY_MAPPING.get(force_reachability, {}))
  114. for param, value in [
  115. ("bootstrapPeers", initial_peers),
  116. ("hostAddrs", host_maddrs),
  117. ("announceAddrs", announce_maddrs),
  118. ]:
  119. if value:
  120. process_kwargs[param] = self._maddrs_to_str(value)
  121. proc_args = self._make_process_args(
  122. str(p2pd_path),
  123. listen=self._daemon_listen_maddr,
  124. quic=quic,
  125. tls=tls,
  126. connManager=conn_manager,
  127. natPortMap=nat_port_map,
  128. autonat=auto_nat,
  129. relay=use_relay,
  130. relayHop=use_relay_hop,
  131. relayDiscovery=use_relay_discovery,
  132. autoRelay=use_auto_relay,
  133. relayHopLimit=relay_hop_limit,
  134. b=need_bootstrap,
  135. **process_kwargs,
  136. )
  137. self._child = await asyncio.subprocess.create_subprocess_exec(
  138. *proc_args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT
  139. )
  140. self._alive = True
  141. ready = asyncio.Future()
  142. self._reader_task = asyncio.create_task(self._read_outputs(ready))
  143. try:
  144. await asyncio.wait_for(ready, startup_timeout)
  145. except asyncio.TimeoutError:
  146. await self.shutdown()
  147. raise P2PDaemonError(f"Daemon failed to start in {startup_timeout:.1f} seconds")
  148. self._client = p2pclient.Client(self._daemon_listen_maddr, self._client_listen_maddr)
  149. await self._ping_daemon()
  150. return self
  151. @classmethod
  152. async def replicate(cls, daemon_listen_maddr: Multiaddr) -> "P2P":
  153. """
  154. Connect to existing p2p daemon
  155. :param daemon_listen_maddr: multiaddr of the existing p2p daemon
  156. :return: new wrapper for the existing p2p daemon
  157. """
  158. self = cls()
  159. # There is no child under control
  160. # Use external already running p2pd
  161. self._child = None
  162. self._alive = True
  163. socket_uid = secrets.token_urlsafe(8)
  164. self._daemon_listen_maddr = daemon_listen_maddr
  165. self._client_listen_maddr = Multiaddr(cls._UNIX_SOCKET_PREFIX + f"p2pclient-{socket_uid}.sock")
  166. self._client = p2pclient.Client(self._daemon_listen_maddr, self._client_listen_maddr)
  167. await self._ping_daemon()
  168. return self
  169. async def _ping_daemon(self) -> None:
  170. self.peer_id, self._visible_maddrs = await self._client.identify()
  171. logger.debug(f"Launched p2pd with peer id = {self.peer_id}, host multiaddrs = {self._visible_maddrs}")
  172. async def get_visible_maddrs(self, latest: bool = False) -> List[Multiaddr]:
  173. """
  174. Get multiaddrs of the current peer that should be accessible by other peers.
  175. :param latest: ask the P2P daemon to refresh the visible multiaddrs
  176. """
  177. if latest:
  178. _, self._visible_maddrs = await self._client.identify()
  179. if not self._visible_maddrs:
  180. raise ValueError(f"No multiaddrs found for peer {self.peer_id}")
  181. p2p_maddr = Multiaddr(f"/p2p/{self.peer_id.to_base58()}")
  182. return [addr.encapsulate(p2p_maddr) for addr in self._visible_maddrs]
  183. async def list_peers(self) -> List[PeerInfo]:
  184. return list(await self._client.list_peers())
  185. async def wait_for_at_least_n_peers(self, n_peers: int, attempts: int = 3, delay: float = 1) -> None:
  186. for _ in range(attempts):
  187. peers = await self._client.list_peers()
  188. if len(peers) >= n_peers:
  189. return
  190. await asyncio.sleep(delay)
  191. raise RuntimeError("Not enough peers")
  192. @property
  193. def daemon_listen_maddr(self) -> Multiaddr:
  194. return self._daemon_listen_maddr
  195. @staticmethod
  196. async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2 ** 16) -> None:
  197. writer.write(len(data).to_bytes(P2P.HEADER_LEN, P2P.BYTEORDER))
  198. data = memoryview(data)
  199. for offset in range(0, len(data), chunk_size):
  200. writer.write(data[offset : offset + chunk_size])
  201. await writer.drain()
  202. @staticmethod
  203. async def receive_raw_data(reader: asyncio.StreamReader) -> bytes:
  204. header = await reader.readexactly(P2P.HEADER_LEN)
  205. content_length = int.from_bytes(header, P2P.BYTEORDER)
  206. data = await reader.readexactly(content_length)
  207. return data
  208. TInputProtobuf = TypeVar("TInputProtobuf")
  209. TOutputProtobuf = TypeVar("TOutputProtobuf")
  210. @staticmethod
  211. async def send_protobuf(protobuf: Union[TOutputProtobuf, RPCError], writer: asyncio.StreamWriter) -> None:
  212. if isinstance(protobuf, RPCError):
  213. writer.write(P2P.ERROR_MARKER)
  214. else:
  215. writer.write(P2P.MESSAGE_MARKER)
  216. await P2P.send_raw_data(protobuf.SerializeToString(), writer)
  217. @staticmethod
  218. async def receive_protobuf(
  219. input_protobuf_type: Message, reader: asyncio.StreamReader
  220. ) -> Tuple[Optional[TInputProtobuf], Optional[RPCError]]:
  221. msg_type = await reader.readexactly(1)
  222. if msg_type == P2P.MESSAGE_MARKER:
  223. protobuf = input_protobuf_type()
  224. protobuf.ParseFromString(await P2P.receive_raw_data(reader))
  225. return protobuf, None
  226. elif msg_type == P2P.ERROR_MARKER:
  227. protobuf = RPCError()
  228. protobuf.ParseFromString(await P2P.receive_raw_data(reader))
  229. return None, protobuf
  230. else:
  231. raise TypeError("Invalid Protobuf message type")
  232. TInputStream = AsyncIterator[TInputProtobuf]
  233. TOutputStream = AsyncIterator[TOutputProtobuf]
  234. async def _add_protobuf_stream_handler(
  235. self,
  236. name: str,
  237. handler: Callable[[TInputStream, P2PContext], TOutputStream],
  238. input_protobuf_type: Message,
  239. max_prefetch: int = 5,
  240. ) -> None:
  241. """
  242. :param max_prefetch: Maximum number of items to prefetch from the request stream.
  243. ``max_prefetch <= 0`` means unlimited.
  244. :note: Since the cancel messages are sent via the input stream,
  245. they will not be received while the prefetch buffer is full.
  246. """
  247. async def _handle_stream(
  248. stream_info: StreamInfo, reader: asyncio.StreamReader, writer: asyncio.StreamWriter
  249. ) -> None:
  250. context = P2PContext(
  251. handle_name=name,
  252. local_id=self.peer_id,
  253. remote_id=stream_info.peer_id,
  254. )
  255. requests = asyncio.Queue(max_prefetch)
  256. async def _read_stream() -> P2P.TInputStream:
  257. while True:
  258. request = await requests.get()
  259. if request is None:
  260. break
  261. yield request
  262. async def _process_stream() -> None:
  263. try:
  264. async for response in handler(_read_stream(), context):
  265. await P2P.send_protobuf(response, writer)
  266. except Exception as e:
  267. logger.warning("Exception while processing stream and sending responses:", exc_info=True)
  268. # Sometimes `e` is a connection error, so we won't be able to report the error to the caller
  269. with suppress(Exception):
  270. await P2P.send_protobuf(RPCError(message=str(e)), writer)
  271. with closing(writer):
  272. processing_task = asyncio.create_task(_process_stream())
  273. try:
  274. while True:
  275. receive_task = asyncio.create_task(P2P.receive_protobuf(input_protobuf_type, reader))
  276. await asyncio.wait({processing_task, receive_task}, return_when=asyncio.FIRST_COMPLETED)
  277. if processing_task.done():
  278. receive_task.cancel()
  279. return
  280. if receive_task.done():
  281. try:
  282. request, _ = await receive_task
  283. except asyncio.IncompleteReadError: # Connection is closed (the client cancelled or died)
  284. return
  285. await requests.put(request) # `request` is None for the end-of-stream message
  286. except Exception:
  287. logger.warning("Exception while receiving requests:", exc_info=True)
  288. finally:
  289. processing_task.cancel()
  290. await self.add_binary_stream_handler(name, _handle_stream)
  291. async def _iterate_protobuf_stream_handler(
  292. self, peer_id: PeerID, name: str, requests: TInputStream, output_protobuf_type: Message
  293. ) -> TOutputStream:
  294. _, reader, writer = await self.call_binary_stream_handler(peer_id, name)
  295. async def _write_to_stream() -> None:
  296. async for request in requests:
  297. await P2P.send_protobuf(request, writer)
  298. await P2P.send_protobuf(P2P.END_OF_STREAM, writer)
  299. with closing(writer):
  300. writing_task = asyncio.create_task(_write_to_stream())
  301. try:
  302. while True:
  303. try:
  304. response, err = await P2P.receive_protobuf(output_protobuf_type, reader)
  305. except asyncio.IncompleteReadError: # Connection is closed
  306. break
  307. if err is not None:
  308. raise P2PHandlerError(f"Failed to call handler `{name}` at {peer_id}: {err.message}")
  309. yield response
  310. await writing_task
  311. finally:
  312. writing_task.cancel()
  313. async def add_protobuf_handler(
  314. self,
  315. name: str,
  316. handler: Callable[
  317. [Union[TInputProtobuf, TInputStream], P2PContext], Union[Awaitable[TOutputProtobuf], TOutputStream]
  318. ],
  319. input_protobuf_type: Message,
  320. *,
  321. stream_input: bool = False,
  322. stream_output: bool = False,
  323. ) -> None:
  324. """
  325. :param stream_input: If True, assume ``handler`` to take ``TInputStream``
  326. (not just ``TInputProtobuf``) as input.
  327. :param stream_output: If True, assume ``handler`` to return ``TOutputStream``
  328. """
  329. if not stream_input and not stream_output:
  330. await self._add_protobuf_unary_handler(name, handler, input_protobuf_type)
  331. return
  332. async def _stream_handler(requests: P2P.TInputStream, context: P2PContext) -> P2P.TOutputStream:
  333. input = requests if stream_input else await asingle(requests)
  334. output = handler(input, context)
  335. if isinstance(output, AsyncIterableABC):
  336. async for item in output:
  337. yield item
  338. else:
  339. yield await output
  340. await self._add_protobuf_stream_handler(name, _stream_handler, input_protobuf_type)
  341. # only registers request-response handlers
  342. async def _add_protobuf_unary_handler(
  343. self,
  344. handle_name: str,
  345. handler: Callable[[TInputProtobuf, P2PContext], Awaitable[TOutputProtobuf]],
  346. input_protobuf_type: Message,
  347. ) -> None:
  348. """
  349. Register a request-response (unary) handler. Unary requests and responses
  350. are sent through persistent multiplexed connections to the daemon for the
  351. sake of reducing the number of open files.
  352. :param handle_name: name of the handler (protocol id)
  353. :param handler: function handling the unary requests
  354. :param input_protobuf_type: protobuf type of the request
  355. """
  356. async def _unary_handler(request: bytes, remote_id: PeerID) -> bytes:
  357. input_serialized = input_protobuf_type.FromString(request)
  358. context = P2PContext(
  359. handle_name=handle_name,
  360. local_id=self.peer_id,
  361. remote_id=remote_id,
  362. )
  363. response = await handler(input_serialized, context)
  364. return response.SerializeToString()
  365. await self._client.add_unary_handler(handle_name, _unary_handler)
  366. async def call_protobuf_handler(
  367. self,
  368. peer_id: PeerID,
  369. name: str,
  370. input: Union[TInputProtobuf, TInputStream],
  371. output_protobuf_type: Message,
  372. ) -> Awaitable[TOutputProtobuf]:
  373. if not isinstance(input, AsyncIterableABC):
  374. return await self._call_unary_protobuf_handler(peer_id, name, input, output_protobuf_type)
  375. requests = input if isinstance(input, AsyncIterableABC) else aiter(input)
  376. responses = self._iterate_protobuf_stream_handler(peer_id, name, requests, output_protobuf_type)
  377. return await asingle(responses)
  378. async def _call_unary_protobuf_handler(
  379. self,
  380. peer_id: PeerID,
  381. handle_name: str,
  382. input: TInputProtobuf,
  383. output_protobuf_type: Message,
  384. ) -> Awaitable[TOutputProtobuf]:
  385. serialized_input = input.SerializeToString()
  386. response = await self._client.call_unary_handler(peer_id, handle_name, serialized_input)
  387. return output_protobuf_type().FromString(response)
  388. def iterate_protobuf_handler(
  389. self,
  390. peer_id: PeerID,
  391. name: str,
  392. input: Union[TInputProtobuf, TInputStream],
  393. output_protobuf_type: Message,
  394. ) -> TOutputStream:
  395. requests = input if isinstance(input, AsyncIterableABC) else aiter(input)
  396. return self._iterate_protobuf_stream_handler(peer_id, name, requests, output_protobuf_type)
  397. def _start_listening(self) -> None:
  398. async def listen() -> None:
  399. async with self._client.listen():
  400. await asyncio.Future() # Wait until this task will be cancelled in _terminate()
  401. self._listen_task = asyncio.create_task(listen())
  402. async def add_binary_stream_handler(self, name: str, handler: p2pclient.StreamHandler) -> None:
  403. if self._listen_task is None:
  404. self._start_listening()
  405. await self._client.stream_handler(name, handler)
  406. async def call_binary_stream_handler(
  407. self, peer_id: PeerID, handler_name: str
  408. ) -> Tuple[StreamInfo, asyncio.StreamReader, asyncio.StreamWriter]:
  409. return await self._client.stream_open(peer_id, (handler_name,))
  410. def __del__(self):
  411. self._terminate()
  412. @property
  413. def is_alive(self) -> bool:
  414. return self._alive
  415. async def shutdown(self) -> None:
  416. self._terminate()
  417. if self._child is not None:
  418. await self._child.wait()
  419. def _terminate(self) -> None:
  420. if self._listen_task is not None:
  421. self._listen_task.cancel()
  422. if self._reader_task is not None:
  423. self._reader_task.cancel()
  424. self._alive = False
  425. if self._child is not None and self._child.returncode is None:
  426. self._child.terminate()
  427. logger.debug(f"Terminated p2pd with id = {self.peer_id}")
  428. with suppress(FileNotFoundError):
  429. os.remove(self._daemon_listen_maddr["unix"])
  430. with suppress(FileNotFoundError):
  431. os.remove(self._client_listen_maddr["unix"])
  432. @staticmethod
  433. def _make_process_args(*args, **kwargs) -> List[str]:
  434. proc_args = []
  435. proc_args.extend(str(entry) for entry in args)
  436. proc_args.extend(
  437. f"-{key}={P2P._convert_process_arg_type(value)}" if value is not None else f"-{key}"
  438. for key, value in kwargs.items()
  439. )
  440. return proc_args
  441. @staticmethod
  442. def _convert_process_arg_type(val: Any) -> Any:
  443. if isinstance(val, bool):
  444. return int(val)
  445. return val
  446. @staticmethod
  447. def _maddrs_to_str(maddrs: List[Multiaddr]) -> str:
  448. return ",".join(str(addr) for addr in maddrs)
  449. async def _read_outputs(self, ready: asyncio.Future) -> None:
  450. last_line = None
  451. while True:
  452. line = await self._child.stdout.readline()
  453. if not line: # Stream closed
  454. break
  455. last_line = line.rstrip().decode(errors="ignore")
  456. if last_line.startswith("Peer ID:"):
  457. ready.set_result(None)
  458. if not ready.done():
  459. ready.set_exception(P2PDaemonError(f"Daemon failed to start: {last_line}"))
  460. class P2PDaemonError(RuntimeError):
  461. pass