Forráskód Böngészése

Machine learning test (#68)

* Added scikit-learn to circleci config

* Implemented expert training test

* Fixed typo in expert training test

* Moved assert from function to main code

* Fixed args

* Fixed dataset size error

* Added number of epochs as argument

* Fixed pytest
Changed optimizer in create_dummy_server run_server.py
Vsevolod-pl 5 éve
szülő
commit
d2aab26d58
3 módosított fájl, 42 hozzáadás és 2 törlés
  1. 1 1
      .circleci/config.yml
  2. 40 0
      tests/test_training.py
  3. 1 1
      tests/test_utils/run_server.py

+ 1 - 1
.circleci/config.yml

@@ -20,7 +20,7 @@ jobs:
               cd -
             fi
           name: compile-grpc  # remove this command when v1.31 becomes available via pip install -r requirements.txt
-      - run: sudo pip install codecov pytest grpcio-tools tqdm
+      - run: sudo pip install codecov pytest grpcio-tools tqdm scikit-learn
       - python/install-deps
       - python/save-cache
       - run:

+ 40 - 0
tests/test_training.py

@@ -0,0 +1,40 @@
+#%env CUDA_VISIBLE_DEVICES=
+import argparse
+from typing import Optional
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from hivemind import RemoteExpert, find_open_port
+from test_utils.run_server import background_server
+
+from sklearn.datasets import load_digits
+
+
+def test_training(port: Optional[int] = None, max_steps: int = 100, threshold: float = 0.9):
+    if port is None:
+        port = find_open_port()
+    dataset = load_digits()
+    X_train, y_train = torch.tensor(dataset['data'], dtype=torch.float), torch.tensor(dataset['target'])
+
+    with background_server(num_experts=2, device='cpu', port=port, hidden_dim=64):
+        expert1 = RemoteExpert('expert.0', host='127.0.0.1', port=port)
+        expert2 = RemoteExpert('expert.1', host='127.0.0.1', port=port)
+        model = nn.Sequential(expert2, nn.Tanh(), expert1, nn.Linear(64, 10))
+
+        opt = torch.optim.SGD(model.parameters(), lr=0.05)
+
+        for step in range(max_steps):
+            opt.zero_grad()
+
+            outputs = model(X_train)
+            loss = F.cross_entropy(outputs, y_train)
+            loss.backward()
+            opt.step()
+
+            accuracy = (outputs.argmax(dim=1) == y_train).numpy().mean()
+            if accuracy >= threshold:
+                break
+
+        assert accuracy >= threshold, f"too small accuracy: {accuracy}"

+ 1 - 1
tests/test_utils/run_server.py

@@ -70,7 +70,7 @@ def make_dummy_server(interface='0.0.0.0', port=None, num_experts=1, expert_cls=
     experts = {}
     for i in range(num_experts):
         expert = name_to_block[expert_cls](hidden_dim)
-        opt = torch.optim.SGD(expert.parameters(), 0.0) if no_optimizer else torch.optim.Adam(expert.parameters())
+        opt = torch.optim.SGD(expert.parameters(), 0.0 if no_optimizer else 0.05)
         expert_uid = f'{expert_prefix}{UID_DELIMETER}{i + expert_offset}'
         experts[expert_uid] = hivemind.ExpertBackend(name=expert_uid, expert=expert, opt=opt,
                                                      args_schema=args_schema,