瀏覽代碼

remove models older than 3 days

justheuristic 3 年之前
父節點
當前提交
2dbd7881c6
共有 2 個文件被更改,包括 31 次插入7 次删除
  1. 6 7
      .github/workflows/run-tests.yaml
  2. 25 0
      tests/scripts/remove_old_models.py

+ 6 - 7
.github/workflows/run-tests.yaml

@@ -26,12 +26,10 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
-      - name: Delete test models older than 72 hours
+      - name: Delete any test models older than 72 hours
         run: |
-          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
-          python -c "from huggingface_hub import delete_repo; delete_repo(token='$BLOOM_TESTING_WRITE_TOKEN', \
-          name='test-bloomd-560m-$HF_TAG', organization='bloom-testing')" || true
-      - name: Delete previous model, if exists
+          python tests/scripts/remove_old_models.py --author bloom-testing --use_auth_token $BLOOM_TESTING_WRITE_TOKEN
+      - name: Delete previous version of this model, if exists
         run: |
           export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
           python -c "from huggingface_hub import delete_repo; delete_repo(token='$BLOOM_TESTING_WRITE_TOKEN', \
@@ -109,7 +107,8 @@ jobs:
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server5.log &
           SERVER5_PID=$!
           
-          #TODO tail server logs
+          tail -n 100 -f server*.log &
+          LOGGER_PID=$!
           sleep 30  # wait for servers to download layers
           
           kill -0 $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $SERVER5_PID # ensure all servers survived init
@@ -118,5 +117,5 @@ jobs:
           
           kill -0 $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $SERVER5_PID # ensure all servers survived tests
           
-          kill -s SIGINT $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $SERVER5_PID
+          kill -s SIGINT $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $SERVER5_PID $LOGGER_PID
           echo "Done!"

+ 25 - 0
tests/scripts/remove_old_models.py

@@ -0,0 +1,25 @@
+import argparse
+import os
+from datetime import datetime
+from huggingface_hub import delete_repo, list_models
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Remove old testing models from HF hub")
+    parser.add_argument("--author", type=str, default='bloom-testing', help="auth token for from_pretrained")
+    parser.add_argument("--seconds_since_last_updated", type=int, default=7 * 24 * 60 * 60)
+    parser.add_argument("--use_auth_token", type=str, default=None, help="auth token for from_pretrained")
+    parser.add_argument('--dry_run', action='store_true')
+
+    args = parser.parse_args()
+
+    for model in list_models(author=args.author, full=True):
+        last_modified = datetime.strptime(model.lastModified, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+        if model.Id.endswith("-main") or "/test-" not in model.Id:
+            continue  # remove only test models
+
+        if (datetime.now() - last_modified).total_seconds() > args.seconds_since_last_updated:
+            if args.dry_run:
+                print(f"{model.Id} can be deleted")
+            else:
+                delete_repo(token=args.use_auth_token, name=model.Id, organization=args.author)