runtime.proto 533 B

12345678910111213141516171819202122232425262728293031323334353637
  1. syntax = "proto3";
  2. message ExpertUID {
  3. string uid = 1;
  4. }
  5. message ExpertInfo {
  6. bytes serialized_info = 1;
  7. }
  8. message ExpertRequest {
  9. string uid = 1;
  10. repeated Tensor tensors = 2;
  11. }
  12. message ExpertResponse {
  13. repeated Tensor tensors = 2;
  14. }
  15. enum CompressionType{
  16. NONE = 0;
  17. MEANSTD_16BIT = 1;
  18. FLOAT16 = 2;
  19. QUANTILE_8BIT = 3;
  20. UNIFORM_8BIT = 4;
  21. }
  22. message Tensor {
  23. bytes buffer = 1;
  24. repeated uint32 size = 2;
  25. bool requires_grad = 3;
  26. string dtype = 4;
  27. CompressionType compression = 5;
  28. int32 chunks = 6;
  29. }