vor 3 Jahren · 1555d98f66
--- a/README.md
+++ b/README.md
@@ -30,6 +30,13 @@ pip install https://github.com/huggingface/transformers/archive/6589e510fa4e6c44
 
				 # run one bloom block for a few steps
			
 
				 python -m cli.inference_one_block --config cli/config.json  # see other args
			
 
				 
			
 
				+# convert model from HF hub to a distributed format
			
 
				+MY_WRITE_TOKEN=TODO_WRITE_TOKEN_FROM_https://huggingface.co/settings/token
			
 
				+python -m cli.convert_model --model bigscience/bloom-6b3  \
			
 
				+  --output_path ./converted_model --output_repo bigscience/test-bloomd-6b3 \
			
 
				+  --use_auth_token $MY_WRITE_TOKEN  # ^-- todo replace output repo with something you have access to
			
 
				+
			
 
				+
			
 
				 # minimalistic server with non-trained bloom blocks
			
 
				 python -m cli.run_server --prefix smol --block_config bigscience/bloom-6b3 --num_blocks 2 --identity_path ./server1.id --host_maddrs /ip4/127.0.0.1/tcp/31337
			
 
				 ```
			
--- a/cli/quantize_cpu_naive.py
+++ b/cli/quantize_cpu_naive.py
@@ -57,7 +57,6 @@ if __name__ == "__main__":
 
				         with repo.commit(
			
 
				                 commit_message=args.commit_message, branch=args.block_branch_prefix + str(i), track_large_files=True
			
 
				         ):
			
 
				-            print(block.self_attention.layer_number)
			
 
				             torch.save(block.state_dict(), "./pytorch_model.bin")
			
 
				 
			
 
				     repo.git_checkout(args.base_branch, create_branch_ok=True)