|
@@ -75,7 +75,18 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "MODEL_NAME = \"bigscience/bloom-petals\" # select model you like\n",
|
|
|
+ "# Choose a model you'd like to prompt-tune. We recommend starting with\n",
|
|
|
+ "# the smaller 7.1B version of BLOOM (bigscience/bloom-7b1-petals) for faster prototyping.\n",
|
|
|
+ "# Once your code is ready, you can switch to full-scale\n",
|
|
|
+ "# 176B-parameter BLOOM (bigscience/bloom-petals) or BLOOMZ (bigscience/bloomz-petals).\n",
|
|
|
+ "MODEL_NAME = \"bigscience/bloom-7b1-petals\"\n",
|
|
|
+ "\n",
|
|
|
+ "# Choose a prompt-tuning mode ('ptune' or 'deep_ptune').\n",
|
|
|
+ "# The latter fine-tunes separate prefixes for each transformer block,\n",
|
|
|
+ "# so prompt-tuning will take more time but yield better results.\n",
|
|
|
+ "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
|
|
|
+ "TUNING_MODE = 'ptune'\n",
|
|
|
+ "\n",
|
|
|
"NUM_PREFIX_TOKENS = 16\n",
|
|
|
"DEVICE = 'cuda'\n",
|
|
|
"BATCH_SIZE = 8\n",
|
|
@@ -83,8 +94,7 @@
|
|
|
"WEIGHT_DECAY = 0.0\n",
|
|
|
"NUM_SAMPLES = 1000\n",
|
|
|
"SEED = 42\n",
|
|
|
- "MODEL_MAX_LENGTH = 256\n",
|
|
|
- "TUNING_MODE = 'ptune' # choose between ['ptune', 'deep_ptune'] "
|
|
|
+ "MODEL_MAX_LENGTH = 256"
|
|
|
]
|
|
|
},
|
|
|
{
|