From 20439e9f8e50ba9e6f59e62ed59430b786d565f0 Mon Sep 17 00:00:00 2001 From: "Dr. Christoph Mittendorf" <34183942+Cassini-chris@users.noreply.github.com> Date: Sun, 10 Dec 2023 14:43:44 +0100 Subject: [PATCH] Update transformer_sizing.ipynb Fixed var typos: emEbedding/position > embedding/position --- transformer_sizing.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/transformer_sizing.ipynb b/transformer_sizing.ipynb index 43262add3b..2dc07867c7 100644 --- a/transformer_sizing.ipynb +++ b/transformer_sizing.ipynb @@ -52,7 +52,7 @@ "text": [ "we see: 124337664, expected: 124337664, match: True\n", "name params ratio (%) \n", - "emebedding/position 786432 0.6325\n", + "embedding/position 786432 0.6325\n", "embedding/token 38597376 31.0424\n", "embedding 39383808 31.6749\n", "attention/ln 768 0.0006\n", @@ -77,9 +77,9 @@ " out = OrderedDict()\n", "\n", " # token and position embeddings\n", - " out['emebedding/position'] = n_embd * block_size\n", + " out['embedding/position'] = n_embd * block_size\n", " out['embedding/token'] = n_embd * vocab_size\n", - " out['embedding'] = out['emebedding/position'] + out['embedding/token']\n", + " out['embedding'] = out['embedding/position'] + out['embedding/token']\n", "\n", " # attention blocks\n", " out['attention/ln'] = n_embd # note, bias=False in our LN\n", @@ -278,7 +278,7 @@ " \"\"\"estimate of the model flops following PaLM paper formula\"\"\"\n", " # non-embedding model parameters. note that we do not subtract the\n", " # embedding/token params because those are tied and get used in the last layer.\n", - " N = params()['total'] - params()['emebedding/position']\n", + " N = params()['total'] - params()['embedding/position']\n", " L, H, Q, T = n_layer, n_head, n_embd//n_head, block_size\n", " mf_per_token = 6*N + 12*L*H*Q*T\n", " mf = mf_per_token * block_size\n",