Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
trholding committed Sep 2, 2023
2 parents 9b6beee + b9fb861 commit 7e589f8
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
22 changes: 12 additions & 10 deletions run.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,26 +219,28 @@ void free_run_state(RunState* s) {

void memory_map_weights(TransformerWeights *w, Config* p, float* ptr, int shared_weights) {
int head_size = p->dim / p->n_heads;
// make sure the multiplications below are done in 64bit to fit the parameter counts of 13B+ models
unsigned long long n_layers = p->n_layers;
w->token_embedding_table = ptr;
ptr += p->vocab_size * p->dim;
w->rms_att_weight = ptr;
ptr += p->n_layers * p->dim;
ptr += n_layers * p->dim;
w->wq = ptr;
ptr += p->n_layers * p->dim * (p->n_heads * head_size);
ptr += n_layers * p->dim * (p->n_heads * head_size);
w->wk = ptr;
ptr += p->n_layers * p->dim * (p->n_kv_heads * head_size);
ptr += n_layers * p->dim * (p->n_kv_heads * head_size);
w->wv = ptr;
ptr += p->n_layers * p->dim * (p->n_kv_heads * head_size);
ptr += n_layers * p->dim * (p->n_kv_heads * head_size);
w->wo = ptr;
ptr += p->n_layers * (p->n_heads * head_size) * p->dim;
ptr += n_layers * (p->n_heads * head_size) * p->dim;
w->rms_ffn_weight = ptr;
ptr += p->n_layers * p->dim;
ptr += n_layers * p->dim;
w->w1 = ptr;
ptr += p->n_layers * p->dim * p->hidden_dim;
ptr += n_layers * p->dim * p->hidden_dim;
w->w2 = ptr;
ptr += p->n_layers * p->hidden_dim * p->dim;
ptr += n_layers * p->hidden_dim * p->dim;
w->w3 = ptr;
ptr += p->n_layers * p->dim * p->hidden_dim;
ptr += n_layers * p->dim * p->hidden_dim;
w->rms_final_weight = ptr;
ptr += p->dim;
ptr += p->seq_len * head_size / 2; // skip what used to be freq_cis_real (for RoPE)
Expand Down Expand Up @@ -379,7 +381,7 @@ float* forward(Transformer* transformer, int token, int pos) {
memcpy(x, content_row, dim*sizeof(*x));

// forward all the layers
for(int l = 0; l < p->n_layers; l++) {
for(unsigned long long l = 0; l < p->n_layers; l++) {

// attention rmsnorm
rmsnorm(s->xb, x, w->rms_att_weight + l*dim, dim);
Expand Down
2 changes: 1 addition & 1 deletion tinystories.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def train_vocab(vocab_size):
shard_filenames = sorted(glob.glob(os.path.join(data_dir, "*.json")))

print(f"Writing temporary file {tiny_file} with {num_shards} shards...")
with open(tiny_file, "w") as of:
with open(tiny_file, "w", encoding="utf-8") as of:
for shard in tqdm(shard_filenames[:num_shards]):
with open(shard, "r") as f:
data = json.load(f)
Expand Down

0 comments on commit 7e589f8

Please sign in to comment.