Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
trholding committed Aug 20, 2023
2 parents bbb7b0e + 6c5d78f commit f7a7ed9
Show file tree
Hide file tree
Showing 15 changed files with 914 additions and 365 deletions.
83 changes: 76 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@ on:
push:
branches:
- master
paths: ['.github/workflows/**', '**/Makefile', '**/*.c', '**/*.h']
paths: ['.github/workflows/**', '**/Makefile', '**/*.c', '**/*.h', '**/*.py']
pull_request:
types: [opened, synchronize, reopened]
paths: ['**/Makefile', '**/*.c', '**/*.h']
paths: ['**/Makefile', '**/*.c', '**/*.h', '**/*.py']
# for manual triggering
workflow_dispatch:

env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

jobs:
# check basic builds to avoid breaking changes
ubuntu-focal-make:
runs-on: ubuntu-20.04
runs-on: ubuntu-latest

steps:
- name: Clone
Expand All @@ -28,6 +30,16 @@ jobs:
sudo apt-get update
sudo apt-get install build-essential -y
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Pip setup
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Build
id: make_build
run: |
Expand All @@ -38,6 +50,10 @@ jobs:
run: |
make runfast
- name: Test with pytest
run: |
pytest
macOS-latest-make:
runs-on: macos-latest

Expand All @@ -52,6 +68,21 @@ jobs:
run: |
brew update
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Pip setup
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Build clang
id: make_build_clang
run: |
make run CC=clang
- name: Build
id: make_build
run: |
Expand All @@ -62,15 +93,17 @@ jobs:
run: |
make runfast
- name: Build clang
id: make_build_clang
run: |
make run CC=clang
- name: Test with pytest
run: pytest




windows-latest-make:
runs-on: windows-latest

strategy:
fail-fast: false #necessary, otherwise the matrix breaks
matrix:
arch:
- amd64
Expand All @@ -90,11 +123,30 @@ jobs:
with:
arch: ${{ matrix.arch }}

- name: Set up Python 3.10
if: matrix.arch != 'amd64_arm64'
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Pip setup
if: matrix.arch != 'amd64_arm64'
run: |
python -m pip install --upgrade pip
if (Test-Path requirements.txt) {
pip install -r requirements.txt
}
- name: Build ${{ matrix.arch }}
id: build_msvc
run: |
.\build_msvc.bat
#cross-comiled, cannot be run on host
- name: Test with pytest
if: matrix.arch != 'amd64_arm64'
run: pytest

windows-latest-mingw:
runs-on: windows-latest

Expand Down Expand Up @@ -122,3 +174,20 @@ jobs:
id: build_mingw
run: |
make win64
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Pip setup
shell: powershell
run: |
python -m pip install --upgrade pip
if (Test-Path requirements.txt) {
pip install -r requirements.txt
}
- name: Test with pytest
shell: powershell
run: pytest
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ cosmorun:
zip run.com out/model.bin
zip run.com tokenizer.bin

# run all tests
.PHONY: test
test:
pytest

# run only tests for run.c C implementation (is a bit faster if only C code changed)
.PHONY: testc
testc:
pytest -k runc

.PHONY: clean
clean:
rm -f run
113 changes: 113 additions & 0 deletions export_meta_llama_hf_bin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""
This script exports the Llama 2 weights in llama2c.bin format.
"""
import os
import sys
import struct
from pathlib import Path
import json

import torch

from model import precompute_freqs_cis


def export(p, state_dict, filepath='model.bin'):
"""export the model weights in fp32 into .bin file to be read from C"""
f = open(filepath, 'wb')

def serialize(key):
print(f"writing {key}...")
t = state_dict[key].contiguous().view(-1).type(torch.float32).numpy()
f.write(memoryview(t))
del state_dict[key]

# first write out the header
hidden_dim = state_dict['model.layers.0.mlp.gate_proj.weight'].shape[0]
p['vocab_size'] = 32000
p['max_seq_len'] = 2048

n_kv_heads = p.get('n_kv_heads') or p['n_heads']
header = struct.pack(
'iiiiiii',
p['dim'], hidden_dim, p['n_layers'], p['n_heads'],
n_kv_heads, -p['vocab_size'], p['max_seq_len']
)
# NOTE ABOVE: -ve vocab_size is indicating that the classifier weights are present
# in the checkpoint and should be loaded.
f.write(header)

# next write out the embedding weights
print("writing tok_embeddings...")
serialize('model.embed_tokens.weight')

# now all the layers
# attention weights
for i in range(p['n_layers']): serialize(f'model.layers.{i}.input_layernorm.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.q_proj.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.k_proj.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.v_proj.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.o_proj.weight')
# ffn weights
for i in range(p['n_layers']): serialize(f'model.layers.{i}.post_attention_layernorm.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.mlp.gate_proj.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.mlp.down_proj.weight')
for i in range(p['n_layers']): serialize(f'model.layers.{i}.mlp.up_proj.weight')

# final rmsnorm
serialize('model.norm.weight')
# freqs_cos, freqs_sin
freqs_cos, freqs_sin = precompute_freqs_cis(p['dim'] // p['n_heads'], p['max_seq_len'] * 2)
state_dict['freqs_cos'] = freqs_cos[:p['max_seq_len']]
state_dict['freqs_sin'] = freqs_sin[:p['max_seq_len']]
# check if this requires addtional conversion
serialize('freqs_cos')
serialize('freqs_sin')

# finally write the output weights
serialize('lm_head.weight')

f.close()
print(f"wrote {filepath}")


def concat_weights(models):
state_dict = {}
for name in list(models[0]):
tensors = [model[name] for model in models]
if len(tensors) == 1 or len(tensors[0].shape) == 1:
state_dict[name] = tensors[0]
continue
is_axis_1 = (
name.startswith('model.embed_tokens.weight')
or name.endswith('.self_attn.o_proj.weight')
or name.endswith('.mlp.down_proj.weight')
)
axis = 1 if is_axis_1 else 0
state_dict[name] = torch.cat(tensors, dim=axis)
for model in models:
del model[name]
return state_dict


def load_and_export(model_path, output_path):
params_path = os.path.join(model_path, 'params.json')
with open(params_path) as f:
params = json.load(f)
print(params)

model_paths = sorted(list(Path(model_path).glob('consolidated.*.pth')))
models = [torch.load(p, map_location='cpu') for p in model_paths]
state_dict = concat_weights(models)
del models
export(params, state_dict, output_path)


if __name__ == '__main__':
if len(sys.argv) == 1:
print('[Llama model folder path] [output path]')
exit()

model_path = sys.argv[1]
output_path = sys.argv[2]
load_and_export(model_path, output_path)
8 changes: 5 additions & 3 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@

@dataclass
class ModelArgs:
# default hyperparameters for the Llama 7B model
dim: int = 4096
n_layers: int = 32
n_heads: int = 32
n_kv_heads: Optional[int] = None
vocab_size: int = -1 # defined later by tokenizer
multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2
vocab_size: int = 32000
multiple_of: int = 256 # MLP hidden layer size will be multiple of
norm_eps: float = 1e-5
max_seq_len: int = 2048
dropout: float = 0.0
Expand Down Expand Up @@ -93,6 +94,7 @@ class Attention(nn.Module):
def __init__(self, args: ModelArgs):
super().__init__()
self.n_kv_heads = args.n_heads if args.n_kv_heads is None else args.n_kv_heads
assert args.n_heads % self.n_kv_heads == 0
model_parallel_size = 1
self.n_local_heads = args.n_heads // model_parallel_size
self.n_local_kv_heads = self.n_kv_heads // model_parallel_size
Expand Down Expand Up @@ -317,7 +319,7 @@ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
# if the sequence context is growing too long we must crop it at block_size
idx_cond = idx if idx.size(1) <= self.params.max_seq_len else idx[:, -self.params.max_seq_len:]
# forward the model to get the logits for the index in the sequence
logits, _ = self(idx_cond)
logits = self(idx_cond)
logits = logits[:, -1, :] # crop to just the final time step
if temperature == 0.0:
# "sample" the single most likely index
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ numpy==1.23.5
pytest==7.4.0
Requests==2.31.0
sentencepiece==0.1.99
tiktoken==0.3.3
torch==2.0.1
tqdm==4.64.1
wandb==0.15.5
Loading

0 comments on commit f7a7ed9

Please sign in to comment.