Skip to content

Commit

Permalink
MKL build fix + Matmul debug log build
Browse files Browse the repository at this point in the history
- run.c: added temp matmul debug for action replay
- Makefile: fixed MKL build, added matmul debug log build
  • Loading branch information
trholding committed Apr 1, 2024
1 parent bf290a4 commit 5d2fa99
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 7 deletions.
18 changes: 11 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ BLIS_PREFIX = /usr/local
BLIS_INC = $(BLIS_PREFIX)/include/blis
BLIS_LIB = $(BLIS_PREFIX)/lib/libblis.a

# MKL
MKL_PREFIX = /opt/intel
MKL_INC = $(MKL_PREFIX)/mkl/include
MKL_LIB = $(MKL_PREFIX)/mkl/lib/intel64

#OpenBLAS
OPENBLAS_PREFIX = /usr/include
Expand All @@ -13,10 +17,6 @@ OPENBLAS_INC = $(OPENBLAS_PREFIX)/openblas
MOD_PATH = out/model.bin
TOK_PATH = tokenizer.bin

# -L${MKLROOT}/lib/intel64 -lmkl_rt -Wl,--no-as-needed -lpthread -lm -ldl
# -m64 -I"${MKLROOT}/include"


# choose your compiler, e.g. gcc/clang
# example override to clang: make run CC=clang

Expand Down Expand Up @@ -64,6 +64,10 @@ run_cc_gnu: ## - Optimized Generic linux distro build
runq_cc_gnu: ## - Same for quantized build
$(CC) -Ofast -march=native -mtune=native -std=gnu11 -o run runq.c -lm

.PHONY: run_cc_mmdebug
run_cc_mmdebug: ## - ***NEW*** Matmul Debug Log build (Warning: Huge Logs)
$(CC) -D MMDEBUG -Ofast -march=native -mtune=native run.c -lm -o run

##@ Accelerated Builds
# additionally compiles with OpenMP, allowing multithreaded runs
# make sure to also enable multiple threads when running, e.g.:
Expand Down Expand Up @@ -133,12 +137,12 @@ runq_cc_blis: ## - Same for quantized build
##@ ---> x86_64
# amd64 (x86_64) / Intel Mac (WIP) Do not use!
.PHONY: run_cc_mkl
run_cc_mkl: ## - OpenMP + Intel MKL CBLAS build (x86_64 / intel Mac) (WIP)
$(CC) -D MKL -D OPENMP -Ofast -fopenmp -march=native -mtune=native run.c -lm -lblis -o run
run_cc_mkl: ## - ***NEW*** OpenMP + Intel MKL CBLAS build (x86_64 / intel Mac)
$(CC) -D MKL -D OPENMP -Ofast -fopenmp -march=native -mtune=native -I$(MKL_INC) -L$(MKL_LIB) run.c -lmkl_rt -lpthread -lm -o run

.PHONY: runq_cc_mkl
runq_cc_mkl: ## - Same for quantized build
$(CC) -D MKL -D OPENMP -Ofast -fopenmp -march=native -mtune=native runq.c -lm -lblis -o run
$(CC) -D MKL -D OPENMP -Ofast -fopenmp -march=native -mtune=native -I$(MKL_INC) -L$(MKL_LIB) runq.c -lmkl_rt -lpthread -lm -o run

##@ ---> ARM64 / aarch64
.PHONY: run_cc_armpl
Expand Down
55 changes: 55 additions & 0 deletions run.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,48 @@ void avx_matmul(float* xout, const float* x, const float* w, int n, int d) {
}
}
#endif

#ifdef MMDEBUG
void debug_matmul(float* xout, float* x, float* w, int n, int d) {
// W (d,n) @ x (n,) -> xout (d,)
// by far the most amount of time is spent inside this little function

// Print input values to stderr
fprintf(stderr, "<<<<<<< Input x: >>>>>>> ");
for (int i = 0; i < n; i++) {
fprintf(stderr, "%f ", x[i]);
}
fprintf(stderr, "\n");

fprintf(stderr, "<<<<<<< Input w: >>>>>>> ");
for (int i = 0; i < d; i++) {
for (int j = 0; j < n; j++) {
fprintf(stderr, "%f ", w[i * n + j]);
}
// fprintf(stderr, "\n");
}
fprintf(stderr, "\n");

int i;
#ifdef ACCEL
ACCEL(i)
#endif
for (i = 0; i < d; i++) {
float val = 0.0f;
for (int j = 0; j < n; j++) {
val += w[i * n + j] * x[j];
}
xout[i] = val;
}

// Print output values to stderr
fprintf(stderr, "<<<<<<< Output xout: >>>>>>> ");
for (int i = 0; i < d; i++) {
fprintf(stderr, "%f ", xout[i]);
}
fprintf(stderr, "\n");
}
#endif
// END L2E Addition

void matmul(float* xout, float* x, float* w, int n, int d) {
Expand All @@ -453,6 +495,8 @@ void matmul(float* xout, float* x, float* w, int n, int d) {
cblas_sgemv(CblasRowMajor, CblasNoTrans, d, n, 1.0f, w, n, x, 1, 0.0f, xout, 1);
#elif defined(ACCELAVX)
avx_matmul(xout, x, w, n, d);
#elif defined(MMDEBUG)
debug_matmul(xout, x, w, n, d);
#else
#ifdef ACCEL
ACCEL(i) // OMP/OACC Macro
Expand Down Expand Up @@ -1250,6 +1294,14 @@ int main(int argc, char *argv[]) {
fflush(stdout);
inprompt(prompt); // read prompt
#else
#ifdef MMDEBUG
FILE* dLogFile = freopen("debug_matmul.log", "w", stderr);
if (dLogFile == NULL) {
// Handle error
perror("freopen");
return 1;
}
#endif
// END L2E Addition
// poor man's C argparse so we can override the defaults above from the command line
if (argc >= 2) { checkpoint_path = argv[1]; } else { error_usage(); }
Expand Down Expand Up @@ -1316,6 +1368,9 @@ int main(int argc, char *argv[]) {
} // end of loop
#endif
#endif
#ifdef MMDEBUG
fclose(dLogFile);
#endif
// END L2E Addition
return 0;
}
Expand Down

0 comments on commit 5d2fa99

Please sign in to comment.