From 3fa73bab51b01aad15af7b7ddff8c2ec3b2bbe86 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 28 Jun 2024 09:42:46 +0700 Subject: [PATCH 1/3] chore: remove cortex.llamacpp out of cortex-cpp release --- .../scripts/e2e-test-llama-linux-and-mac.sh | 178 ------------------ .github/scripts/e2e-test-llama-windows.bat | 165 ---------------- .github/workflows/cortex-build.yml | 174 +---------------- .github/workflows/cortex-cpp-quality-gate.yml | 151 +-------------- .github/workflows/nightly-build.yml | 174 +---------------- cortex-cpp/CMakeLists.txt | 19 +- cortex-cpp/Makefile | 37 +--- cortex-cpp/controllers/server.cc | 5 - cortex-cpp/cortex-cpp-deps/CMakeLists.txt | 20 +- cortex-cpp/utils/cpuid/cpu_validation.cc | 60 ------ cortex-cpp/utils/cpuid/cpu_validation.h | 8 - 11 files changed, 28 insertions(+), 963 deletions(-) delete mode 100644 .github/scripts/e2e-test-llama-linux-and-mac.sh delete mode 100644 .github/scripts/e2e-test-llama-windows.bat delete mode 100644 cortex-cpp/utils/cpuid/cpu_validation.cc delete mode 100644 cortex-cpp/utils/cpuid/cpu_validation.h diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh deleted file mode 100644 index 4e4170a6..00000000 --- a/.github/scripts/e2e-test-llama-linux-and-mac.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/bin/bash - -## Example run command -# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/cortex-cpp/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf - -# Check for required arguments -if [[ $# -ne 3 ]]; then - echo "Usage: $0 " - exit 1 -fi - -rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/cortex-cpp.log - -BINARY_PATH=$1 -DOWNLOAD_LLM_URL=$2 -DOWNLOAD_EMBEDDING_URL=$3 - -# Random port to ensure it's not used -min=10000 -max=11000 -range=$((max - min + 1)) -PORT=$((RANDOM % range + min)) - -# Start the binary file -"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/cortex-cpp.log & - -# Get the process id of the binary file -pid=$! - -if ! ps -p $pid >/dev/null; then - echo "cortex-cpp failed to start. Logs:" - cat /tmp/cortex-cpp.log - exit 1 -fi - -# Wait for a few seconds to let the server start -sleep 5 - -# Check if /tmp/testllm exists, if not, download it -if [[ ! -f "/tmp/testllm" ]]; then - curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm -fi - -# Check if /tmp/test-embedding exists, if not, download it -if [[ ! -f "/tmp/test-embedding" ]]; then - curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding -fi - -# Run the curl commands -response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \ - --header 'Content-Type: application/json' \ - --data '{ - "llama_model_path": "/tmp/testllm", - "ctx_len": 50, - "ngl": 32, - "embedding": false -}') - -if ! ps -p $pid >/dev/null; then - echo "cortex-cpp failed to load model. Logs:" - cat /tmp/cortex-cpp.log - exit 1 -fi - -response2=$( - curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \ - --header 'Content-Type: application/json' \ - --header 'Accept: text/event-stream' \ - --header 'Access-Control-Allow-Origin: *' \ - --data '{ - "messages": [ - {"content": "Hello there", "role": "assistant"}, - {"content": "Write a long and sad story for me", "role": "user"} - ], - "stream": true, - "model": "gpt-3.5-turbo", - "max_tokens": 50, - "stop": ["hello"], - "frequency_penalty": 0, - "presence_penalty": 0, - "temperature": 0.1 - }' -) - -# unload model -response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request POST -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \ - --header 'Content-Type: application/json' \ - --data '{ - "model": "testllm" -}') - -# load embedding model -response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \ - --header 'Content-Type: application/json' \ - --data '{ - "llama_model_path": "/tmp/test-embedding", - "ctx_len": 50, - "ngl": 32, - "embedding": true, - "model_type": "embedding" -}') - -# request embedding -response5=$( - curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \ - --header 'Content-Type: application/json' \ - --header 'Accept: text/event-stream' \ - --header 'Access-Control-Allow-Origin: *' \ - --data '{ - "input": "Hello", - "model": "test-embedding", - "encoding_format": "float" - }' -) - -error_occurred=0 -if [[ "$response1" -ne 200 ]]; then - echo "The load llm model curl command failed with status code: $response1" - cat /tmp/load-llm-model-res.log - error_occurred=1 -fi - -if [[ "$response2" -ne 200 ]]; then - echo "The completion curl command failed with status code: $response2" - cat /tmp/completion-res.log - error_occurred=1 -fi - -if [[ "$response3" -ne 200 ]]; then - echo "The unload model curl command failed with status code: $response3" - cat /tmp/unload-model-res.log - error_occurred=1 -fi - -if [[ "$response4" -ne 200 ]]; then - echo "The load embedding model curl command failed with status code: $response4" - cat /tmp/load-embedding-model-res.log - error_occurred=1 -fi - -if [[ "$response5" -ne 200 ]]; then - echo "The embedding curl command failed with status code: $response5" - cat /tmp/embedding-res.log - error_occurred=1 -fi - -if [[ "$error_occurred" -eq 1 ]]; then - echo "cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!" - echo "cortex-cpp Error Logs:" - cat /tmp/cortex-cpp.log - kill $pid - exit 1 -fi - -echo "----------------------" -echo "Log load model:" -cat /tmp/load-llm-model-res.log - -echo "----------------------" -echo "Log run test:" -cat /tmp/completion-res.log - -echo "----------------------" -echo "Log run test:" -cat /tmp/unload-model-res.log - -echo "----------------------" -echo "Log run test:" -cat /tmp/load-embedding-model-res.log - -echo "----------------------" -echo "Log run test:" -cat /tmp/embedding-res.log - -echo "cortex-cpp test run successfully!" - -# Kill the server process -kill $pid diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat deleted file mode 100644 index c54c741d..00000000 --- a/.github/scripts/e2e-test-llama-windows.bat +++ /dev/null @@ -1,165 +0,0 @@ -@echo off - -set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" -set "MODEL_LLM_PATH=%TEMP%\testllm" -set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding" - -rem Check for required arguments -if "%~3"=="" ( - echo Usage: %~0 ^ ^ ^ - exit /b 1 -) - -set "BINARY_PATH=%~1" -set "DOWNLOAD_LLM_URL=%~2" -set "DOWNLOAD_EMBEDDING_URL=%~3" - -for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi" - -echo BINARY_NAME=%BINARY_NAME% - -del %TEMP%\response1.log 2>nul -del %TEMP%\response2.log 2>nul -del %TEMP%\response3.log 2>nul -del %TEMP%\response4.log 2>nul -del %TEMP%\response5.log 2>nul -del %TEMP%\cortex-cpp.log 2>nul - -set /a min=9999 -set /a max=11000 -set /a range=max-min+1 -set /a PORT=%min% + %RANDOM% %% %range% - -rem Start the binary file -start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\cortex-cpp.log 2>&1 - -ping -n 6 127.0.0.1 %PORT% > nul - -rem Capture the PID of the started process with "cortex-cpp" in its name -for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( - set "pid=%%a" -) - -echo pid=%pid% - -if not defined pid ( - echo cortex-cpp failed to start. Logs: - type %TEMP%\cortex-cpp.log - exit /b 1 -) - -rem Wait for a few seconds to let the server start - -rem Check if %TEMP%\testmodel exists, if not, download it -if not exist "%MODEL_LLM_PATH%" ( - curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%" -) - -if not exist "%MODEL_EMBEDDING_PATH%" ( - curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%" -) - -rem Define JSON strings for curl data -call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%" -call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%" -set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\", \"model_alias\":\"gpt-3.5-turbo\"}" -set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" -set "curl_data3={\"model\":\"gpt-3.5-turbo\"}" -set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}" -set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}" - -rem Print the values of curl_data for debugging -echo curl_data1=%curl_data1% -echo curl_data2=%curl_data2% -echo curl_data3=%curl_data3% -echo curl_data4=%curl_data4% -echo curl_data5=%curl_data5% - -rem Run the curl commands and capture the status code -curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 - -curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/chat_completion" ^ ---header "Content-Type: application/json" ^ ---data "%curl_data2%" > %TEMP%\response2.log 2>&1 - -curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1 - -curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1 - -curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^ ---header "Content-Type: application/json" ^ ---data "%curl_data5%" > %TEMP%\response5.log 2>&1 - -set "error_occurred=0" - -rem Read the status codes from the log files -for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" -for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" -for /f %%a in (%TEMP%\response3.log) do set "response3=%%a" -for /f %%a in (%TEMP%\response4.log) do set "response4=%%a" -for /f %%a in (%TEMP%\response5.log) do set "response5=%%a" - -if "%response1%" neq "200" ( - echo The first curl command failed with status code: %response1% - type %TEMP%\response1.log - set "error_occurred=1" -) - -if "%response2%" neq "200" ( - echo The second curl command failed with status code: %response2% - type %TEMP%\response2.log - set "error_occurred=1" -) - -if "%response3%" neq "200" ( - echo The third curl command failed with status code: %response3% - type %TEMP%\response3.log - set "error_occurred=1" -) - -if "%response4%" neq "200" ( - echo The fourth curl command failed with status code: %response4% - type %TEMP%\response4.log - set "error_occurred=1" -) - -if "%response5%" neq "200" ( - echo The fifth curl command failed with status code: %response5% - type %TEMP%\response5.log - set "error_occurred=1" -) - -if "%error_occurred%"=="1" ( - echo cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!! - echo cortex-cpp Error Logs: - type %TEMP%\cortex-cpp.log - taskkill /f /pid %pid% - exit /b 1 -) - - -echo ---------------------- -echo Log load llm model: -type %TEMP%\response1.log - -echo ---------------------- -echo Log run test: -type %TEMP%\response2.log - -echo ---------------------- -echo Log unload model: -type %TEMP%\response3.log - -echo ---------------------- -echo Log load embedding model: -type %TEMP%\response3.log - -echo ---------------------- -echo Log run embedding test: -type %TEMP%\response5.log - -echo cortex-cpp test run successfully! - -rem Kill the server process -@REM taskkill /f /pid %pid% -taskkill /f /im cortex-cpp.exe 2>nul || exit /B 0 \ No newline at end of file diff --git a/.github/workflows/cortex-build.yml b/.github/workflows/cortex-build.yml index 9b22dc1b..7c4d2f09 100644 --- a/.github/workflows/cortex-build.yml +++ b/.github/workflows/cortex-build.yml @@ -7,8 +7,6 @@ on: workflow_dispatch: env: - LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf - EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf jobs: create-draft-release: @@ -45,179 +43,28 @@ jobs: matrix: include: - os: "linux" - name: "amd64-avx2" - runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: true - ccache-dir: '' - - - os: "linux" - name: "amd64-avx" - runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx512" + name: "amd64" runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-vulkan" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx2-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx512-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx2-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx512-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON" + cmake-flags: "" build-deps-cmake-flags: "" - run-e2e: false ccache-dir: '' - - os: "mac" name: "amd64" runs-on: "macos-13" cmake-flags: "" build-deps-cmake-flags: "" - run-e2e: true ccache-dir: '' - - os: "mac" name: "arm64" - runs-on: "macos-latest" + runs-on: "mac-silicon" cmake-flags: "-DMAC_ARM64=ON" build-deps-cmake-flags: "" - run-e2e: true ccache-dir: '' - - os: "windows" - name: "amd64-avx2" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx512" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-vulkan" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx2-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx512-cuda-12-0" + name: "amd64" runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx2-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx512-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + cmake-flags: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: @@ -267,11 +114,6 @@ jobs: cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" - - name: Pre-package - run: | - cd cortex-cpp - make pre-package - - name: Code Signing macOS if: runner.os == 'macOS' run: | @@ -297,12 +139,6 @@ jobs: cd cortex-cpp make package - - name: Run e2e testing - if: ${{ matrix.run-e2e }} - run: | - cd cortex-cpp - make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }} - - name: Upload Artifact uses: actions/upload-artifact@v2 with: diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index d3a58016..e9a25027 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -22,159 +22,29 @@ jobs: matrix: include: - os: "linux" - name: "amd64-avx2" - runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: true - ccache-dir: '' - - os: "linux" - name: "amd64-avx" - runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx512" + name: "amd64" runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-vulkan" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx2-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx512-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx2-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - os: "linux" - name: "amd64-avx512-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON" + cmake-flags: "" build-deps-cmake-flags: "" - run-e2e: false ccache-dir: '' - os: "mac" name: "amd64" runs-on: "macos-13" cmake-flags: "" build-deps-cmake-flags: "" - run-e2e: true ccache-dir: '' - os: "mac" name: "arm64" runs-on: "mac-silicon" cmake-flags: "-DMAC_ARM64=ON" build-deps-cmake-flags: "" - run-e2e: true ccache-dir: '' - os: "windows" - name: "amd64-avx2" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-vulkan" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx2-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512-cuda-12-0" + name: "amd64" runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx2-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + cmake-flags: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: - name: Clone id: checkout @@ -203,23 +73,12 @@ jobs: run: | cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" - - - name: Pre-package - run: | - cd cortex-cpp - make pre-package - name: Package run: | cd cortex-cpp make package - - name: Run e2e testing - if: ${{ matrix.run-e2e }} - run: | - cd cortex-cpp - make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }} - - name: Upload Artifact uses: actions/upload-artifact@v2 with: diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 39c6372e..2cc4722f 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -49,179 +49,28 @@ jobs: matrix: include: - os: "linux" - name: "amd64-avx2" - runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: true - ccache-dir: '' - - - os: "linux" - name: "amd64-avx" - runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx512" + name: "amd64" runs-on: "ubuntu-20-04" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-vulkan" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx2-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx512-cuda-11-7" - runs-on: "ubuntu-20-04-cuda-11-7" - cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx2-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF" - build-deps-cmake-flags: "" - run-e2e: false - ccache-dir: '' - - - os: "linux" - name: "amd64-avx512-cuda-12-0" - runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON" + cmake-flags: "" build-deps-cmake-flags: "" - run-e2e: false ccache-dir: '' - - os: "mac" name: "amd64" runs-on: "macos-13" cmake-flags: "" build-deps-cmake-flags: "" - run-e2e: true ccache-dir: '' - - os: "mac" name: "arm64" - runs-on: "macos-latest" + runs-on: "mac-silicon" cmake-flags: "-DMAC_ARM64=ON" build-deps-cmake-flags: "" - run-e2e: false ccache-dir: '' - - - os: "windows" - name: "amd64-avx2" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: true - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - os: "windows" - name: "amd64-avx512" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-vulkan" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx2-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx-cuda-12-0" - runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx512-cuda-12-0" + name: "amd64" runs-on: "windows-cuda-12-0" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx2-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - - - os: "windows" - name: "amd64-avx512-cuda-11-7" - runs-on: "windows-cuda-11-7" - cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + cmake-flags: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - run-e2e: false ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: @@ -271,11 +120,6 @@ jobs: cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" - - name: Pre-package - run: | - cd cortex-cpp - make pre-package - - name: Code Signing macOS if: runner.os == 'macOS' run: | @@ -299,13 +143,7 @@ jobs: - name: Package run: | cd cortex-cpp - make package - - - name: Run e2e testing - if: ${{ matrix.run-e2e }} - run: | - cd cortex-cpp - make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }} + make package - name: Upload Artifact uses: actions/upload-artifact@v2 diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt index c3f90525..d59fbea5 100644 --- a/cortex-cpp/CMakeLists.txt +++ b/cortex-cpp/CMakeLists.txt @@ -56,27 +56,10 @@ endif() add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") -if(LLAMA_CUDA) - add_compile_definitions(CORTEX_CUDA) -endif() - -if(LLAMA_AVX512) - add_compile_definitions(CORTEX_AVX512) -endif() - -if(LLAMA_AVX2) - add_compile_definitions(CORTEX_AVX2) -endif() - -if(LLAMA_VULKAN) - add_compile_definitions(CORTEX_VULKAN) -endif() - -add_subdirectory(test) +# add_subdirectory(test) add_executable(${PROJECT_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc - ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_validation.cc ) # ############################################################################## diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile index 3af94f47..fc60916a 100644 --- a/cortex-cpp/Makefile +++ b/cortex-cpp/Makefile @@ -1,5 +1,5 @@ # Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean -.PHONY: all build package run-e2e-test +.PHONY: all build package BUILD_DEPS_CMAKE_EXTRA_FLAGS ?= "" CMAKE_EXTRA_FLAGS ?= "" @@ -36,24 +36,6 @@ else make -j4; endif -pre-package: -ifeq ($(OS),Windows_NT) - @powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;" - @powershell -Command "cp build\cortex-cpp.exe .\cortex-cpp\;" - @powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;" - @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" - @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" - @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" -else ifeq ($(shell uname -s),Linux) - @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ - cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \ - cp build/cortex-cpp cortex-cpp/; -else - @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ - cp build/engines/cortex.llamacpp/libengine.dylib cortex-cpp/engines/cortex.llamacpp/; \ - cp build/cortex-cpp cortex-cpp/; -endif - codesign: ifeq ($(CODE_SIGN),false) @echo "Skipping Code Sign" @@ -80,23 +62,6 @@ else tar -czvf cortex-cpp.tar.gz cortex-cpp; endif -run-e2e-test: -ifeq ($(RUN_TESTS),false) - @echo "Skipping tests" - @exit 0 -endif -ifeq ($(OS),Windows_NT) - @powershell -Command "cd cortex-cpp; ..\..\.github\scripts\e2e-test-llama-windows.bat cortex-cpp.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);" -else ifeq ($(shell uname -s),Linux) - @cd cortex-cpp; \ - chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ - rm -rf uploads/; -else - @cd cortex-cpp; \ - chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ - rm -rf uploads/; -endif - clean: ifeq ($(OS),Windows_NT) @powershell -Command "rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz;" diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 9950eb7f..14507317 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -7,7 +7,6 @@ #include "trantor/utils/Logger.h" #include "utils/cortex_utils.h" #include "utils/cpuid/cpu_info.h" -#include "utils/cpuid/cpu_validation.h" #include "utils/logging_utils.h" using namespace inferences; @@ -275,10 +274,6 @@ void server::LoadModel(const HttpRequestPtr& req, if (engine_type == kLlamaEngine) { cortex::cpuid::CpuInfo cpu_info; LOG_INFO << "CPU instruction set: " << cpu_info.to_string(); - if (auto [res, err] = cortex::cpuid::llamacpp::IsValidInstructions(); - !res) { - LOG_WARN << err; - } } std::string abs_path = diff --git a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt index 0f087728..f74e90be 100644 --- a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt +++ b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt @@ -88,16 +88,16 @@ ExternalProject_Add( ) # Download and install GoogleTest -ExternalProject_Add( - gtest - GIT_REPOSITORY https://github.com/google/googletest - GIT_TAG v1.14.0 - CMAKE_ARGS - -Dgtest_force_shared_crt=ON - -DCMAKE_PREFIX_PATH=${THIRD_PARTY_INSTALL_PATH} - -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} - -DCMAKE_BUILD_TYPE=RELEASE -) +# ExternalProject_Add( +# gtest +# GIT_REPOSITORY https://github.com/google/googletest +# GIT_TAG v1.14.0 +# CMAKE_ARGS +# -Dgtest_force_shared_crt=ON +# -DCMAKE_PREFIX_PATH=${THIRD_PARTY_INSTALL_PATH} +# -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} +# -DCMAKE_BUILD_TYPE=RELEASE +# ) if(WIN32) diff --git a/cortex-cpp/utils/cpuid/cpu_validation.cc b/cortex-cpp/utils/cpuid/cpu_validation.cc deleted file mode 100644 index 14968f8d..00000000 --- a/cortex-cpp/utils/cpuid/cpu_validation.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include "cpu_validation.h" -#include "cpu_info.h" - -namespace cortex::cpuid::llamacpp { - -// TODO implement Result for better perf -std::pair IsValidInstructions() { - cpuid::CpuInfo info; -#if defined(_WIN32) -#if defined(CORTEX_AVX512) - auto res = info.has_avx512_f() || info.has_avx512_dq() || - info.has_avx512_ifma() || info.has_avx512_pf() || - info.has_avx512_er() || info.has_avx512_cd() || - info.has_avx512_bw() || info.has_avx512_vl() || - info.has_avx512_vbmi() || info.has_avx512_vbmi2() || - info.has_avx512_vnni() || info.has_avx512_bitalg() || - info.has_avx512_vpopcntdq() || info.has_avx512_4vnniw() || - info.has_avx512_4fmaps() || info.has_avx512_vp2intersect(); - return res ? std::make_pair(true, "") - : std::make_pair(false, "System does not support AVX512"); -#elif defined(CORTEX_AVX2) - return info.has_avx2() - ? std::make_pair(true, "") - : std::make_pair(false, "System does not support AVX2"); -#elif defined(CORTEX_VULKAN) - return std::make_pair(true, ""); -#else - return info.has_avx() ? std::make_pair(true, "") - : std::make_pair(false, "System does not support AVX"); -#endif -#elif defined(__APPLE__) - return std::make_pair(true, ""); -#else -#if defined(CORTEX_CUDA) - return std::make_pair(true, ""); -#elif defined(CORTEX_AVX512) - auto res = info.has_avx512_f() || info.has_avx512_dq() || - info.has_avx512_ifma() || info.has_avx512_pf() || - info.has_avx512_er() || info.has_avx512_cd() || - info.has_avx512_bw() || info.has_avx512_vl() || - info.has_avx512_vbmi() || info.has_avx512_vbmi2() || - info.has_avx512_vnni() || info.has_avx512_bitalg() || - info.has_avx512_vpopcntdq() || info.has_avx512_4vnniw() || - info.has_avx512_4fmaps() || info.has_avx512_vp2intersect(); - return res ? std::make_pair(true, "") - : std::make_pair(false, "System does not support AVX512"); -#elif defined(CORTEX_AVX2) - return info.has_avx2() - ? std::make_pair(true, "") - : std::make_pair(false, "System does not support AVX2"); -#elif defined(CORTEX_VULKAN) - return std::make_pair(true, ""); -#else - return info.has_avx() ? std::make_pair(true, "") - : std::make_pair(false, "System does not support AVX"); -#endif -#endif - return std::make_pair(true, ""); -} -} // namespace cortex::cpuid::llamacpp \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/cpu_validation.h b/cortex-cpp/utils/cpuid/cpu_validation.h deleted file mode 100644 index d50d22b8..00000000 --- a/cortex-cpp/utils/cpuid/cpu_validation.h +++ /dev/null @@ -1,8 +0,0 @@ -// Inspired by https://github.com/steinwurf/cpuid -#pragma once -#include -#include - -namespace cortex::cpuid::llamacpp { -std::pair IsValidInstructions(); -} // namespace cortex::cpuid::llamacpp \ No newline at end of file From 58400865c6279f15c9373a288304a031b882fc03 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 28 Jun 2024 11:25:55 +0700 Subject: [PATCH 2/3] fix: pre-package --- .github/workflows/cortex-build.yml | 5 +++++ .github/workflows/cortex-cpp-quality-gate.yml | 5 +++++ .github/workflows/nightly-build.yml | 5 +++++ cortex-cpp/Makefile | 16 ++++++++++++++++ 4 files changed, 31 insertions(+) diff --git a/.github/workflows/cortex-build.yml b/.github/workflows/cortex-build.yml index 7c4d2f09..7cefed9a 100644 --- a/.github/workflows/cortex-build.yml +++ b/.github/workflows/cortex-build.yml @@ -114,6 +114,11 @@ jobs: cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" + - name: Pre-package + run: | + cd cortex-cpp + make pre-package + - name: Code Signing macOS if: runner.os == 'macOS' run: | diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index e9a25027..33a13d37 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -74,6 +74,11 @@ jobs: cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" + - name: Pre-package + run: | + cd cortex-cpp + make pre-package + - name: Package run: | cd cortex-cpp diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 2cc4722f..f36ffc91 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -120,6 +120,11 @@ jobs: cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" + - name: Pre-package + run: | + cd cortex-cpp + make pre-package + - name: Code Signing macOS if: runner.os == 'macOS' run: | diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile index fc60916a..ef703c60 100644 --- a/cortex-cpp/Makefile +++ b/cortex-cpp/Makefile @@ -36,6 +36,22 @@ else make -j4; endif +pre-package: +ifeq ($(OS),Windows_NT) + @powershell -Command "mkdir -p cortex-cpp;" + @powershell -Command "cp build\cortex-cpp.exe .\cortex-cpp\;" + @powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" +else ifeq ($(shell uname -s),Linux) + @mkdir -p cortex-cpp; \ + cp build/cortex-cpp cortex-cpp/; +else + @mkdir -p cortex-cpp; \ + cp build/cortex-cpp cortex-cpp/; +endif + codesign: ifeq ($(CODE_SIGN),false) @echo "Skipping Code Sign" From 1c97d2fb4131cdb71147607816ee0d1142427950 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 28 Jun 2024 12:47:38 +0700 Subject: [PATCH 3/3] fix: remove engine.cmake --- cortex-cpp/CMakeLists.txt | 1 - .../engines/cortex.llamacpp/engine.cmake | 92 ------------------- 2 files changed, 93 deletions(-) delete mode 100644 cortex-cpp/engines/cortex.llamacpp/engine.cmake diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt index d59fbea5..e22d9f62 100644 --- a/cortex-cpp/CMakeLists.txt +++ b/cortex-cpp/CMakeLists.txt @@ -1,7 +1,6 @@ cmake_minimum_required(VERSION 3.5) project(cortex-cpp C CXX) -include(engines/cortex.llamacpp/engine.cmake) include(CheckIncludeFileCXX) check_include_file_cxx(any HAS_ANY) diff --git a/cortex-cpp/engines/cortex.llamacpp/engine.cmake b/cortex-cpp/engines/cortex.llamacpp/engine.cmake deleted file mode 100644 index c4569de1..00000000 --- a/cortex-cpp/engines/cortex.llamacpp/engine.cmake +++ /dev/null @@ -1,92 +0,0 @@ -# cortex.llamacpp release version -set(VERSION 0.1.20) -# vulka is unstable, we need to use a customized version -set(VULKA_VERSION 0.1.12-25.06.24) - -set(ENGINE_VERSION v${VERSION}) -add_compile_definitions(CORTEX_LLAMACPP_VERSION="${VERSION}") - -# MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION}) - -# Download library based on instructions -if(UNIX AND NOT APPLE) - if(CUDA_12_0) - if(LLAMA_AVX512) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx512-cuda-12-0.tar.gz) - elseif(NOT LLAMA_AVX2) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx-cuda-12-0.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx2-cuda-12-0.tar.gz) - endif() - elseif(CUDA_11_7) - if(LLAMA_AVX512) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx512-cuda-11-7.tar.gz) - elseif(NOT LLAMA_AVX2) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx-cuda-11-7.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx2-cuda-11-7.tar.gz) - endif() - elseif(LLAMA_VULKAN) - set(LIBRARY_NAME cortex.llamacpp-${VULKA_VERSION}-linux-amd64-vulkan.tar.gz) - set(ENGINE_VERSION v${VULKA_VERSION}) - elseif(LLAMA_AVX512) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx512.tar.gz) - elseif(NOT LLAMA_AVX2) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx2.tar.gz) - endif() -elseif(UNIX) - if(MAC_ARM64) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-mac-arm64.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-mac-amd64.tar.gz) - endif() -else() - if(CUDA_12_0) - if(LLAMA_AVX512) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx512-cuda-12-0.tar.gz) - elseif(NOT LLAMA_AVX2) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx-cuda-12-0.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx2-cuda-12-0.tar.gz) - endif() - elseif(CUDA_11_7) - if(LLAMA_AVX512) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx512-cuda-11-7.tar.gz) - elseif(NOT LLAMA_AVX2) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx-cuda-11-7.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx2-cuda-11-7.tar.gz) - endif() - elseif(LLAMA_VULKAN) - set(LIBRARY_NAME cortex.llamacpp-${VULKA_VERSION}-windows-amd64-vulkan.tar.gz) - set(ENGINE_VERSION v${VULKA_VERSION}) - elseif(LLAMA_AVX512) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx512.tar.gz) - elseif(NOT LLAMA_AVX2) - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx.tar.gz) - else() - set(LIBRARY_NAME cortex.llamacpp-${VERSION}-windows-amd64-avx2.tar.gz) - endif() -endif() - - -set(LIBLLAMA_ENGINE_URL https://github.com/janhq/cortex.llamacpp/releases/download/${ENGINE_VERSION}/${LIBRARY_NAME}) -# MESSAGE("LIBLLAMA_ENGINE_URL="${LIBLLAMA_ENGINE_URL}) -# MESSAGE("LIBARRY_NAME=" ${LIBRARY_NAME}) - -set(LIBLLAMA_ENGINE_PATH ${CMAKE_BINARY_DIR}/engines/${LIBRARY_NAME}) - -# MESSAGE("CMAKE_BINARY_DIR = " ${CMAKE_BINARY_DIR}) - -file(DOWNLOAD ${LIBLLAMA_ENGINE_URL} ${LIBLLAMA_ENGINE_PATH} STATUS LIBLLAMA_ENGINE_DOWNLOAD_STATUS) -list(GET LIBLLAMA_ENGINE_DOWNLOAD_STATUS 0 LIBLLAMA_ENGINE_DOWNLOAD_STATUS_NO) -# MESSAGE("file = " ${CMAKE_BINARY_DIR}/engines/${LIBRARY_NAME}) - -if(LIBLLAMA_ENGINE_DOWNLOAD_STATUS_NO) - message(STATUS "Pre-built library not downloaded. (${LIBLLAMA_ENGINE_DOWNLOAD_STATUS})") -else() - message(STATUS "Linking downloaded pre-built library.") - file(ARCHIVE_EXTRACT INPUT ${CMAKE_BINARY_DIR}/engines/${LIBRARY_NAME} DESTINATION ${CMAKE_BINARY_DIR}/engines/) -endif() \ No newline at end of file