From bacc20203efb1839aa313858a04d75255bb4b7f4 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed, 9 Aug 2023 20:37:17 -0500 Subject: [PATCH] Merge remote-tracking branch 'upstream/concedo' --- CMakeLists.txt | 13 +++++++++---- Makefile | 5 +++++ klite.embd | 15 +++++++++------ koboldcpp.py | 35 ++++++++++++++++++----------------- llama.cpp | 19 ++++++++++++------- model_adapter.cpp | 44 ++++++++++++++++++++++++++------------------ 6 files changed, 79 insertions(+), 52 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c0272165d8eb..e8042a71913ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,9 +3,9 @@ # IT WILL NOT BE UPDATED OR MAINTAINED !!! message(STATUS "============== ============== ==============") -message(STATUS "WARNING! Do NOT use this file. It is UNSUPPORTED for normal users. Use MAKE instead.") -message(STATUS "It is ONLY for CUBLAS build testing on windows visual studio. IT WILL NOT BE UPDATED OR MAINTAINED !!!") -message(STATUS "IF YOU ARE SEEING THIS, you MUST ONLY be building AN EXPERIMENAL WINDOWS CUBLAS BUILD! NOTHING ELSE WILL BE SUPPORTED !!!") +message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.") +message(STATUS "It is ONLY for CUBLAS builds on windows visual studio. IT WILL OVERWRITE YOUR EXISTING MAKEFILE !!!") +message(STATUS "IF YOU ARE SEEING THIS, you MUST ONLY be building CUBLAS BUILDS! NOTHING ELSE WILL BE SUPPORTED !!!") message(STATUS "============== ============== ==============") cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason @@ -110,7 +110,12 @@ if (LLAMA_CUBLAS) if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics else() - set(CMAKE_CUDA_ARCHITECTURES "37;52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics + message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}") + if(CUDAToolkit_VERSION VERSION_GREATER 12) + set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics + else() + set(CMAKE_CUDA_ARCHITECTURES "37;52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics + endif() endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") diff --git a/Makefile b/Makefile index 7325343f843e2..736d7dfac3d34 100644 --- a/Makefile +++ b/Makefile @@ -174,6 +174,11 @@ ifdef LLAMA_CUDA_KQUANTS_ITER else NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 endif +ifdef LLAMA_CUDA_MMQ_Y + NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y) +else + NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64 +endif # LLAMA_CUDA_MMQ_Y #ifdef LLAMA_CUDA_CUBLAS # NVCCFLAGS += -DGGML_CUDA_CUBLAS #endif # LLAMA_CUDA_CUBLAS diff --git a/klite.embd b/klite.embd index e3933b6ae2e8a..153b4af7de47b 100644 --- a/klite.embd +++ b/klite.embd @@ -1,6 +1,6 @@