In [ ]:
# Base ctransformers with no GPU acceleration
!pip install ctransformers>=0.2.24
# Or with CUDA GPU acceleration
!pip install ctransformers[cuda]>=0.2.24
In [ ]:
from ctransformers import AutoModelForCausalLM
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.q4_K_M.gguf", model_type="llama", gpu_layers=50)
print(llm("AI is going to"))
Content
Comments
You must login before you can post a comment.