Meta Comprehensive RAG Benchmark: KDD Cup 2024

my-model-1

In [ ]:

# Base ctransformers with no GPU acceleration
!pip install ctransformers>=0.2.24
# Or with CUDA GPU acceleration
!pip install ctransformers[cuda]>=0.2.24

In [ ]:

from ctransformers import AutoModelForCausalLM

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.q4_K_M.gguf", model_type="llama", gpu_layers=50)

print(llm("AI is going to"))

Content

625

Show Comments

Comments

You must login before you can post a comment.