When I run (inference) the model on CPU, the output is garbled.
Here is my code:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
tokenizer = AutoTokenizer.from_pretrained("/media/nvme/johnson/model-space/Giraffe-v1-Tokenizer", use_fast=False)
model = AutoModelForCausalLM.from_pretrained("/media/nvme/johnson/model-space/Giraffe-v1-delta-13b-scaled-16")
# model = AutoModelForCausalLM.from_pretrained("/media/nvme/johnson/model-space/13B-Alpaca-Base")
device = "cpu"
model.to(device)
generation_config = GenerationConfig(
temperature=0.2,
top_k=50,
top_p=0.95,
repetition_penalty=1.2,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
min_new_tokens=32,
max_new_tokens=256,
)
prompts = [
"Develop a C++ program that reads a text file line by line and counts the number of occurrences of a specific word in the file."
]
outputs = ""
for idx, prompt in enumerate(prompts):
batch = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to(device)
generated_ids = model.generate(**batch, generation_config=generation_config)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True).lstrip()
outputs += generated_text + "\n\n"
print(f"=== EXAMPLE {idx} ===")
print()
print(generated_text)
print()
print("======================")
print()
Here is my output. Obviously, it is garbled.