import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm - tuned - alpha - 7b")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm - tuned - alpha - 7b")
model.half().cuda()
class StopOnTokens(StoppingCriteria):
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
stop_ids = set([50278, 50279, 50277, 1, 0])
return input_ids[0][-1] in stop_ids
system_prompt = """ <|System|> # StableLM Tuned (Alpha version)
- StableLM is a helpful and harmless open - source AI language model developed by Stability AI.
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
- StableLM will refuse to participate in anything that could harm a human.
"""
prompt = f"{system_prompt}<|User|>what's your mood today? <|Assistant|> "
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
tokens = model.generate(
**inputs,
max_new_tokens = 64,
temperature = 0.7,
do_sample = True,
stopping_criteria = StoppingCriteriaList([StopOnTokens()])
)
print(tokenizer.decode(tokens[0], skip_special_tokens = True))