import transformers
import outlines
import torch
import string
import pydantic
import enum
import json
import warnings

warnings.filterwarnings("ignore")

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

device = torch.device("cuda")

pipe = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device,
)

Device set to use cuda

prompt = "What is 40 + 2 ? Give me the answer only. "

messages = [
    {
        "role": "user", 
        "content": prompt,
    },
]

results = pipe(
    messages, 
    do_sample=False,
    max_new_tokens=25,
)

output = results[0]["generated_text"][-1]["content"]

print(output)

60

model = outlines.models.transformers(
    model_name,
    device=device,
)

generator = outlines.generate.text(
    model,
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
    max_tokens=25,
)

print(output)

22

The answer is 22. 

To arrive at this answer, simply add the two numbers together:

4

generator = outlines.generate.choice(
    model, 
    choices=[f"{i}." for i in range(100)],
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
)

print(output)

22.

generator = outlines.generate.choice(
    model, 
    [f"40 + 2 = {i}." for i in range(100)],
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
)

print(output)

40 + 2 = 42.

generator = outlines.generate.format(
    model, 
    int,
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
    max_tokens=25,
)

print(output)

2200000000000000000000000

generator = outlines.generate.regex(
    model, 
    r"\d+\.",
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
)

print(output)

22.

generator = outlines.generate.regex(
    model, 
    r"40 \+ 2 = \d+\.",
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
    max_tokens=25,
)

print(output)

40 + 2 = 42.

generator = outlines.generate.regex(
    model, 
    r"Obviously, the answer is: \d+\.",
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
)

print(output)

Obviously, the answer is: 42.

output = generator(
    "What is 402 + 420?", 
    max_tokens=25,
)

print(output)

Obviously, the answer is: 822.

class Operator(str, enum.Enum):
    ADDITION = "ADDITION"
    SUBTRACTION = "SUBTRACTION"
    MULTIPLICATION = "MULTIPLICATION"
    DIVISION = "DIVISION"

class Schema(pydantic.BaseModel):
    left_operand: int
    operator: Operator
    right_operand: int
    answer: int

generator = outlines.generate.json(
    model,
    Schema,
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt, 
)

repr(output)

"Schema(left_operand=40, operator=<Operator.ADDITION: 'ADDITION'>, right_operand=2, answer=42)"

type(output)

__main__.Schema

output.operator == Operator.ADDITION == "ADDITION", output.answer

(True, 42)

output.dict()

{'left_operand': 40,
 'operator': <Operator.ADDITION: 'ADDITION'>,
 'right_operand': 2,
 'answer': 42}

output.json()

'{"left_operand":40,"operator":"ADDITION","right_operand":2,"answer":42}'

class Sentiment(str, enum.Enum):
    POSITIVE = "POSITIVE"
    NEGATIVE = "NEGATIVE"

class Classification(pydantic.BaseModel):
    sentiment: Sentiment
    sentiment_score: int

prompt = "The movie was great. I loved it!"

generator = outlines.generate.json(
    model,
    Classification,
    sampler=outlines.samplers.greedy(),
)

output = generator(
    prompt,
)

repr(output)

"Classification(sentiment=<Sentiment.POSITIVE: 'POSITIVE'>, sentiment_score=4)"

Making LLMs smarter with structured generation and Outlines¶