from transformers import pipeline
import torch

torch.manual_seed(0)
generator = pipeline('text-generation', model = 'openai-community/gpt2')
prompt = "Hello, I'm a language model"

generator(prompt, max_length = 30)

text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
prompt = "Translate from English to French: I'm very happy to see you"

text2text_generator(prompt)

pip install -q transformers accelerate

from transformers import pipeline, AutoTokenizer
import torch

torch.manual_seed(0)
model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

torch.manual_seed(0)
prompt = """Classify the text into neutral, negative or positive. 
Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
Sentiment:
"""

sequences = pipe(
    prompt,
    max_new_tokens=10,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

torch.manual_seed(1)
prompt = """Return a list of named entities in the text.
Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
Named entities:
"""

sequences = pipe(
    prompt,
    max_new_tokens=15,
    return_full_text = False,    
)

for seq in sequences:
    print(f"{seq['generated_text']}")

torch.manual_seed(2)
prompt = """Translate the English text to Italian.
Text: Sometimes, I've believed as many as six impossible things before breakfast.
Translation:
"""

sequences = pipe(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    return_full_text = False,
)

for seq in sequences:
    print(f"{seq['generated_text']}")

torch.manual_seed(3)
prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
Write a summary of the above text.
Summary:
"""

sequences = pipe(
    prompt,
    max_new_tokens=30,
    do_sample=True,
    top_k=10,
    return_full_text = False,
)

for seq in sequences:
    print(f"{seq['generated_text']}")

torch.manual_seed(4)
prompt = """Answer the question using the context below.
Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
Question: What modern tool is used to make gazpacho?
Answer:
"""

sequences = pipe(
    prompt,
    max_new_tokens=10,
    do_sample=True,
    top_k=10,
    return_full_text = False,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

torch.manual_seed(5)
prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""

sequences = pipe(
    prompt,
    max_new_tokens=30,
    do_sample=True,
    top_k=10,
    return_full_text = False,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

torch.manual_seed(6)
prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""

sequences = pipe(
    prompt,
    max_new_tokens=10,
    do_sample=True,
    top_k=10,
    return_full_text = False,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

torch.manual_seed(0)
prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
Date: 04/12/1961
Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
Date:"""

sequences = pipe(
    prompt,
    max_new_tokens=8,
    do_sample=True,
    top_k=10,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

LLM 提示指南¶

激励的基础¶

模型的类型¶

使用文本生成 pipeline 运行仅解码器模型的推理：¶

基本模型 vs 指令/聊天模型¶

NLP任务¶

文本分类¶

命名实体识别¶

翻译¶

文本摘要¶

问题回答¶

推理¶

LLM提示的最佳实践¶

高级提示技术¶

少样本提示（Few-shot prompting）¶

少样本提示的示例¶

少样本提示的局限性¶

思维链（Chain-of-thought）¶

提示优化与微调对比¶