| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from PIL import Image | |
| import requests | |
| from io import BytesIO | |
| url = "https://d2h50zujfkj84t.cloudfront.net/product_images/Screenshot_2024-09-03_135657.png" | |
| response = requests.get(url) | |
| image = Image.open(BytesIO(response.content)) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "qresearch/llama-3.1-8B-vision-378", | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16, | |
| ).to("cpu") | |
| tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", use_fast=True,) | |
| print( | |
| model.answer_question( | |
| image, "Briefly describe the image", tokenizer, max_new_tokens=128, do_sample=True, temperature=0.3 | |
| ), | |
| ) | |