Configuration Parsing Warning: Config file config.json cannot be fetched (too big)

Configuration Parsing Warning: Config file tokenizer_config.json cannot be fetched (too big)

YAML Metadata Warning: empty or missing yaml metadata in repo card

Check out the documentation for more information.

Sample Code to run the model

import os
import argparse
import logging
import time
from tqdm import tqdm
import pandas as pd

import torch
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer


logging.basicConfig(level=logging.INFO)

_CODE2LANG = {
    "as": "Assamese",
    "bn": "Bengali",
    "en": "English",
    "gu": "Gujarati",
    "hi": "Hindi",
    "kn": "Kannada",
    "ml": "Malayalam",
    "mr": "Marathi",
    "ne": "Nepali",
    "or": "Odia",
    "pa": "Punjabi",
    "sa": "Sanskrit",
    "ta": "Tamil",
    "te": "Telugu",
    "ur": "Urdu"
}

def main():
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", padding_side="left")

    src = [
        "When I was young, I used to go to the park every day.",
        "We watched a new movie last week, which was very inspiring.",
        "If you had met me at that time, we would have gone out to eat.",
        "My friend has invited me to his birthday party, and I will give him a gift."
    ]
    
    tgt_lang = "hi"
    model = "ai4bharat/IndicTrans3-beta"
    prompt_dicts = []
    for s in src:
        prompt_dicts.append([{"role": "user", "content": f"Translate the following text to {_CODE2LANG[tgt_lang]}: {s}"}])
    prompts = [tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True) for prompt in prompt_dicts]

    print(f"Loading model from: {model}")
    llm = LLM(
        model=model,
        trust_remote_code=True,
        tensor_parallel_size=1,
        download_dir="/projects/data/llmteam/safi/itv3/cache"
    )

    sampling_params = SamplingParams(
        temperature=1.0, #set an appropriate temperature value
        max_tokens=4096,
        repetition_penalty=1.0,
    )

    outputs = llm.generate(prompts, sampling_params)
    results = []
    for input_, output in zip(src, outputs):
        generated_text = output.outputs[0].text
        results.append({
            'input': input_,
            'output': generated_text
        })

    print(results)

if __name__ == "__main__":
    main()
Downloads last month
13
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support