healdette / generate_binders 2.py
Raiff1982's picture
Upload 55 files
6d3b444 verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import random
# Load ProtGPT2 or equivalent model
tokenizer = AutoTokenizer.from_pretrained("nferruz/ProtGPT2")
model = AutoModelForCausalLM.from_pretrained("nferruz/ProtGPT2")
def generate_binders(fusion_context, strategy='low_shot', num_candidates=10):
seed_sequence = fusion_context['embedding_vector'][:10]
seed = ''.join([chr(int(65 + abs(int(x * 10)) % 20)) for x in seed_sequence])
input_ids = tokenizer.encode(seed, return_tensors="pt")
outputs = model.generate(
input_ids,
do_sample=True,
top_k=950,
top_p=0.96,
temperature=1.0,
max_length=200,
num_return_sequences=num_candidates
)
binders = []
for output in outputs:
sequence = tokenizer.decode(output, skip_special_tokens=True)
sequence = ''.join([aa for aa in sequence if aa in "ACDEFGHIKLMNPQRSTVWY"])
if len(sequence) > 30:
binder_meta = {
"sequence": sequence,
"perspective_source": fusion_context["perspective_tags"],
"sentiment_trace": fusion_context["sentiment_trace"],
"symbolic_logic_score": fusion_context["symbolic_logic_score"]
}
binders.append(binder_meta)
return {"generated_binders": binders}