|
|
import torch |
|
|
from typing import Dict, Any, List |
|
|
from transformers import AutoTokenizer, AutoModelForMaskedLM |
|
|
|
|
|
class EndpointHandler(): |
|
|
def __init__(self, path=""): |
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
|
|
|
self.leadership_model_name = "roberta-large-mnli" |
|
|
self.leadership_model_path = path + "/pet-leadership-model-roberta-large-mnli_bs4_gas4_lr1e-05_ep5" + "/checkpoint-1855" |
|
|
self.leadership_pattern = "Sentence: {} Question: Does this show leadership? Answer: <mask>" |
|
|
|
|
|
|
|
|
self.collab_model_name = "roberta-large-mnli" |
|
|
self.collab_model_path = path + "/pet-collaboration-model-roberta-large-mnli_bs4_gas4_lr1e-05_ep5" + "/checkpoint-1560" |
|
|
self.collab_pattern = "Sentence: {} Question: Does this show teamwork? Answer: <mask>" |
|
|
|
|
|
|
|
|
self.leadership_tokenizer = AutoTokenizer.from_pretrained(self.leadership_model_name) |
|
|
self.collab_tokenizer = AutoTokenizer.from_pretrained(self.collab_model_name) |
|
|
self.model_lead = AutoModelForMaskedLM.from_pretrained(self.leadership_model_path) |
|
|
self.model_collab = AutoModelForMaskedLM.from_pretrained(self.collab_model_path) |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
data args: |
|
|
inputs (:obj: `str` | `PIL.Image` | `np.array`) |
|
|
kwargs |
|
|
Return: |
|
|
A :obj:`list` | `dict`: will be serialized and returned |
|
|
""" |
|
|
sentence = data["inputs"] |
|
|
pl, pt, pc, pct = self.extract_skill_quality(sentence) |
|
|
return {"leadership": pl, "leadership_token": pt, "collaboration": pc, "collaboration_token": pct} |
|
|
|
|
|
|
|
|
def predict_trait(self, model, sentence, pattern, task_name): |
|
|
prompt = pattern.format(sentence) |
|
|
|
|
|
tokenizer = self.leadership_tokenizer if task_name == "leadership" else self.collab_tokenizer |
|
|
|
|
|
enc = tokenizer(prompt, return_tensors="pt") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
model = model.cuda() |
|
|
enc = {k: v.cuda() for k, v in enc.items()} |
|
|
|
|
|
outputs = model(**enc) |
|
|
logits = outputs.logits |
|
|
|
|
|
|
|
|
mask_index = (enc["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1] |
|
|
mask_logits = logits[0, mask_index, :] |
|
|
|
|
|
|
|
|
pred_token_id = mask_logits.argmax(dim=-1).item() |
|
|
pred_token = tokenizer.decode([pred_token_id]).strip() |
|
|
|
|
|
|
|
|
pred_label = 1 if pred_token.lower().startswith("yes") else 0 |
|
|
return pred_label, pred_token |
|
|
|
|
|
def extract_skill_quality(self, sentence): |
|
|
|
|
|
pl, pt = self.predict_trait(self.model_lead, sentence, self.leadership_pattern, "leadership") |
|
|
|
|
|
pc, pct = self.predict_trait(self.model_collab, sentence, self.collab_pattern, "collaboration") |
|
|
return pl, pt, pc, pct |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
from handler import EndpointHandler |
|
|
handler = EndpointHandler(path=".") |
|
|
|
|
|
|
|
|
test_sentences = [ |
|
|
"I am leading a team of engineers.", |
|
|
"I am not leading my team.", |
|
|
"Exemplified the second-to-none customer service delivery in all interactions with customers and potential clients", |
|
|
"Collaborated with cross-functional teams to deliver the product on time.", |
|
|
"Finished my work on time.", |
|
|
"Mentored interns and coordinated weekly sync-ups." |
|
|
] |
|
|
|
|
|
|
|
|
for sentence in test_sentences: |
|
|
print(f"Sentence: \"{sentence}\"") |
|
|
result = handler({"inputs": sentence}) |
|
|
|
|
|
|
|
|
lead_token = result["leadership_token"] |
|
|
lead_pred = "Yes" if result["leadership"] == 1 else "No" |
|
|
print(f" Leadership Prediction: {lead_pred} (Predicted token: '{lead_token}')") |
|
|
|
|
|
|
|
|
collab_token = result["collaboration_token"] |
|
|
collab_pred = "Yes" if result["collaboration"] == 1 else "No" |
|
|
print(f" Collaboration Prediction: {collab_pred} (Predicted token: '{collab_token}')") |
|
|
print() |
|
|
|