| | |
| |
|
| | from transformers import AutoModel, AutoTokenizer |
| | import torch |
| | import torch.nn as nn |
| | import json |
| |
|
| | |
| | encoder = AutoModel.from_pretrained("./outputs/final_baseline_best") |
| |
|
| | |
| | with open("./outputs/final_baseline_best/classifier_config.json", 'r') as f: |
| | c_config = json.load(f) |
| |
|
| | num_labels = c_config.get('num_labels', 1) |
| | hidden_size = c_config.get('hidden_size', 768) |
| |
|
| | |
| | classifier = nn.Sequential( |
| | nn.Linear(hidden_size, 256), |
| | nn.ReLU(), |
| | nn.Dropout(0.1), |
| | nn.Linear(256, num_labels) |
| | ) |
| | classifier.load_state_dict(torch.load("./outputs/final_baseline_best/classifier.pt")) |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained("./outputs/final_baseline_best") |
| |
|
| | |
| | def predict(text): |
| | inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128) |
| | with torch.no_grad(): |
| | outputs = encoder(**inputs) |
| | cls_embedding = outputs.last_hidden_state[:, 0, :] |
| | logits = classifier(cls_embedding) |
| | probs = torch.sigmoid(logits) |
| | return probs.item() |
| |
|
| | |
| | text = "আপনার বাংলা টেক্সট এখানে" |
| | prob = predict(text) |
| | print(f"Hate Speech Probability: {prob:.4f}") |
| | print(f"Prediction: {'Hate Speech' if prob > 0.5 else 'Non-Hate Speech'}") |
| |
|