Commit
·
ca7d649
1
Parent(s):
eda7f18
Update README.md
Browse files
README.md
CHANGED
|
@@ -21,11 +21,11 @@ language:
|
|
| 21 |
widget:
|
| 22 |
- text: >-
|
| 23 |
The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.
|
| 24 |
-
|
| 25 |
inference:
|
| 26 |
parameters:
|
| 27 |
-
src_lang:
|
| 28 |
-
tgt_lang:
|
| 29 |
tags:
|
| 30 |
- seq2seq
|
| 31 |
- relation-extraction
|
|
@@ -61,7 +61,7 @@ from transformers import pipeline
|
|
| 61 |
|
| 62 |
triplet_extractor = pipeline('translation_xx_to_yy', model='Babelscape/mrebel-base', tokenizer='Babelscape/mrebel-base')
|
| 63 |
# We need to use the tokenizer manually since we need special tokens.
|
| 64 |
-
extracted_text = triplet_extractor.tokenizer.batch_decode([triplet_extractor("The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.",
|
| 65 |
print(extracted_text[0])
|
| 66 |
# Function to parse the generated text and extract the triplets
|
| 67 |
def extract_triplets_typed(text):
|
|
@@ -145,11 +145,8 @@ def extract_triplets_typed(text):
|
|
| 145 |
return triplets
|
| 146 |
|
| 147 |
# Load model and tokenizer
|
| 148 |
-
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-base", src_lang="
|
| 149 |
-
# Here we set English ("
|
| 150 |
-
# tokenizer._src_lang = "ca_XX"
|
| 151 |
-
# tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids("ca_XX")
|
| 152 |
-
# tokenizer.set_src_lang_special_tokens("ca_XX")
|
| 153 |
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-base")
|
| 154 |
gen_kwargs = {
|
| 155 |
"max_length": 256,
|
|
@@ -169,7 +166,6 @@ model_inputs = tokenizer(text, max_length=256, padding=True, truncation=True, re
|
|
| 169 |
generated_tokens = model.generate(
|
| 170 |
model_inputs["input_ids"].to(model.device),
|
| 171 |
attention_mask=model_inputs["attention_mask"].to(model.device),
|
| 172 |
-
decoder_start_token_id = tokenizer.convert_tokens_to_ids("tp_XX"),
|
| 173 |
**gen_kwargs,
|
| 174 |
)
|
| 175 |
|
|
|
|
| 21 |
widget:
|
| 22 |
- text: >-
|
| 23 |
The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.
|
| 24 |
+
example_title: English
|
| 25 |
inference:
|
| 26 |
parameters:
|
| 27 |
+
src_lang: en
|
| 28 |
+
tgt_lang: en
|
| 29 |
tags:
|
| 30 |
- seq2seq
|
| 31 |
- relation-extraction
|
|
|
|
| 61 |
|
| 62 |
triplet_extractor = pipeline('translation_xx_to_yy', model='Babelscape/mrebel-base', tokenizer='Babelscape/mrebel-base')
|
| 63 |
# We need to use the tokenizer manually since we need special tokens.
|
| 64 |
+
extracted_text = triplet_extractor.tokenizer.batch_decode([triplet_extractor("The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.", src_lang="en", return_tensors=True, return_text=False)[0]["translation_token_ids"]]) # change __en__ for the language of the source.
|
| 65 |
print(extracted_text[0])
|
| 66 |
# Function to parse the generated text and extract the triplets
|
| 67 |
def extract_triplets_typed(text):
|
|
|
|
| 145 |
return triplets
|
| 146 |
|
| 147 |
# Load model and tokenizer
|
| 148 |
+
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-base", src_lang="en", tgt_lang="en")
|
| 149 |
+
# Here we set English ("en") as source language. To change the source language swap the first token of the input for your desired language or change to supported language.
|
|
|
|
|
|
|
|
|
|
| 150 |
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-base")
|
| 151 |
gen_kwargs = {
|
| 152 |
"max_length": 256,
|
|
|
|
| 166 |
generated_tokens = model.generate(
|
| 167 |
model_inputs["input_ids"].to(model.device),
|
| 168 |
attention_mask=model_inputs["attention_mask"].to(model.device),
|
|
|
|
| 169 |
**gen_kwargs,
|
| 170 |
)
|
| 171 |
|