Spaces:
Running
Running
Upload postprocess.py
Browse files
src/postproc/postprocess.py
CHANGED
|
@@ -82,7 +82,7 @@ def parseOptions(arguments):
|
|
| 82 |
### Function - read usual abbreviations
|
| 83 |
#################################################
|
| 84 |
def getUsualAbbr():
|
| 85 |
-
infile = open("
|
| 86 |
abbr = []
|
| 87 |
for line in infile:
|
| 88 |
if (line[0] == "#"):
|
|
@@ -350,7 +350,7 @@ def posprocFix():
|
|
| 350 |
else: # ambiguous in the lex - do nothing
|
| 351 |
pos = tk[3]
|
| 352 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 353 |
-
feat = opFEATS[0] if (len(opFEATS) == 1) else tk[5]
|
| 354 |
# fix Pron and Det tags - PRON, DET
|
| 355 |
elif (tk[3] in lexPronDetTags):
|
| 356 |
options = lex.pget(tk[1].lower(), tk[3])
|
|
@@ -385,7 +385,7 @@ def posprocFix():
|
|
| 385 |
else: # ambiguous in the lex - do nothing
|
| 386 |
pos = tk[3]
|
| 387 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 388 |
-
feat = opFEATS[0] if (len(opFEATS) == 1) else tk[5]
|
| 389 |
# fix Open tags - ADJ, INTJ, NOUN, NUM
|
| 390 |
elif (tk[3] in lexOpenTags):
|
| 391 |
options = lex.pget(tk[1].lower(), tk[3])
|
|
@@ -423,7 +423,7 @@ def posprocFix():
|
|
| 423 |
else: # ambiguous in the lex - do nothing
|
| 424 |
pos = tk[3]
|
| 425 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 426 |
-
feat = opFEATS[0] if (len(opFEATS) == 1) else tk[5]
|
| 427 |
# fix Verb tags - AUX, VERB
|
| 428 |
elif (tk[3] in lexVerbTags):
|
| 429 |
options = lex.pget(tk[1].lower(), tk[3])
|
|
@@ -468,7 +468,7 @@ def posprocFix():
|
|
| 468 |
else: # ambiguous in the lex - do nothing
|
| 469 |
pos = tk[3]
|
| 470 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 471 |
-
feat = opFEATS[0] if (len(opFEATS) == 1) else tk[5]
|
| 472 |
# do reports and change
|
| 473 |
if (pos != tk[3]):
|
| 474 |
print(b[0], tk[0], tk[1], tk[3], "UPOS", tk[3], pos, sep="\t", file=repfile)
|
|
|
|
| 82 |
### Function - read usual abbreviations
|
| 83 |
#################################################
|
| 84 |
def getUsualAbbr():
|
| 85 |
+
infile = open("usAbbr.tsv", "r")
|
| 86 |
abbr = []
|
| 87 |
for line in infile:
|
| 88 |
if (line[0] == "#"):
|
|
|
|
| 350 |
else: # ambiguous in the lex - do nothing
|
| 351 |
pos = tk[3]
|
| 352 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 353 |
+
feat = featsFull(opFEATS[0], abbr, extpos=extpos) if (len(opFEATS) == 1) else featsFull(tk[5], abbr, extpos=extpos)
|
| 354 |
# fix Pron and Det tags - PRON, DET
|
| 355 |
elif (tk[3] in lexPronDetTags):
|
| 356 |
options = lex.pget(tk[1].lower(), tk[3])
|
|
|
|
| 385 |
else: # ambiguous in the lex - do nothing
|
| 386 |
pos = tk[3]
|
| 387 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 388 |
+
feat = featsFull(opFEATS[0], abbr, extpos=extpos, prontype=prontype) if (len(opFEATS) == 1) else featsFull(tk[5], abbr, extpos=extpos, prontype=prontype)
|
| 389 |
# fix Open tags - ADJ, INTJ, NOUN, NUM
|
| 390 |
elif (tk[3] in lexOpenTags):
|
| 391 |
options = lex.pget(tk[1].lower(), tk[3])
|
|
|
|
| 423 |
else: # ambiguous in the lex - do nothing
|
| 424 |
pos = tk[3]
|
| 425 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 426 |
+
feat = featsFull(opFEATS[0], abbr, extpos=extpos, verbform=None, numtype=None) if (len(opFEATS) == 1) else featsFull(tk[5], abbr, extpos=extpos, verbform=None, numtype=None)
|
| 427 |
# fix Verb tags - AUX, VERB
|
| 428 |
elif (tk[3] in lexVerbTags):
|
| 429 |
options = lex.pget(tk[1].lower(), tk[3])
|
|
|
|
| 468 |
else: # ambiguous in the lex - do nothing
|
| 469 |
pos = tk[3]
|
| 470 |
lem = opLEMMA[0] if (len(opLEMMA) == 1) else tk[2].lower()
|
| 471 |
+
feat = featsFull(opFEATS[0], abbr, extpos=extpos, verbform=None, voicepass=voicepass) if (len(opFEATS) == 1) else featsFull(tk[5], abbr, extpos=extpos, verbform=None, voicepass=voicepass)
|
| 472 |
# do reports and change
|
| 473 |
if (pos != tk[3]):
|
| 474 |
print(b[0], tk[0], tk[1], tk[3], "UPOS", tk[3], pos, sep="\t", file=repfile)
|