/usr/lib/python2.7/dist-packages/frog/pairs.py is in frog 0.12.17-7.1build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | #!/usr/bin/env python
"""
Generate instances for the pairwise dependency prediction task.
usage: %prog [options] [file...]
-mDIST, --max-dist=DIST: maximum distance between head and dependent
-x, --exclude-non-scoring: do not generate instance for non-scoring
tokens
-t, --test: test data mode: the input does not contain HEAD and DEPREL
columns
-s, --separate-sentences: separate instances from different sentences by
an empty line
Features:
--bigram: add a head-dependent bigram feature
--feats-bigram: add a head-dependent bigram of the FEATS column
---
:Refinements:
-m: type='int', dest='maxDist'
-x: action='store_true', default=False, dest='skipNonScoring'
-t: action='store_true', default=False
-s: action='store_true', default=False, dest='separateSentences'
--bigram: action='store_true', default=False
--feats-bigram: action='store_true', default=False, dest='featsBigram'
"""
import fileinput
from itertools import izip
from operator import itemgetter
from sentences import sentenceIterator, makeWindow
import common
from common import * # COLUMN INDEXES ONLY
def main(options, args):
for sentence in sentenceIterator(fileinput.input(args)):
for dependent in sentence:
dist = "ROOTDEP"
features = []
dependentId = int(dependent[ID]) - 1
# window of words
features.extend(makeWindow(sentence, dependentId,
1, 1, itemgetter(FORM)))
# 2, 2, itemgetter(FORM)))
#features.extend(makeWindow(sentence, headId,
# 1, 1, itemgetter(FORM)))
features.extend(["ROOT", "ROOT", "ROOT"])
# 2, 2, itemgetter(FORM)))
# window of pos tags
features.extend(makeWindow(sentence, dependentId,
1, 1, itemgetter(POSTAG)))
# 2, 2, itemgetter(POSTAG)))
#features.extend(makeWindow(sentence, headId,
# 1, 1, itemgetter(POSTAG)))
features.extend(["ROOT", "ROOT", "ROOT"])
# 2, 2, itemgetter(POSTAG)))
#for id in [dependentId, headId]:
# window = makeWindow(sentence, id,
# 2, 2, itemgetter(POSTAG))
# features.append("%s^%s" % tuple(window[:2]))
# features.append("%s^%s" % tuple(window[-2:]))
features.append("%s^%s" % (dependent[POSTAG], "ROOT"))
# relative position, distance
#features.append(
# ["LEFT", "RIGHT"][dependentId < headId])
features.append("ROOT")
#features.append(str(abs(dependentId - headId)))
features.append("ROOT")
if options.bigram:
features.append("^".join(["ROOT", dependent[FORM]]))
if options.featsBigram:
features.append("^".join(["ROOT", dependent[FEATS]]))
#posTags = map(itemgetter(CPOSTAG), sentence[min(dependentId, headId):max(dependentId, headId)])
#for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
# "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
# "UH", "VB", "WD", "WP", "WR"]:
# features.append(str(sum(1 for t in posTags if t == tag)))
if not options.test:
if dependent[HEAD] == "0":
rel = dependent[DEPREL]
else:
rel = "__"
else:
rel = "?"
print " ".join(features), rel
for dependent, head in common.pairIterator(sentence, options):
dist = abs(int(dependent[ID]) - int(head[ID]))
features = []
dependentId = int(dependent[ID]) - 1
headId = int(head[ID]) - 1
# window of words
features.extend(makeWindow(sentence, dependentId,
1, 1, itemgetter(FORM)))
# 2, 2, itemgetter(FORM)))
features.extend(makeWindow(sentence, headId,
1, 1, itemgetter(FORM)))
# 2, 2, itemgetter(FORM)))
# window of pos tags
features.extend(makeWindow(sentence, dependentId,
1, 1, itemgetter(POSTAG)))
# 2, 2, itemgetter(POSTAG)))
features.extend(makeWindow(sentence, headId,
1, 1, itemgetter(POSTAG)))
# 2, 2, itemgetter(POSTAG)))
#for id in [dependentId, headId]:
# window = makeWindow(sentence, id,
# 2, 2, itemgetter(POSTAG))
# features.append("%s^%s" % tuple(window[:2]))
# features.append("%s^%s" % tuple(window[-2:]))
features.append("%s^%s" % (dependent[POSTAG], head[POSTAG]))
# relative position, distance
features.append(
["LEFT", "RIGHT"][dependentId < headId])
features.append(str(abs(dependentId - headId)))
if options.bigram:
features.append("^".join([head[FORM], dependent[FORM]]))
if options.featsBigram:
features.append("^".join([head[FEATS], dependent[FEATS]]))
#posTags = map(itemgetter(CPOSTAG), sentence[min(dependentId, headId):max(dependentId, headId)])
#for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
# "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
# "UH", "VB", "WD", "WP", "WR"]:
# features.append(str(sum(1 for t in posTags if t == tag)))
if not options.test:
if dependent[HEAD] == head[ID]:
rel = dependent[DEPREL]
else:
rel = "__"
else:
rel = "?"
print " ".join(features), rel
if options.separateSentences:
print
if __name__ == "__main__":
import cmdline
main(*cmdline.parse())
|