def test_GetExtension(self): fileDescription = FileDescription("mypath", "1234.train") self.assertEqual("train", fileDescription.getExtension()) fileDescription = FileDescription("mypath", "0000.test") self.assertEqual("test", fileDescription.getExtension()) fileDescription = FileDescription("mypath", "0003.dev") self.assertEqual("dev", fileDescription.getExtension())
def testGenerateAnnotatedSentence2(self): self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0000.dev")) self.assertEqual( "{english=The}{posTag=DT} {english=complicated}{posTag=VBN} {english=language}{posTag=NN} {english=in}{posTag=IN} {english=the}{posTag=DT} {english=huge}{posTag=JJ} {english=new}{posTag=JJ} {english=law}{posTag=NN} {english=has}{posTag=VBZ} {english=muddied}{posTag=VBN} {english=the}{posTag=DT} {english=fight}{posTag=NN} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0001.dev")) self.assertEqual( "{english=The}{posTag=DT} {english=Ways}{posTag=NNP} {english=and}{posTag=CC} {english=Means}{posTag=NNP} {english=Committee}{posTag=NNP} {english=will}{posTag=MD} {english=hold}{posTag=VB} {english=a}{posTag=DT} {english=hearing}{posTag=NN} {english=on}{posTag=IN} {english=the}{posTag=DT} {english=bill}{posTag=NN} {english=next}{posTag=IN} {english=Tuesday}{posTag=NNP} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0002.dev")) self.assertEqual( "{english=We}{posTag=PRP} {english='re}{posTag=VBP} {english=about}{posTag=IN} {english=to}{posTag=TO} {english=see}{posTag=VB} {english=if}{posTag=IN} {english=advertising}{posTag=NN} {english=works}{posTag=VBZ} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0003.dev")) self.assertEqual( "{english=This}{posTag=DT} {english=time}{posTag=NN} {english=around}{posTag=RP} {english=,}{posTag=,} {english=they}{posTag=PRP} {english='re}{posTag=VBP} {english=moving}{posTag=VBG} {english=even}{posTag=RB} {english=faster}{posTag=RBR} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0004.dev")) self.assertEqual( "{english=Shearson}{posTag=NNP} {english=Lehman}{posTag=NNP} {english=Hutton}{posTag=NNP} {english=Inc.}{posTag=NNP} {english=by}{posTag=IN} {english=yesterday}{posTag=NN} {english=afternoon}{posTag=NN} {english=had}{posTag=VBD} {english=already}{posTag=RB} {english=written}{posTag=VBN} {english=new}{posTag=JJ} {english=TV}{posTag=NN} {english=ads}{posTag=NNS} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0005.dev")) self.assertEqual( "{english=This}{posTag=DT} {english=time}{posTag=NN} {english=,}{posTag=,} {english=the}{posTag=DT} {english=firms}{posTag=NNS} {english=were}{posTag=VBD} {english=ready}{posTag=JJ} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0006.dev")) self.assertEqual( "{english=``}{posTag=``} {english=To}{posTag=TO} {english=maintain}{posTag=VB} {english=that}{posTag=DT} {english=dialogue}{posTag=NN} {english=is}{posTag=VBZ} {english=absolutely}{posTag=RB} {english=crucial}{posTag=JJ} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0007.dev")) self.assertEqual( "{english=It}{posTag=PRP} {english=would}{posTag=MD} {english=have}{posTag=VB} {english=been}{posTag=VBN} {english=too}{posTag=RB} {english=late}{posTag=JJ} {english=to}{posTag=TO} {english=think}{posTag=VB} {english=about}{posTag=IN} {english=on}{posTag=IN} {english=Friday}{posTag=NNP} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0008.dev")) self.assertEqual( "{english=We}{posTag=PRP} {english=had}{posTag=VBD} {english=to}{posTag=TO} {english=think}{posTag=VB} {english=about}{posTag=IN} {english=it}{posTag=PRP} {english=ahead}{posTag=RB} {english=of}{posTag=IN} {english=time}{posTag=NN} {english=.}{posTag=.} {english=''}{posTag=''}", self.tree.generateAnnotatedSentence("english").__str__()) self.tree = ParseTreeDrawable(FileDescription("../trees2/", "0009.dev")) self.assertEqual( "{english=It}{posTag=PRP} {english=goes}{posTag=VBZ} {english=on}{posTag=RB} {english=to}{posTag=TO} {english=plug}{posTag=VB} {english=a}{posTag=DT} {english=few}{posTag=JJ} {english=diversified}{posTag=JJ} {english=Fidelity}{posTag=NNP} {english=funds}{posTag=NNS} {english=by}{posTag=IN} {english=name}{posTag=NN} {english=.}{posTag=.}", self.tree.generateAnnotatedSentence("english").__str__())
def __init__(self, fileDescription, path: str = None): if path is None: if isinstance(fileDescription, FileDescription): self.__fileDescription = fileDescription self.readFromFile( self.__fileDescription.getFileName( fileDescription.getPath())) elif isinstance(fileDescription, str): self.readFromFile(fileDescription) else: self.__fileDescription = FileDescription( path, fileDescription.getExtension(), fileDescription.getIndex()) self.readFromFile( self.__fileDescription.getFileName(fileDescription.getPath()))
def test_GetFileName(self): fileDescription = FileDescription("mypath", "0003.train") self.assertEqual("mypath/0003.train", fileDescription.getFileName()) self.assertEqual("newpath/0003.train", fileDescription.getFileName("newpath")) self.assertEqual("newpath/0000.train", fileDescription.getFileNameWithIndex("newpath", 0)) self.assertEqual("newpath/0020.train", fileDescription.getFileNameWithIndex("newpath", 20)) self.assertEqual("newpath/0103.train", fileDescription.getFileNameWithIndex("newpath", 103)) self.assertEqual( "newpath/0000.dev", fileDescription.getFileNameWithIndex("newpath", 0, "dev")) self.assertEqual( "newpath/0020.dev", fileDescription.getFileNameWithIndex("newpath", 20, "dev")) self.assertEqual( "newpath/0103.dev", fileDescription.getFileNameWithIndex("newpath", 103, "dev"))
def setUp(self) -> None: self.tree0 = ParseTreeDrawable(FileDescription("../trees/", "0000.dev")) self.tree1 = ParseTreeDrawable(FileDescription("../trees/", "0001.dev")) self.tree2 = ParseTreeDrawable(FileDescription("../trees/", "0002.dev")) self.tree3 = ParseTreeDrawable(FileDescription("../trees/", "0003.dev")) self.tree4 = ParseTreeDrawable(FileDescription("../trees/", "0004.dev")) self.tree5 = ParseTreeDrawable(FileDescription("../trees/", "0005.dev")) self.tree6 = ParseTreeDrawable(FileDescription("../trees/", "0006.dev")) self.tree7 = ParseTreeDrawable(FileDescription("../trees/", "0007.dev")) self.tree8 = ParseTreeDrawable(FileDescription("../trees/", "0008.dev")) self.tree9 = ParseTreeDrawable(FileDescription("../trees/", "0009.dev"))
def __init__(self, folder: str = None, pattern: str = None): self.parseTrees = [] if str is not None: for root, dirs, files in os.walk(folder): for file in files: fileName = os.path.join(root, file) if (pattern is None or pattern in fileName) and re.match( "\\d+\\.", file): parseTree = ParseTreeDrawable(fileName) if parseTree.getRoot() is not None: parseTree.setName(fileName) parseTree.setFileDescription( FileDescription(root, file)) self.parseTrees.append(parseTree)
def test_GetIndex(self): fileDescription = FileDescription("mypath", "1234.train") self.assertEqual(1234, fileDescription.getIndex()) fileDescription = FileDescription("mypath", "0000.test") self.assertEqual(0, fileDescription.getIndex()) fileDescription = FileDescription("mypath", "0003.dev") self.assertEqual(3, fileDescription.getIndex()) fileDescription = FileDescription("mypath", "0020.train") self.assertEqual(20, fileDescription.getIndex()) fileDescription = FileDescription("mypath", "0304.dev") self.assertEqual(304, fileDescription.getIndex())
class ParseTreeDrawable(ParseTree): __fileDescription: FileDescription __name: str def __init__(self, fileDescription, path: str = None): if path is None: if isinstance(fileDescription, FileDescription): self.__fileDescription = fileDescription self.readFromFile( self.__fileDescription.getFileName( fileDescription.getPath())) elif isinstance(fileDescription, str): self.readFromFile(fileDescription) else: self.__fileDescription = FileDescription( path, fileDescription.getExtension(), fileDescription.getIndex()) self.readFromFile( self.__fileDescription.getFileName(fileDescription.getPath())) def setFileDescription(self, fileDescription: FileDescription): self.__fileDescription = fileDescription def getFileDescription(self) -> FileDescription: return self.__fileDescription def reload(self): self.readFromFile( self.__fileDescription.getFileName( self.__fileDescription.getPath())) def readFromFile(self, fileName: str): inputFile = open(fileName, encoding="utf8") line = inputFile.readline().strip() if "(" in line and ")" in line: line = line[line.index("(") + 1:line.rindex(")")].strip() self.root = ParseNodeDrawable(None, line, False, 0) else: self.root = None inputFile.close() def setName(self, name: str): self.__name = name def getName(self) -> str: return self.__name def nextTree(self, count: int): if self.__fileDescription.nextFileExists(count): self.__fileDescription.addToIndex(count) self.reload() def previousTree(self, count: int): if self.__fileDescription.previousFileExists(count): self.__fileDescription.addToIndex(-count) self.reload() def save(self): outputFile = open(self.__fileDescription.getFileName(), mode='w', encoding="utf8") outputFile.write("( " + self.__str__() + " )\n") outputFile.close() def saveWithPath(self, newPath: str): outputFile = open(self.__fileDescription.getFileName(newPath), mode='w', encoding="utf8") outputFile.write("( " + self.__str__() + " )\n") outputFile.close() def maxDepth(self) -> int: if isinstance(self.root, ParseNodeDrawable): return self.root.maxDepth() def moveLeft(self, node: ParseNode): if self.root != node: self.root.moveLeft(node) def moveRight(self, node: ParseNode): if self.root != node: self.root.moveRight(node) def layerExists(self, viewLayerType: ViewLayerType) -> bool: if self.root is not None and isinstance(self.root, ParseNodeDrawable): return self.root.layerExists(viewLayerType) else: return False def layerAll(self, viewLayerType: ViewLayerType) -> bool: if self.root is not None and isinstance(self.root, ParseNodeDrawable): return self.root.layerAll(viewLayerType) else: return False def clearLayer(self, viewLayerType: ViewLayerType): if self.root is not None and isinstance(self.root, ParseNodeDrawable): self.root.clearLayer(viewLayerType) def generateAnnotatedSentence(self, language: str = None) -> AnnotatedSentence: sentence = AnnotatedSentence() if language is None: nodeDrawableCollector = NodeDrawableCollector( self.root, IsTurkishLeafNode()) leafList = nodeDrawableCollector.collect() for parseNode in leafList: if isinstance(parseNode, ParseNodeDrawable): layers = parseNode.getLayerInfo() for i in range(layers.getNumberOfWords()): sentence.addWord(layers.toAnnotatedWord(i)) else: nodeDrawableCollector = NodeDrawableCollector( self.root, IsEnglishLeafNode()) leafList = nodeDrawableCollector.collect() for parseNode in leafList: if isinstance(parseNode, ParseNodeDrawable): newWord = AnnotatedWord( "{" + language + "=" + parseNode.getData().getName() + "}{posTag=" + parseNode.getParent().getData().getName() + "}") sentence.addWord(newWord) return sentence def extractNodesWithVerbs(self, wordNet: WordNet) -> list: nodeDrawableCollector = NodeDrawableCollector(self.root, IsVerbNode(wordNet)) return nodeDrawableCollector.collect() def extractNodesWithPredicateVerbs(self, wordNet: WordNet) -> list: nodeDrawableCollector = NodeDrawableCollector( self.root, IsPredicateVerbNode(wordNet)) return nodeDrawableCollector.collect()