Exemplo n.º 1
0
def main():
    punctuation = string.punctuation + "「」、"

    for sentenceData in knock41.sentenceDataIterator(sys.stdin):
        chunkList = knock41.createChunkListFromData(sentenceData)
        for c in chunkList:
            if c.hasVerb():
                baseVerb = c.getMorph(c.firstVerbId())._base
                joshi = dict()
                idWoChunk = -1
                idWoMorph = -1
                for srcId in c.sources():
                    id_joshi = chunkList[int(srcId)].getJoshiId()

                    if id_joshi != -1:
                        baseJoshi = chunkList[int(srcId)].getMorph(
                            id_joshi)._base
                        if baseJoshi == "を":
                            idWoChunk = int(srcId)
                            idWoMorph = id_joshi
                        else:
                            joshi[baseJoshi] = chunkList[int(
                                srcId)].origin().strip(punctuation)
                if len(joshi) > 0 and idWoChunk >= 0:
                    morph = chunkList[idWoChunk]._morphs[idWoMorph - 1]
                    joshi_sorted = sorted(joshi, key=lambda x: x[0])
                    jsent_sorted = [joshi[key] for key in joshi_sorted]
                    if (morph.pos1() == "サ変接続"):
                        print(
                            "%s%s\t%s\t%s" %
                            (chunkList[idWoChunk].origin().strip(punctuation),
                             baseVerb, " ".join(joshi_sorted),
                             " ".join(jsent_sorted)))
Exemplo n.º 2
0
def main():
	# data = []
	for sentenceData in knock41.sentenceDataIterator(sys.stdin):
		chunkList = knock41.createChunkListFromData(sentenceData)
		# data = []
		for c in chunkList:
			if c.hasNone() and -1 < c.dest() < len(chunkList) and chunkList[c.dest()].hasVerb():
				print ("%s\t%s" % (c.origin(), chunkList[c.dest()].origin()))
Exemplo n.º 3
0
def main():
    data = []
    for sentData in knock41.sentenceDataIterator(sys.stdin):
        chunkList = knock41.createChunkListFromData(sentData)
        # data = []
        for c in chunkList:
            print("%s\t%s" % (c.origin().strip(),
                              (chunkList[c._dst].origin().strip() if
                               (c._dst != -1) else "NULL")))
Exemplo n.º 4
0
def main():
	data = []
	for sentData in knock41.sentenceDataIterator(sys.stdin):
		chunkList = knock41.createChunkListFromData(sentData)
		# data = []
		for c in chunkList:
			print("%s\t%s" % (
					c.origin().strip(), 
					(chunkList[c._dst].origin().strip() if (c._dst != -1 ) else "NULL")
				))
Exemplo n.º 5
0
def main():
    lineNo = 0
    for sentenceData in knock41.sentenceDataIterator(sys.stdin):
        chunkList = knock41.createChunkListFromData(sentenceData)
        # print(lineNo)
        for chunk in chunkList:
            if chunk.hasNone():
                # pathId = []
                pathId = getPathToRoot(chunkList, chunk.id())
                pathSurface = [chunkList[i].origin() for i in pathId]
                print("->".join(pathSurface))
        lineNo += 1
Exemplo n.º 6
0
def main():
	lineNo = 0
	for sentenceData in knock41.sentenceDataIterator(sys.stdin):
		chunkList = knock41.createChunkListFromData(sentenceData)
		# print(lineNo)
		for chunk in chunkList:
			if chunk.hasNone():
				# pathId = []
				pathId = getPathToRoot(chunkList, chunk.id())
				pathSurface = [chunkList[i].origin() for i in pathId]
				print ("->".join(pathSurface))
		lineNo += 1
Exemplo n.º 7
0
def main():
	G = Digraph(format='png')

	for sentId, sentenceData in enumerate(knock41.sentenceDataIterator(sys.stdin)):
		if sentId == int(sys.argv[1]) - 1:
			chunkList = knock41.createChunkListFromData(sentenceData)
			for c in chunkList:
				if c.dest() != -1:
					G.node(str(c.id()), c.origin())
					G.node(str(c.dest()), chunkList[c.dest()].origin())
					G.edge(str(c.id()), str(c.dest()))
				# binary_tree.pngで保存
			G.render('knock44', cleanup=True)
			return		
Exemplo n.º 8
0
def main():
    lineNo = 0
    for sentenceData in knock41.sentenceDataIterator(sys.stdin):
        chunkList = knock41.createChunkListFromData(sentenceData)

        for j in range(1, len(chunkList)):
            if (chunkList[j].hasNone()):
                for i in range(0, j):
                    if (chunkList[i].hasNone()):
                        pathI = getPathToRoot(chunkList, i)
                        pathJ = getPathToRoot(chunkList, j)
                        if j in pathI:
                            pathI = pathI[0:pathI.index(j) + 1]
                            first = chunkList[i].origin("X")
                            last = chunkList[j].origin("Y")
                            pathSurface = [
                                chunkList[i].origin() for i in pathI[1:-1]
                            ]
                            pathSurface.insert(0, first)
                            pathSurface.append(last)
                            print("->".join(pathSurface))
                        else:
                            for k in pathJ[1:]:
                                if (k in pathI):
                                    pathSurfaceI = [
                                        chunkList[idx].origin()
                                        for idx in pathI[1:pathI.index(k)]
                                    ]
                                    pathSurfaceI.insert(
                                        0, chunkList[i].origin("X"))

                                    pathSurfaceJ = [
                                        chunkList[idx].origin()
                                        for idx in pathJ[1:pathJ.index(k)]
                                    ]
                                    pathSurfaceJ.insert(
                                        0, chunkList[j].origin("Y"))

                                    path_I_To_PrevK = "->".join(pathSurfaceI)
                                    path_J_To_prevK = "->".join(pathSurfaceJ)
                                    print("%s|%s|%s" %
                                          (path_I_To_PrevK, path_J_To_prevK,
                                           chunkList[k].origin()))
                                    break
                        # else:
                        # 	print ("->".join([chunkList[idx].origin() for idx in pathI]))
                        # 	print ("->".join([chunkList[idx].origin() for idx in pathJ]))
                        # getCommonNode()

        lineNo += 1
Exemplo n.º 9
0
def main():
    # for sentData in knock41.
    for sentenceData in knock41.sentenceDataIterator(sys.stdin):
        chunkList = knock41.createChunkListFromData(sentenceData)
        for c in chunkList:
            if c.hasVerb():
                # output = ""
                baseVerb = c.getMorph(c.firstVerbId())._base
                joshi = list()
                for srcId in c.sources():
                    # print(c.origin(), c.firstVerbId())
                    id_joshi = chunkList[int(srcId)].getJoshiId()

                    # print (id_joshi)
                    if id_joshi != -1:
                        baseJoshi = chunkList[int(srcId)].getMorph(
                            id_joshi)._base
                        joshi.append(baseJoshi)
                if len(joshi) > 0:
                    print("%s\t%s" % (baseVerb, " ".join(sorted(joshi))))
Exemplo n.º 10
0
def main():
	# for sentData in knock41.
	for sentenceData in knock41.sentenceDataIterator(sys.stdin):
		chunkList = knock41.createChunkListFromData(sentenceData)
		for c in chunkList:
			if c.hasVerb():
				# output = ""
				baseVerb = c.getMorph(c.firstVerbId())._base
				joshi = list()
				joshiChunk = list()
				for srcId in c.sources():
					# print(c.origin(), c.firstVerbId())
					id_joshi = chunkList[int(srcId)].getJoshiId()

					# print (id_joshi)
					if id_joshi != -1 :
						baseJoshi = chunkList[int(srcId)].getMorph(id_joshi)._base
						joshi.append(baseJoshi)
						joshiChunk.append(chunkList[int(srcId)].origin())
				if len(joshi) > 0 :
					print("%s\t%s\t%s" % (baseVerb, " ".join(joshi), " ".join(joshiChunk)))
Exemplo n.º 11
0
def main():
	lineNo = 0
	for sentenceData in knock41.sentenceDataIterator(sys.stdin):
		chunkList = knock41.createChunkListFromData(sentenceData)

		for j in range(1, len(chunkList)):
			if(chunkList[j].hasNone()):
				for i in range(0, j):
					if(chunkList[i].hasNone()):
						pathI = getPathToRoot(chunkList, i)
						pathJ = getPathToRoot(chunkList, j)
						if j in pathI:
							pathI = pathI[0 : pathI.index(j) + 1]
							first = chunkList[i].origin("X")
							last  = chunkList[j].origin("Y")
							pathSurface = [chunkList[i].origin() for i in pathI[1:-1]]
							pathSurface.insert(0, first)
							pathSurface.append(last)
							print ("->".join(pathSurface))
						else:
							for k in pathJ[1:]:
								if(k in pathI):
									pathSurfaceI = [chunkList[idx].origin() for idx in pathI[1:pathI.index(k)]]
									pathSurfaceI.insert(0, chunkList[i].origin("X"))

									pathSurfaceJ = [chunkList[idx].origin() for idx in pathJ[1:pathJ.index(k)]]
									pathSurfaceJ.insert(0, chunkList[j].origin("Y"))

									path_I_To_PrevK = "->".join(pathSurfaceI)
									path_J_To_prevK = "->".join(pathSurfaceJ)
									print ("%s|%s|%s" % (path_I_To_PrevK, path_J_To_prevK, chunkList[k].origin()))
									break
						# else:
						# 	print ("->".join([chunkList[idx].origin() for idx in pathI]))
						# 	print ("->".join([chunkList[idx].origin() for idx in pathJ]))
						# getCommonNode()


		lineNo += 1