def main(): resultados = CYK().algoritmo_cyk() with open('out-status.txt', 'w') as writer: for resultado in resultados.values(): for i in range(len(resultado)): writer.write(str(resultado[i])) if (i != len(resultado) - 1): writer.write(' ') writer.write('\n')
def test10(self): print("===== Test 10 =====") self._use_grammar3() input_str = 'ab' print('Input string: ' + input_str) result = (CYK.is_in_cfl(input_str, self._grammar)) print('Result = ' + str(result))
def test1a(self): print("===== Test 1a =====") self._use_grammar1() #self._grammar.display() input_str = 'ab' print('Input string: ' + input_str) result = (CYK.is_in_cfl(input_str, self._grammar)) print('Result = ' + str(result))
help='Input sentence (text to parse)') args = p.parse_args() corpus = [] with open("data/sequoia-corpus+fct.mrg_strict", "r") as file_corpus: for line in file_corpus: corpus.append(line) frac_train = 0.8 N = len(corpus) nb_train = int(round(N * frac_train)) corpus_train = corpus[:nb_train] # Building Parser print("Building PCFG and Parser") my_CYK_parser = CYK(corpus_train) print("Done") print("Start Parsing") if args.test_sentence: sent = args.test_sentence print("#################") print("Sentence: ") print(sent + "\n") print("Parsing") my_parsing = my_CYK_parser.parse(sent) if my_parsing is None: print("Found no viable parsing.") else:
length_test = int(0.1 * corpus_length) end_dev = length_train + length_dev corpus_train = sentences[:length_train] corpus_dev = sentences[length_train:end_dev] corpus_test = sentences[end_dev:] # Get unparsed sentence from eval_corpus.txt with open('data/eval_corpus.txt', 'r') as f: file = f.read() test_sentences = file.split('\n') # Build the parser with corpus_train print('Building the parser...') cyk_parser = CYK(corpus_train) print('Done') # Parsing of Evaluation sentences print('Parsing...') test_sentences_bis = [] with open('data/evaluation_data.parser.txt', 'w') as f: for sentence in test_sentences: parsed_sentence = cyk_parser.parse(sentence) if parsed_sentence is not None: test_sentences_bis.append(sentence) f.write('%s\n' % parsed_sentence) print('Done')
from cyk import CYK startstate = "S" grammar = { startstate: ["VaE", "VbF"], "G": ["GG", "a", "b", "VaVb"], "E": ["GVa", "a"], "F": ["GVb", "b"], "Va": ["a"], "Vb": ["b"] } cyk = CYK(grammar, startstate) word = "abacba" print(cyk.checkWord(word)) cyk.outputTable(word)
T = tagged_sent_to_tree(human_parsing, remove_after_hyphen=True) real_parsing = tree_to_sentence(T) real_parsings_test.append(real_parsing) sent = tagged_to_sentence(real_parsing) sentences_test.append(sent) with open('results/sentences_test.txt', 'w') as f: for item in sentences_test: f.write("%s\n" % item) # Build CYK print("Build CYK parser") tic = time.time() my_CYK_parser = CYK(dataset["train"]) tac = time.time() print("Done in " + str(round(tac - tic, 2)) + "sec\n") # Use pyevalb assert (len(sentences_test) == nb_test) assert (len(real_parsings_test) == nb_test) for idx_sentence in range(nb_test): print("##############################") real_parsing = real_parsings_test[idx_sentence] sent = sentences_test[idx_sentence] print("Sentence #" + str(idx_sentence))
print(len(treeList)) # print (MATRIX.shape) print(treeList[0]) # run the exps cycling through different parameters for dimension in [8192]: # print ('dimension: ', dimension) for LAMBDA in [0.6]: for filter in [1.5]: # print ('filter: ', filter) # creating parser instance if MODE == "binary": cykInstance = CYK(dimension, LAMBDA, Grammar, filter=filter) else: cykInstance = CYKPlus(dimension, LAMBDA, Grammar, filter=filter) # creating or loading the appropriate matrix of distributed trees # matrix = pickle.load("matrixFile", "rb") for distortRate in [0]: results = runExp(treeList, cykInstance, k_best=2, k_max=2, matrix=MATRIX,
# # loading grammar (binarized) # Grammar = pickle.load(open("binaryGrammar.txt", "rb")) Grammar = pickle.load(open("binaryGrammar23.txt", "rb")) # # # defining parser and dtk parameters # dimension = 1024 # filter = 1.5 # LAMBDA = 0.6 # path = '/Users/lorenzo/Documents/Universita/PHD/Lavori/Codice/pyCYK/' for dimension in [1024, 2048, 4096, 8192, 16384]: for LAMBDA in [0.6]: for filter in [1.5, 2, 2.5]: parser = CYK(dimension, LAMBDA, Grammar, filter=filter) # filename # f_bin = 'binaryoutput_{0}_{1}'.format(dimension, filter) # f_debin = 'debinarized_{0}_{1}'.format(dimension, filter) # # parsing and saving parsingAndSaving(treeListBinarized, "binaryreconstructed.txt", parser) ## loading java from python -> procudes debinarized.txt os.system(javaString) print('\n parametri: dimensione={0}, filter={1}'.format( dimension, filter))
# for OOV oovwords = OOV(embedfilename, vocabulary) # parse new sentences using CYK based on learned PCFG # parser = CYKSolver(lexicon, grammar, vocabulary, symbols, oovwords) # i = 0 for line in sys.stdin: # print('start parse') # print(line) # start = time.time() # if line == '\n': continue # cyksolver = CYK(line.split(), lexicon, grammar, vocabulary, symbols, embedfilename) # i += 1 # if i < 20: continue # if i > 3: break # parsedtree = parser.compute(line.split()) parsedtree = CYK(line.split(), lexicon, grammar, vocabulary, symbols, oovwords) if parsedtree == None: print('(None)') continue parsedtree.un_chomsky_normal_form() # end = time.time() # print(end-start) # print('bon') print('( ' + parsedtree._pformat_flat(nodesep='', parens='()', quotes=False) + ')')