j = 0.0 correct = 0.0 half_correct = 0.0 all_correct = 0.0 cutoff = 0.0 for s_full, s_disamb in sents: c += 1 if c % 100 == 0: print c, '/', len(sentences) s_name = s_full.replace( LFG_DIR + 'parses/' + PARSES_VERSION + '/', '') s_prob = LFG_DIR + 'stochastic/' + parse_dir + wf + '/' + s_name[: -3] + PROB_SUFFIX data = None try: data = read_prolog_file(s_prob, True) except IOError: #print 'IOError' continue if not data.text: #print 'Oh noes!' continue text = data.text solutions = data.num_solutions #print '*', len(data.opts), data.num_solutions weighted_constrs = {} for opts in data.opts: weight = 0 for o in opts: if o in data.choices_weights: weight -= data.choices_weights[o]
os.system('mkdir ' + tex_dir) os.system('cp /home/kasia/Dokumenty/Robota/LFG/parses/avm.sty ' + tex_dir) processed = 0 for sent_name in sentences: sent_count += 1 if sent_name in skip_sets[sents_dir]: #print 'SKIP:', sent_name continue print '-----', sent_count, '/', num_sents, sent_name path = '/home/kasia/Dokumenty/Robota/LFG/parses/%s/' % sents_dir + sent_name + file_suff #dla disamb trzeba dis try: open(path, 'r') except: print 'CAN\'T OPEN:', path continue prolog_data = read_prolog_file(path, max_solutions=20000) if prolog_data is None: continue num_solutions, text = prolog_data.num_solutions, prolog_data.text #num_solutions, text, constraint_list, phis, other_list, choices_list, nodes, root_id, opts, all_lines = read_prolog_file(path) sentences_solutions[sent_name] = num_solutions if (num_solutions < 1): continue processed += 1 if (num_solutions > 20000): continue num_consistent = 0.0 count = 0 #==== skladnica_path = '/home/kasia/Dokumenty/Robota/Świgrowe/zrobione%s/' % skladnica_v + \ sent_name.replace('Skladnica--FULL', 'NKJP_1M') \
MAX_SOLUTIONS = 8 sentences = [] with open (sents_file, 'r') as f: for l in f.readlines(): if not (l.startswith('#')): s_name = re.match(r'.*(Skladnica.*-s).*', l).group(1) sentences.append(s_name) i = 0 for s_name in sentences[3917:3930]: i += 1 if (i % 200 == 0): print i full_forest = sents_dir + s_name + '.pl' solutions = 0 with open(full_forest, 'r') as forest: l = forest.readline() while l: if l.startswith('\t\'statistics'): solutions = int(l[15:l.find(' solutions')]) break l = forest.readline() if solutions > 7 and solutions <= MAX_SOLUTIONS: print full_forest prolog_data = read_prolog_file(full_forest, quiet=True, max_solutions=MAX_SOLUTIONS) print prolog_data.text for n_id, node in prolog_data.nodes.items(): print node.short() print prolog_data.nodes
print 'Checking solutions sums' for ranked_forest in rfs: i += 1 if (i % 250 == 0): print i, '/', n j = ranked_forest.find('-s-') s_name = ranked_forest[:(j + 2)] if not s_name in solutions_sum: #print ranked_forest continue ranked_forests[s_name].append(ranked_forest) #TYLKO NA RAZIE TU PRZERYWAMY, ZEBY BYLO SZYBCIEJ #continue prolog_data = read_prolog_file(ranked_dir + ranked_forest, quiet=True, max_solutions=1000000) if prolog_data is None: continue #print ranked_forest, len(prolog_data.opts) for o in prolog_data.opts: try: fs, eqs_dict = build_fs([ c for c, l_no in prolog_data.constraint_list if c.cond.intersection(o) ]) except: if not s_name in exc_forests: print 'EXCEPTION' print o exc_forests[s_name] = set()
exc_forests = dict() print 'Checking solutions sums' for ranked_forest in rfs: i += 1 if (i % 250 == 0): print i, '/', n j = ranked_forest.find('-s-') s_name = ranked_forest[:(j + 2)] if not s_name in solutions_sum: #print ranked_forest continue ranked_forests[s_name].append(ranked_forest) #TYLKO NA RAZIE TU PRZERYWAMY, ZEBY BYLO SZYBCIEJ #continue prolog_data = read_prolog_file(ranked_dir + ranked_forest, quiet=True, max_solutions=1000000) if prolog_data is None: continue #print ranked_forest, len(prolog_data.opts) for o in prolog_data.opts: try: fs, eqs_dict = build_fs([c for c, l_no in prolog_data.constraint_list if c.cond.intersection(o)]) except: if not s_name in exc_forests: print 'EXCEPTION' print o exc_forests[s_name] = set() raise exceptions[s_name] = traceback.format_exc() exc_forests[s_name].add(ranked_forest) solutions_sum[s_name] += len(prolog_data.opts)
#avg_first_rank = 0.0 c = 0 j = 0.0 correct = 0.0 half_correct = 0.0 all_correct = 0.0 cutoff = 0.0 for s_full, s_disamb in sents: c += 1 if c % 100 == 0: print c, '/', len(sentences) s_name = s_full.replace(LFG_DIR + 'parses/' + PARSES_VERSION + '/', '') s_prob = LFG_DIR + 'stochastic/' + parse_dir + wf + '/' + s_name[:-3] + PROB_SUFFIX data = None try: data = read_prolog_file(s_prob, True) except IOError: #print 'IOError' continue if not data.text: #print 'Oh noes!' continue text = data.text solutions = data.num_solutions #print '*', len(data.opts), data.num_solutions weighted_constrs = {} for opts in data.opts: weight = 0 for o in opts: if o in data.choices_weights: weight -= data.choices_weights[o]