def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') flags.mark_flag_as_required('prediction_file') sources, predictions, _ = score_lib.read_data(FLAGS.prediction_file, FLAGS.case_insensitive) ref_filepaths = [ get_data_filepath('turkcorpus', 'valid', 'simple.turk', i) for i in range(8) ] target_lists = [read_lines(ref_filepath) for ref_filepath in ref_filepaths] logging.info(f'Read file: {FLAGS.prediction_file}') turk_scores = get_all_scores(orig_sents=sources, sys_sents=predictions, refs_sents=target_lists) logging.info("[turk] {}".format(turk_scores)) ref_filepaths = [ get_data_filepath('asset', 'valid', 'simp', i) for i in range(10) ] target_lists = [read_lines(ref_filepath) for ref_filepath in ref_filepaths] asset_scores = get_all_scores(orig_sents=sources, sys_sents=predictions, refs_sents=target_lists) logging.info("[asset] {}".format(asset_scores))
def test_model(config_file, model, device): """ :param config_file: (string) path to a '.yaml' configuration file. :param model: (MalConv). :param device: the device that was used for the model. """ # load configurations try: conf = yaml.load(open(config_file, 'r')) except: print('Error with test configuration yaml') sys.exit() classes = utils.read_lines(conf[LABELS]) i2l = {i: l for i, l in enumerate(classes)} l2i = {l: i for i, l in i2l.iteritems()} # create loader files = utils.read_lines(conf[FILES_LS_PATH]) # list files to predict on test_loader = DataLoader(ExeDatasetNoLabels(files, l2i, conf[NUM_BYTES]), batch_size=1, shuffle=False, num_workers=conf[WORKERS]) # predict with open(conf[TARGET_FILE], 'w') as f: for x in test_loader: # todo check this if device is not None: x = x.to(device) pred = model(x) pred_label = torch.max(pred, 1)[1].item() f.write('{}\n'.format(pred_label)) f.close()
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented")) filenames = [n for n in filenames if n.endswith(".txt")] filenames.sort() utils.mkdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) for filename in pyprind.prog_bar(filenames): path_seg = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented", filename) path_raw = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "raw", filename) path_dst = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".txt", ".edus")) # Input edus = utils.read_lines(path_seg, process=lambda line: line) edus = remove_empty_lines(filename, edus) raw_lines = utils.read_lines(path_raw, process=lambda line: line) raw_lines = remove_empty_lines(filename, raw_lines) assert count_chars(edus) == count_chars(raw_lines) # Processing edus = convert_edus(edus, raw_lines) assert count_chars(edus) == count_chars(raw_lines) # Output utils.write_lines(path_dst, edus)
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() n_skipped = 0 for file_i, filename in enumerate(filenames): path_s = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".sentence.boundaries")) path_p = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".paragraph.boundaries")) if not os.path.exists(path_s): print("Skipped %s because %s doesn't exist." % (filename, path_s)) n_skipped += 1 continue if not os.path.exists(path_p): print("Skipped %s because %s doesn't exist." % (filename, path_p)) n_skipped += 1 continue sbnds = utils.read_lines( path_s, process=lambda l: tuple([int(x) for x in l.split()])) pbnds = utils.read_lines( path_p, process=lambda l: tuple([int(x) for x in l.split()])) sbnds_proj, n_edus = project_pbnds_to_sbnds(sbnds=sbnds, pbnds=pbnds) if sbnds != sbnds_proj: print("Projected paragraph boundaries into the sentence boundaries (+%d): %s" % \ (len(sbnds_proj) - len(sbnds), path_s)) test_boundaries(sbnds_proj, n_edus) pbnds = replace_subtrees_with_ids(sbnds=sbnds_proj, pbnds=pbnds) write_boundaries( sbnds, os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".sentence.noproj.boundaries"))) write_boundaries( sbnds_proj, os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".sentence.proj.boundaries"))) write_boundaries( pbnds, os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".paragraph.boundaries"))) print("Skipped %d files." % n_skipped)
def main(args): path = args.path filenames = os.listdir(path) filenames = [n for n in filenames if n.endswith(".edus.arcs")] filenames.sort() for filename in pyprind.prog_bar(filenames): # EDUs (sub-arcs, tokens, POS tags) edus_arcs = utils.read_lines(os.path.join(path, filename), process=lambda line: line.split()) edus_tokens = utils.read_lines(os.path.join( path, filename.replace(".edus.arcs", ".edus.tokens")), process=lambda line: line.split()) edus_postags = utils.read_lines(os.path.join( path, filename.replace(".edus.arcs", ".edus.postags")), process=lambda line: line.split()) heads = [] for tokens, postags, arcs in zip(edus_tokens, edus_postags, edus_arcs): arcs = treetk.hyphens2arcs(arcs) # Check: Arcs should be arranged in ascending order wrt dependent prev_d = -1 for h, d, l in arcs: assert d > prev_d prev_d = d head_idx = None # If its head is the root, it is the head of the EDU for idx, (h, d, l) in enumerate(arcs): if h == 0: assert l == "ROOT" # TODO head_idx = idx break # If its head is outside the span, it is the head of the EDU if head_idx is None: span_min = arcs[0][1] span_max = arcs[-1][1] for idx, (h, d, l) in enumerate(arcs): if h < span_min or h > span_max: head_idx = idx break # Head token, POS tag, and dependency relation head_token = tokens[head_idx] head_postag = postags[head_idx] head_deprel = arcs[head_idx][2] heads.append((head_token, head_postag, head_deprel)) # Write with open( os.path.join(path, filename.replace(".edus.arcs", ".edus.heads")), "w") as f: for token, postag, deprel in heads: f.write("%s %s %s\n" % (token, postag, deprel))
def process(path, check_token, check_char, check_boundary): filenames = os.listdir(path) filenames = [n for n in filenames if n.endswith(".edus.tokens")] filenames.sort() confliction = False for filename in filenames: # Gold EDUs lines_e = utils.read_lines(os.path.join(path, filename)) text_e = " ".join(lines_e) # str edus = [l.split() for l in lines_e] # List[List[str]] # Paragraphs lines_d = utils.read_lines( os.path.join(path, filename.replace(".edus.tokens", ".doc.tokens"))) text_d = " ".join(lines_d) # str paras = [] # List[List[str]] para = [lines_d[0].split()] for i in range(1, len(lines_d)): line = lines_d[i].split() if len(line) == 0 and len(para) == 0: continue elif len(line) == 0 and len(para) != 0: paras.append(para) para = [] else: para.append(line) if len(para) != 0: paras.append(para) # Test if check_token and not test_tokenlevel_confliction(text_e, text_d): print("Found token-level confliction: %s" % os.path.join(path, filename)) confliction = True if check_char and not test_charlevel_confliction(text_e, text_d): print("Found char-level confliction: %s" % os.path.join(path, filename)) confliction = True if check_boundary and not test_boundary_confliction(edus, paras): print("Found paragraph-boundary confliction: %s" % os.path.join(path, filename)) confliction = True if not confliction: print("Found NO confliction: OK")
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) filenames = [n for n in filenames if n.endswith(".paragraph.boundaries")] filenames = [ n.replace(".paragraph.boundaries", ".edus") for n in filenames ] filenames.sort() with open( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", "filelist.corenlp2.txt"), "w") as ff: for filename in filenames: # Path path_edus = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename + ".tokenized") path_sbnds = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".sentence.noproj.boundaries")) path_sents = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".sentences")) # Read edus = utils.read_lines( path_edus, process=lambda line: line.split()) # list of list of str sbnds = utils.read_lines( path_sbnds, process=lambda line: (int(x) for x in line.split())) # list of (int, int) # Create sentences based on the sentence boundaries sentences = [] for begin_i, end_i in sbnds: sentence = edus[begin_i:end_i + 1] # list of list of str sentence = utils.flatten_lists(sentence) # list of str sentences.append(sentence) # Write with open(path_sents, "w") as fs: for sentence in sentences: fs.write("%s\n" % " ".join(sentence)) ff.write("%s\n" % path_sents)
def __build_feats(datasets_info): """ Builds the feature and label vectors from the specified datasets Arguments: ---------- datasets_info: type: list info: list of dictionaries containing: - dataset_file (string) - dataset_label (string) Returns: ---------- samples: type: list info: contains all the sentences labels: type: list info: contains all the sentences labels """ samples = [] labels = [] for info in datasets_info: name = info['dataset_name'] label = info['dataset_label'] sentences = read_lines(file_name=name, file_type='dataset') samples.extend(sentences) labels.extend([label] * len(sentences)) return samples, labels
def clean(dir_): """Filters the dir_ directory (splits into dir_cleaned and dir_junk).""" dir_cleaned = join(DATA_DIR, dir_ + '_cleaned_final') _make_labeled_dir_structure(dir_cleaned) dir_junk = join(DATA_DIR, dir_ + '_junk_final') _make_labeled_dir_structure(dir_junk) black_list = read_lines(IMAGES_BLACKLIST_FILE, line_func=lambda l: l.rstrip()) for class_dir in CLASSES: class_dir_abs = join(DATA_DIR, dir_, class_dir) for file_name in listdir(class_dir_abs): if not file_name.endswith('.jpg'): continue src_path = join(class_dir_abs, file_name) if _is_clean_image(black_list, src_path): dest_path = join(dir_cleaned, class_dir, file_name) else: dest_path = join(dir_junk, class_dir, file_name) copyfile(src_path, dest_path)
def main(): m = {'#': 1, '.': 0} l = lmap(lambda x: [m[i] for i in x[:-1]], read_lines()) ast = [] for i in range(len(l)): for j in range(len(l[0])): if l[i][j]: ast.append((j, i)) s = [] for i in ast: angles = set() for j in ast: if i != j: k = compute_k(i, j) angles.add(k) s.append(len(angles)) i = np.argmax(s) print(s[i]) best = ast[i] dd = collections.defaultdict(list) for i in ast: if i != best: k = compute_k(i, best) dd[k].append(i) d = dict(dd) s = sorted([j for i in d.values() for j in compute_angles_group(i, best)]) print(s[199][1])
def csv_to_npz(csv_file_name): print("reading file '{}'".format(csv_file_name), flush=True) categories = read_lines("/storage/kaggle/quickdraw/categories.txt") df = pd.read_csv( csv_file_name, index_col="key_id", converters={ "word": lambda word: categories.index(word), "drawing": lambda drawing: np.array(eval(drawing)) }) df = df.rename(columns={"word": "category"}) key_id = np.array(df.index.values, dtype=np.int64) drawing = np.array(df.drawing.values, dtype=np.object) category = np.array(df.category.values, dtype=np.int16) recognized = np.array(df.recognized.values, dtype=np.bool) countrycode = np.array(df.countrycode.values, dtype=np.object) npz_file_name = csv_file_name[:-4] + ".npz" print("writing file '{}'".format(npz_file_name), flush=True) np.savez_compressed( npz_file_name, key_id=key_id, drawing=drawing, category=category, recognized=recognized, countrycode=countrycode) return None
def main(): seats = [Seat(line) for line in utils.read_lines()] max_uid = 0 for seat in seats: max_uid = max(max_uid, seat.uid) print("Part1: {}".format(max_uid)) max_row_id = Seat('BBBBBBBRRR').uid seats_exist = [] for i in range(max_row_id): found = False for seat in seats: # print(seat.uid) if seat.uid == i: found = True break seats_exist.append(found) # print(seats_exist) for i in range(8, max_row_id): if seats_exist[i-1] and seats_exist[i + 1] and not seats_exist[i]: print('Part2: {}'.format(i)) break
def part_two(): def is_valid(line): pos_1, pos_2, char, password = parse_policy(line) return (password[int(pos_1) - 1] == char) != (password[int(pos_2) - 1] == char) return sum(is_valid(line) for line in read_lines(day=2))
def eval_on(config_file, model, device): """ :param config_file: (string) path to a '.yaml' configuration file. :param model: (MalConv). :param device: the device that was used for the model. """ try: conf = yaml.load(open(config_file, 'r')) except: print('Error with dev configuration yaml') sys.exit() classes = utils.read_lines(conf[LABELS]) i2l = {i: l for i, l in enumerate(classes)} l2i = {l: i for i, l in i2l.iteritems()} path2label = utils.create_path2label_dict(conf[MAIN_DIR], conf[L2DIR]) size_dev = len(path2label) keys = list(path2label.viewkeys()) np.random.shuffle(keys) dev_set = [(key, path2label[key]) for key in keys] fps_dev, y_dev = utils.split_to_files_and_labels(dev_set) validloader = DataLoader(ExeDataset(fps_dev, y_dev, l2i, conf[NUM_BYTES]), batch_size=conf[BATCH], shuffle=False, num_workers=conf[WORKERS]) acc, t = validate_dev_set(validloader, model, device, size_dev, conf[CONF_MAT]) print 'time-dev: {:.2f} dev-acc: {:.4f}'.format(t, acc)
def main(args): config = utils.Config() utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab")) filenames = os.listdir( os.path.join(config.getpath("data"), "rstdt", "renamed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() with open( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "concat.edus.heads.deprel"), "w") as f: for filename in filenames: deprels = utils.read_lines(os.path.join(config.getpath("data"), "rstdt", "renamed", filename + ".heads"), process=lambda line: line.split()[-1]) for deprel in deprels: f.write("%s\n" % deprel) if args.with_root: special_words = ["<root>"] else: special_words = [] textpreprocessor.create_vocabulary.run( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "concat.edus.heads.deprel"), os.path.join(config.getpath("data"), "rstdt-vocab", "deprels.vocab.txt"), prune_at=10000000, min_count=-1, special_words=special_words, with_unk=True)
def extract_abstract(path): lines = utils.read_lines(path, process=lambda line: line) abst_lines = [] in_abstract = False for line in lines: if line.lower().startswith("abstract"): in_abstract = True # Remove the beggining token(="Abstract", "ABSTRACT", etc.) tokens = line.split() tokens = tokens[1:] if len(tokens) > 0: line = " ".join(tokens) abst_lines.append(line) elif "introduction" in line.lower() or line.startswith("1"): in_abstract = False break elif in_abstract: abst_lines.append(line) while True: length = len(abst_lines) for i in range(len(abst_lines)): if abst_lines[i].endswith("-") and len( abst_lines[i]) != 1 and abst_lines[i][-2] != "-": if i + 1 < len(abst_lines): line = abst_lines[i] line = line[:-1] abst_lines[i + 1] = line + abst_lines[i + 1] abst_lines.pop(i) break if length == len(abst_lines): break abst_text = " ".join(abst_lines) return abst_text
def main(args): path = args.path filenames = os.listdir(path) filenames = [n for n in filenames if n.endswith(".edus.arcs")] filenames.sort() for filename in pyprind.prog_bar(filenames): edus_arcs = utils.read_lines(os.path.join(path, filename), process=lambda line: line.split()) edus_deprels = [] for arcs in edus_arcs: arcs = treetk.hyphens2arcs(arcs) deprels = [l for h, d, l in arcs] edus_deprels.append(deprels) # Write with open( os.path.join(path, filename.replace(".edus.arcs", ".edus.deprels")), "w") as f: for deprels in edus_deprels: deprels = " ".join(deprels) f.write("%s\n" % deprels)
def test_parse_instructions(): program = utils.read_lines("data/08_tests.data") compiler = Compiler(program) instruction = "acc -99" expected = {"type": "acc", "argument": -99} result = compiler.parse_instruction(instruction) assert expected == result
def decode(self, output=None, remove_unk=False, raw_output=False, max_test_size=None, **kwargs): utils.log('starting decoding') # empty `test` means that we read from standard input, which is not possible with multiple encoders # assert len(self.src_ext) == 1 or self.filenames.test # check that there is the right number of files for decoding # assert not self.filenames.test or len(self.filenames.test) == len(self.src_ext) output_file = None try: output_file = sys.stdout if output is None else open(output, 'w') paths = self.filenames.test or [None] lines = utils.read_lines(paths, binary=self.binary) if max_test_size: lines = itertools.islice(lines, max_test_size) if not self.filenames.test: # interactive mode batch_size = 1 else: batch_size = self.batch_size lines = list(lines) hypothesis_iter = self.decode_batch(lines, batch_size, remove_unk=remove_unk) for hypothesis, raw in hypothesis_iter: if raw_output: hypothesis = raw output_file.write(hypothesis + '\n') output_file.flush() finally: if output_file is not None: output_file.close()
def __init__(self, lang): """ Creates a text tokenizer object Arguments: ---------- lang: type: string info: language to perform the tokenizer process """ if lang not in languages: exit('Invalid language') self.lemmatizer = SnowballStemmer(lang) self.tokenizer = TweetTokenizer( preserve_case = False, reduce_len = True, strip_handles = True ) self.stopwords = set(read_lines( file_name = lang + '.txt', file_type = 'stopwords' ))
def load_train_info(): train_info = read_lines(TRAIN_INFO_FILE)[1:] parsed_train_info = {} for l in train_info: split = l.split(',') parsed_train_info[split[0]] = split[1] return parsed_train_info
def prepare_shards(): num_shards = 50 if os.path.isdir("/storage/kaggle/quickdraw/train_simplified_shards"): shutil.rmtree("/storage/kaggle/quickdraw/train_simplified_shards") os.makedirs("/storage/kaggle/quickdraw/train_simplified_shards") categories = read_lines("/storage/kaggle/quickdraw/categories.txt") for category in categories: csv_file_name = "/storage/kaggle/quickdraw/train_simplified/{}.csv".format(category) print("processing file '{}'".format(csv_file_name), flush=True) df = pd.read_csv(csv_file_name, index_col="key_id") shard_size = math.ceil(len(df) / num_shards) indexes = df.index.values np.random.shuffle(indexes) for s in range(num_shards): start = s * shard_size end = min(start + shard_size, len(df)) shard_df = df[df.index.isin(indexes[start:end])] shard_file_name = "/storage/kaggle/quickdraw/train_simplified_shards/shard-{}.csv".format(s) write_csv_header = not os.path.isfile(shard_file_name) with open(shard_file_name, "a") as shard_file: shard_df.to_csv(shard_file, header=write_csv_header)
def main(): path_len = 100 nb_feature = 16 weibo_file = os.path.join(project_folder, 'dataset', 'weibo', 'weibo.txt') lines = utils.read_lines(weibo_file) x = [] y = [] i = 1 for line in lines: print(i) i += 1 line = line.replace('\t', ' ') sp = line.split(' ') eid = sp[0].split(':')[1] label = sp[1].split(':')[1] y.append(int(label)) f = [] json_file = os.path.join(project_folder, 'dataset', 'weibo', 'Weibo', eid + '.json') text_content = utils.read(json_file) json_content = json.loads(text_content) for post in json_content[0:path_len]: f.append(get_feature(post)) if len(f) < path_len: for j in range(path_len - len(f)): f.append([0 for j in range(nb_feature)]) x.append(f) y = numpy.array(y) x = numpy.array(x) print(x.shape, y.shape) numpy.save(os.path.join(project_folder, 'feature', 'weibo', 'x.npy'), x) numpy.save(os.path.join(project_folder, 'feature', 'weibo', 'y.npy'), y)
def get_active(dimensions): cubes = set() for y, row in enumerate(read_lines(day=17)): for x, cube in enumerate(row): if cube == "#": cubes.add(tuple([y, x] + [0 for _ in range(dimensions - 2)])) return cubes
def parse_raw_file(file): ''' 将原始文件解析为qa对 原始文件格式: Q: hello Q: hi A: hello A: hi Q: ... A: ... yield: [q,a] ''' qa = {'q': set(), 'a': set()} for line in read_lines(file, yield_null=True): if not line: if len(qa['q']) > 0 and len(qa['a']) > 0: for q in qa['q']: for a in qa['a']: yield [q, a] qa = {'q': set(), 'a': set()} elif line[0] == 'Q': if line[3:]: qa['q'].add(line[3:]) elif line[0] == 'A': if line[3:]: qa['a'].add(line[3:]) if len(qa['q']) > 0 and len(qa['a']) > 0: for q in qa['q']: for a in qa['a']: yield [q, a] qa = {'q': set(), 'a': set()}
def answer_analogy_questions(analogies_path, embeddings, words2ids, top_k): all_questions_init = read_lines(analogies_path) # lowercase everything all_questions_low = [[j.lower() for j in i] for i in all_questions_init] # get rid of oov all_questions = [ q for q in all_questions_low if q[0] == ":" or (q[0] in words2ids and q[1] in words2ids and q[2] in words2ids and q[3] in words2ids) ] results = [] group = [] print('group_name', '1nn%', '10nn%') # answer questions, combining them in groups for line in all_questions: if line[0] == ':': if group: # if group is not empty, evaluate and print results results[-1].extend( answer_questions_in_group(group, embeddings, words2ids, top_k)) print(results[-1][0], '%.2f' % results[-1][1], '%.2f' % results[-1][2]) group = [] group_name = line[1] results.append([group_name]) else: group.append(line) # handle last group's results results[-1].extend( answer_questions_in_group(group, embeddings, words2ids, top_k)) print(results[-1][0], '%.2f' % results[-1][1], '%.2f' % results[-1][2]) # print overall results n_syntactic = sum(1 for r in results if r[0].startswith('gram')) summarize_analogies_results(results, n_syntactic)
def create_averaged_submission(weighted_submissions): names, probs, weights = [], [], [] for file_name, weight in weighted_submissions.items(): file_path = join(SUBMISSIONS_DIR, file_name) lines = read_lines(file_path)[1:] weights.append(weight) single_probs = [] names = [] for line in lines: split = line.rstrip().split(',') names.append(split[0]) single_probs.append(np.array([float(x) for x in split[1:]])) probs.append(np.array(single_probs)) probs = np.array(probs) weights = np.array(weights) averaged = probs * weights[:, np.newaxis, np.newaxis] averaged = np.sum(averaged, axis=0) / np.sum(weights) submissions_file = join( SUBMISSIONS_DIR, 'averaged_all.csv' ) create_submission_file(names, averaged, submissions_file)
def ranking_precision_in_pruney(pruney, problem_dir, problem_source): precision = 0.0 for name in pruney: problem_file = os.path.join(problem_dir, name) problem_lines = read_lines(problem_file) if problem_source == "Vampire": selected_names = [line.split(",")[0].replace("tff(", "") for line in problem_lines if "tff" in line and "axiom" in line] if problem_source == "E": selected_names = [line.split(", ")[0].replace( "fof(", "") for line in problem_lines if "fof" in line and "file" in line and "axiom" in line] if problem_source == "Q_selection": selected_names = [line.split(",")[0].replace( "fof(", "") for line in problem_lines if "fof" in line and "axiom" in line] print(selected_names) proofs = pruney[name] temp = [len(proof) for proof in proofs if proof.issubset(set(selected_names))] if temp: precision += (max(temp) + 1) / (len(selected_names) + 1) precision = precision / len(pruney) return precision
def part_two(): precedences = { Token.PLUS: 1, Token.ASTERISK: 0, Token.LPAREN: 2, } return sum(run(program, precedences) for program in read_lines(day=18))
def ranking_precision(problem_dir, output_dir, ATP, problem_source): filenames = os.listdir(output_dir) precision = 0.0 counter = 0 for name in filenames: output_file = os.path.join(output_dir, name) lines = read_lines(output_file) if ATP == "E" and "# Proof found!" in lines and \ "# SZS status Theorem" in lines: counter += 1 useful_names = extract_useful_premises_from_E(lines) problem_file = os.path.join(problem_dir, name) problem_lines = read_lines(problem_file) if problem_source == "Vampire": problem_len = compute_selected_problem_from_Vampire( problem_lines) if problem_source == "E": problem_len = compute_selected_problem_from_E(problem_lines) if problem_source == "Q_selection": problem_len = len(problem_lines) precision += len(useful_names) / problem_len if ATP == "Vampire" and "% Refutation found. Thanks to Tanya!" \ in lines: counter += 1 useful_names = extract_useful_premises_from_Vampire(lines) problem_file = os.path.join(problem_dir, name) problem_lines = read_lines(problem_file) if problem_source == "Vampire": problem_len = compute_selected_problem_from_Vampire( problem_lines) if problem_source == "E": problem_len = compute_selected_problem_from_E(problem_lines) if problem_source == "Q_selection": problem_len = len(problem_lines) precision += len(useful_names) / problem_len precision = precision / counter return precision, counter
def load_cache(fn): lines = read_lines(fn) pairs = [line.strip().split() for line in lines] pairs = filter(lambda pair: len(pair) == 2, pairs) cache = dict(pairs) # Update cache with actual None values instead of the string 'None' for k, v in cache.items(): if v == 'None': cache[k] = None return cache