def across_all_dir(path, files): scores = np.zeros((len(files), 24)) for i, f in enumerate(files): sample_dict = load_json(join(path, f)) for j, c in enumerate(sample_dict['chromosomesToResults'].keys()): scores[i][j] = sample_dict['chromosomesToResults'][c]['score'] return scores
def __init__(self): ''' 提前准备好计算句向量必备的文件:word2id,id2embed,id2weight ''' self.word_id = load_json(config.word_id_path) self.id_emb = load_pickle(config.id_emb_path) self.id_weight = load_pickle(config.id_weight_path) self.tokenizer = clean_seg
def abduce_batch(self, json_file_path, ahs, moneys, attrs, months, max_change_num): abduced_attrs = [] abduced_months = [] judgement_jsons = [] context_dict = self.build_dict(load_json(json_file_path)) for ah, money, attr, month in zip(ahs, moneys, attrs, months): data = context_dict.get(ah, None) abduced_attr, abduced_month, judgement_json = self.abduce(money, attr, month, data, max_change_num) if abduced_attr is None: continue abduced_attrs.append(abduced_attr) abduced_months.append(abduced_month) judgement_jsons.append(judgement_json) return abduced_attrs, abduced_months, judgement_jsons
recorder.write_pair("init_baseline", baseline) recorder.write_pair("init_rate", rate) tmp_json_path = "tmp/abl_predict_%d.json" % (0) filenames_new, ahs_new, money_new, labels_new, attrs_new = \ get_bert_generate_label(perception, test_filename, test_money_filename, tmp_json_path, tags_list) best_mae, best_mse, _, _ = sentence.test(money_new, attrs_new, labels_new, filenames_new, ahs_new) ret_raw, ret_str = get_score("data/" + test_filename, tmp_json_path, "data/tags_for_test.txt") recorder.write_pair("init_MAE", best_mae) recorder.write_pair("init_MSE", best_mse) recorder.write_pair("init_f1_score", ret_str) pretrain_bert_train_data = load_json(perception.data_dir + "/" + pretrain_filename) model_idx = 0 for t in range(abl_times): print("abduction times %d" % t, "model idx %d" % (model_idx)) tmp_json_path = "tmp/abl_train_%d.json" % (t) filenames_new, ahs_new, money_new, labels_new, attrs_new = \ get_bert_generate_label(perception, abl_train_filename, abl_train_money_filename, tmp_json_path, tags_list) t_b = time.time() abductor.set_predict_model(sentence) attrs_abduced, labels_abduced, judgement_jsons = abductor.abduce_batch( tmp_json_path, ahs_new, money_new, attrs_new, labels_new, abl_max_change_num) t_e = time.time()