def test(self, dev_instances, dev_file): """ """ ''' 1. Extract Features ''' self.make_feature_file(dev_instances, dev_file, dev=True) self.output_file = self.get_output_file(dev_file) print(self.output_file) ''' 2. Predict Answers ''' predict_label = self.classifier.test_model(self.dev_feature_file, self.model_file, self.output_file) f_out = utils.create_write_file(self.output_file) for label, dev_instance in zip(predict_label, dev_instances): print('{:d}\t#\t{}'.format(label, dev_instance.get_instance_string()), file=f_out) submit_file = self.output_file.replace('.txt', '.submit') f_out = utils.create_write_file(submit_file) print('#id correctLabelW0orW1', file=f_out) for label, dev_instance in zip(predict_label, dev_instances): print('{}\t{}'.format(dev_instance.get_id(), label), file=f_out) return predict_label
def train(self, train_instances, train_file, out_list=None): """ out_list is used to sub train_instances from all train_instances if only happen when all faetures have been made. """ ''' 1. Extract Features ''' self.make_feature_file(train_instances, train_file) if out_list: dev = utils.create_read_file(self.train_feature_file).readlines() dev = [ dev[idx].strip() for idx in range(len(dev)) if idx not in out_list ] f_dev = utils.create_write_file(self.train_feature_file) print('\n'.join(dev), file=f_dev) f_dev.close() print('finish filter, train examples %d', len(dev)) ''' 2. Train Classifier ''' self.classifier.train_model(self.train_feature_file, self.model_file) ''' 3. Predict Answers ''' self.output_file = self.get_output_file(train_file) predict_label = self.classifier.test_model(self.train_feature_file, self.model_file, self.output_file) f_out = utils.create_write_file(self.output_file) for label, train_instances in zip(predict_label, train_instances): print('%.2f\t#\t%s' % (label, train_instances.get_instance_string()), file=f_out) return self.classifier
def extract_information(self, train_instances): if self.is_training: sents = [] for train_instance in train_instances: warrant0, warrant1, reason, claim, title, info = train_instance.get_six( type='word') sents.append(warrant0) sents.append(warrant1) sents.append(reason) sents.append(claim) idf_dict = utils.idf_calculator(sents) # idf_dict = sorted(idf_dict.items(), key=lambda x: x[1], reverse=True) with utils.create_write_file(config.RESOURCE_DIR + '/idf_dict.txt') as fw: for key in idf_dict: print('{}\t{}'.format(key, idf_dict[key]), file=fw) print(len(idf_dict)) else: with utils.create_read_file(config.RESOURCE_DIR + '/idf_dict.txt') as fr: idf_dict = {} for line in fr: line = line.strip().split('\t') idf_dict[line[0]] = float(line[1]) self.unigram_dict = idf_dict
def write_feature_to_file(feature_file, features, infos): """ write features string to file """ if type(features[0]) is list: dim = len(features[0]) else: dim = infos[0][0] f_feature = utils.create_write_file(feature_file) ''' write features infomation to file ''' print(len(features), dim, file=f_feature) ''' write features string to file ''' for feature, info in zip(features, infos): ''' type(feature) is list ''' if type(feature) is list: feature_string = Feature._feat_list_to_string(feature) elif type(feature) is str: feature_string = feature else: raise NotImplementedError info_string = Feature._info_list_to_string(info) print(feature_string + '\t#\t' + info_string, file=f_feature) f_feature.close()
def __create_dict(*args, **kwargs): print("====> create dict for function [{}]".format(func.__name__)) ret = func(*args, **kwargs) ''' remove item whose frequency is less than threshold ''' if 'threshold' in kwargs: threshold = kwargs['threshold'] for key in ret.keys(): if ret[key] < threshold: ret.pop(key) ''' write dict to file ''' file_name = 'dict_{}.txt'.format(func.__name__) f_dict = utils.create_write_file(file_name) if type(ret) == list: # ensure it is set ret = list(set(ret)) ret = sorted(ret) for idx, item in enumerate(ret): print(str(item), file=f_dict) elif type(ret) == dict: # order the dict for item in sorted(ret.keys()): print(item, ret[item]) print('%s\t%s' % (item, ret[item]), file=f_dict) else: raise NotImplementedError f_dict.close() print("====> write file {}, {:d} instances".format( file_name, len(ret))) return ret
def record(record_file, dev_pearsonr, test_pearsonr, model): with utils.create_write_file(record_file, 'ab') as f: writer = csv.writer(f, delimiter=',') features = [feature.feature_name for feature in model.feature_list] writer.writerow([ model.model_name, dev_pearsonr, test_pearsonr, model.classifier.strategy.trainer, features ])
def load_parse_data(train_file, parser=None, flag=False): """ Load data after Parse, like POS, NER, etc. Value: [ SentPair:class, ... ] Parameter: flag: False(Default), Load from file (resources....) True, Parse and Write to file, and then load from file """ ''' Pre-Define Write File ''' # parse_train_file = config.PARSE_DIR + '/' + \ # utils.FileManager.get_file(train_file) parse_train_file = train_file.replace('./data', './generate/parse') if flag or not os.path.isfile(parse_train_file): print(train_file) if parser is None: raise RuntimeError( "parser should be init by ``nlp = stst.StanfordNLP('http://localhost:9000')``" ) ''' Parse Data ''' data = load_STS(train_file) print('*' * 50) print("Parse Data, train_file=%s, n_train=%d\n" % (train_file, len(data))) parse_data = [] process_bar = pyprind.ProgPercent(len(data)) for (sa, sb, score) in data: process_bar.update() parse_sa = parser.parse(sa) parse_sb = parser.parse(sb) parse_data.append((parse_sa, parse_sb, score)) ''' Write Data to File ''' with utils.create_write_file(parse_train_file) as f_parse: for parse_instance in parse_data: line = json.dumps(parse_instance) print(line, file=f_parse) ''' Load Data from File ''' print('*' * 50) parse_data = [] with utils.create_read_file(parse_train_file) as f: for line in f: parse_json = json.loads(line) sentpair_instance = SentPair(parse_json) parse_data.append(sentpair_instance) print("Load Data, train_file=%s, n_train=%d\n" % (train_file, len(parse_data))) return parse_data
def extract_instances(self, train_instances): asiya = AsiyaDriver() n_lines = 250 features = [] infos = [] idx_list = range(0, len(train_instances), n_lines) for idx in idx_list: st, ed = idx, idx + n_lines if ed > len(train_instances): ed = len(train_instances) print("\rAsiya MT Featyre index = %d, st = %d, ed = %d" % (idx, st, ed), end=' ') while True: ''' sa -> sb ''' f_sa = utils.create_write_file(config.EX_DICT_DIR + '/sa.txt') f_sb = utils.create_write_file(config.EX_DICT_DIR + '/sb.txt') for id in range(st, ed): lemma_sa, lemma_sb = train_instances[id].get_word( type='lemma') lemma_sa = ' '.join(lemma_sa) lemma_sb = ' '.join(lemma_sb) print(lemma_sa, file=f_sa) print(lemma_sb, file=f_sb) f_sa.close() f_sb.close() page = asiya.run_file() if page != ' ': features_sa = asiya.extract_table(page) break else: asiya.reload() features += features_sa infos += [['0']] * len(features) print(features[:10]) return features, infos
def test_model(self, test_file_path, model_path, result_file_path): print("==> Load the data ...") test_X, test_y = self.load_file(test_file_path) print("==> Load the model ...") clf = pickle.load(open(model_path, 'rb')) print("==> Test the model ...") y_pred = clf.predict(test_X.toarray()) print("==> Save the result ...") with utils.create_write_file(result_file_path) as f: for y in y_pred: print(y, file=f) return y_pred
def test_model(self, test_file_path, model_path, result_file_path): print("==> Load the model ...") # bst = pickle.load(open(model_path, 'rb')) bst = xgb.Booster(self.param) bst.load_model(model_path) print("==> Test the model ...") dtest = xgb.DMatrix(test_file_path) y_pred = bst.predict(dtest) print("==> Save the result ...") with utils.create_write_file(result_file_path) as f: for y in y_pred: print(y, file=f) return y_pred
def load_parse_data(train_file, nlp=None, flag=False): """ Load data after Parse, like POS, NER, etc. Value: [ SentPair:class, ... ] Parameter: flag: False(Default), Load from file (resources....) True, Parse and Write to file, and then load from file """ ''' Pre-Define Write File ''' # parse_train_file = config.PARSE_DIR + '/' + \ # utils.FileManager.get_file(train_file) parse_train_file = train_file.replace('./data', './generate/parse') if flag or not os.path.isfile(parse_train_file): print(train_file) ''' Parse Data ''' data = load_data(train_file) print('*' * 50) print("Parse Data, train_file=%s, n_train=%d\n" % (train_file, len(data))) parse_data = [] process_bar = pyprind.ProgPercent(len(data)) for (sent, label) in data: process_bar.update() sent = preprocess(sent) parse_data.append((sent, label)) ''' Write Data to File ''' with utils.create_write_file(parse_train_file) as f_parse: for parse_instance in parse_data: line = json.dumps(parse_instance, ensure_ascii=False) print(line, file=f_parse) ''' Load Data from File ''' print('*' * 50) parse_data = [] with utils.create_read_file(parse_train_file) as f: for line in f: sent, label = json.loads(line) sentpair_instance = Sent(sent, label) parse_data.append(sentpair_instance) print("Load Data, train_file=%s, n_train=%d\n" % (train_file, len(parse_data))) return parse_data
def write_feature_to_file(feature_file, features, infos): """ write features string to file """ dim = len(features[0]) f_feature = utils.create_write_file(feature_file) ''' write features infomation to file ''' print(len(features), dim, file=f_feature) ''' write features string to file ''' for feature, info in zip(features, infos): ''' type(feature) is list ''' feature_string = Feature._feat_list_to_string(feature) info_string = Feature._info_list_to_string(info) print(feature_string + '\t#\t' + info_string, file=f_feature) f_feature.close()
def test_model(self, test_file_path, model_path, result_file_path): print("==> Load the model ...") # bst = pickle.load(open(model_path, 'rb')) bst = xgb.Booster(self.param) bst.load_model(model_path) print("==> Test the model ...") dtest = xgb.DMatrix(test_file_path) y_probs = bst.predict(dtest).reshape(-1, self.num_class) with open(result_file_path + '.pkl', 'wb') as f: pickle.dump(y_probs, f) y_pred = np.argmax(y_probs, axis=1) print("==> Save the result ...") with utils.create_write_file(result_file_path) as f: for y in y_pred: print(y, file=f) return y_pred
def load(train_file, train_gs=None, dev_flag=False): # train_file = config.TRAIN_FILE # train_gs = config.TRAIN_GS_FILE train_parse_data = data_utils.load_parse_data(train_file, train_gs, flag=dev_flag) datas = [] for train_instance in train_parse_data: data = make(train_instance) datas.append(data) file_name = train_file.split('/')[-1] path_dir = '../iclr2016-test/data/eval/' f = utils.create_write_file(path_dir + file_name) for sa, sb, sc in datas: f.write('%s\t%s\t%.4f\n' % (' '.join(sa), ' '.join(sb), sc * 5)) return datas
def test_model(self, test_file_path, model_path, result_file_path): print("==> Load the data ...") X_test, Y_test = self.load_file(test_file_path) print(test_file_path, shape(X_test)) print("==> Load the model ...") clf = pickle.load(open(model_path, 'rb')) scaler_path = model_path.replace('.pkl', '.scaler.pkl') min_max_scaler = pickle.load(open(scaler_path, 'rb')) print("==> Test the model ...") X_test_minmax = min_max_scaler.transform(X_test) y_pred = clf.predict(X_test_minmax.toarray()) print("==> Save the result ...") with utils.create_write_file(result_file_path) as f: for y in y_pred: print(y, file=f) return y_pred
def test(self, dev_instances, dev_file): """ """ ''' 1. Extract Features ''' self.make_feature_file(dev_instances, dev_file, dev=True) self.output_file = self.get_output_file(dev_file) print(self.output_file) ''' 2. Predict Answers ''' predict_label = self.classifier.test_model(self.dev_feature_file, self.model_file, self.output_file) f_out = utils.create_write_file(self.output_file) for label, dev_instance in zip(predict_label, dev_instances): print('%.2f\t#\t%s' % (label, dev_instance.get_instance_string()), file=f_out) return predict_label
def test_model(self, test_file_path, model_path, result_file_path): print("==> Load the data ...") X_test, Y_test = self.load_file(test_file_path) print(test_file_path, shape(X_test)) X_test = X_test.toarray() for x in X_test[:10]: print(x) print("==> Test the model ...") y_pred = [] for x in X_test: x = sum(x) / len(x) y_pred.append(x) print("==> Save the result ...") with utils.create_write_file(result_file_path) as f: for y in y_pred: print(y, file=f) return y_pred
def test_model(self, test_file_path, model_path, result_file_path): print("==> Load the data ...") X_test, Y_test = self.load_file(test_file_path) print(test_file_path, shape(X_test)) X_test = X_test.toarray() X_test = np.array(X_test, dtype=np.int32) print("==> Test the model ...") y_pred = [] for x in X_test: counter = Counter(x) topk = counter.most_common(1) # [(dict, freq)] y_pred.append(topk[0][0]) print("==> Save the result ...") with utils.create_write_file(result_file_path) as f: for y in y_pred: print(y, file=f) return y_pred
def extract_information(self, train_instances): if self.is_training: sents = [] for train_instance in train_instances: sent = train_instance.get_sent(self.type) sents.append(sent) idf_dict = utils.idf_calculator(sents) with utils.create_write_file(config.DICTIONARY_DIR + '/{}_idf_dict.txt'.format(self.type)) as fw: idf_dict_tuple = sorted(idf_dict.items(), key=lambda x: x[1], reverse=True) for key, value in idf_dict_tuple: print('{}\t{}'.format(key, value), file=fw) else: with utils.create_read_file(config.DICTIONARY_DIR + '/{}_idf_dict.txt'.format(self.type)) as fr: idf_dict = {} for line in fr: line = line.strip().split('\t') idf_dict[line[0]] = float(line[1]) self.unigram_dict = idf_dict word_keys = sorted(idf_dict.keys(), reverse=True) self.word2index = {word: i for i, word in enumerate(word_keys)}
def extract_information(self, train_instances): if self.is_training: sents = [] for train_instance in train_instances: sent = train_instance.get_word() sents.append(sent) idf_dict = utils.idf_calculator(sents) #idf_dict = sorted(idf_dict.iteritems(), key=lambda x: x[1], reverse=True) with utils.create_write_file(config.DICTIONARY_DIR + '/idf_dict.txt') as fw: for key in idf_dict: print('{}\t{}'.format(key, idf_dict[key]), file=fw) print(len(idf_dict)) else: with utils.create_read_file(config.DICTIONARY_DIR + '/idf_dict.txt') as fr: idf_dict = {} for line in fr: line = line.strip().split('\t') idf_dict[line[0]] = float(line[1]) self.unigram_dict = idf_dict
def __create_dict(*args, **kwargs): print("==" * 40) print("Create dict for %s ... " % (func.__name__.replace("create_", ""))) print("==" * 40) ret = func(*args, **kwargs) ''' remove item whose frequency is less than threshold ''' if 'threshold' in kwargs: threshold = kwargs['threshold'] for key in ret.keys(): if ret[key] < threshold: ret.pop(key) ''' write dict to file ''' file_name = 'dict_' + func.__name__.replace("create_", "") + '.txt' f_dict = utils.create_write_file(config.DICT_DIR + '/' + file_name) if type(ret) == list: # ensure it is set ret = list(set(ret)) ret = sorted(ret) for idx, item in enumerate(ret): print(str(item), file=f_dict) elif type(ret) == dict: # order the dict for item in sorted(ret.keys()): print(item, ret[item]) print('%s\t%s' % (item, ret[item]), file=f_dict) else: raise NotImplementedError f_dict.close() print("Write file: %s, %d instances" % (file_name, len(ret))) return ret
def extract_information(self, train_instances): if self.is_training: sents, labels = [], [] for train_instance in train_instances: sent = train_instance.get_word() label = train_instance.get_label() sents.append(sent) labels.append(label) rf_dict = utils.rf_calculator(sents, labels, max_cnt=1000) with utils.create_write_file(config.DICTIONARY_DIR + '/rf_dict.txt', 'w') as fw: json.dump(rf_dict, fw, ensure_ascii=False) with utils.create_read_file(config.DICTIONARY_DIR + '/rf_dict.txt', 'rb') as fr: rf_dict = json.load(fr) with utils.create_read_file(config.DICTIONARY_DIR + '/vocab.txt') as fr: vocab_dict = {} for line in fr: line = line.strip().split('\t') vocab_dict[line[0]] = int(line[1]) self.rf_dict = rf_dict self.vocab_dict = vocab_dict
def make_feature_file(self, train_instances, train_file, dev=False): """ :param train_instances: :param train_file: :param dev: :return: TODO. similar to feature, write to file """ print("-" * 120) print("\n".join([f.feature_name for f in self.feature_list])) print("-" * 120) ''' Extract Features ''' feature_strings = [] feature_dimensions = [] sum_feature_dimension = 0 for feature_class in self.feature_list: if isinstance(feature_class, Feature): feature_string, feature_dimension, n_instance = \ feature_class.extract_dataset_instances(train_instances, train_file) feature_strings.append(feature_string) feature_dimensions.append(feature_dimension) sum_feature_dimension += feature_dimension print('[Extract Features]', 'Feature', feature_class.feature_name, feature_dimension, sum_feature_dimension) elif isinstance(feature_class, Model): if dev: feature_class.test(train_instances, train_file) feature_string = feature_class.load_model_score(train_file) else: ''' seperate to train for speed up ''' # feature_class.train(train_instances, train_file) feature_string = feature_class.load_model_score(train_file) feature_strings.append(feature_string) feature_dimensions.append(1) sum_feature_dimension += 1 print('[Extract Features]', 'Model', feature_class.feature_name, 1, sum_feature_dimension) ''' Merge Features''' merged_feature_string_list = [] for feature_strings in zip(*feature_strings): merged_feature_string = "" dimension = 0 for feature_dimension, feature_string in zip( feature_dimensions, feature_strings): if dimension == 0: # 第一个 merged_feature_string = feature_string else: if feature_string != "": # 修改当前feature的index temp = "" for item in feature_string.split(" "): if len(item.split(":")) == 1: print(item) index, value = item.split(":") temp += " %d:%s" % (int(index) + dimension, value) merged_feature_string += temp dimension += feature_dimension merged_feature_string_list.append(merged_feature_string) merged_feature_dimension = sum(feature_dimensions) ''' Write to feature file ''' if dev: f_feature = utils.create_write_file(self.dev_feature_file) else: f_feature = utils.create_write_file(self.train_feature_file) for idx, feature_string in enumerate(merged_feature_string_list): train_instance = train_instances[idx] print(str(train_instance.get_score()), feature_string, file=f_feature) return merged_feature_string_list, merged_feature_dimension, len( merged_feature_string_list)
def load_parse_data(file_path, init=False): """ Load data after Parse, like POS, NER, etc. Args: file_path: init: false, load from file; else init from corenlp Returns: parse_data: List of Example:class """ ''' Pre-Define Write File ''' parse_train_file = file_path.replace('data/', 'generate/parse/') parse_word_file = file_path.replace('data/', 'generate/word/') parse_lemma_file = file_path.replace('data/', 'generate/lemma/') parse_stopwords_lemma_file = file_path.replace( 'data/', 'generate/stopwords/lemma/') if init or not os.path.exists(parse_train_file): ''' Define CoreNLP''' nlp = corenlp_utils.StanfordNLP(server_url='http://localhost:9000') ''' Read data ''' print("Read Data from file: %s" % file_path) examples = load_data(file_path) ''' Parse data ''' print('*' * 50) print("Parse Data to file: %s, n_line: %d\n" % (parse_train_file, len(examples))) parse_data = [] process_bar = pyprind.ProgPercent(len(examples)) for example in examples: process_bar.update() id = example['id'] label = example['label'] parse_lst = [id, label] try: # warrant0 / warrant1 / reason / claim / debate / negclaim example_lst = [ example['warrant0'], example['warrant1'], example['reason'], example['claim'], example['debate'], example['negclaim'], example['title'], example['info'] ] for sent in example_lst: parse_sent = nlp.parse(sent) parse_lst.append(sent) parse_lst.append(parse_sent) except Exception: print(example.get_id()) traceback.print_exc() parse_lst = ( "id label warrant0 warrant1 reason claim title info".split( )) parse_data.append(parse_lst) ''' Write Data to File ''' f_parse = utils.create_write_file(parse_train_file) f_word = utils.create_write_file(parse_word_file) # id warrant0 warrant1 label reason claim title info f_lemma = utils.create_write_file(parse_lemma_file) f_stopwords_lemma = utils.create_write_file(parse_stopwords_lemma_file) for parse_example in parse_data: parse_sent = json.dumps(parse_example) # list -> str parse_example = ParseExample(parse_example) # list -> class id = parse_example.get_id() label = parse_example.get_label() for word_type, fw in zip(['word', 'lemma'], [f_word, f_lemma]): warrant0, warrant1, reason, claim, debate, negclaim = parse_example.get_six( return_str=True, type=word_type) sent = '%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s' % ( id, warrant0, warrant1, label, reason, claim, debate, negclaim) print(sent, file=fw) warrant0, warrant1, reason, claim, debate, negclaim = parse_example.get_six( return_str=True, type='lemma', stopwords=True) sent = '%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s' % ( id, warrant0, warrant1, label, reason, claim, debate, negclaim) print(sent, file=f_stopwords_lemma) print(parse_sent, file=f_parse) f_parse.close() f_word.close() f_lemma.close() f_stopwords_lemma.close() ''' Load Data from File ''' print('*' * 50) parse_data = [] with codecs.open(parse_train_file, 'r', encoding='utf8') as f: for line in f: # obtain the json object parse_sent = json.loads(line) # obtain the class parse_example = ParseExample(parse_sent) parse_data.append(parse_example) print("Load Data, file_path=%s n_line=%d\n" % (file_path, len(parse_data))) return parse_data
def cross_validation(self, data_instances, data_file, k_fold=5, shuffle=False): self.make_feature_file(data_instances, data_file) n_data = len(data_instances) n_batch = n_data // k_fold data_instances = list(zip(range(n_data), data_instances)) id_map = range(n_data) if shuffle is True: random.shuffle(id_map) preds = [None] * n_data for fold in range(k_fold): st = fold * n_batch ed = (fold + 1) * n_batch if ed > n_data: ed = n_data data = utils.create_read_file(self.dev_feature_file).readlines() # make train data train = [ data[id_map[idx]].strip() for idx in range(len(data)) if idx not in range(st, ed) ] dev_feature_file_train = self.dev_feature_file.replace( 'txt', 'train') f_train = utils.create_write_file(dev_feature_file_train) print('\n'.join(train), file=f_train) f_train.close() # make dev data dev = [data[id_map[idx]].strip() for idx in range(st, ed)] dev_feature_file_dev = self.dev_feature_file.replace('txt', 'dev') f_dev = utils.create_write_file(dev_feature_file_dev) print('\n'.join(dev), file=f_dev) f_dev.close() ''' Train Classifier ''' self.classifier.train_model( dev_feature_file_train, self.model_file) # Attention! self.dev_feature_file ''' Predict Lables''' self.output_file = self.get_output_file(data_file) predict_label = self.classifier.test_model(dev_feature_file_dev, self.model_file, self.output_file) for idx in range(st, ed): idy = idx - st preds[id_map[idx]] = predict_label[idy] ''' Write to File ''' self.output_file = self.get_output_file(data_file) f_out = utils.create_write_file(self.output_file) for label, train_instance in zip(preds, data_instances): print('%.2f\t#\t%s' % (label, train_instance[1].get_instance_string()), file=f_out)
def extract_instances(self, train_instances): asiya = AsiyaDriver() n_lines = 250 features = [] infos = [] idx_list = range(0, len(train_instances), n_lines) for idx in idx_list: st, ed = idx, idx + n_lines if ed > len(train_instances): ed = len(train_instances) print("\rAsiya MT Featyre index = %d, st = %d, ed = %d" % (idx, st, ed), end=' ') while True: ''' sa -> sb ''' f_sa = utils.create_write_file(config.TMP_DIR + '/sa.txt') f_sb = utils.create_write_file(config.TMP_DIR + '/sb.txt') for id in range(st, ed): lemma_sa, lemma_sb = train_instances[id].get_word( type='lemma') lemma_sa = ' '.join(lemma_sa) lemma_sb = ' '.join(lemma_sb) print(lemma_sa, file=f_sa) print(lemma_sb, file=f_sb) f_sa.close() f_sb.close() page = asiya.run_file() if page != ' ': features_sa = asiya.extract_table(page) break else: asiya.reload() while True: ''' sb -> sa ''' f_sa = utils.create_write_file(config.TMP_DIR + '/sb.txt') f_sb = utils.create_write_file(config.TMP_DIR + '/sa.txt') # "F:\PyCharmWorkSpace\SemEval17_T1_System\resources\external_dict\sa.txt" for id in range(st, ed): lemma_sa, lemma_sb = train_instances[id].get_word( type='lemma') lemma_sa = ' '.join(lemma_sa) lemma_sb = ' '.join(lemma_sb) print(lemma_sa, file=f_sa) print(lemma_sb, file=f_sb) f_sa.close() f_sb.close() page = asiya.run_file() if page != ' ': features_sb = asiya.extract_table(page) break else: asiya.reload() ''' Merge feature ''' for a, b in zip(features_sa, features_sb): features.append(a + b) infos.append([]) print(features[:10]) return features, infos