def main(): _, word_vecs = data_load.load_word_vectors('/media/dhl/Data/el/word2vec/wiki_vectors.jbin') word_vec_len = len(word_vecs[0]) wid_idx_dict, entity_vecs = data_load.load_entities( '/media/dhl/Data/el/vec_rep/wid_entity_rep_wiki50_cat.bin', False) # wid_idx_dict, entity_vecs = data_load.load_entities('/media/dhl/Data/el/vec_rep/wid_entity_rep_wiki50.bin', # True) # all_word_vecs = num_val_candidates = 30 num_test_candidates = 30 skipwidth_loading = 0 img_h = sentence_len + 2 * sentence_pad_len train_cnn_for_el('/media/dhl/Data/el/vec_rep/wiki_train_word_vec_indices_wiki50.td', '/media/dhl/Data/el/vec_rep/tac_2014_training.bin', # '/media/dhl/Data/el/vec_rep/wiki_val_word_vec_indices_wiki50.td', num_val_candidates, '/media/dhl/Data/el/vec_rep/wiki_test_word_vec_indices_wiki50.td', num_test_candidates, img_h, word_vec_len, word_vecs, wid_idx_dict, entity_vecs, skip_width_loading=skipwidth_loading, n_epochs=1)
def main(): if len(sys.argv) < 2: print 'need params file' params = load_params(sys.argv[1]) entity_side_cnn = params['entity_side_cnn'] == '1' word_vec_file_name = params['word_vec_file'] if entity_side_cnn: entity_rep_file_name = params['entity_rep_indices_file'] else: entity_rep_file_name = params['entity_rep_vec_file'] train_data_file_name = params['train_data_file'] val_data_file_name = params['val_data_file'] test_data_file_name = params['test_data_file'] training_part_size = int(params['training_part_size']) sentence_len = int(params['context_sentence_len']) _, word_vecs = data_load.load_word_vectors(word_vec_file_name) word_vec_len = len(word_vecs[0]) if entity_side_cnn: print 'entity use cnn' global entity_rep_len wid_idx_dict, entity_vecs, entity_rep_len = data_load.load_index_vec_of_entities_fixed_len( entity_rep_file_name) else: wid_idx_dict, entity_vecs = data_load.load_entities( entity_rep_file_name, False) # wid_idx_dict, entity_vecs = data_load.load_entities_indices( # entity_rep_file_name, max_num_entity_words, entity_pad_len) num_val_candidates = 30 num_test_candidates = 30 skipwidth_loading = 0 train_cnn_for_el(train_data_file_name, val_data_file_name, num_val_candidates, test_data_file_name, num_test_candidates, sentence_len, word_vec_len, word_vecs, wid_idx_dict, entity_vecs, entity_side_cnn=entity_side_cnn, gold_as_first_candidate=False, skip_width_loading=skipwidth_loading, n_epochs=1, training_part_size=training_part_size)