def main(): """ main function """ options, args = parser.parse_args() if (options.u2i_path == None) or (options.output == None): return output_file_path = options.output embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) url2info_path = options.u2i_path print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') print('Loading url2info : start') dict_url2info = load_json(url2info_path) print('Loading url2info : end') dict_rnn_input = generate_rnn_input(dict_url2info, dict_url2vec) with open(output_file_path, 'w') as f_out: json.dump(dict_rnn_input, f_out)
def main(): options, args = parser.parse_args() if options.input == None: return torch_input_path = options.input dict_rnn_input_path = '{}/torch_rnn_input.dict'.format(torch_input_path) print('Loading torch input : start') dict_rnn_input = load_json(dict_rnn_input_path) print('Loading torch input : end') sequence_count = 0 event_count = 0 for dataset_name in ['train', 'valid', 'test']: sequence_count += len(dict_rnn_input['dataset'][dataset_name]) for sequence in dict_rnn_input['dataset'][dataset_name]: event_count += len(sequence[2]) article_count = len(dict_rnn_input['idx2url']) print('number of session : {}'.format(sequence_count)) print('number of event : {}'.format(event_count)) print('number of article : {}'.format(article_count))
def __init__(self, rec_input_json_path, dict_url2vec, options, \ dict_url2info={}, dict_glove={}): # initialize mixins self.load_rec_input(dict_url2vec=dict_url2vec, dict_rec_input=load_json(rec_input_json_path), options=options) self.load_category(dict_url2vec=dict_url2vec, dict_url2info=dict_url2info) # datasets will be updated lazily self._dataset = {}
def main(): options, args = parser.parse_args() if (options.input == None) or (options.d2v_embed == None) or \ (options.u2v_path == None) or (options.ws_path == None): return torch_input_path = options.input embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) ws_path = options.ws_path search_mode = options.search_mode model_ws_path = '{}/model/{}'.format(ws_path, option2str(options)) if not os.path.exists(ws_path): os.system('mkdir -p {}'.format(ws_path)) # os.system('rm -rf {}'.format(model_ws_path)) os.system('mkdir -p {}'.format(model_ws_path)) # Save best result with param name param_search_path = ws_path + '/param_search' if not os.path.exists(param_search_path): os.system('mkdir -p {}'.format(param_search_path)) param_search_file_path = '{}/{}'.format(param_search_path, option2str(options)) if search_mode and os.path.exists(param_search_file_path): print('Param search mode already exist : {}'.format( param_search_file_path)) return print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') predictor = AdressaRec(HRAMModel, model_ws_path, torch_input_path, dict_url2vec, options, hram_mode=True) best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train() if search_mode: with open(param_search_file_path, 'w') as f_out: f_out.write(str(best_hit_5) + '\n') f_out.write(str(best_auc_20) + '\n') f_out.write(str(best_mrr_20) + '\n')
def main(): """ main function """ options, args = parser.parse_args() if (options.d2v_embed == None) or (options.u2v_path == None) \ or (options.input == None) or (options.output == None) \ or (options.ws_path == None): return output_file_path = options.output embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) ws_path = options.ws_path rnn_input_path = options.input if os.path.exists(ws_path): os.system('rm -rf {}'.format(ws_path)) if not os.path.exists(ws_path): os.system('mkdir -p {}'.format(ws_path)) print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') print('Loading a2v rnn input : start') dict_rnn_input = load_json(rnn_input_path) print('Loading a2v rnn input : end') ar = ArticleRepresentation(dict_url2vec, dict_rnn_input, embedding_dimension, ws_path) ar.do_train() dict_y2v = ar.generate_y2v() with open(output_file_path, 'w') as f_out: json.dump(dict_y2v, f_out)
def main(): options, args = parser.parse_args() if (options.input == None) or (options.d2v_embed == None) or \ (options.u2v_path == None) or (options.ws_path == None): return torch_input_path = options.input embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) ws_path = options.ws_path if not os.path.exists(ws_path): os.system('mkdir -p {}'.format(ws_path)) dict_rnn_input_path = '{}/torch_rnn_input.dict'.format(torch_input_path) dict_rnn_input = load_json(dict_rnn_input_path) selected_times(dict_rnn_input)
def main(): options, args = parser.parse_args() if (options.input == None) or (options.d2v_embed == None) or \ (options.u2v_path == None) or (options.ws_path == None) or \ (options.word_embed_path == None): return path_rec_input = '{}/torch_rnn_input.dict'.format(options.input) embedding_dimension = int(options.d2v_embed) path_url2vec = '{}_{}'.format(options.u2v_path, embedding_dimension) sr = SelectRec(path_rec_input, path_url2vec, SimpleAVGModel, options) sr.do_train(total_epoch=1) return torch_input_path = options.input embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) ws_path = options.ws_path search_mode = options.search_mode model_ws_path = '{}/model/{}'.format(ws_path, option2str(options)) if not os.path.exists(ws_path): os.system('mkdir -p {}'.format(ws_path)) #os.system('rm -rf {}'.format(model_ws_path)) os.system('mkdir -p {}'.format(model_ws_path)) print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') print('Loading glove : start') with open(options.glove, 'rb') as f_glove: dict_glove = pickle.load(f_glove) print('Loading glove : end') predictor = AdressaRec(SingleLSTMModel, ws_path, torch_input_path, dict_url2vec, options, dict_glove=dict_glove) best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train(total_epoch=200)
def main(): options, args = parser.parse_args() if (options.input == None) or (options.d2v_embed == None) or \ (options.u2v_path == None) or (options.ws_path == None): return torch_input_path = options.input embedding_dimension = int(options.d2v_embed) url2vec_path = options.u2v_path ws_path = options.ws_path os.system('rm -rf {}'.format(ws_path)) os.system('mkdir -p {}'.format(ws_path)) print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') predictor = AdressaRec(SingleLSTMModel, ws_path, torch_input_path, dict_url2vec, options) time_start = time.time() hit_5, mrr_20 = predictor.pop_history_test(metric_count=20, candidate_count=20) print('history test :: hit_5 : mrr_20 : {}'.format(hit_5, mrr_20)) print('time tooks : {}'.format(time.time() - time_start)) return hit_5, mrr_20 = predictor.pop(metric_count=20, candidate_count=20) print('candi {} :: hit_5 : {}, mrr_20 : {}'.format(20, hit_5, mrr_20)) print('time tooks : {}'.format(time.time() - time_start)) for candi_count in [40, 60, 80, 100]: time_start = time.time() hit_5, mrr_20 = predictor.pop(metric_count=20, candidate_count=candi_count) print('candi {} :: hit_5 : {}, mrr_20 : {}'.format( candi_count, hit_5, mrr_20)) print('time tooks : {}'.format(time.time() - time_start))
def main(): options, args = parser.parse_args() if (options.input == None) or (options.d2v_embed == None) or \ (options.u2v_path == None) or (options.ws_path == None): return torch_input_path = options.input embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) ws_path = options.ws_path search_mode = options.search_mode model_ws_path = '{}/model/{}'.format(ws_path, option2str(options)) if not os.path.exists(ws_path): os.system('mkdir -p {}'.format(ws_path)) # os.system('rm -rf {}'.format(model_ws_path)) os.system('mkdir -p {}'.format(model_ws_path)) # Save best result with param name param_search_path = ws_path + '/param_search' if not os.path.exists(param_search_path): os.system('mkdir -p {}'.format(param_search_path)) param_search_file_path = '{}/{}'.format(param_search_path, option2str(options)) if search_mode and os.path.exists(param_search_file_path): print('Param search mode already exist : {}'.format( param_search_file_path)) return print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') test_mode = True if test_mode: print('test mode') predictor = AdressaRec(MultiCellModel, model_ws_path, torch_input_path, dict_url2vec, options) if test_mode: predictor.load_model() time_start = time.time() hit_5, _, mrr_20 = predictor.test_mrr_trendy(metric_count=20, candidate_count=20, length_mode=True) print('hit_5', hit_5, 'mrr_20', mrr_20) print('time tooks : {}'.format(time.time() - time_start)) return best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train() if search_mode: with open(param_search_file_path, 'w') as f_out: f_out.write(str(best_hit_5) + '\n') f_out.write(str(best_auc_20) + '\n') f_out.write(str(best_mrr_20) + '\n')
def main(): options, args = parser.parse_args() if (options.input == None) or (options.d2v_embed == None) or \ (options.u2v_path == None) or (options.ws_path == None): return torch_input_path = options.input embedding_dimension = int(options.d2v_embed) url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension) url2info_path = options.u2i_path ws_path = options.ws_path search_mode = options.search_mode model_ws_path = '{}/model/{}'.format(ws_path, option2str(options)) if not os.path.exists(ws_path): os.system('mkdir -p {}'.format(ws_path)) # os.system('rm -rf {}'.format(model_ws_path)) os.system('mkdir -p {}'.format(model_ws_path)) # Save best result with param name param_search_path = ws_path + '/param_search' if not os.path.exists(param_search_path): os.system('mkdir -p {}'.format(param_search_path)) param_search_file_path = '{}/{}'.format(param_search_path, option2str(options)) if search_mode and os.path.exists(param_search_file_path): print('Param search mode already exist : {}'.format(param_search_file_path)) return print('Loading url2vec : start') dict_url2vec = load_json(url2vec_path) print('Loading url2vec : end') print('Loading url2info : start') dict_url2info = load_json(url2info_path) print('Loading url2info : end') attn_analysis = False if attn_analysis: print('test mode') predictor = AdressaRec(NeRTModel, model_ws_path, torch_input_path, dict_url2vec, options, dict_url2info=dict_url2info) if attn_analysis: predictor.load_model() time_start = time.time() hit_5, _, mrr_20 = predictor.test_mrr_trendy(metric_count=20, candidate_count=20, attn_mode=True, length_mode=False) print(hit_5, mrr_20) return hit_5, _, mrr_20 = predictor.test_mrr_trendy_history_test(metric_count=20, candidate_count=20) print('hitory_test :: hit_5 : {}, mrr_20 : {}'.format(hit_5, mrr_20)) print('time tooks : {}'.format(time.time() - time_start)) return for candi_count in [40, 60, 80, 100]: time_start = time.time() hit_5, _, mrr_20 = predictor.test_mrr_trendy(metric_count=20, candidate_count=candi_count) print('candi {} :: hit_5 : {}, mrr_20 : {}'.format(candi_count, hit_5, mrr_20)) print('time tooks : {}'.format(time.time() - time_start)) return best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train() if search_mode: with open(param_search_file_path, 'w') as f_out: f_out.write(str(best_hit_5) + '\n') f_out.write(str(best_auc_20) + '\n') f_out.write(str(best_mrr_20) + '\n')