def main():
    """
        main function
    """
	options, args = parser.parse_args()

	if (options.u2i_path == None) or (options.output == None):
		return

	output_file_path = options.output
	embedding_dimension = int(options.d2v_embed)
	url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
	url2info_path = options.u2i_path

	print('Loading url2vec : start')
	dict_url2vec = load_json(url2vec_path)
	print('Loading url2vec : end')

	print('Loading url2info : start')
	dict_url2info = load_json(url2info_path)
	print('Loading url2info : end')

	dict_rnn_input = generate_rnn_input(dict_url2info, dict_url2vec)

	with open(output_file_path, 'w') as f_out:
		json.dump(dict_rnn_input, f_out)
Пример #2
0
def main():
	options, args = parser.parse_args()

	if options.input == None:
		return

	torch_input_path = options.input
	dict_rnn_input_path = '{}/torch_rnn_input.dict'.format(torch_input_path)

	print('Loading torch input : start')
	dict_rnn_input = load_json(dict_rnn_input_path)
	print('Loading torch input : end')

	sequence_count = 0
	event_count = 0
	for dataset_name in ['train', 'valid', 'test']:
		sequence_count += len(dict_rnn_input['dataset'][dataset_name]) 
		for sequence in dict_rnn_input['dataset'][dataset_name]:
			event_count += len(sequence[2])

	article_count = len(dict_rnn_input['idx2url'])

	print('number of session : {}'.format(sequence_count))
	print('number of event : {}'.format(event_count))
	print('number of article : {}'.format(article_count))
Пример #3
0
    def __init__(self, rec_input_json_path, dict_url2vec, options, \
            dict_url2info={}, dict_glove={}):

        # initialize mixins
        self.load_rec_input(dict_url2vec=dict_url2vec,
                dict_rec_input=load_json(rec_input_json_path), options=options)

        self.load_category(dict_url2vec=dict_url2vec,
                dict_url2info=dict_url2info)

        # datasets will be updated lazily
        self._dataset = {}
Пример #4
0
def main():
    options, args = parser.parse_args()

    if (options.input == None) or (options.d2v_embed == None) or \
                       (options.u2v_path == None) or (options.ws_path == None):
        return

    torch_input_path = options.input
    embedding_dimension = int(options.d2v_embed)
    url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
    ws_path = options.ws_path
    search_mode = options.search_mode
    model_ws_path = '{}/model/{}'.format(ws_path, option2str(options))

    if not os.path.exists(ws_path):
        os.system('mkdir -p {}'.format(ws_path))


#    os.system('rm -rf {}'.format(model_ws_path))
    os.system('mkdir -p {}'.format(model_ws_path))

    # Save best result with param name
    param_search_path = ws_path + '/param_search'
    if not os.path.exists(param_search_path):
        os.system('mkdir -p {}'.format(param_search_path))
    param_search_file_path = '{}/{}'.format(param_search_path,
                                            option2str(options))

    if search_mode and os.path.exists(param_search_file_path):
        print('Param search mode already exist : {}'.format(
            param_search_file_path))
        return

    print('Loading url2vec : start')
    dict_url2vec = load_json(url2vec_path)
    print('Loading url2vec : end')

    predictor = AdressaRec(HRAMModel,
                           model_ws_path,
                           torch_input_path,
                           dict_url2vec,
                           options,
                           hram_mode=True)

    best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train()

    if search_mode:
        with open(param_search_file_path, 'w') as f_out:
            f_out.write(str(best_hit_5) + '\n')
            f_out.write(str(best_auc_20) + '\n')
            f_out.write(str(best_mrr_20) + '\n')
Пример #5
0
def main():
    """
        main function
    """
	options, args = parser.parse_args()

	if (options.d2v_embed == None) or (options.u2v_path == None) \
						   or (options.input == None) or (options.output == None) \
						   or (options.ws_path == None):
		return

	output_file_path = options.output
	embedding_dimension = int(options.d2v_embed)
	url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
	ws_path = options.ws_path

	rnn_input_path = options.input

	if os.path.exists(ws_path):
		os.system('rm -rf {}'.format(ws_path))

	if not os.path.exists(ws_path):
		os.system('mkdir -p {}'.format(ws_path))

	print('Loading url2vec : start')
	dict_url2vec = load_json(url2vec_path)
	print('Loading url2vec : end')

	print('Loading a2v rnn input : start')
	dict_rnn_input = load_json(rnn_input_path)
	print('Loading a2v rnn input : end')

	ar = ArticleRepresentation(dict_url2vec, dict_rnn_input, embedding_dimension, ws_path)
	ar.do_train()
	dict_y2v = ar.generate_y2v()

	with open(output_file_path, 'w') as f_out:
		json.dump(dict_y2v, f_out)
def main():
    options, args = parser.parse_args()

    if (options.input == None) or (options.d2v_embed == None) or \
           (options.u2v_path == None) or (options.ws_path == None):
        return

    torch_input_path = options.input
    embedding_dimension = int(options.d2v_embed)
    url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
    ws_path = options.ws_path

    if not os.path.exists(ws_path):
        os.system('mkdir -p {}'.format(ws_path))

    dict_rnn_input_path = '{}/torch_rnn_input.dict'.format(torch_input_path)
    dict_rnn_input = load_json(dict_rnn_input_path)

    selected_times(dict_rnn_input)
Пример #7
0
def main():
    options, args = parser.parse_args()

    if (options.input == None) or (options.d2v_embed == None) or \
            (options.u2v_path == None) or (options.ws_path == None) or \
            (options.word_embed_path == None):
        return

    path_rec_input = '{}/torch_rnn_input.dict'.format(options.input)
    embedding_dimension = int(options.d2v_embed)
    path_url2vec = '{}_{}'.format(options.u2v_path, embedding_dimension)

    sr = SelectRec(path_rec_input, path_url2vec, SimpleAVGModel, options)
    sr.do_train(total_epoch=1)
    return

    torch_input_path = options.input
    embedding_dimension = int(options.d2v_embed)
    url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
    ws_path = options.ws_path
    search_mode = options.search_mode
    model_ws_path = '{}/model/{}'.format(ws_path, option2str(options))

    if not os.path.exists(ws_path):
        os.system('mkdir -p {}'.format(ws_path))

#os.system('rm -rf {}'.format(model_ws_path))
    os.system('mkdir -p {}'.format(model_ws_path))

    print('Loading url2vec : start')
    dict_url2vec = load_json(url2vec_path)
    print('Loading url2vec : end')

    print('Loading glove : start')
    with open(options.glove, 'rb') as f_glove:
        dict_glove = pickle.load(f_glove)
    print('Loading glove : end')

    predictor = AdressaRec(SingleLSTMModel, ws_path, torch_input_path, dict_url2vec, options,
            dict_glove=dict_glove)

    best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train(total_epoch=200)
Пример #8
0
def main():
    options, args = parser.parse_args()

    if (options.input == None) or (options.d2v_embed == None) or \
           (options.u2v_path == None) or (options.ws_path == None):
        return

    torch_input_path = options.input
    embedding_dimension = int(options.d2v_embed)
    url2vec_path = options.u2v_path
    ws_path = options.ws_path

    os.system('rm -rf {}'.format(ws_path))
    os.system('mkdir -p {}'.format(ws_path))

    print('Loading url2vec : start')
    dict_url2vec = load_json(url2vec_path)
    print('Loading url2vec : end')

    predictor = AdressaRec(SingleLSTMModel, ws_path, torch_input_path,
                           dict_url2vec, options)

    time_start = time.time()
    hit_5, mrr_20 = predictor.pop_history_test(metric_count=20,
                                               candidate_count=20)
    print('history test :: hit_5 : mrr_20 : {}'.format(hit_5, mrr_20))
    print('time tooks : {}'.format(time.time() - time_start))
    return

    hit_5, mrr_20 = predictor.pop(metric_count=20, candidate_count=20)
    print('candi {} :: hit_5 : {}, mrr_20 : {}'.format(20, hit_5, mrr_20))
    print('time tooks : {}'.format(time.time() - time_start))

    for candi_count in [40, 60, 80, 100]:
        time_start = time.time()
        hit_5, mrr_20 = predictor.pop(metric_count=20,
                                      candidate_count=candi_count)
        print('candi {} :: hit_5 : {}, mrr_20 : {}'.format(
            candi_count, hit_5, mrr_20))
        print('time tooks : {}'.format(time.time() - time_start))
def main():
    options, args = parser.parse_args()

    if (options.input == None) or (options.d2v_embed == None) or \
           (options.u2v_path == None) or (options.ws_path == None):
        return

    torch_input_path = options.input
    embedding_dimension = int(options.d2v_embed)
    url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
    ws_path = options.ws_path
    search_mode = options.search_mode
    model_ws_path = '{}/model/{}'.format(ws_path, option2str(options))

    if not os.path.exists(ws_path):
        os.system('mkdir -p {}'.format(ws_path))


#	os.system('rm -rf {}'.format(model_ws_path))
    os.system('mkdir -p {}'.format(model_ws_path))

    # Save best result with param name
    param_search_path = ws_path + '/param_search'
    if not os.path.exists(param_search_path):
        os.system('mkdir -p {}'.format(param_search_path))
    param_search_file_path = '{}/{}'.format(param_search_path,
                                            option2str(options))

    if search_mode and os.path.exists(param_search_file_path):
        print('Param search mode already exist : {}'.format(
            param_search_file_path))
        return

    print('Loading url2vec : start')
    dict_url2vec = load_json(url2vec_path)
    print('Loading url2vec : end')

    test_mode = True
    if test_mode:
        print('test mode')

    predictor = AdressaRec(MultiCellModel, model_ws_path, torch_input_path,
                           dict_url2vec, options)

    if test_mode:
        predictor.load_model()
        time_start = time.time()
        hit_5, _, mrr_20 = predictor.test_mrr_trendy(metric_count=20,
                                                     candidate_count=20,
                                                     length_mode=True)
        print('hit_5', hit_5, 'mrr_20', mrr_20)
        print('time tooks : {}'.format(time.time() - time_start))
        return

    best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train()

    if search_mode:
        with open(param_search_file_path, 'w') as f_out:
            f_out.write(str(best_hit_5) + '\n')
            f_out.write(str(best_auc_20) + '\n')
            f_out.write(str(best_mrr_20) + '\n')
Пример #10
0
def main():
    options, args = parser.parse_args()

    if (options.input == None) or (options.d2v_embed == None) or \
                       (options.u2v_path == None) or (options.ws_path == None):
        return

    torch_input_path = options.input
    embedding_dimension = int(options.d2v_embed)
    url2vec_path = '{}_{}'.format(options.u2v_path, embedding_dimension)
    url2info_path = options.u2i_path
    ws_path = options.ws_path
    search_mode = options.search_mode
    model_ws_path = '{}/model/{}'.format(ws_path, option2str(options))

    if not os.path.exists(ws_path):
        os.system('mkdir -p {}'.format(ws_path))

#    os.system('rm -rf {}'.format(model_ws_path))
    os.system('mkdir -p {}'.format(model_ws_path))

    # Save best result with param name
    param_search_path = ws_path + '/param_search'
    if not os.path.exists(param_search_path):
        os.system('mkdir -p {}'.format(param_search_path))
    param_search_file_path = '{}/{}'.format(param_search_path, option2str(options))

    if search_mode and os.path.exists(param_search_file_path):
        print('Param search mode already exist : {}'.format(param_search_file_path))
        return

    print('Loading url2vec : start')
    dict_url2vec = load_json(url2vec_path)
    print('Loading url2vec : end')

    print('Loading url2info : start')
    dict_url2info = load_json(url2info_path)
    print('Loading url2info : end')

    attn_analysis = False
    if attn_analysis:
        print('test mode')

    predictor = AdressaRec(NeRTModel, model_ws_path, torch_input_path,
            dict_url2vec, options, dict_url2info=dict_url2info)

    if attn_analysis:
        predictor.load_model()
        time_start = time.time()
        hit_5, _, mrr_20 = predictor.test_mrr_trendy(metric_count=20, candidate_count=20,
                attn_mode=True, length_mode=False)

        print(hit_5, mrr_20)

        return 

        hit_5, _, mrr_20 = predictor.test_mrr_trendy_history_test(metric_count=20, candidate_count=20)
        print('hitory_test :: hit_5 : {}, mrr_20 : {}'.format(hit_5, mrr_20))
        print('time tooks : {}'.format(time.time() - time_start))
        return

        for candi_count in [40, 60, 80, 100]:
            time_start = time.time()
            hit_5, _, mrr_20 = predictor.test_mrr_trendy(metric_count=20, candidate_count=candi_count)
            print('candi {} :: hit_5 : {}, mrr_20 : {}'.format(candi_count, hit_5, mrr_20))
            print('time tooks : {}'.format(time.time() - time_start))
        return

    best_hit_5, best_auc_20, best_mrr_20 = predictor.do_train()

    if search_mode:
        with open(param_search_file_path, 'w') as f_out:
            f_out.write(str(best_hit_5) + '\n')
            f_out.write(str(best_auc_20) + '\n')
            f_out.write(str(best_mrr_20) + '\n')