Пример #1
0
def train_eval_model(graph_hyper_params):
    # global pos_train_data, neg_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict
    all_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict, re_uid_map, re_aid_map = get_prod_dataset(graph_hyper_params['formal'])
    print graph_hyper_params


    # 重新 split train dev
    o_dev_size = graph_hyper_params['o_dev_size']
    atd = pd.concat([all_train_data, dev_data])
    pos_atd, neg_atd = atd[atd['label'] == 1], atd[atd['label'] == 0]
    dev_data = pd.concat([pos_atd[:o_dev_size], neg_atd[:o_dev_size]])
    pos_train_data, neg_train_data = pos_atd[o_dev_size:], neg_atd[o_dev_size:]
    print 'dev_size', len(dev_data)
    print 'pos-neg-all', len(pos_train_data), len(neg_train_data), len(all_train_data)
    del all_train_data
    gc.collect()
    # **********************************

    print 'map row start'
    uid_map_row, aid_map_row = dict(zip(relevant_user_data['uid'].values, np.arange(len(relevant_user_data)))), dict(zip(ad_data['aid'].values, np.arange(len(ad_data))))
    print 'map row end'

    # 对 creativeSize 这一个连续特征的处理
    if graph_hyper_params['creativeSize_pro'] == 'min_max':
        print 'min-max norm creativeSize', ad_data['creativeSize'].max(), ad_data['creativeSize'].min()
        norm_cs = (ad_data['creativeSize'] * 1.0 - ad_data['creativeSize'].min()) / (ad_data['creativeSize'].max() - ad_data['creativeSize'].min())
        ad_data = ad_data.drop(['creativeSize'], axis=1)
        ad_data['creativeSize'] = norm_cs
        creativesize_p = tf.placeholder(tf.float32, [None, 1], name="creativeSize")
    elif graph_hyper_params['creativeSize_pro'] == 'li_san':
        print '离散化 creativeSize'
        sh = ShrinkSep()
        ad_data['creativeSize'] = ad_data['creativeSize'].apply(sh)
        feature_conf_dict['creativeSize'] = len(sh.d) + 1
        creativesize_p = tf.placeholder(tf.int32, [None, 1], name="creativeSize")
    else:
        print 'no process creativeSize'

    print feature_conf_dict
    # ****************************************************************** place holder start
    uid_p = tf.placeholder(tf.int32, [None, 1], name="uid")
    lbs_p = tf.placeholder(tf.int32, [None, 1], name="LBS")
    age_p = tf.placeholder(tf.int32, [None, 1], name="age")

    carrier_p = tf.placeholder(tf.int32, [None, 1], name="carrier")
    consumptionability_p = tf.placeholder(tf.int32, [None, 1], name="consumptionAbility")
    education_p = tf.placeholder(tf.int32, [None, 1], name="education")
    gender_p = tf.placeholder(tf.int32, [None, 1], name="gender")
    house_p = tf.placeholder(tf.int32, [None, 1], name="house")
    os_p = tf.placeholder(tf.int32, [None, 1], name="os")
    ct_p = tf.placeholder(tf.int32, [None, 1], name="ct")
    marriagestatus_p = tf.placeholder(tf.int32, [None, 1], name="marriageStatus")

    appidaction_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['appIdAction'][1]], name="appidaction_index")
    appidaction_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['appIdAction'][1]], name="appidaction_val")
    appIdInstall_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_index")
    appIdInstall_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_val")

    interest1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest1'][1]], name="interest1_index")
    interest1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest1'][1]], name="interest1_val")
    interest2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest2'][1]], name="interest2_index")
    interest2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest2'][1]], name="interest2_val")
    interest3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest3'][1]], name="interest3_index")
    interest3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest3'][1]], name="interest3_val")
    interest4_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest4'][1]], name="interest4_index")
    interest4_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest4'][1]], name="interest4_val")
    interest5_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest5'][1]], name="interest5_index")
    interest5_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest5'][1]], name="interest5_val")

    kw1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw1'][1]], name="kw1_index")
    kw1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw1'][1]], name="kw1_val")
    kw2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw2'][1]], name="kw2_index")
    kw2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw2'][1]], name="kw2_val")
    kw3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw3'][1]], name="kw3_index")
    kw3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw3'][1]], name="kw3_val")

    topic1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic1'][1]], name="topic1_index")
    topic1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic1'][1]], name="topic1_val")
    topic2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic2'][1]], name="topic2_index")
    topic2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic2'][1]], name="topic2_val")
    topic3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic3'][1]], name="topic3_index")
    topic3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic3'][1]], name="topic3_val")

    aid_p = tf.placeholder(tf.int32, [None, 1], name="aid")
    advertiserid_p = tf.placeholder(tf.int32, [None, 1], name="advertiserId")
    campaignid_p = tf.placeholder(tf.int32, [None, 1], name="campaignId")
    creativeid_p = tf.placeholder(tf.int32, [None, 1], name="creativeId")
    adcategoryid_p = tf.placeholder(tf.int32, [None, 1], name="adCategoryId")
    productid_p = tf.placeholder(tf.int32, [None, 1], name="productId")
    producttype_p = tf.placeholder(tf.int32, [None, 1], name="productType")

    true_label = tf.placeholder(tf.float32, [None, 1], name="true_label")

    train_p = tf.placeholder(tf.bool, name="train_p")
    dropout_p = tf.placeholder(tf.float32, shape=[None], name="dropout_p")
    # ****************************************************************** place holder end

    pred_val, model_loss, network_params = inference(uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p,
                                                     gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p,
                                                     appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p,
                                                     interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p,
                                                     kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p,
                                                     topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict,
                                                     graph_hyper_params, train_p, dropout_p)

    # pred_val_for_pre, _, __ = inference(uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p,
    #                                                  gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p,
    #                                                  appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p,
    #                                                  interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p,
    #                                                  kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p,
    #                                                  topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params, istrain=False)

    global_step = tf.Variable(0, name="global_step", trainable=False)
    train_step = None
    if graph_hyper_params['opt'] == 'adam':
        train_step = tf.train.AdamOptimizer(graph_hyper_params['learn_rate']).minimize(model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'adgrad':
        train_step = tf.train.AdagradOptimizer(graph_hyper_params['learn_rate']).minimize(model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'adadelta':
        train_step = tf.train.AdadeltaOptimizer(graph_hyper_params['learn_rate']).minimize(model_loss, global_step=global_step)
    else:
        print 'No optimizer !'

    time_now = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    checkpoint_dir = os.path.abspath("./checkpoints/dmf_tencent/" + time_now)
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    # config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    # sess = tf.Session(config=config)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    def get_fed_dict(b_data, split_vector_data):
        if graph_hyper_params['formal']:
            aid_list = b_data['aid'].values
            uid_list = b_data['uid'].values
        else:
            if len(b_data) == 4:
                aid_list, uid_list = [11, 11, 11, 11], [11, 190, 191, 11]
            elif len(b_data) == 3:
                aid_list, uid_list = [11, 11, 11], [11, 190, 191]
            else:
                aid_list, uid_list = [11], [11]

        # print 11
        # d1 = datetime.now()
        b_u_d, b_a_d = [], []
        for b_uid in uid_list:
            b_u_d.append(relevant_user_data.iloc[uid_map_row[b_uid]])
        for b_aid in aid_list:
            b_a_d.append(ad_data.iloc[aid_map_row[b_aid]])
        b_u_d = pd.concat(b_u_d, axis=1).transpose()
        b_a_d = pd.concat(b_a_d, axis=1).transpose()
        # d3 = datetime.now()

        # print 12
        # pd.concat([data.iloc[1].to_frame(), data.iloc[2].to_frame()], axis=1).transpose()
        fed_dict = {}
        fed_dict[uid_p] = np.expand_dims(b_u_d['uid'], axis=1)
        fed_dict[lbs_p] = np.expand_dims(b_u_d['LBS'], axis=1)
        fed_dict[age_p] = np.expand_dims(b_u_d['age'], axis=1)
        fed_dict[carrier_p] = np.expand_dims(b_u_d['carrier'], axis=1)
        fed_dict[consumptionability_p] = np.expand_dims(b_u_d['consumptionAbility'], axis=1)
        fed_dict[education_p] = np.expand_dims(b_u_d['education'], axis=1)
        fed_dict[gender_p] = np.expand_dims(b_u_d['gender'], axis=1)
        fed_dict[house_p] = np.expand_dims(b_u_d['house'], axis=1)
        fed_dict[os_p] = np.expand_dims(b_u_d['os'], axis=1)
        fed_dict[ct_p] = np.expand_dims(b_u_d['ct'], axis=1)
        fed_dict[marriagestatus_p] = np.expand_dims(b_u_d['marriageStatus'], axis=1)
        # print 121
        appidaction_li = split_vector_data(b_u_d['appIdAction'])
        # print 1212
        fed_dict[appidaction_index_p], fed_dict[appidaction_val_p] = appidaction_li[0], appidaction_li[1]
        appIdInstall_li = split_vector_data(b_u_d['appIdInstall'])
        fed_dict[appIdInstall_index_p], fed_dict[appIdInstall_val_p] = appIdInstall_li[0], appIdInstall_li[1]
        # print 122
        interest1_li = split_vector_data(b_u_d['interest1'])
        fed_dict[interest1_index_p], fed_dict[interest1_val_p]  = interest1_li[0], interest1_li[1]
        interest2_li = split_vector_data(b_u_d['interest2'])
        fed_dict[interest2_index_p], fed_dict[interest2_val_p] = interest2_li[0], interest2_li[1]
        interest3_li = split_vector_data(b_u_d['interest3'])
        fed_dict[interest3_index_p], fed_dict[interest3_val_p] = interest3_li[0], interest3_li[1]
        interest4_li = split_vector_data(b_u_d['interest4'])
        fed_dict[interest4_index_p], fed_dict[interest4_val_p] = interest4_li[0], interest4_li[1]
        interest5_li = split_vector_data(b_u_d['interest5'])
        fed_dict[interest5_index_p], fed_dict[interest5_val_p] = interest5_li[0], interest5_li[1]
        # print 123
        kw1_li = split_vector_data(b_u_d['kw1'])
        fed_dict[kw1_index_p], fed_dict[kw1_val_p] = kw1_li[0], kw1_li[1]
        kw2_li = split_vector_data(b_u_d['kw2'])
        fed_dict[kw2_index_p], fed_dict[kw2_val_p] = kw2_li[0], kw2_li[1]
        kw3_li = split_vector_data(b_u_d['kw3'])
        fed_dict[kw3_index_p], fed_dict[kw3_val_p] = kw3_li[0], kw3_li[1]
        # print 124
        topic1_li = split_vector_data(b_u_d['topic1'])
        fed_dict[topic1_index_p], fed_dict[topic1_val_p] = topic1_li[0], topic1_li[1]
        topic2_li = split_vector_data(b_u_d['topic2'])
        fed_dict[topic2_index_p], fed_dict[topic2_val_p] = topic2_li[0], topic2_li[1]
        topic3_li = split_vector_data(b_u_d['topic3'])
        fed_dict[topic3_index_p], fed_dict[topic3_val_p] = topic3_li[0], topic3_li[1]
        # print 125
        # # ad
        fed_dict[aid_p] = np.expand_dims(b_a_d['aid'], axis=1)
        fed_dict[advertiserid_p] = np.expand_dims(b_a_d['advertiserId'], axis=1)
        fed_dict[campaignid_p] = np.expand_dims(b_a_d['campaignId'], axis=1)
        fed_dict[creativeid_p] = np.expand_dims(b_a_d['creativeId'], axis=1)
        fed_dict[adcategoryid_p] = np.expand_dims(b_a_d['adCategoryId'], axis=1)
        fed_dict[productid_p] = np.expand_dims(b_a_d['productId'], axis=1)
        fed_dict[producttype_p] = np.expand_dims(b_a_d['productType'], axis=1)

        # print 13
        # fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1)
        if graph_hyper_params['creativeSize_pro'] == 'min_max':
            fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1).astype(np.float32)
        elif graph_hyper_params['creativeSize_pro'] == 'li_san':
            fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1)
        else:
            print 'wrong feed'
        # label
        # print 14
        fed_dict[true_label] = np.expand_dims(b_data['label'].values, axis=1).astype(np.float32)
        # print 15
        # d4 = datetime.now()
        # print d2-d1, d3-d2, d4-d3
        # print fed_dict[true_label]
        # print len(fed_dict[true_label]), len(fed_dict[aid_p]), len(fed_dict[uid_p]),
        return fed_dict

    def eval_on_dev(split_vector_data):
        e_b_s = len(dev_data) / graph_hyper_params['batch_size']
        auc_true, auc_pre = [], []
        # auc = []
        for index in tqdm(range(e_b_s)):
            start = index * graph_hyper_params['batch_size']
            end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(dev_data) else len(dev_data)
            b_dev_data = dev_data[start:end]
            fed_dict = get_fed_dict(b_dev_data, split_vector_data)
            fed_dict[train_p] = False
            fed_dict[dropout_p] = np.array([1.0])
            pred_value = sess.run([pred_val], feed_dict=fed_dict)

            pre_real_val = np.array(pred_value).reshape((-1))
            auc_true = auc_true + list(b_dev_data['label'].values)
            auc_pre = auc_pre + pre_real_val.tolist()
            # auc.append()
        # auc_pre = np.array(auc_pre)
        # auc_pre = np.exp(auc_pre) / np.exp(auc_pre).sum()
        # print auc_true
        # print auc_pre
        fpr, tpr, thresholds = metrics.roc_curve(auc_true, auc_pre, pos_label=1)
        # >> > metrics.auc(fpr, tpr)
        return metrics.auc(fpr, tpr), gini_norm(auc_true, auc_pre)

    # def predict_csv(split_vector_data):
    #     e_b_s = len(predict_data) / graph_hyper_params['batch_size'] if len(predict_data) % graph_hyper_params['batch_size']==0 else len(predict_data) / graph_hyper_params['batch_size'] + 1
    #     pred = []
    #     for index in tqdm(range(e_b_s)):
    #         start = index * graph_hyper_params['batch_size']
    #         end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(predict_data) else len(predict_data)+1
    #         b_predict_data = predict_data[start:end]
    #         # print len(b_predict_data), start, end
    #         fed_dict = get_fed_dict(b_predict_data, split_vector_data)
    #         pred_value = sess.run([pred_val], feed_dict=fed_dict)
    #         pre_real_val = np.array(pred_value).reshape((-1))
    #         pred = pred + pre_real_val.tolist()
    #     # print len(pred), len(predict_data)
    #     predict_data['pred_label'] = pred
    #     csv_data = predict_data[['ori_aid', 'ori_uid', 'pred_label']]
    #     csv_data.columns = ['aid', 'uid', 'score']
    #     csv_path = checkpoint_dir+'/submission.csv'
    #     csv_data.to_csv(csv_path, index=False)
    #     return csv_path


    def save_predict_material(user_data, ad_data):
        user_data_file = os.path.join(checkpoint_dir, 'user_data_file.csv')
        ad_data_file = os.path.join(checkpoint_dir, 'ad_data_file.csv')
        graph_hyper_params_file = os.path.join(checkpoint_dir, 'graph_hyper_params_file.pic')

        user_data.to_csv(user_data_file, index=False)
        ad_data.to_csv(ad_data_file, index=False)
        pickle.dump(graph_hyper_params, open(graph_hyper_params_file, 'w'))
        pass

    def construct_train_data(start_neg, pos_train_data, neg_train_data, graph_hyper_params):
        # global pos_train_data, neg_train_data, start_neg
        pos_len, neg_len = len(pos_train_data), len(neg_train_data)
        # print start_neg, pos_len, neg_len
        if start_neg + pos_len < neg_len:
            this_neg_train_data = neg_train_data[start_neg : start_neg + graph_hyper_params['neg_size']*pos_len]
            start_neg += pos_len*graph_hyper_params['neg_size']
        else:
            this_neg_train_data = pd.concat([neg_train_data[start_neg : neg_len], neg_train_data[0 : graph_hyper_params['neg_size']*pos_len - (neg_len-start_neg)]])
            start_neg = graph_hyper_params['neg_size']*pos_len - (neg_len-start_neg)
        train_data = pd.concat([pos_train_data, this_neg_train_data])
        return shuffle(train_data), start_neg

    best_auc = 0.0
    start_neg = 0
    split_vector_data = SplitClass()
    save_data_for_predict = False
    for epoch in range(graph_hyper_params['epoch']):
        train_data, start_neg = construct_train_data(start_neg, pos_train_data, neg_train_data, graph_hyper_params)
        if start_neg < graph_hyper_params['neg_size'] * len(pos_train_data):
            neg_train_data = shuffle(neg_train_data)

        e_b_s = len(train_data) / graph_hyper_params['batch_size']
        one_epoch_loss, one_epoch_batchnum = 0.0, 0.0
        early_stop_hit = 0
        split_vector_data.clean()
        for index in tqdm(range(e_b_s)):
            # print 0
            start = index * graph_hyper_params['batch_size']
            end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(train_data) else len(train_data)
            b_data = train_data[start:end]

            # print 1
            # d1 = datetime.now()
            fed_dict = get_fed_dict(b_data, split_vector_data)
            fed_dict[train_p] = True
            fed_dict[dropout_p] = np.array([graph_hyper_params['dropout_keep']])
            # d2 = datetime.now()
            # print 2
            _, loss_val = sess.run([train_step, model_loss], feed_dict=fed_dict)
            # print 3
            # d3 = datetime.now()
            # print d2-d1, d3-d2
            one_epoch_loss += loss_val
            one_epoch_batchnum += 1.0

            if graph_hyper_params['debug']:
                print datetime.now(), index, loss_val

            if index != 0 and index % ((e_b_s - 1) / graph_hyper_params['show_peroid']) == 0:
                auc, gn = eval_on_dev(split_vector_data)
                best_auc = max(auc, best_auc)
                format_str = '%s epoch=%.2f avg_loss=%.4f auc=%.4f best_auc=%.4f gn=%.4f'
                print (format_str % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), (epoch + 1.0 * (index+1) / e_b_s), one_epoch_loss / one_epoch_batchnum, auc, best_auc, gn))
                one_epoch_loss = one_epoch_batchnum = 0.0

                # global split_cache, split_cache_rem_size
                # if len(split_cache) > 10000000:
                #     keys = split_cache.keys()
                #     for key in keys:
                #         if split_cache_rem_size[key] < 2:
                #             del split_cache_rem_size[key], split_cache[key]


                if (auc >= best_auc and (epoch + 1.0 * (index+1) / e_b_s) >= 0.6 and auc > 0.72) or (auc >= best_auc and auc>0.75):
                    current_step = tf.train.global_step(sess, global_step)
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("saved model to: %s" % path)

                    if not save_data_for_predict:
                        udp = pd.concat([relevant_user_data, no_relevant_user_data])
                        save_predict_material(udp, ad_data)
                        save_data_for_predict = True
                    early_stop_hit = 0
                elif auc < best_auc and abs(auc-best_auc) > 0.02:
                    early_stop_hit += 1
                    if early_stop_hit >= 3:
                        print 'eary_stop_best:', best_auc
                        import sys
                        sys.exit(0)

                    # csv_path = predict_csv(split_vector_data)
                    # print 'save csv to: ', csv_path
    pass
Пример #2
0
def train_eval_model(graph_hyper_params):
    # global pos_train_data, neg_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict
    all_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict, re_uid_map, re_aid_map = get_prod_dataset(
        graph_hyper_params['formal'])
    print graph_hyper_params

    var_name_val = {}
    if graph_hyper_params['mp_w'] is not None and graph_hyper_params[
            'mp_d'] is not None:
        print 'reload model start !'
        print '\t reload wide'
        graph_wide = tf.Graph()
        with graph_wide.as_default():
            checkpoint_file = tf.train.latest_checkpoint(
                graph_hyper_params['mp_w'])
            wide_saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            sess_wide = tf.Session()
            wide_saver.restore(sess_wide, checkpoint_file)
            for na in graph_wide.get_collection('trainable_variables'):
                if na in var_name_val:
                    print "wrong 1!"
                var_name_val[na.name] = np.array(sess_wide.run(na)).astype(
                    np.float)
            sess_wide.close()
        # print '1', var_name_val.keys()
        print '\t reload deep'
        graph_deep = tf.Graph()
        with graph_deep.as_default():
            checkpoint_file = tf.train.latest_checkpoint(
                graph_hyper_params['mp_d'])
            deep_saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            sess_deep = tf.Session(graph=graph_deep)
            deep_saver.restore(sess_deep, checkpoint_file)
            for na in graph_deep.get_collection('trainable_variables'):
                if na in var_name_val:
                    print "wrong 2!"
                var_name_val[na.name] = np.array(sess_deep.run(na)).astype(
                    np.float32)
            sess_deep.close()
        print 'reload model done !'

    graph = tf.Graph()
    with graph.as_default():
        # 重新 split train dev
        o_dev_size = graph_hyper_params['o_dev_size']
        atd = pd.concat([all_train_data, dev_data])
        pos_atd, neg_atd = atd[atd['label'] == 1], atd[atd['label'] == 0]
        dev_data = pd.concat([pos_atd[:o_dev_size], neg_atd[:o_dev_size]])
        pos_train_data, neg_train_data = pos_atd[o_dev_size:], neg_atd[
            o_dev_size:]
        print 'dev_size', len(dev_data)
        print 'pos-neg-all', len(pos_train_data), len(neg_train_data), len(
            all_train_data)
        del all_train_data
        gc.collect()
        # **********************************

        print 'map row start'
        uid_map_row, aid_map_row = dict(
            zip(relevant_user_data['uid'].values,
                np.arange(len(relevant_user_data)))), dict(
                    zip(ad_data['aid'].values, np.arange(len(ad_data))))
        print 'map row end'

        # 对 creativeSize 这一个连续特征的处理
        if graph_hyper_params['creativeSize_pro'] == 'min_max':
            print 'min-max norm creativeSize', ad_data['creativeSize'].max(
            ), ad_data['creativeSize'].min()
            norm_cs = (
                ad_data['creativeSize'] * 1.0 - ad_data['creativeSize'].min()
            ) / (ad_data['creativeSize'].max() - ad_data['creativeSize'].min())
            ad_data = ad_data.drop(['creativeSize'], axis=1)
            ad_data['creativeSize'] = norm_cs
            creativesize_p = tf.placeholder(tf.float32, [None, 1],
                                            name="creativeSize")
        elif graph_hyper_params['creativeSize_pro'] == 'li_san':
            print '离散化 creativeSize'
            sh = ShrinkSep()
            ad_data['creativeSize'] = ad_data['creativeSize'].apply(sh)
            feature_conf_dict['creativeSize'] = len(sh.d) + 1
            creativesize_p = tf.placeholder(tf.int32, [None, 1],
                                            name="creativeSize")
        else:
            print 'no process creativeSize'

        print 'for cross feature'
        sh2 = ShrinkSep()
        ad_data['creativeSize_cross'] = ad_data['creativeSize'].apply(sh2)
        feature_conf_dict['creativeSize_cross'] = len(sh2.d) + 1

        print feature_conf_dict
        # ****************************************************************** place holder start
        uid_p = tf.placeholder(tf.int32, [None, 1], name="uid")
        lbs_p = tf.placeholder(tf.int32, [None, 1], name="LBS")
        age_p = tf.placeholder(tf.int32, [None, 1], name="age")

        carrier_p = tf.placeholder(tf.int32, [None, 1], name="carrier")
        consumptionability_p = tf.placeholder(tf.int32, [None, 1],
                                              name="consumptionAbility")
        education_p = tf.placeholder(tf.int32, [None, 1], name="education")
        gender_p = tf.placeholder(tf.int32, [None, 1], name="gender")
        house_p = tf.placeholder(tf.int32, [None, 1], name="house")
        os_p = tf.placeholder(tf.int32, [None, 1], name="os")
        ct_p = tf.placeholder(tf.int32, [None, 1], name="ct")
        marriagestatus_p = tf.placeholder(tf.int32, [None, 1],
                                          name="marriageStatus")

        appidaction_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['appIdAction'][1]],
            name="appidaction_index")
        appidaction_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['appIdAction'][1]],
            name="appidaction_val")
        appIdInstall_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['appIdInstall'][1]],
            name="appIdInstall_index")
        appIdInstall_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['appIdInstall'][1]],
            name="appIdInstall_val")

        interest1_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['interest1'][0]],
            name="interest1_index")
        interest1_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['interest1'][0]],
            name="interest1_val")
        interest2_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['interest2'][0]],
            name="interest2_index")
        interest2_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['interest2'][0]],
            name="interest2_val")
        interest3_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['interest3'][0]],
            name="interest3_index")
        interest3_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['interest3'][0]],
            name="interest3_val")
        interest4_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['interest4'][0]],
            name="interest4_index")
        interest4_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['interest4'][0]],
            name="interest4_val")
        interest5_index_p = tf.placeholder(
            tf.int32, [None, feature_conf_dict['interest5'][0]],
            name="interest5_index")
        interest5_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['interest5'][0]],
            name="interest5_val")

        kw1_index_p = tf.placeholder(tf.int32,
                                     [None, feature_conf_dict['kw1'][1]],
                                     name="kw1_index")
        kw1_val_p = tf.placeholder(tf.float32,
                                   [None, 1, feature_conf_dict['kw1'][1]],
                                   name="kw1_val")
        kw2_index_p = tf.placeholder(tf.int32,
                                     [None, feature_conf_dict['kw2'][1]],
                                     name="kw2_index")
        kw2_val_p = tf.placeholder(tf.float32,
                                   [None, 1, feature_conf_dict['kw2'][1]],
                                   name="kw2_val")
        kw3_index_p = tf.placeholder(tf.int32,
                                     [None, feature_conf_dict['kw3'][1]],
                                     name="kw3_index")
        kw3_val_p = tf.placeholder(tf.float32,
                                   [None, 1, feature_conf_dict['kw3'][1]],
                                   name="kw3_val")

        topic1_index_p = tf.placeholder(tf.int32,
                                        [None, feature_conf_dict['topic1'][1]],
                                        name="topic1_index")
        topic1_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['topic1'][1]],
            name="topic1_val")
        topic2_index_p = tf.placeholder(tf.int32,
                                        [None, feature_conf_dict['topic2'][1]],
                                        name="topic2_index")
        topic2_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['topic2'][1]],
            name="topic2_val")
        topic3_index_p = tf.placeholder(tf.int32,
                                        [None, feature_conf_dict['topic3'][1]],
                                        name="topic3_index")
        topic3_val_p = tf.placeholder(
            tf.float32, [None, 1, feature_conf_dict['topic3'][1]],
            name="topic3_val")

        aid_p = tf.placeholder(tf.int32, [None, 1], name="aid")
        advertiserid_p = tf.placeholder(tf.int32, [None, 1],
                                        name="advertiserId")
        campaignid_p = tf.placeholder(tf.int32, [None, 1], name="campaignId")
        creativeid_p = tf.placeholder(tf.int32, [None, 1], name="creativeId")
        adcategoryid_p = tf.placeholder(tf.int32, [None, 1],
                                        name="adCategoryId")
        productid_p = tf.placeholder(tf.int32, [None, 1], name="productId")
        producttype_p = tf.placeholder(tf.int32, [None, 1], name="productType")

        # for cross part
        user_input_len, user_all_len, user_feature_start = 0, 0, {}
        ad_input_len, ad_all_len, ad_feature_start = 0, 0, {}
        for fea in user_features:
            if fea == 'uid':
                continue
            user_feature_start[fea] = user_all_len
            if type(feature_conf_dict[fea]) is int:
                user_input_len += 1
                user_all_len += feature_conf_dict[fea]
            elif 'interest' in fea:
                user_input_len += feature_conf_dict[fea][0]
                user_all_len += feature_conf_dict[fea][0]
            else:
                user_input_len += feature_conf_dict[fea][1]
                user_all_len += feature_conf_dict[fea][0]
        for fea in ad_features_for_cross:
            if fea == 'aid':
                continue
            ad_feature_start[fea] = ad_all_len
            if type(feature_conf_dict[fea]) is int:
                ad_input_len += 1
                ad_all_len += feature_conf_dict[fea]
            else:
                ad_input_len += feature_conf_dict[fea][1]
                ad_all_len += feature_conf_dict[fea][0]
        # cross_ind_p = tf.placeholder(tf.int32, [None, user_input_len*ad_input_len], name="productType")
        # cross_val_p = tf.placeholder(tf.float32, [None, 1, user_input_len*ad_input_len], name="productType")
        feature_conf_dict['cross_len_for_emb'] = user_all_len * ad_all_len
        print '-------cross-info-start-------'
        print 'user_input_len_all_len', user_input_len, user_all_len
        print 'ad_input_len_all_len', ad_input_len, ad_all_len
        print '-------cross-info-end---------'

        true_label = tf.placeholder(tf.float32, [None, 1], name="true_label")

        train_p = tf.placeholder(tf.bool, name="train_p")
        dropout_p = tf.placeholder(tf.float32, shape=[None], name="dropout_p")
        # ****************************************************************** place holder end

        pred_val, model_loss, network_params = inference(
            uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p,
            gender_p, house_p, os_p, ct_p, marriagestatus_p,
            appidaction_index_p, appidaction_val_p, appIdInstall_index_p,
            appIdInstall_val_p, interest1_index_p, interest1_val_p,
            interest2_index_p, interest2_val_p, interest3_index_p,
            interest3_val_p, interest4_index_p, interest4_val_p,
            interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p,
            kw2_index_p, kw2_val_p, kw3_index_p, kw3_val_p, topic1_index_p,
            topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p,
            topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p,
            adcategoryid_p, productid_p, producttype_p, creativesize_p,
            true_label, feature_conf_dict, graph_hyper_params, train_p,
            dropout_p, user_feature_start, ad_feature_start, user_input_len,
            user_all_len, ad_all_len)

        # pred_val_for_pre, _, __ = inference(uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p,
        #                                                  gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p,
        #                                                  appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p,
        #                                                  interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p,
        #                                                  kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p,
        #                                                  topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params, istrain=False)

        global_step = tf.Variable(0, name="global_step", trainable=False)
        train_step = None
        learning_rate = tf.Variable(float(graph_hyper_params['learn_rate']),
                                    trainable=False,
                                    dtype=tf.float32)
        learning_rate_decay_op = learning_rate.assign(learning_rate * 0.5)
        if graph_hyper_params['opt'] == 'adam':
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(
                model_loss, global_step=global_step)
        elif graph_hyper_params['opt'] == 'adgrad':
            train_step = tf.train.AdagradOptimizer(learning_rate).minimize(
                model_loss, global_step=global_step)
        elif graph_hyper_params['opt'] == 'adadelta':
            train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize(
                model_loss, global_step=global_step)
        elif graph_hyper_params['opt'] == 'ftrl':
            train_step = tf.train.FtrlOptimizer(learning_rate).minimize(
                model_loss, global_step=global_step)
        elif graph_hyper_params['opt'] == 'sgd':
            train_step = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(model_loss, global_step=global_step)
        else:
            print 'No optimizer !'

        time_now = 'mtyp' + str(graph_hyper_params['mtyp']) + datetime.now(
        ).strftime("-%Y-%m-%d-%H-%M-%S")
        checkpoint_dir = os.path.abspath("./checkpoints/dmf_tencent/" +
                                         time_now)
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        # config = tf.ConfigProto()
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5
        # sess = tf.Session(config=config)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        if graph_hyper_params['mtyp'] == 4:
            for na in graph.get_collection('trainable_variables'):
                if na.name in var_name_val:
                    print 'assign: ', na.name
                    sess.run(
                        tf.assign(graph.get_tensor_by_name(na.name),
                                  var_name_val[na.name]))
                    del var_name_val[na.name]
                else:
                    print 'not in: ', na.name

        def get_fed_dict(b_data, split_vector_data, feature_conf_dict):
            if graph_hyper_params['formal']:
                aid_list = b_data['aid'].values
                uid_list = b_data['uid'].values
            else:
                if len(b_data) == 4:
                    aid_list, uid_list = [11, 11, 11, 11], [11, 190, 191, 11]
                elif len(b_data) == 3:
                    aid_list, uid_list = [11, 11, 11], [11, 190, 191]
                else:
                    aid_list, uid_list = [11], [11]

            # print 11
            # d1 = datetime.now()
            b_u_d, b_a_d = [], []
            for b_uid in uid_list:
                b_u_d.append(relevant_user_data.iloc[uid_map_row[b_uid]])
            for b_aid in aid_list:
                b_a_d.append(ad_data.iloc[aid_map_row[b_aid]])
            b_u_d = pd.concat(b_u_d, axis=1).transpose()
            b_a_d = pd.concat(b_a_d, axis=1).transpose()
            # d3 = datetime.now()

            # print 12
            # pd.concat([data.iloc[1].to_frame(), data.iloc[2].to_frame()], axis=1).transpose()
            fed_dict = {}
            fed_dict[uid_p] = np.expand_dims(b_u_d['uid'], axis=1)
            fed_dict[lbs_p] = np.expand_dims(b_u_d['LBS'], axis=1)
            fed_dict[age_p] = np.expand_dims(b_u_d['age'], axis=1)
            fed_dict[carrier_p] = np.expand_dims(b_u_d['carrier'], axis=1)
            fed_dict[consumptionability_p] = np.expand_dims(
                b_u_d['consumptionAbility'], axis=1)
            fed_dict[education_p] = np.expand_dims(b_u_d['education'], axis=1)
            fed_dict[gender_p] = np.expand_dims(b_u_d['gender'], axis=1)
            fed_dict[house_p] = np.expand_dims(b_u_d['house'], axis=1)
            fed_dict[os_p] = np.expand_dims(b_u_d['os'], axis=1)
            fed_dict[ct_p] = np.expand_dims(b_u_d['ct'], axis=1)
            fed_dict[marriagestatus_p] = np.expand_dims(
                b_u_d['marriageStatus'], axis=1)
            # print 121
            appidaction_li = split_vector_data(b_u_d['appIdAction'])
            # print 1212
            fed_dict[appidaction_index_p], fed_dict[
                appidaction_val_p] = appidaction_li[0], appidaction_li[1]
            appIdInstall_li = split_vector_data(b_u_d['appIdInstall'])
            fed_dict[appIdInstall_index_p], fed_dict[
                appIdInstall_val_p] = appIdInstall_li[0], appIdInstall_li[1]
            # print 122
            interest1_li = split_vector_data(b_u_d['interest1'],
                                             interest='interest1',
                                             feature_config=feature_conf_dict)
            fed_dict[interest1_index_p], fed_dict[
                interest1_val_p] = interest1_li[0], interest1_li[1]
            interest2_li = split_vector_data(b_u_d['interest2'],
                                             interest='interest2',
                                             feature_config=feature_conf_dict)
            fed_dict[interest2_index_p], fed_dict[
                interest2_val_p] = interest2_li[0], interest2_li[1]
            interest3_li = split_vector_data(b_u_d['interest3'],
                                             interest='interest3',
                                             feature_config=feature_conf_dict)
            fed_dict[interest3_index_p], fed_dict[
                interest3_val_p] = interest3_li[0], interest3_li[1]
            interest4_li = split_vector_data(b_u_d['interest4'],
                                             interest='interest4',
                                             feature_config=feature_conf_dict)
            fed_dict[interest4_index_p], fed_dict[
                interest4_val_p] = interest4_li[0], interest4_li[1]
            interest5_li = split_vector_data(b_u_d['interest5'],
                                             interest='interest5',
                                             feature_config=feature_conf_dict)
            fed_dict[interest5_index_p], fed_dict[
                interest5_val_p] = interest5_li[0], interest5_li[1]
            # print 123
            kw1_li = split_vector_data(b_u_d['kw1'])
            fed_dict[kw1_index_p], fed_dict[kw1_val_p] = kw1_li[0], kw1_li[1]
            kw2_li = split_vector_data(b_u_d['kw2'])
            fed_dict[kw2_index_p], fed_dict[kw2_val_p] = kw2_li[0], kw2_li[1]
            kw3_li = split_vector_data(b_u_d['kw3'])
            fed_dict[kw3_index_p], fed_dict[kw3_val_p] = kw3_li[0], kw3_li[1]
            # print 124
            topic1_li = split_vector_data(b_u_d['topic1'])
            fed_dict[topic1_index_p], fed_dict[topic1_val_p] = topic1_li[
                0], topic1_li[1]
            topic2_li = split_vector_data(b_u_d['topic2'])
            fed_dict[topic2_index_p], fed_dict[topic2_val_p] = topic2_li[
                0], topic2_li[1]
            topic3_li = split_vector_data(b_u_d['topic3'])
            fed_dict[topic3_index_p], fed_dict[topic3_val_p] = topic3_li[
                0], topic3_li[1]
            # print 125
            # cross user
            # user_vec_fed_index = np.hstack([
            #     fed_dict[lbs_p] + user_feature_start['LBS'],
            #     fed_dict[age_p] + user_feature_start['age'],
            #     fed_dict[carrier_p] + user_feature_start['carrier'],
            #     fed_dict[consumptionability_p] + user_feature_start['consumptionAbility'],
            #     fed_dict[education_p] + user_feature_start['education'],
            #     fed_dict[gender_p] + user_feature_start['gender'],
            #     fed_dict[house_p] + user_feature_start['house'],
            #     fed_dict[os_p] + user_feature_start['os'],
            #     fed_dict[ct_p] + user_feature_start['ct'],
            #     fed_dict[marriagestatus_p] + user_feature_start['marriageStatus'],
            #     fed_dict[appidaction_index_p] + user_feature_start['appIdAction'],
            #     fed_dict[appIdInstall_index_p] + user_feature_start['appIdInstall'],
            #     fed_dict[interest1_index_p] + user_feature_start['interest1'],
            #     fed_dict[interest2_index_p] + user_feature_start['interest2'],
            #     fed_dict[interest3_index_p] + user_feature_start['interest3'],
            #     fed_dict[interest4_index_p] + user_feature_start['interest4'],
            #     fed_dict[interest5_index_p] + user_feature_start['interest5'],
            #     fed_dict[kw1_index_p] + user_feature_start['kw1'],
            #     fed_dict[kw2_index_p] + user_feature_start['kw2'],
            #     fed_dict[kw3_index_p] + user_feature_start['kw3'],
            #     fed_dict[topic1_index_p] + user_feature_start['topic1'],
            #     fed_dict[topic2_index_p] + user_feature_start['topic2'],
            #     fed_dict[topic3_index_p] + user_feature_start['topic3'],
            # ])
            # user_vec_fed_val = np.hstack([
            #
            # ])

            # # ad
            fed_dict[aid_p] = np.expand_dims(b_a_d['aid'], axis=1)
            fed_dict[advertiserid_p] = np.expand_dims(b_a_d['advertiserId'],
                                                      axis=1)
            fed_dict[campaignid_p] = np.expand_dims(b_a_d['campaignId'],
                                                    axis=1)
            fed_dict[creativeid_p] = np.expand_dims(b_a_d['creativeId'],
                                                    axis=1)
            fed_dict[adcategoryid_p] = np.expand_dims(b_a_d['adCategoryId'],
                                                      axis=1)
            fed_dict[productid_p] = np.expand_dims(b_a_d['productId'], axis=1)
            fed_dict[producttype_p] = np.expand_dims(b_a_d['productType'],
                                                     axis=1)

            # print 13
            # fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1)
            if graph_hyper_params['creativeSize_pro'] == 'min_max':
                fed_dict[creativesize_p] = np.expand_dims(
                    b_a_d['creativeSize'], axis=1).astype(np.float32)
            elif graph_hyper_params['creativeSize_pro'] == 'li_san':
                fed_dict[creativesize_p] = np.expand_dims(
                    b_a_d['creativeSize'], axis=1)
            else:
                print 'wrong feed'

            # cross ad
            # advec_fed = np.hstack([ fed_dict[advertiserid_p] + ad_feature_start['advertiserId'],
            #                         fed_dict[campaignid_p] + ad_feature_start['campaignId'],
            #                         fed_dict[creativeid_p] + ad_feature_start['creativeId'],
            #                         fed_dict[adcategoryid_p] + ad_feature_start['adCategoryId'],
            #                         fed_dict[productid_p] + ad_feature_start['productId'],
            #                         fed_dict[producttype_p] + ad_feature_start['productType'],
            #                         fed_dict[creativesize_p] + ad_feature_start['creativeSize_cross']])

            # label
            # print 14
            fed_dict[true_label] = np.expand_dims(b_data['label'].values,
                                                  axis=1).astype(np.float32)
            # print 15
            # d4 = datetime.now()
            # print d2-d1, d3-d2, d4-d3
            # print fed_dict[true_label]
            # print len(fed_dict[true_label]), len(fed_dict[aid_p]), len(fed_dict[uid_p]),
            return fed_dict

        def eval_on_dev(split_vector_data):
            e_b_s = len(dev_data) / graph_hyper_params['batch_size']
            auc_true, auc_pre = [], []
            # auc = []
            for index in tqdm(range(e_b_s)):
                start = index * graph_hyper_params['batch_size']
                end = (index + 1) * graph_hyper_params['batch_size'] if (
                    index + 1) * graph_hyper_params['batch_size'] < len(
                        dev_data) else len(dev_data)
                b_dev_data = dev_data[start:end]
                fed_dict = get_fed_dict(b_dev_data, split_vector_data,
                                        feature_conf_dict)
                fed_dict[train_p] = False
                fed_dict[dropout_p] = np.array([1.0])
                pred_value, pre_pred_value, final_vec, uu, vv = sess.run(
                    [
                        pred_val, network_params[0], network_params[1],
                        network_params[2], network_params[3]
                    ],
                    feed_dict=fed_dict)

                pre_real_val = np.array(pred_value).reshape((-1))
                auc_true = auc_true + list(b_dev_data['label'].values)
                auc_pre = auc_pre + pre_real_val.tolist()

                if True in np.isnan(pre_real_val):
                    print 'contain nan: ', np.array(pre_pred_value).reshape(
                        (-1))
                    print np.array(final_vec).reshape((-1))
                    print np.array(uu).reshape((-1))
                    print np.array(vv).reshape((-1))

                # auc.append()
            # auc_pre = np.array(auc_pre)
            # auc_pre = np.exp(auc_pre) / np.exp(auc_pre).sum()
            # print auc_true
            # print auc_pre
            fpr, tpr, thresholds = metrics.roc_curve(auc_true,
                                                     auc_pre,
                                                     pos_label=1)
            auc_v, gni = metrics.auc(fpr, tpr), gini_norm(auc_true, auc_pre)

            auc_pre_2 = np.array(auc_pre)
            auc_pre_2.sort()
            print('dev_pre_top2=%.4f %.4f min2=%.4f %.4f' %
                  (auc_pre_2.tolist()[-1], auc_pre_2.tolist()[-2],
                   auc_pre_2.tolist()[0], auc_pre_2.tolist()[1]))
            return auc_v, gni

        def save_predict_material(user_data, ad_data):
            user_data_file = os.path.join(checkpoint_dir, 'user_data_file.csv')
            ad_data_file = os.path.join(checkpoint_dir, 'ad_data_file.csv')
            graph_hyper_params_file = os.path.join(
                checkpoint_dir, 'graph_hyper_params_file.pic')
            feature_conf_dict_file = os.path.join(checkpoint_dir,
                                                  'feature_conf_dict.pic')

            user_data.to_csv(user_data_file, index=False)
            ad_data.to_csv(ad_data_file, index=False)
            pickle.dump(graph_hyper_params, open(graph_hyper_params_file, 'w'))
            pickle.dump(feature_conf_dict, open(feature_conf_dict_file, 'w'))
            pass

        def construct_train_data(start_neg, pos_train_data, neg_train_data,
                                 graph_hyper_params):
            # global pos_train_data, neg_train_data, start_neg
            pos_len, neg_len = len(pos_train_data), len(neg_train_data)
            # print start_neg, pos_len, neg_len
            if start_neg + pos_len < neg_len:
                this_neg_train_data = neg_train_data[
                    start_neg:start_neg +
                    graph_hyper_params['neg_size'] * pos_len]
                start_neg += pos_len * graph_hyper_params['neg_size']
            else:
                this_neg_train_data = pd.concat([
                    neg_train_data[start_neg:neg_len],
                    neg_train_data[0:graph_hyper_params['neg_size'] * pos_len -
                                   (neg_len - start_neg)]
                ])
                start_neg = graph_hyper_params['neg_size'] * pos_len - (
                    neg_len - start_neg)
            train_data = pd.concat([pos_train_data, this_neg_train_data])
            return shuffle(train_data), start_neg

        best_auc = 0.0
        start_neg = 0
        split_vector_data = SplitClass()
        save_data_for_predict = False
        cut_lr = True
        for epoch in range(graph_hyper_params['epoch']):
            train_data, start_neg = construct_train_data(
                start_neg, pos_train_data, neg_train_data, graph_hyper_params)
            if start_neg < graph_hyper_params['neg_size'] * len(
                    pos_train_data):
                neg_train_data = shuffle(neg_train_data)

            e_b_s = len(train_data) / graph_hyper_params['batch_size']
            one_epoch_loss, one_epoch_batchnum = 0.0, 0.0
            early_stop_hit = 0
            split_vector_data.clean()
            for index in tqdm(range(e_b_s)):
                # print 0
                start = index * graph_hyper_params['batch_size']
                end = (index + 1) * graph_hyper_params['batch_size'] if (
                    index + 1) * graph_hyper_params['batch_size'] < len(
                        train_data) else len(train_data)
                b_data = train_data[start:end]

                # print 1
                # d1 = datetime.now()
                fed_dict = get_fed_dict(b_data, split_vector_data,
                                        feature_conf_dict)
                fed_dict[train_p] = True
                fed_dict[dropout_p] = np.array(
                    [graph_hyper_params['dropout_keep']])
                # d2 = datetime.now()
                # print 2
                _, loss_val, pre_tr_val = sess.run(
                    [train_step, model_loss, network_params[0]],
                    feed_dict=fed_dict)
                # print 3
                # d3 = datetime.now()
                # print d2-d1, d3-d2
                one_epoch_loss += loss_val
                one_epoch_batchnum += 1.

                if graph_hyper_params['debug']:
                    print datetime.now(), index, loss_val
                pre_tr_val = np.array(pre_tr_val).reshape((-1))
                if graph_hyper_params['debug'] or True in np.isnan(pre_tr_val):
                    print pre_tr_val

                if (graph_hyper_params['mtyp'] == 4 or index != 0
                    ) and index % (
                        (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0:
                    auc, gn = eval_on_dev(split_vector_data)
                    best_auc = max(auc, best_auc)
                    format_str = '%s epoch=%.2f avg_loss=%.4f auc=%.4f best_auc=%.4f gn=%.4f'
                    print(format_str %
                          (datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                           (epoch + 1.0 *
                            (index + 1) / e_b_s), one_epoch_loss /
                           one_epoch_batchnum, auc, best_auc, gn))
                    one_epoch_loss = one_epoch_batchnum = 0.0

                    if (auc >= best_auc and
                        (epoch + 1.0 *
                         (index + 1) / e_b_s) >= 0.6) or (auc >= best_auc
                                                          and auc > 0.74):
                        current_step = tf.train.global_step(sess, global_step)
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        print("saved model to: %s" % path)

                        if not save_data_for_predict:
                            udp = pd.concat(
                                [relevant_user_data, no_relevant_user_data])
                            save_predict_material(udp, ad_data)
                            save_data_for_predict = True
                        early_stop_hit = 0
                    elif auc < best_auc and abs(auc - best_auc) > 0.02:
                        early_stop_hit += 1
                        if early_stop_hit >= 3:
                            if cut_lr:
                                print 'cut_lr_ori:', sess.run(learning_rate)
                                sess.run(learning_rate_decay_op)
                                print 'cut_lr_now:', sess.run(learning_rate)
                                cut_lr = False
                                early_stop_hit = -5
                            else:
                                print 'eary_stop_best:', best_auc
                                import sys
                                sys.exit(0)

                        # csv_path = predict_csv(split_vector_data)
                        # print 'save csv to: ', csv_path
    pass
Пример #3
0
def train_eval_model(graph_hyper_params):
    def construct_train_data(pos_train_data, neg_train_data,
                             graph_hyper_params):
        # global pos_train_data, neg_train_data, start_neg
        pos_len, neg_len = len(pos_train_data), len(neg_train_data)
        # print start_neg, pos_len, neg_len
        if graph_hyper_params['neg_start'] * pos_len + graph_hyper_params[
                'neg_size'] * pos_len < neg_len:
            this_neg_train_data = neg_train_data[graph_hyper_params['neg_start'] * pos_len: \
                                                 graph_hyper_params['neg_start'] * pos_len + graph_hyper_params[
                                                     'neg_size'] * pos_len]
        else:
            print 'fianl ! fianl ! fianl ! fianl !'
            this_neg_train_data = pd.concat([
                neg_train_data[graph_hyper_params['neg_start'] * pos_len:],
                neg_train_data[:pos_len - max(
                    0, neg_len - graph_hyper_params['neg_start'] * pos_len)]
            ])
        train_data = pd.concat([pos_train_data, this_neg_train_data])
        return shuffle(train_data)

    print graph_hyper_params

    print 'read data start !'
    pos_train_data, neg_train_data, predict_data1, predict_data2, user_data, ad_data, feature_conf_dict, uid_map, aid_map = get_prod_dataset(
        graph_hyper_params['formal'])
    print 'read data done !'

    # 重新 split train dev
    # o_dev_size = graph_hyper_params['o_dev_size']
    # dev_data = pd.concat([pos_train_data[:o_dev_size], neg_train_data[:o_dev_size]])
    # pos_train_data, neg_train_data = pos_train_data[o_dev_size:], neg_train_data[o_dev_size:]
    # print 'dev_size:', len(dev_data)
    # print 'pos-neg-len:', len(pos_train_data), len(neg_train_data)

    train_data = construct_train_data(pos_train_data, neg_train_data,
                                      graph_hyper_params)
    # if graph_hyper_params['only_train']:
    #     if graph_hyper_params['formal']:
    #         formal_set = set(list(train_data['uid']) + list(dev_data['uid']))
    #     else:
    #         formal_set = set(list(train_data['uid']) + list(dev_data['uid']) + [1, 2, 3, 4])
    #     user_data = user_data[user_data['uid'].isin(formal_set)]

    print 'map row start'
    user_data_train = user_data[user_data['uid'].isin(train_data['uid'])]
    user_data_predict1 = user_data[user_data['uid'].isin(predict_data1['uid'])]
    user_data_predict2 = user_data[user_data['uid'].isin(predict_data2['uid'])]
    del user_data
    gc.collect()
    uid_map_row_train, aid_map_row = dict(
        zip(user_data_train['uid'].values,
            np.arange(len(user_data_train)))), dict(
                zip(ad_data['aid'].values, np.arange(len(ad_data))))
    uid_map_row_predict_1 = dict(
        zip(user_data_predict1['uid'].values,
            np.arange(len(user_data_predict1))))
    uid_map_row_predict_2 = dict(
        zip(user_data_predict2['uid'].values,
            np.arange(len(user_data_predict2))))
    print 'map row end'
    print feature_conf_dict

    # graph = tf.Graph()
    # with graph.as_default():
    # 对 creativeSize 这一个连续特征的处理
    if graph_hyper_params['creativeSize_pro'] == 'min_max':
        print 'min-max norm creativeSize', ad_data['creativeSize'].max(
        ), ad_data['creativeSize'].min()
        norm_cs = (
            ad_data['creativeSize'] * 1.0 - ad_data['creativeSize'].min()) / (
                ad_data['creativeSize'].max() - ad_data['creativeSize'].min())
        ad_data = ad_data.drop(['creativeSize'], axis=1)
        ad_data['creativeSize'] = norm_cs
        creativesize_p = tf.placeholder(tf.float32, [None, 1],
                                        name="creativeSize")
    elif graph_hyper_params['creativeSize_pro'] == 'li_san':
        print '离散化 creativeSize'
        sh = ShrinkSep()
        ad_data['creativeSize'] = ad_data['creativeSize'].apply(sh)
        feature_conf_dict['creativeSize'] = len(sh.d) + 1
        creativesize_p = tf.placeholder(tf.int32, [None, 1],
                                        name="creativeSize")
    else:
        print 'no process creativeSize'
    # ****************************************************************** place holder start
    uid_p = tf.placeholder(tf.int32, [None, 1], name="uid")
    lbs_p = tf.placeholder(tf.int32, [None, 1], name="LBS")
    age_p = tf.placeholder(tf.int32, [None, 1], name="age")

    carrier_p = tf.placeholder(tf.int32, [None, 1], name="carrier")
    consumptionability_p = tf.placeholder(tf.int32, [None, 1],
                                          name="consumptionAbility")
    education_p = tf.placeholder(tf.int32, [None, 1], name="education")
    gender_p = tf.placeholder(tf.int32, [None, 1], name="gender")
    house_p = tf.placeholder(tf.int32, [None, 1], name="house")
    os_p = tf.placeholder(tf.int32, [None, 1], name="os")
    ct_p = tf.placeholder(tf.int32, [None, 1], name="ct")
    # marriagestatus_p = tf.placeholder(tf.int32, [None, 1], name="marriageStatus")

    appidaction_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['appIdAction'][1]],
        name="appidaction_index")
    appidaction_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['appIdAction'][1]],
        name="appidaction_val")
    appIdInstall_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['appIdInstall'][1]],
        name="appIdInstall_index")
    appIdInstall_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['appIdInstall'][1]],
        name="appIdInstall_val")

    marriagestatus_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['marriageStatus'][0]],
        name="marriageStatus_index")
    marriagestatus_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['marriageStatus'][0]],
        name="marriageStatus_val")
    interest1_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['interest1'][0]],
        name="interest1_index")
    interest1_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['interest1'][0]],
        name="interest1_val")
    interest2_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['interest2'][0]],
        name="interest2_index")
    interest2_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['interest2'][0]],
        name="interest2_val")
    interest3_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['interest3'][0]],
        name="interest3_index")
    interest3_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['interest3'][0]],
        name="interest3_val")
    interest4_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['interest4'][0]],
        name="interest4_index")
    interest4_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['interest4'][0]],
        name="interest4_val")
    interest5_index_p = tf.placeholder(
        tf.int32, [None, feature_conf_dict['interest5'][0]],
        name="interest5_index")
    interest5_val_p = tf.placeholder(
        tf.float32, [None, 1, feature_conf_dict['interest5'][0]],
        name="interest5_val")

    # kmeans type
    # clu_200_p = tf.placeholder(tf.int32, [None, 1], name="clu_200_p")
    # clu_400_p = tf.placeholder(tf.int32, [None, 1], name="clu_400_p")

    kw1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw1'][1]],
                                 name="kw1_index")
    kw1_val_p = tf.placeholder(tf.float32,
                               [None, 1, feature_conf_dict['kw1'][1]],
                               name="kw1_val")
    kw2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw2'][1]],
                                 name="kw2_index")
    kw2_val_p = tf.placeholder(tf.float32,
                               [None, 1, feature_conf_dict['kw2'][1]],
                               name="kw2_val")
    kw3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw3'][1]],
                                 name="kw3_index")
    kw3_val_p = tf.placeholder(tf.float32,
                               [None, 1, feature_conf_dict['kw3'][1]],
                               name="kw3_val")

    topic1_index_p = tf.placeholder(tf.int32,
                                    [None, feature_conf_dict['topic1'][1]],
                                    name="topic1_index")
    topic1_val_p = tf.placeholder(tf.float32,
                                  [None, 1, feature_conf_dict['topic1'][1]],
                                  name="topic1_val")
    topic2_index_p = tf.placeholder(tf.int32,
                                    [None, feature_conf_dict['topic2'][1]],
                                    name="topic2_index")
    topic2_val_p = tf.placeholder(tf.float32,
                                  [None, 1, feature_conf_dict['topic2'][1]],
                                  name="topic2_val")
    topic3_index_p = tf.placeholder(tf.int32,
                                    [None, feature_conf_dict['topic3'][1]],
                                    name="topic3_index")
    topic3_val_p = tf.placeholder(tf.float32,
                                  [None, 1, feature_conf_dict['topic3'][1]],
                                  name="topic3_val")

    aid_p = tf.placeholder(tf.int32, [None, 1], name="aid")
    advertiserid_p = tf.placeholder(tf.int32, [None, 1], name="advertiserId")
    campaignid_p = tf.placeholder(tf.int32, [None, 1], name="campaignId")
    creativeid_p = tf.placeholder(tf.int32, [None, 1], name="creativeId")
    adcategoryid_p = tf.placeholder(tf.int32, [None, 1], name="adCategoryId")
    productid_p = tf.placeholder(tf.int32, [None, 1], name="productId")
    producttype_p = tf.placeholder(tf.int32, [None, 1], name="productType")

    true_label = tf.placeholder(tf.float32, [None, 1], name="true_label")
    # ****************************************************************** place holder end

    pred_val, model_loss, network_params = inference(
        uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p,
        gender_p, house_p, os_p, ct_p, marriagestatus_index_p,
        marriagestatus_val_p, appidaction_index_p, appidaction_val_p,
        appIdInstall_index_p, appIdInstall_val_p, interest1_index_p,
        interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p,
        interest3_val_p, interest4_index_p, interest4_val_p, interest5_index_p,
        interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p,
        kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p,
        topic2_val_p, topic3_index_p, topic3_val_p, aid_p, advertiserid_p,
        campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p,
        creativesize_p, true_label, feature_conf_dict, graph_hyper_params)

    global_step = tf.Variable(0, name="global_step", trainable=False)
    train_step = None
    learning_rate = tf.Variable(float(graph_hyper_params['learn_rate']),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.5)
    if graph_hyper_params['opt'] == 'adam':
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(
            model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'adgrad':
        train_step = tf.train.AdagradOptimizer(learning_rate).minimize(
            model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'adadelta':
        train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize(
            model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'ftrl':
        train_step = tf.train.FtrlOptimizer(learning_rate).minimize(
            model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'sgd':
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            model_loss, global_step=global_step)
    else:
        print 'No optimizer !'

    time_now = 'model_' + str(graph_hyper_params['model']) + datetime.now(
    ).strftime("_%Y_%m_%d_%H_%M_%S")
    checkpoint_dir = os.path.abspath("./checkpoints/dmf_tencent/" + time_now)
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    def get_fed_dict(b_data,
                     split_vector_data,
                     feature_conf_dict,
                     user_data_in,
                     user_data_in_map_row,
                     predict=False):
        if graph_hyper_params['formal']:
            aid_list = b_data['aid'].values
            uid_list = b_data['uid'].values
        else:
            if len(b_data) == 4:
                aid_list, uid_list = [1, 2, 3, 4], [1, 2, 3, 4]
            elif len(b_data) == 3:
                aid_list, uid_list = [1, 2, 3], [1, 2, 3]
            elif len(b_data) == 2:
                aid_list, uid_list = [1, 2], [1, 2]
            else:
                aid_list, uid_list = [1], [1]

        # print 11
        # d1 = datetime.now()
        b_u_d, b_a_d = [], []
        for b_uid in uid_list:
            b_u_d.append(user_data_in.iloc[user_data_in_map_row[b_uid]])
            # if predict == 0:
            #     b_u_d.append(user_data_train.iloc[uid_map_row_train[b_uid]])
            # elif predict == 1:
            #     b_u_d.append(user_data_predict1.iloc[uid_map_row_predict_1[b_uid]])
            # elif predict == 2:
            #     b_u_d.append(user_data_predict2.iloc[uid_map_row_predict_2[b_uid]])
            # else:
            #     print 'fed wrong!'
        for b_aid in aid_list:
            b_a_d.append(ad_data.iloc[aid_map_row[b_aid]])
        b_u_d = pd.concat(b_u_d, axis=1).transpose()
        b_a_d = pd.concat(b_a_d, axis=1).transpose()
        # d3 = datetime.now()

        # print 12
        # pd.concat([data.iloc[1].to_frame(), data.iloc[2].to_frame()], axis=1).transpose()
        fed_dict = {}
        fed_dict[uid_p] = np.expand_dims(b_u_d['uid'], axis=1)
        fed_dict[lbs_p] = np.expand_dims(b_u_d['LBS'], axis=1)
        fed_dict[age_p] = np.expand_dims(b_u_d['age'], axis=1)
        fed_dict[carrier_p] = np.expand_dims(b_u_d['carrier'], axis=1)
        fed_dict[consumptionability_p] = np.expand_dims(
            b_u_d['consumptionAbility'], axis=1)
        fed_dict[education_p] = np.expand_dims(b_u_d['education'], axis=1)
        fed_dict[gender_p] = np.expand_dims(b_u_d['gender'], axis=1)
        fed_dict[house_p] = np.expand_dims(b_u_d['house'], axis=1)
        fed_dict[os_p] = np.expand_dims(b_u_d['os'], axis=1)
        fed_dict[ct_p] = np.expand_dims(b_u_d['ct'], axis=1)
        # fed_dict[marriagestatus_p] = np.expand_dims(b_u_d['marriageStatus'], axis=1)
        # print 121
        appidaction_li = split_vector_data(b_u_d['appIdAction'])
        # print 1212
        fed_dict[appidaction_index_p], fed_dict[
            appidaction_val_p] = appidaction_li[0], appidaction_li[1]
        appIdInstall_li = split_vector_data(b_u_d['appIdInstall'])
        fed_dict[appIdInstall_index_p], fed_dict[
            appIdInstall_val_p] = appIdInstall_li[0], appIdInstall_li[1]
        # print 122
        marriagestatus_li = split_vector_data(b_u_d['marriageStatus'],
                                              interest='marriageStatus',
                                              feature_config=feature_conf_dict)
        fed_dict[marriagestatus_index_p], fed_dict[
            marriagestatus_val_p] = marriagestatus_li[0], marriagestatus_li[1]
        interest1_li = split_vector_data(b_u_d['interest1'],
                                         interest='interest1',
                                         feature_config=feature_conf_dict)
        fed_dict[interest1_index_p], fed_dict[interest1_val_p] = interest1_li[
            0], interest1_li[1]
        interest2_li = split_vector_data(b_u_d['interest2'],
                                         interest='interest2',
                                         feature_config=feature_conf_dict)
        fed_dict[interest2_index_p], fed_dict[interest2_val_p] = interest2_li[
            0], interest2_li[1]
        interest3_li = split_vector_data(b_u_d['interest3'],
                                         interest='interest3',
                                         feature_config=feature_conf_dict)
        fed_dict[interest3_index_p], fed_dict[interest3_val_p] = interest3_li[
            0], interest3_li[1]
        interest4_li = split_vector_data(b_u_d['interest4'],
                                         interest='interest4',
                                         feature_config=feature_conf_dict)
        fed_dict[interest4_index_p], fed_dict[interest4_val_p] = interest4_li[
            0], interest4_li[1]
        interest5_li = split_vector_data(b_u_d['interest5'],
                                         interest='interest5',
                                         feature_config=feature_conf_dict)
        fed_dict[interest5_index_p], fed_dict[interest5_val_p] = interest5_li[
            0], interest5_li[1]
        # print 123
        kw1_li = split_vector_data(b_u_d['kw1'])
        fed_dict[kw1_index_p], fed_dict[kw1_val_p] = kw1_li[0], kw1_li[1]
        kw2_li = split_vector_data(b_u_d['kw2'])
        fed_dict[kw2_index_p], fed_dict[kw2_val_p] = kw2_li[0], kw2_li[1]
        kw3_li = split_vector_data(b_u_d['kw3'])
        fed_dict[kw3_index_p], fed_dict[kw3_val_p] = kw3_li[0], kw3_li[1]
        # print 124
        topic1_li = split_vector_data(b_u_d['topic1'])
        fed_dict[topic1_index_p], fed_dict[topic1_val_p] = topic1_li[
            0], topic1_li[1]
        topic2_li = split_vector_data(b_u_d['topic2'])
        fed_dict[topic2_index_p], fed_dict[topic2_val_p] = topic2_li[
            0], topic2_li[1]
        topic3_li = split_vector_data(b_u_d['topic3'])
        fed_dict[topic3_index_p], fed_dict[topic3_val_p] = topic3_li[
            0], topic3_li[1]
        # print 125

        # # ad
        fed_dict[aid_p] = np.expand_dims(b_a_d['aid'], axis=1)
        fed_dict[advertiserid_p] = np.expand_dims(b_a_d['advertiserId'],
                                                  axis=1)
        fed_dict[campaignid_p] = np.expand_dims(b_a_d['campaignId'], axis=1)
        fed_dict[creativeid_p] = np.expand_dims(b_a_d['creativeId'], axis=1)
        fed_dict[adcategoryid_p] = np.expand_dims(b_a_d['adCategoryId'],
                                                  axis=1)
        fed_dict[productid_p] = np.expand_dims(b_a_d['productId'], axis=1)
        fed_dict[producttype_p] = np.expand_dims(b_a_d['productType'], axis=1)

        # print 13
        # fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1)
        if graph_hyper_params['creativeSize_pro'] == 'min_max':
            fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'],
                                                      axis=1).astype(
                                                          np.float32)
        elif graph_hyper_params['creativeSize_pro'] == 'li_san':
            fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'],
                                                      axis=1)
        else:
            print 'wrong feed'

        # label
        # print 14
        if not predict:
            fed_dict[true_label] = np.expand_dims(b_data['label'].values,
                                                  axis=1).astype(np.float32)
        # print 15
        # d4 = datetime.now()
        # print d2-d1, d3-d2, d4-d3
        # print fed_dict[true_label]
        # print len(fed_dict[true_label]), len(fed_dict[aid_p]), len(fed_dict[uid_p]),
        return fed_dict

    # def eval_on_dev(split_vector_data):
    #     e_b_s = len(dev_data) / graph_hyper_params['batch_size']
    #     auc_true, auc_pre = [], []
    #     # auc = []
    #     for index in tqdm(range(e_b_s)):
    #         start = index * graph_hyper_params['batch_size']
    #         end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(dev_data) else len(dev_data)
    #         b_dev_data = dev_data[start:end]
    #         fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict)
    #         pred_value, pre_pred_value, final_vec, uu, vv = sess.run([pred_val, network_params[0], network_params[1], network_params[2], network_params[3]], feed_dict=fed_dict)
    #
    #         pre_real_val = np.array(pred_value).reshape((-1))
    #         auc_true = auc_true + list(b_dev_data['label'].values)
    #         auc_pre = auc_pre + pre_real_val.tolist()
    #
    #         if True in np.isnan(pre_real_val):
    #             print 'contain nan: ', np.array(pre_pred_value).reshape((-1))
    #             print np.array(final_vec).reshape((-1))
    #             print np.array(uu).reshape((-1))
    #             print np.array(vv).reshape((-1))
    #
    #         # auc.append()
    #     # auc_pre = np.array(auc_pre)
    #     # auc_pre = np.exp(auc_pre) / np.exp(auc_pre).sum()
    #     # print auc_true
    #     # print auc_pre
    #     fpr, tpr, thresholds = metrics.roc_curve(auc_true, auc_pre, pos_label=1)
    #     auc_v, gni = metrics.auc(fpr, tpr), gini_norm(auc_true, auc_pre)
    #
    #     auc_pre_2 = np.array(auc_pre)
    #     auc_pre_2.sort()
    #     print('dev_pre_top2=%.4f %.4f min2=%.4f %.4f' %
    #           (auc_pre_2.tolist()[-1], auc_pre_2.tolist()[-2], auc_pre_2.tolist()[0], auc_pre_2.tolist()[1]))
    #     return auc_v, gni

    best_auc = 0.0
    split_vector_data = SplitClass()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    for epoch in range(graph_hyper_params['epoch']):  # 只训练 1 轮
        e_b_s = len(train_data) / graph_hyper_params['batch_size']
        one_epoch_loss, one_epoch_batchnum = 0.0, 0.0
        for index in tqdm(range(e_b_s)):
            # print 0
            start = index * graph_hyper_params['batch_size']
            end = (index + 1) * graph_hyper_params['batch_size'] if (
                index + 1) * graph_hyper_params['batch_size'] < len(
                    train_data) else len(train_data)
            b_data = train_data[start:end]

            # print 1
            # d1 = datetime.now()
            fed_dict = get_fed_dict(b_data, split_vector_data,
                                    feature_conf_dict, user_data_train,
                                    uid_map_row_train)
            # d2 = datetime.now()
            # print 2
            _, loss_val, pre_tr_val = sess.run(
                [train_step, model_loss, network_params[0]],
                feed_dict=fed_dict)
            # print 3
            # d3 = datetime.now()
            # print d2-d1, d3-d2
            one_epoch_loss += loss_val
            one_epoch_batchnum += 1.

            if graph_hyper_params['debug']:
                print datetime.now(), index, loss_val
            pre_tr_val = np.array(pre_tr_val).reshape((-1))
            if graph_hyper_params['debug'] or True in np.isnan(pre_tr_val):
                print pre_tr_val

            if index != 0 and index % (
                (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0:
                split_vector_data.clean()
            #     auc, gn = eval_on_dev(split_vector_data)
            #     best_auc = max(auc, best_auc)
            #     format_str = '%s epoch=%.2f avg_loss=%.4f auc=%.4f best_auc=%.4f gn=%.4f'
            #     print (format_str % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), (epoch + 1.0 * (index+1) / e_b_s), one_epoch_loss / one_epoch_batchnum, auc, best_auc, gn))
            #     one_epoch_loss = one_epoch_batchnum = 0.0

    # pass
    del split_vector_data, user_data_train, train_data
    gc.collect()
    split_vector_data = 1
    if graph_hyper_params['test1']:
        predict_data = predict_data1.sort_values(by='uid')
        if graph_hyper_params['formal']:
            graph_hyper_params['batch_size'] = 512
        e_b_s = len(predict_data) / graph_hyper_params['batch_size'] if len(
            predict_data) % graph_hyper_params['batch_size'] == 0 else len(
                predict_data) / graph_hyper_params['batch_size'] + 1
        split_vector_data = SplitClass()
        pred = []
        for index in tqdm(range(e_b_s)):
            start = index * graph_hyper_params['batch_size']
            end = (index + 1) * graph_hyper_params['batch_size'] if (
                index + 1) * graph_hyper_params['batch_size'] <= len(
                    predict_data) else len(predict_data) + 1
            b_predict_data = predict_data[start:end]
            # print len(b_predict_data), start, end
            # fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict)
            fed_dict = get_fed_dict(b_predict_data,
                                    split_vector_data,
                                    feature_conf_dict,
                                    user_data_predict1,
                                    uid_map_row_predict_1,
                                    predict=True)
            pred_value = sess.run([pred_val], feed_dict=fed_dict)
            # print pred_value
            pre_real_val = np.array(pred_value).reshape((-1))
            pred = pred + pre_real_val.tolist()

            if graph_hyper_params['formal'] and index != 0 and index % (
                (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0:
                split_vector_data.clean()

        print len(predict_data), len(pred)
        predict_data['pred_label'] = pred
        csv_data = predict_data[['ori_aid', 'ori_uid', 'pred_label']]
        csv_data.columns = ['aid', 'uid', 'score']
        csv_path = os.path.join(
            checkpoint_dir, 'test1_' + 'n' +
            str(graph_hyper_params['neg_start']) + '_submission.csv')
        csv_data.to_csv(csv_path, index=False)
        print 'submission_path:', csv_path

    del split_vector_data, user_data_predict1, predict_data1
    gc.collect()
    if graph_hyper_params['test2']:
        predict_data = predict_data2.sort_values(by='uid')
        if graph_hyper_params['formal']:
            graph_hyper_params['batch_size'] = 512
        e_b_s = len(predict_data) / graph_hyper_params['batch_size'] if len(
            predict_data) % graph_hyper_params['batch_size'] == 0 else len(
                predict_data) / graph_hyper_params['batch_size'] + 1
        split_vector_data = SplitClass()
        # split_vector_data.clean()
        pred = []
        for index in tqdm(range(e_b_s)):
            start = index * graph_hyper_params['batch_size']
            end = (index + 1) * graph_hyper_params['batch_size'] if (
                index + 1) * graph_hyper_params['batch_size'] <= len(
                    predict_data) else len(predict_data) + 1
            b_predict_data = predict_data[start:end]
            # print len(b_predict_data), start, end
            # fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict)
            fed_dict = get_fed_dict(b_predict_data,
                                    split_vector_data,
                                    feature_conf_dict,
                                    user_data_predict2,
                                    uid_map_row_predict_2,
                                    predict=True)
            pred_value = sess.run([pred_val], feed_dict=fed_dict)
            # print pred_value
            pre_real_val = np.array(pred_value).reshape((-1))
            pred = pred + pre_real_val.tolist()

            if graph_hyper_params['formal'] and index != 0 and index % (
                (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0:
                split_vector_data.clean()

        print len(predict_data), len(pred)
        predict_data['pred_label'] = pred
        csv_data = predict_data[['ori_aid', 'ori_uid', 'pred_label']]
        csv_data.columns = ['aid', 'uid', 'score']
        csv_path = os.path.join(
            checkpoint_dir, 'test2_' + 'n' +
            str(graph_hyper_params['neg_start']) + '_submission.csv')
        csv_data.to_csv(csv_path, index=False)
        print 'submission_path:', csv_path
    pass
Пример #4
0
def super_resolve_MCdropout(
        dt_lowres,
        method='mlp_h=3',
        n_h1=500,
        n_h2=200,
        n_h3=100,
        n=2,
        m=2,
        us=2,
        dropout_rate=0.25,
        no_samples=10,
        network_dir='/Users/ryutarotanno/DeepLearning/nsampler/models/linear'):
    """Perform a patch-based super-resolution on a given low-res image.
    Args:
        dt_lowres (numpy array): a low-res diffusion tensor image volume
        n (int): the width of an input patch is 2*n + 1
        m (int): the width of an output patch is m
        us (int): the upsampling factord
    Returns:
        the estimated high-res volume
    """

    # Specify the network:
    print('... defining the network model %s .' % method)
    n_in, n_out = 6 * (2 * n +
                       1)**3, 6 * m**3  # dimensions of input and output
    x_scaled = tf.placeholder(tf.float32, shape=[None, n_in])
    y_scaled = tf.placeholder(tf.float32, shape=[None, n_out])
    keep_prob = tf.placeholder(tf.float32)  # keep probability for dropout
    y_pred_scaled, L2_sqr, L1 = models.inference(method,
                                                 x_scaled,
                                                 keep_prob,
                                                 n_in,
                                                 n_out,
                                                 n_h1=n_h1,
                                                 n_h2=n_h2,
                                                 n_h3=n_h3)

    # load the transforms used for normalisation of the training data:
    transform_file = os.path.join(network_dir, 'transforms.pkl')
    transform = cPickle.load(open(transform_file, 'rb'))
    train_set_x_mean = transform['input_mean'].reshape(
        (1, n_in))  # row vector representing the mean
    train_set_x_std = transform['input_std'].reshape((1, n_in))
    train_set_y_mean = transform['output_mean'].reshape((1, n_out))
    train_set_y_std = transform['output_std'].reshape((1, n_out))
    del transform

    # load the weights with the best performance:
    settings_file = os.path.join(network_dir, 'settings.pkl')
    details = cPickle.load(open(settings_file, 'rb'))
    best_step = details['best step']

    # Restore all the variables and perform reconstruction:
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Restore variables from disk.
        saver.restore(sess, os.path.join(network_dir,
                                         "model-" + str(best_step)))
        print("Model restored.")

        # reconstruct
        dt_lowres = dt_lowres[
            0::us, 0::us, 0::
            us, :]  # take every us th entry to reduce it to the original resolution.
        (xsize, ysize, zsize, comp) = dt_lowres.shape
        dt_hires = np.zeros(
            (xsize * us, ysize * us, zsize * us,
             comp))  # the base array for the output high-res volume.
        dt_hires[:, :, :, 0] = -1  # initialise all the voxels as 'background'

        dt_std = np.zeros((xsize * us, ysize * us, zsize * us,
                           comp))  # the base array for the output uncertainty.
        dt_std[:, :, :, 0] = -1  # initialise all the voxels as 'background'.

        for k in np.arange(n + 1, zsize - n + 1):
            print('Slice %i of %i.' % (k, zsize))
            for j in np.arange(n + 1, ysize - n + 1):
                for i in np.arange(n + 1, xsize - n + 1):
                    ipatch = dt_lowres[(i - n - 1):(i + n),
                                       (j - n - 1):(j + n),
                                       (k - n - 1):(k + n),
                                       2:comp]  # input patch

                    # Process only if the whole patch is foreground
                    if np.min(dt_lowres[(i - n - 1):(i + n),
                                        (j - n - 1):(j + n),
                                        (k - n - 1):(k + n), 0]) >= 0:

                        opatch_MCsamples = np.zeros((no_samples, 6 * m**3))

                        for sample_idx in np.arange(no_samples):
                            # Vectorise input patch (following 'Fortran' reshape ordering) and normalise:
                            ipatch_row = ipatch.reshape((1, ipatch.size),
                                                        order='F')
                            ipatch_row_scaled = (ipatch_row - train_set_x_mean
                                                 ) / train_set_x_std

                            # Predict the corresponding high-res output patch in the normalised space:
                            opatch_row_scaled = y_pred_scaled.eval(
                                feed_dict={
                                    x_scaled: ipatch_row_scaled,
                                    keep_prob: (1.0 - dropout_rate)
                                })

                            # Send back into the original space and reshape into a cubic patch:
                            opatch_row = train_set_y_std * opatch_row_scaled + train_set_y_mean

                            # Store each predicted row-vector high-res patch:
                            opatch_MCsamples[sample_idx, :] = opatch_row

                        opatch_row_mean = opatch_MCsamples.mean(axis=0)
                        opatch_row_std = opatch_MCsamples.std(axis=0)

                        opatch_mean = opatch_row_mean.reshape(
                            (m, m, m, comp - 2), order='F')
                        opatch_std = opatch_row_std.reshape(
                            (m, m, m, comp - 2), order='F')

                        # Select the correct location of the output patch in the brain and store:
                        x_temp_1, x_temp_2 = (us * (i - 1) + 1 -
                                              (m - us) / 2) - 1, (us * i +
                                                                  (m - us) / 2)
                        y_temp_1, y_temp_2 = (us * (j - 1) + 1 -
                                              (m - us) / 2) - 1, (us * j +
                                                                  (m - us) / 2)
                        z_temp_1, z_temp_2 = (us * (k - 1) + 1 -
                                              (m - us) / 2) - 1, (us * k +
                                                                  (m - us) / 2)

                        dt_hires[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] \
                            = dt_hires[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] + opatch_mean

                        dt_std[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] \
                            = dt_std[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] + opatch_std

                        # Label only reconstructed voxels as foreground.
                        dt_hires[x_temp_1:x_temp_2, y_temp_1:y_temp_2,
                                 z_temp_1:z_temp_2, 0] = 0
                        dt_std[x_temp_1:x_temp_2, y_temp_1:y_temp_2,
                               z_temp_1:z_temp_2, 0] = 0
    return dt_hires, dt_std
Пример #5
0
def sr_train(method='linear',
             n_h1=500,
             n_h2=200,
             n_h3=100,
             data_dir='/Users/ryutarotanno/DeepLearning/Test_1/data/',
             cohort='Diverse',
             no_subjects=8,
             sample_rate=32,
             us=2,
             n=2,
             m=2,
             optimisation_method='adam',
             dropout_rate=0.0,
             learning_rate=1e-4,
             L1_reg=0.00,
             L2_reg=1e-5,
             n_epochs=1000,
             batch_size=25,
             save_dir='/Users/ryutarotanno/DeepLearning/nsampler/models'):

    # -------------------------- Load the training data---------------------------:
    # get the full path to the training set:
    dataset = data_dir + 'PatchLibs%sDS%02i_%ix%i_%ix%i_TS%i_SRi%03i_0001.mat' \
                         % (cohort, us, 2 * n + 1, 2 * n + 1, m, m, no_subjects, sample_rate)
    data_dir, data_file = os.path.split(dataset)

    # load
    print('... loading the training dataset %s' % data_file)
    patchlib = sr_utility.load_patchlib(patchlib=dataset)
    train_set_x, valid_set_x, train_set_y, valid_set_y = patchlib  # load the original patch libs

    # normalise the data and keep the transforms:
    (train_set_x_scaled, train_set_x_mean, train_set_x_std, train_set_y_scaled, train_set_y_mean, train_set_y_std)\
        = sr_utility.standardise_data(train_set_x, train_set_y, option='default')  # normalise the data

    # normalise the validation sets into the same space as training sets:
    valid_set_x_scaled = (valid_set_x - train_set_x_mean) / train_set_x_std
    valid_set_y_scaled = (valid_set_y - train_set_y_mean) / train_set_y_std
    del train_set_x, valid_set_x, train_set_y, valid_set_y, patchlib  # clear original data as you don't need them.

    # --------------------------- Define the model--------------------------:
    # clear the graph
    tf.reset_default_graph()

    # define input and output:
    n_in, n_out = 6 * (2 * n +
                       1)**3, 6 * m**3  # dimensions of input and output
    x_scaled = tf.placeholder(tf.float32,
                              shape=[None,
                                     n_in])  # normalised input low-res patch
    y_scaled = tf.placeholder(tf.float32,
                              shape=[None, n_out
                                     ])  # normalised output high-res patch
    keep_prob = tf.placeholder(tf.float32)  # keep probability for dropout
    global_step = tf.Variable(0, name="global_step", trainable=False)

    y_pred_scaled, L2_sqr, L1 = models.inference(method,
                                                 x_scaled,
                                                 keep_prob,
                                                 n_in,
                                                 n_out,
                                                 n_h1=n_h1,
                                                 n_h2=n_h2,
                                                 n_h3=n_h3)
    cost = models.cost(y_scaled, y_pred_scaled, L2_sqr, L1, L2_reg, L1_reg)
    train_step = models.training(cost,
                                 learning_rate,
                                 global_step=global_step,
                                 option=optimisation_method)
    mse = tf.reduce_mean(
        tf.square(train_set_y_std * (y_scaled - y_pred_scaled)))

    # -------------------------- Start training -----------------------------:
    # Add the variable initializer Op.
    init = tf.initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Set the directory for saving checkpoints:
    nn_file = sr_utility.name_network(method=method,
                                      n_h1=n_h1,
                                      n_h2=n_h2,
                                      n_h3=n_h3,
                                      cohort=cohort,
                                      no_subjects=no_subjects,
                                      sample_rate=sample_rate,
                                      us=us,
                                      n=n,
                                      m=m,
                                      optimisation_method=optimisation_method,
                                      dropout_rate=dropout_rate)

    checkpoint_dir = os.path.join(save_dir, nn_file)

    if not os.path.exists(
            checkpoint_dir):  # create a subdirectory to save the model.
        os.makedirs(checkpoint_dir)

    # Save the transforms used for data normalisation:
    print(
        '... saving the transforms used for data normalisation for the test time'
    )
    transform = {
        'input_mean': train_set_x_mean,
        'input_std': train_set_x_std,
        'output_mean': train_set_y_mean,
        'output_std': train_set_y_std
    }
    f = file(os.path.join(checkpoint_dir, 'transforms.pkl'), 'wb')
    cPickle.dump(transform, f, protocol=cPickle.HIGHEST_PROTOCOL)

    # Create a session for running Ops on the Graph.
    print('... training')

    with tf.Session() as sess:
        # Run the Op to initialize the variables.
        sess.run(init)

        # Compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x_scaled.shape[0] // batch_size
        n_valid_batches = valid_set_x_scaled.shape[0] // batch_size

        # early-stopping parameters
        patience = 10000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is found
        improvement_threshold = 0.995  # a relative improvement of this much is considered significant
        validation_frequency = min(n_train_batches, patience // 2)
        # go through this many minibatches before checking the network on the validation set;
        # in this case we check every epoch

        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0

        start_time = timeit.default_timer()

        epoch = 0
        done_looping = False

        iter_valid = 0
        total_validation_loss_epoch = 0
        total_training_loss_epoch = 0

        while (epoch < n_epochs) and (not done_looping):
            epoch += 1
            start_time_epoch = timeit.default_timer()

            for minibatch_index in range(n_train_batches):

                # Select batches:
                x_batch_train = train_set_x_scaled[
                    minibatch_index * batch_size:(minibatch_index + 1) *
                    batch_size, :]
                y_batch_train = train_set_y_scaled[
                    minibatch_index * batch_size:(minibatch_index + 1) *
                    batch_size, :]
                x_batch_valid = valid_set_x_scaled[
                    minibatch_index * batch_size:(minibatch_index + 1) *
                    batch_size, :]
                y_batch_valid = valid_set_y_scaled[
                    minibatch_index * batch_size:(minibatch_index + 1) *
                    batch_size, :]

                # track the number of steps
                current_step = tf.train.global_step(sess, global_step)

                # perform gradient descent:
                train_step.run(
                    feed_dict={
                        x_scaled: x_batch_train,
                        y_scaled: y_batch_train,
                        keep_prob: (1.0 - dropout_rate)
                    })

                # Accumulate validation/training errors for each epoch:
                total_validation_loss_epoch += mse.eval(
                    feed_dict={
                        x_scaled: x_batch_valid,
                        y_scaled: y_batch_valid,
                        keep_prob: (1.0 - dropout_rate)
                    })

                total_training_loss_epoch += mse.eval(
                    feed_dict={
                        x_scaled: x_batch_train,
                        y_scaled: y_batch_train,
                        keep_prob: (1.0 - dropout_rate)
                    })

                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index
                iter_valid += 1

                if (iter + 1) % validation_frequency == 0:
                    # Print out the errors for each epoch:

                    this_validation_loss = total_validation_loss_epoch / iter_valid
                    this_training_loss = total_training_loss_epoch / iter_valid
                    end_time_epoch = timeit.default_timer()

                    print('\nEpoch %i, minibatch %i/%i:\n'
                          '     training error (rmse) %f times 1E-5\n'
                          '     validation error (rmse) %f times 1E-5\n'
                          '     took %f secs' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           np.sqrt(this_training_loss * 10**10),
                           np.sqrt(this_validation_loss * 10**10),
                           end_time_epoch - start_time_epoch))
                    print('     number of minibatches = %i and patience = %i' %
                          (iter + 1, patience))
                    print('     validation frequency = %i, iter_valid = %i' %
                          (validation_frequency, iter_valid))
                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        # improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss * improvement_threshold:

                            patience = max(patience, iter * patience_increase)
                            print(
                                '     reduces the previous error by more than %f %%'
                                % ((1 - improvement_threshold) * 100.))

                        best_validation_loss = this_validation_loss
                        best_training_loss = this_training_loss
                        best_iter = iter
                        best_step = current_step + 1

                    # Save the model:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    save_path = saver.save(sess,
                                           checkpoint_prefix,
                                           global_step=global_step)
                    print("Model saved in file: %s" % save_path)

                    # Save the model details:
                    print('... saving the model details')
                    model_details = {
                        'method': method,
                        'cohort': cohort,
                        'no of subjects': no_subjects,
                        'sample rate': sample_rate,
                        'upsampling factor': us,
                        'n': n,
                        'm': m,
                        'optimisation': optimisation_method,
                        'dropout rate': dropout_rate,
                        'learning rate': learning_rate,
                        'L1 coefficient': L1_reg,
                        'L2 coefficient': L2_reg,
                        'max no of epochs': n_epochs,
                        'batch size': batch_size,
                        'training length': end_time_epoch - start_time,
                        'best validation rmse': np.sqrt(best_validation_loss),
                        'best training rmse': np.sqrt(best_training_loss),
                        'best step': best_step
                    }
                    cPickle.dump(model_details,
                                 file(
                                     os.path.join(checkpoint_dir,
                                                  'settings.pkl'), 'wb'),
                                 protocol=cPickle.HIGHEST_PROTOCOL)

                    # Terminate training when the validation loss starts decreasing.
                    if this_validation_loss > best_validation_loss:
                        patience = 0
                        print(
                            'Validation error increases - terminate training ...'
                        )
                        break

                    # Start counting again:
                    total_validation_loss_epoch = 0
                    total_training_loss_epoch = 0
                    iter_valid = 0
                    start_time_epoch = timeit.default_timer()

                if patience <= iter:
                    done_looping = True
                    break

        # Display the best results:
        print(('\nOptimization complete. Best validation score of %f  '
               'obtained at iteration %i') %
              (np.sqrt(best_validation_loss * 10**10), best_step))

        end_time = timeit.default_timer()
        time_train = end_time - start_time
        print('Training done!!! It took %f secs.' % time_train)

    # clear the graph
    tf.reset_default_graph()
Пример #6
0
    batch_size, preprocessing.valSetPath, preprocessing.valLabelPath)
# trainData, numTrainExamples, trainIterator = preprocessing.inputValFlows(batch_size, preprocessing.trainSetPath, preprocessing.trainLabelPath)
perGPUValData = [list([]) for i in range(numGpus)]
for tD in valData[:-1]:
    split = tf.split(tD, numGpus, axis=0)
    for gpu in range(numGpus):
        perGPUValData[gpu].append(split[gpu])

netOut = []
for gpu in range(numGpus):
    with tf.name_scope('tower_%d' % (gpu)) as scope:
        with tf.device('/gpu:%d' % gpu):
            print(perGPUValData[gpu][0].get_shape())
            print(len(perGPUValData[gpu]))
            valCode = models.inference(perGPUValData[gpu],
                                       first=(gpu == 0),
                                       useType="test",
                                       modelType=modelType)
            print(valCode.get_shape())
            gpuValPredictions = models.predictForces(valCode,
                                                     5 * batch_size // numGpus,
                                                     log,
                                                     useType="test",
                                                     first=(gpu == 0))
            netOut.append(gpuValPredictions)

# netOut = []
# for gpu in range(numGpus):
# 	with tf.name_scope('tower_%d' % (gpu)) as scope:
# 		with tf.device('/gpu:%d' % gpu):
# 			print(perGPUValData[gpu][0].get_shape())
# 			print(len(perGPUValData[gpu]))
Пример #7
0
        perGPUValData[gpu].append(split[gpu])

perGPUTrainData = [list([]) for i in range(numGpus)]
for tD in trainData[:-1]:
    split = tf.split(tD, numGpus, axis=0)
    for gpu in range(numGpus):
        perGPUTrainData[gpu].append(split[gpu])

netOut = []
for gpu in range(numGpus):
    with tf.name_scope('tower_%d' % (gpu)) as scope:
        with tf.device('/gpu:%d' % gpu):
            print(perGPUValData[gpu][0].get_shape())
            print(len(perGPUValData[gpu]))
            valCode = models.inference(perGPUValData[gpu],
                                       first=(gpu == 0),
                                       useType="test",
                                       modelType=modelType)
            print(valCode.get_shape())
            gpuValPredictions = models.predictForces(valCode,
                                                     5 * batch_size // numGpus,
                                                     log,
                                                     useType="test",
                                                     first=(gpu == 0))
            netOut.append(gpuValPredictions)

trainNetOut = []
for gpu in range(numGpus):
    with tf.name_scope('tower_%d' % (gpu)) as scope:
        with tf.device('/gpu:%d' % gpu):
            print(perGPUTrainData[gpu][0].get_shape())
            print(len(perGPUTrainData[gpu]))
Пример #8
0
def sr_train(method='mlp_h=1_kingma',
             n_h1=500,
             n_h2=200,
             data_dir='./data/',
             cohort='Diverse',
             no_subjects=8,
             sample_rate=32,
             us=2,
             n=2,
             m=2,
             optimisation_method='adam',
             dropout_rate=0.5,
             learning_rate=1e-4,
             L1_reg=0.00,
             L2_reg=1e-5,
             n_epochs=1000,
             batch_size=25,
             save_dir='./models'):

    ##########################
    # Load the training data:
    ##########################
    # get the full path to the training set:
    dataset = data_dir + 'PatchLibs%sDS%02i_%ix%i_%ix%i_TS%i_SRi%03i_0001.mat' \
                         % (cohort, us, 2 * n + 1, 2 * n + 1, m, m, no_subjects, sample_rate)
    data_dir, data_file = os.path.split(dataset)

    # load
    print('... loading the training dataset %s' % data_file)
    patchlib = sr_utility.load_patchlib(patchlib=dataset)
    train_set_x, valid_set_x, train_set_y, valid_set_y = patchlib  # load the original patch libs

    # normalise the data and keep the transforms:
    (train_set_x_scaled, train_set_x_mean, train_set_x_std, train_set_y_scaled, train_set_y_mean, train_set_y_std)\
        = sr_utility.standardise_data(train_set_x, train_set_y, option='default')  # normalise the data

    # normalise the validation sets into the same space as training sets:
    valid_set_x_scaled = (valid_set_x - train_set_x_mean) / train_set_x_std
    valid_set_y_scaled = (valid_set_y - train_set_y_mean) / train_set_y_std
    del train_set_x, valid_set_x, train_set_y, valid_set_y  # clear original data as you don't need them.

    ####################
    # Define the model:
    ####################
    print('... defining the model')

    # clear the graph
    tf.reset_default_graph()

    # define input and output:
    n_in, n_out = 6 * (2 * n +
                       1)**3, 6 * m**3  # dimensions of input and output
    x_scaled = tf.placeholder(tf.float32,
                              shape=[None,
                                     n_in])  # normalised input low-res patch
    y_scaled = tf.placeholder(tf.float32,
                              shape=[None, n_out
                                     ])  # normalised output high-res patch
    keep_prob = tf.placeholder(tf.float32)  # keep probability for dropout

    y_pred_scaled, L2_sqr, L1, reg = models.inference(method, x_scaled,
                                                      keep_prob, n_in, n_out,
                                                      n_h1, n_h2)
    cost = models.cost(y_scaled, y_pred_scaled, L2_sqr, L1, L2_reg, L1_reg)
    train_step = models.training(cost,
                                 learning_rate,
                                 option=optimisation_method)
    mse = tf.reduce_mean(
        tf.square(train_set_y_std * (y_scaled - y_pred_scaled)))
    cost += tf.add_n(reg) / 3.

    #######################
    # Start training:
    #######################
    # Add the variable initializer Op.
    init = tf.initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    print('... training')

    with tf.Session() as sess:
        # Run the Op to initialize the variables.
        sess.run(init)

        # Compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x_scaled.shape[0] // batch_size
        n_valid_batches = valid_set_x_scaled.shape[0] // batch_size

        # early-stopping parameters
        patience = 10000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
        # found
        improvement_threshold = 0.995  # a relative improvement of this much is
        # considered significant
        validation_frequency = min(n_train_batches, patience // 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.

        start_time = timeit.default_timer()

        epoch = 0
        done_looping = False

        while (epoch < n_epochs) and (not done_looping):
            epoch += 1
            for minibatch_index in range(n_train_batches):

                # perform gradient descent:
                train_step.run(
                    feed_dict={
                        x_scaled:
                        train_set_x_scaled[minibatch_index *
                                           batch_size:(minibatch_index + 1) *
                                           batch_size, :],
                        y_scaled:
                        train_set_y_scaled[minibatch_index *
                                           batch_size:(minibatch_index + 1) *
                                           batch_size, :],
                        keep_prob: (1.0 - dropout_rate)
                    })

                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    # compute zero-one loss on validation set

                    validation_losses = [
                        mse.eval(
                            feed_dict={
                                x_scaled:
                                valid_set_x_scaled[index *
                                                   batch_size:(index + 1) *
                                                   batch_size, :],
                                y_scaled:
                                valid_set_y_scaled[index *
                                                   batch_size:(index + 1) *
                                                   batch_size, :],
                                keep_prob: (1.0 - dropout_rate)
                            }) for index in range(n_valid_batches)
                    ]

                    this_validation_loss = np.mean(validation_losses)

                    training_losses = [
                        mse.eval(
                            feed_dict={
                                x_scaled:
                                train_set_x_scaled[index *
                                                   batch_size:(index + 1) *
                                                   batch_size, :],
                                y_scaled:
                                train_set_y_scaled[index *
                                                   batch_size:(index + 1) *
                                                   batch_size, :],
                                keep_prob: (1.0 - dropout_rate)
                            }) for index in range(n_valid_batches)
                    ]

                    this_training_loss = np.mean(training_losses)

                    print('\nEpoch %i, minibatch %i/%i:\n'
                          '     training error (rmse) %f times 1E-5\n'
                          '     validation error (rmse) %f times 1E-5' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           np.sqrt(this_training_loss * 10**10),
                           np.sqrt(this_validation_loss * 10**10)))
                    print('     number of minibatches = %i and patience = %i' %
                          (iter, patience))

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        # improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss * improvement_threshold:

                            patience = max(patience, iter * patience_increase)
                            print(
                                '     reduces the previous error by more than %f %%'
                                % ((1 - improvement_threshold) * 100.))

                        best_validation_loss = this_validation_loss
                        best_iter = iter

                if patience <= iter:
                    done_looping = True
                    break

        end_time = timeit.default_timer()
        print(('\nOptimization complete. Best validation score of %f  '
               'obtained at iteration %i') %
              (np.sqrt(best_validation_loss * 10**10), best_iter + 1))

        print('Training done!!! It took %f secs.' % (end_time - start_time))

        # Save the model:
        nn_file = sr_utility.name_network(
            method=method,
            n_h1=n_h1,
            n_h2=n_h2,
            cohort=cohort,
            no_subjects=no_subjects,
            sample_rate=sample_rate,
            us=us,
            n=n,
            m=m,
            optimisation_method=optimisation_method,
            dropout_rate=dropout_rate)

        save_subdir = os.path.join(save_dir, nn_file)

        if not os.path.exists(
                save_subdir):  # create a subdirectory to save the model.
            os.makedirs(save_subdir)

        save_path = saver.save(sess, os.path.join(save_subdir, "model.ckpt"))
        print("Model saved in file: %s" % save_path)

        # Save the model details:
        print('... saving the model details')
        model_details = {
            'method': method,
            'cohort': cohort,
            'no of subjects': no_subjects,
            'sample rate': sample_rate,
            'upsampling factor': us,
            'n': n,
            'm': m,
            'optimisation': optimisation_method,
            'dropout rate': dropout_rate,
            'learning rate': learning_rate,
            'L1 coefficient': L1_reg,
            'L2 coefficient': L2_reg,
            'max no of epochs': n_epochs,
            'batch size': batch_size
        }
        cPickle.dump(model_details,
                     file(os.path.join(save_subdir, 'settings.pkl'), 'wb'),
                     protocol=cPickle.HIGHEST_PROTOCOL)

        print(
            '... saving the transforms used for data normalisation for the test time'
        )
        transform = {
            'input_mean': train_set_x_mean,
            'input_std': train_set_x_std,
            'output_mean': train_set_y_mean,
            'output_std': train_set_y_std
        }
        f = file(os.path.join(save_subdir, 'transforms.pkl'), 'wb')
        cPickle.dump(transform, f, protocol=cPickle.HIGHEST_PROTOCOL)

    # clear the graph
    tf.reset_default_graph()