def test(self): #self.merge() #self.compress() #return embedding_size = 100 for CLUSTER_MIN_SIZE in range(4,19,2): for dsname in ['webkb','er']: mln = MLN(dsname) db = DBManager(dsname,mln) print('merge db dom sizes:') dom_obj_map = db.get_dom_objs_map(mln,db.merge_db_file) cf = common_f() #cf.delete_files(mln.pickle_location) #cf.remove_irrelevant_atoms() embedding_size += 100 embedding_size = embedding_size%1000 db.set_atoms() bmf = bmf_cluster(dsname) bmf.cluster(db,1,mln.pdm,dom_obj_map) print('original db dom sizes(after compression):') orig_dom_objs_map = db.get_dom_objs_map(mln,mln.orig_db_file) CLUSTER_MIN_SIZE = 10 w2v = word2vec(dsname,db,CLUSTER_MIN_SIZE,embedding_size) print('w2v cluster dom sizes:') w2v_dom_objs_map = db.get_dom_objs_map(mln,w2v.w2v__cluster_db_file) cr = cf.calculate_cr(orig_dom_objs_map,w2v_dom_objs_map) print('cr : ' + str(cr)) rc = random_cluster(dsname) rc.generate_random_db(db,w2v.pred_atoms_reduced_numbers,mln,w2v_dom_objs_map) print('random cluster dom sizes') db.get_dom_objs_map(mln,mln.random__cluster_db_file) kmc = kmeans_cluster(dsname) kmc.cluster(db,str(cr),mln.pdm,w2v_dom_objs_map,mln.dom_pred_map) print('kmeans cluster dom sizes:') kmeans_dom_objs_map = db.get_dom_objs_map(mln,kmc.kmeans__cluster_db_file) mln.create_magician_mln() #magician(dsname,mln) tuffy(dsname) orig_meta_map = {} orig_meta_map['bmf'] = bmf.bmf_orig_meta_map orig_meta_map['w2v'] = w2v.w2v_orig_meta_map orig_meta_map['random'] = rc.rand_orig_meta_map orig_meta_map['kmeans'] = kmc.kmeans_orig_meta_map print('Dataset : ' + dsname + '; CR : ' + str(cr)) p = performance(dsname,embedding_size) p.compare_marginal(mln,orig_meta_map,cr) p.compare_map(mln,orig_meta_map,cr) break
def exp_hidden_state(hidden_state_candidates): results = "batch_size = 512,nb_epoch = 70,timestep=50 \n" results += "hidden state size\tRMSE\tTPA\tTPPA\n" for hidden_state in hidden_state_candidates: y_test, predictions = predict(512,70,50, hidden_state) # !!!!!!!!!!!!timestep rmse, tp_acc, tpp_acc,cm1,cm2 = performance(y_test, predictions) results += str(hidden_state)+"\t"+str(rmse)+"\t"+str(tp_acc)+"\t"+str(tpp_acc )+"\n" with open("./exp/hiddenstate.txt","w") as file: file.write(results)
def exp_timesteps(hidden_state_candidates, timestep_candidates): results = "batch_size = 512,nb_epoch = 70 \n" for hidden_state in hidden_state_candidates: results += "hidden state size: " + str(hidden_state) +"------------------\n" results += "timestep size\tRMSE\tTPA\tTPPA\n" for timestep in timestep_candidates: y_test, predictions = predict(512,70,timestep,hidden_state) rmse,tp_acc,tpp_acc,cm1,cm2 = performance(y_test, predictions) results += str(timestep)+"\t"+str(rmse)+"\t"+str(tp_acc)+"\t"+str(tpp_acc )+"\n" with open("./exp/timestep.txt","w") as file: file.write(results)
def exp_epoch(layers_candidates, timesteps_candidates, epoch_candidates): results = "batch_size = 512 \n" for layers in layers_candidates: for timesteps in timesteps_candidates: results += "layers: " + str(layers) + "; timesteps: " + str(timesteps) + "-------------------\n" results += "number of epoch\tRMSE\tTPA\tTPPA\n" for epoch in epoch_candidates: y_test, predictions = predict(batch_size = 512, nb_epoch = epoch, timestep = timesteps, hidden_state = 50, layers = layers) rmse,tp_acc,tpp_acc,cm1,cm2 = performance(y_test, predictions) results += str(epoch)+"\t"+str(rmse)+"\t"+str(tp_acc)+"\t"+str(tpp_acc )+"\n" with open("./exp/epoch.txt","w") as file: file.write(results)
def exp_batchsize(layers_candidates, batchsize_candidates): results = "nb_epoch = 70, timestep=30 \n" for layers in layers_candidates: results += "layers: " + str(layers) + "---------------------\n" results += "batch_size\tRMSE\tTPA\tTPPA\n" for batch_size in batchsize_candidates: y_test, predictions = predict(batch_size = batch_size, nb_epoch = 70, timestep=30, hidden_state=50, layers = layers) rmse,tp_acc,tpp_acc,cm1,cm2 = performance(y_test, predictions) results += str(batch_size)+"\t"+str(rmse)+"\t"+str(tp_acc)+"\t"+str(tpp_acc )+"\n" with open("./exp/batchsize.txt","w") as file: file.write(results)
def evaluate(date=yesterday,company={}): companies = Company.objects.filter(**company) c_list = [] for _company in companies: c_dict = {"ticker":_company.ticker,} pred = predict(date=date,company=c_dict) perf = performance(date=date,company=c_dict) try: accu = 100*abs((perf['avg_change'] - pred['avg_change'])/perf['avg_change']) except Exception, e: accu = None try: p_accu = 100*abs((perf['avg_perc'] - pred['avg_perc'])/perf['avg_perc']) except Exception, e: p_accu = None
def main(): model = TextCNN(vocab_size, embedding_size, num_class).to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=0.0001) train(model, criterion, optimizer) input_batch, target_batch = make_data(sen_length, sen_list, word2idx, label) model_name = "mdl/10_model.mdl" pre = predict(model, model_name, input_batch) pre_list = pre[:, 0].cpu().numpy() pre_list = pre_list.tolist() label_list = [] for i in label: label_list.append(int(i)) micro_F1, macro_F1, ave_acc = performance(label_list, pre_list) print("micro_F1:", micro_F1) print("macro_F1:", macro_F1) print(f"ave_acc:{ave_acc}")
def exp_layer_depth(layers): prediction_len = 20 predict(batch_size = 512, nb_epoch = 70,timestep = 50,hidden_state = 50, layers = layers, save = True, predict_multiple = True, prediction_len = prediction_len, predict_full = True) # Plot the predictions orig = np.load("./data/y_test.npy") predictions = np.load("./data/predictions.npy") rmse,tp_acc,tpp_acc,cm1,cm2 = performance(orig, predictions) print "Tendency Prediction Confusion Matrix (0:down; 1:up or equal): " print cm1 print "Turning Point Prediction Confusion Matrix (0:not a turning point; 1:summit; 2:vale): " print cm2 print "rmse: " + str(rmse) + "TPA: " + str(tp_acc) + "TPPA: " + str(tpp_acc) predictions_multi = np.load("./data/predictions_multi.npy") predictions_full = np.load("./data/predictions_full.npy") fig = plt.figure(facecolor='white') plot_results(predictions, orig, fig, sublocation = 121, show = False) # plot_results(predictions_full, orig, fig, sublocation = 222, show = False) plot_results_multiple(predictions_multi, orig, prediction_len = prediction_len, fig = fig, sublocation = 122, show = True)
def MLPtrain_workflow(train, val, save_path, nb_members, nb_hid, nb_epochs): # generate folder for saved MLP networks and results if os.path.exists(save_path): shutil.rmtree(save_path) os.makedirs(save_path) save_path_results = '{0}/results'.format(save_path) os.makedirs(save_path_results) save_path_MLPs = '{0}/saved_MLPs'.format(save_path) os.makedirs(save_path_MLPs) # ----------------------------------------------------------------------------- # 1. TRAINING OF MLPS # determine variables used for MLP optimisation variables = ['SD', 'SWE', 'Day of year', 'days without snow', 'number frost-defrost', 'accum pos degrees', 'average age SC', 'number layer', 'accum solid precip', 'accum solid precip in last 10 days', 'total precip last 10 days', 'average temp last 6 days'] MLP_train = train[variables] # delete all rows with nan values MLP_train = MLP_train.dropna() # select explanatory and target variables y_train = MLP_train['SWE'].values.astype('float32') x_train = MLP_train.drop('SWE', axis=1).values.astype('float32') # determination of MLP setup activ_fc = 'tanh' init_w = 2 init_b = 2 optAlg = 'Adadelta' batch_size = 100 shuf_data = 1 # optimise MLP ensmeble by function; trained MLP networks and scaler for standardisation are saved to save_path MLP(x_train, y_train, nb_epochs, nb_members, nb_hid, save_path_MLPs, activ_fc, init_w, init_b, optAlg, batch_size, shuf_data) del train # ----------------------------------------------------------------------------- # 2. EVALUATION ON VALIDATION DATA SET # Perturbe SD on validation data set and estimate SWE; evaluate estimated SWE # against observation of SWE # load scaler for standardisation [scalerIn, scalerOut] = pickle.load(open(save_path_MLPs + '/scaler', "rb")) # determine variables used for MLP optimisation variables = ['SD', 'SWE', 'Day of year', 'days without snow', 'number frost-defrost', 'accum pos degrees', 'average age SC', 'number layer', 'accum solid precip', 'accum solid precip in last 10 days', 'total precip last 10 days', 'average temp last 6 days'] MLP_val = val[variables] # delete all rows with nan values MLP_val = MLP_val.dropna() # select explanatory and target variables y_val = MLP_val['SWE'].values.astype('float32') x_val = MLP_val.drop('SWE', axis=1).values.astype('float32') # find index for SD for perturbation idx_SD = MLP_val.drop('SWE', axis=1).columns.get_loc('SD') # initialise matrix for ensemble with 400 members ensemble400 = np.empty((len(y_val), 20*20)) # assign model setup nb_members = 20 for mb in range(nb_members): # create network graph tf.reset_default_graph() imported_graph = tf.train.import_meta_graph(save_path_MLPs + "/mb_{0}.ckpt.meta".format(mb)) with tf.Session() as sess: # restore parameter imported_graph.restore(sess, save_path_MLPs + "/mb_{0}.ckpt".format(mb)) # get prediction with noisy inputs as an ensemble for k in range(x_val.shape[0]): line_input = x_val[k, :] input_net = np.tile(line_input, (20, 1)) SD = line_input[idx_SD] if SD < 20: SD_low = SD - 1 SD_high = SD + 1 else: SD_low = SD * 0.95 SD_high = SD * 1.05 SD_noise_1rec = np.random.uniform(low=SD_low, high=SD_high, size=20) input_net[:, idx_SD] = SD_noise_1rec input_net_std = scalerIn.transform(input_net) predict_std = sess.run("op_to_restore:0", feed_dict={"input:0": input_net_std}).flatten() ensemble400[k, mb*20:(mb+1)*20] = scalerOut.inverse_transform(predict_std).flatten() # determine 20 quantiles, to get 20 members ensemble20 = np.quantile(ensemble400, np.arange(0.025, 1, 1/20), axis=1).transpose() # save ensemble pickle.dump(ensemble20, open(save_path_results + '/ensembleVal_SD_pt', "wb")) # evaluate on validation data set # apply performance function, saves graphics and results (as csv) in folder assigned above performance(ensemble20, y_val, save_path_results) print(datetime.now()) print('Evaluation on validation data set done')
def handle_nav(self): # 建立画pdf的对象 self.pdfs = PdfPages(self.full_dir[:-1] + '/allfigs.pdf') self.performance = performance(self.nav) self.performance.get_performance(foldername=self.folder_name[:-1]) self.performance.plot_performance(foldername=self.folder_name[:-1], pdfs=self.pdfs)
def __init__(self,aircraft): self.aerodynamics =aerodynamics.aerodynamics(aircraft) self.thrust =thrust.thrustAnalysis(aircraft) # self.mass =mass.generalAviationMass(aircraft) self.performance =performance.performance(aircraft)
from target import target #importing Target Generation module from sol import sol #importing Solution module from verify import verify #importing verify module from performance import performance #importing performance module if __name__ == '__main__': #calling main function import sys #importing sys module import os #importing os module #First argument i.e sys.argv[0] is always aes.py #Second argument i.e sys.argv[1] is the called function #Comparing the second command line argument with respective functions to call if sys.argv[1] == 'target': #Comparing with target target(sys.argv[2], sys.argv[3]) #Calling target function elif sys.argv[1] == 'sol': #Comparing with sol sol(sys.argv[2], sys.argv[3], sys.argv[4]) #Calling sol function elif sys.argv[1] == 'verify': #Comparing with verify verify(sys.argv[2], sys.argv[3], sys.argv[4]) #Calling verify function elif sys.argv[1] == 'performance': #Comparing with performance performance(sys.argv[2]) #Calling performance function
datas = pd.read_excel('datas_final.xlsx', index_col=0, parse_dates=True).dropna() return datas if __name__ == "__main__": datas = main().data_get() print("------------------Equal weighted------------------") weights_EW, result_EW = benchmark.benchmark(datas, period=para.period, rollingtime=para.rollingtime, method='EW') pd.DataFrame( performance.performance(result_EW)).to_csv(para.performance_output + 'result_EW_performance.csv') pd.DataFrame(performance.performance_anl(result_EW)).to_csv( para.performance_output + 'result_EW_performance_anl.csv') weights_EW.to_excel(para.weights_output + 'weights_EW.xlsx') result_EW.to_excel(para.results_output + 'result_EW.xlsx') print("------------------Variance equal weighted------------------") weights_EV, result_EV = benchmark.benchmark(datas, period=para.period, rollingtime=para.rollingtime, method='EV') pd.DataFrame( performance.performance(result_EV)).to_csv(para.performance_output + 'result_EV_performance.csv') pd.DataFrame(performance.performance_anl(result_EV)).to_csv(
def initialize_performance(self): holding_days = pd.Series(self.bkt_position.holding_matrix.index, index=self.bkt_position.holding_matrix.index) holding_days = holding_days[self.bkt_start:self.bkt_end] self.bkt_performance = performance(self.account_value, benchmark = self.benchmark_value, info_series=self.info_series, risk_free_rate = self.bkt_data.const_data['risk_free_rate'], holding_days=holding_days, cash_ratio=self.real_pct_position.cash)
def train_test( data, instance_testing_size, forecast_horizon, feature_or_covariate_set, history_length, model='knn', base_models=None, model_type='regression', model_parameters=None, feature_scaler='logarithmic', target_scaler='logarithmic', labels=None, performance_measures=['MAPE'], performance_mode='normal', performance_report=True, save_predictions=True, verbose=0): """ Parameters: data: Pandas DataFrame a preprocessed DataFrame to be used for training the model and making predictions on the test part instance_testing_size: int or float the size of testing instances forecast_horizon: int forecast horizon to gap consideration in data splitting process by the gap, we mean the number of temporal units which are excluded from data to simulate the situation of real prediction in which we do not have access to the information of forecast horizon-1 units before the time point of the target variable. feature_or_covariate_set: list<string> a list of covariates or features which feature selection process will be based on them if historical data is provided, the input will be considered as a feature list, otherwise as a covariate list history_length: int history length of the input "data", history length is just used for the reports in "train_test" model: string or callable or dict string: one of the pre-defined model names function: a user-defined function dict: pre-defined model names and corresponding hyper parameters pre-defined model names: 'knn', 'nn' , 'gbm', 'glm' model_type: string model_parameters: list<int> or None feature_scaler: string target_scaler: string labels: list<int> or None performance_measures: list<string> a list of performance measures that the user wants to calculate the errors on predictions of test dataset performance_mode: string performance_report: bool if True, some tables containing a report on models and their corresponding errors (based on performance_measurements) will be saved in the same directory save_predictions: bool if True, the prediction values of trained models for training data and validation data through train_and_evaluate process will be saved in the same directory as your program is running as in ‘.csv’ format verbose: int the level of produced detailed logging information available options: 0: no logging 1: only important information logging 2: all details logging Returns: model: string or callable or dict exactly same as the 'model' parameter model_parameters: list<int> """ warnings.filterwarnings("once") ################################ checking for TypeError and other possible mistakes in the inputs if not(isinstance(data, pd.DataFrame)): raise TypeError("Expected a pandas DataFrame for data.") if not(isinstance(instance_testing_size, int) or isinstance(instance_testing_size, float)): raise TypeError("Expected an integer or a float number for instance_testing_size.") if not(isinstance(forecast_horizon, int)): raise TypeError("Expected an integer for forecast_horizon.") if not(isinstance(feature_or_covariate_set, list)): raise TypeError("Expected a list of strings for feature_or_covariate_set.") if not(isinstance(history_length, int)): raise TypeError("Expected an integer for history_length.") if not(isinstance(model, str) or callable(model) or isinstance(model, dict)): raise TypeError("Expected a string or function or a dictionary of model parameters for model.") if not(isinstance(model_type, str)): raise TypeError("Expected a string for model_type.") if not(isinstance(model_parameters, dict) or model_parameters == None): raise TypeError("Expected a dictionary or None value for model_parameters.") if not(isinstance(feature_scaler, str) or feature_scaler == None): raise TypeError("Expected a string or None value for feature_scaler.") if not(isinstance(target_scaler, str) or target_scaler == None): raise TypeError("Expected a string or None value for target_scaler.") if not(isinstance(labels, list) or labels == None): raise TypeError("Expected a list or None value for labels.") if not(isinstance(performance_measures, list)): raise TypeError("Expected a list for performance_measures.") if not(isinstance(performance_mode, str)): raise TypeError("Expected a string for performance_mode.") if not(isinstance(performance_report, bool)): raise TypeError("Expected a bool variable for performance_report.") if not(isinstance(save_predictions, bool)): raise TypeError("Expected a bool variable for save_predictions.") if not(isinstance(verbose, int)): raise TypeError("Expected an integer (0 or 1 or 2) for verbose.") ################################ # classification checking if model_type == 'classification': if not set(performance_measures) <= set(configurations.CLASSIFICATION_PERFORMANCE_MEASURES): raise Exception("Error: The input 'performance_measures' is not valid according to 'model_type=classification'.") if performance_mode != 'normal': performance_mode = 'normal' print("Warning: The input 'performance_mode' is set to 'normal' according to model_type=classification'.") if target_scaler is not None: target_scaler = None print("Warning: The input 'target_scaler' is set to None according to model_type=classification'.") # get some information of the data target_mode, target_granularity, granularity, data = get_target_quantities(data=data.copy()) # get the target temporal id from temporal id # if target temporal id is already in the data, call is from inside the predict function # otherwise backup file must be removed if 'target temporal id' in data.columns: data = data.rename(columns={'target temporal id':'temporal id'}) else: data, _ = get_target_temporal_ids(temporal_data = data.copy(), forecast_horizon = forecast_horizon, granularity = granularity) if os.path.isfile('test_process_backup.csv'): os.remove('test_process_backup.csv') # check rows related to future prediction are removed and if not then remove them temp_data = data.sort_values(by = ['temporal id','spatial id']).copy() number_of_spatial_units = len(temp_data['spatial id'].unique()) if all(temp_data.tail(granularity*forecast_horizon*number_of_spatial_units)['Target'].isna()): data = temp_data.iloc[:-(granularity*forecast_horizon*number_of_spatial_units)] # check if model is a string or function model_name = '' if isinstance(model, str) == False: model_name = model.__name__ if model_name in ['nn', 'knn', 'glm', 'gbm']: raise TypeError("Name of the user defined model matches the name of one of our predefined models.") else: model_name = model # find labels for classification problem if labels == None: if model_type == 'regression': # just an empty list labels = [] elif model_type == 'classification': # unique values in 'Target' column of data labels = data.Target.unique() labels.sort() # select features processed_data = select_features( data=data.copy(), ordered_covariates_or_features=feature_or_covariate_set ) # splitting data in the way is set for train_test training_data, _, testing_data, gap_data = split_data( data=processed_data.copy(), splitting_type='instance', instance_testing_size=instance_testing_size, instance_validation_size=None, instance_random_partitioning=False, fold_total_number=0, fold_number=0, forecast_horizon=forecast_horizon, granularity=granularity, verbose=verbose ) # separate some data which are needed later base_data = training_data['Target'].values.tolist() training_target = training_data[['spatial id', 'temporal id', 'Target', 'Normal target']] test_target = testing_data[['spatial id', 'temporal id', 'Target', 'Normal target']] # scaling data training_data, testing_data = data_scaling( train_data=training_data.copy(), test_data=testing_data.copy(), feature_scaler=feature_scaler, target_scaler=target_scaler ) # training model with processed data training_predictions, testing_predictions, trained_model, number_of_parameters = inner_train_evaluate( training_data=training_data.copy(), validation_data=testing_data.copy(), model=model, model_type=model_type, model_parameters=model_parameters, labels=labels, base_models = base_models, verbose=verbose ) # target descale training_predictions = target_descale( scaled_data=list(training_predictions), base_data=base_data, scaler=target_scaler ) testing_predictions = target_descale( scaled_data=list(testing_predictions), base_data=base_data, scaler=target_scaler ) # checking for some files to exit which will be used in the next phases test_process_backup_file_name = 'test_process_backup.csv' if pathlib.Path(test_process_backup_file_name).is_file() == False: if model_type == 'regression': df = pd.DataFrame(columns=['spatial id', 'temporal id', 'Target', 'Normal target', 'prediction']) elif model_type == 'classification': df = pd.DataFrame(columns=['spatial id', 'temporal id', 'Target', 'Normal target']+\ ['prediction class '+str(class_num) for class_num in range(np.array(testing_predictions).shape[1])]) df.to_csv(test_process_backup_file_name, index=False) # getting back previous points (useful for one-by-one method, also works for one-as-whole method) previous_test_points = pd.read_csv(test_process_backup_file_name) # append current point to previous points test_target = test_target.append(previous_test_points[['spatial id', 'temporal id', 'Target', 'Normal target']], ignore_index=True) if model_type == 'regression': previous_testing_predictions = previous_test_points['prediction'].tolist() testing_predictions = list(testing_predictions) + previous_testing_predictions elif model_type == 'classification': previous_testing_predictions = previous_test_points.filter(regex='^prediction class ',axis=1) testing_predictions = np.concatenate((np.array(testing_predictions),np.array(previous_testing_predictions))) testing_predictions_df = pd.DataFrame(testing_predictions) testing_predictions_df.columns = ['prediction class '+str(class_num) for class_num in testing_predictions_df.columns] # saving test_target+testing_predictions into a backup file to be used in the next point df_for_backup = test_target.copy() if model_type == 'regression': df_for_backup.insert(loc=len(df_for_backup.columns), column='prediction', value=testing_predictions) elif model_type == 'classification': df_for_backup = pd.concat([df_for_backup,testing_predictions_df],axis = 1) df_for_backup.to_csv(test_process_backup_file_name, index=False) # get normal data training_target, test_target, training_prediction, test_prediction = get_normal_target( training_target=training_target.append(gap_data[['spatial id', 'temporal id', 'Target', 'Normal target']], ignore_index=True), test_target=test_target.copy(), training_prediction=list(training_predictions) + gap_data['Target'].tolist(), test_prediction=testing_predictions, target_mode=target_mode, target_granularity=target_granularity ) # make copy of some data to be stores later test_target_normal, test_prediction_normal = test_target.copy(), test_prediction.copy() # including performance_mode training_target, test_target, training_prediction, test_prediction = apply_performance_mode( training_target=training_target.copy(), test_target=test_target.copy(), training_prediction=list(training_prediction), test_prediction=test_prediction, performance_mode=performance_mode ) # computing trivial values for the test set (just when want to calculate MASE) if 'MASE' in performance_measures: _, _, _, testing_true_values, testing_predicted_values, testing_trivial_values = get_trivial_values( train_true_values_df=training_target.copy(), validation_true_values_df=test_target.copy(), train_prediction=list(training_prediction), validation_prediction=test_prediction, forecast_horizon=forecast_horizon, granularity=granularity ) # computing performnace on test dataset test_prediction_errors = performance( true_values=testing_true_values, predicted_values=testing_predicted_values, performance_measures=performance_measures, trivial_values=testing_trivial_values, model_type=model_type, num_params=number_of_parameters, labels=labels) else: # computing performnace on test dataset test_prediction_errors = performance( true_values=test_target['Normal target'], predicted_values=test_prediction, performance_measures=performance_measures, trivial_values=[], model_type=model_type, num_params=number_of_parameters, labels=labels) # checking for existance of some directories for logging purpose if pathlib.Path('prediction/test process').is_dir() == False: pathlib.Path('prediction/test process').mkdir(parents=True, exist_ok=True) if pathlib.Path('performance/test process').is_dir() == False: pathlib.Path('performance/test process').mkdir(parents=True, exist_ok=True) # saving predictions based on model_type pred_file_name = 'prediction/test process/test prediction forecast horizon = %s.csv' % (forecast_horizon) testing_predictions = np.array(testing_predictions) if save_predictions == True: if model_type == 'regression': df = pd.DataFrame() df['real'] = test_target_normal['Normal target'].values.tolist() df['prediction'] = list(test_prediction_normal) df.insert(0, 'temporal id', test_target_normal['temporal id'].values.tolist(), True) df.insert(0, 'spatial id', test_target_normal['spatial id'].values.tolist(), True) df.insert(0, 'model name', model_name, True) df.to_csv(pred_file_name, index=False) elif model_type == 'classification': df = pd.DataFrame() df['real'] = test_target_normal['Normal target'].values.tolist() for i in range(len(labels)): col_name = 'class ' + str(labels[i]) df[col_name] = testing_predictions[:, i] df.insert(0, 'temporal id', test_target_normal['temporal id'].values.tolist(), True) df.insert(0, 'spatial id', test_target_normal['spatial id'].values.tolist(), True) df.insert(0, 'model name', model_name, True) df.to_csv(pred_file_name, index=False) # saving performance (same approach for both regression and classification) performance_file_name = 'performance/test process/test performance report forecast horizon = %s.csv' % (forecast_horizon) # selecting temporal and futuristic features or covariates from the feature_or_covariate_set list check_list = [item for item in feature_or_covariate_set if item.count(' ') != 0] # type_flag for detecting feature type (False) or covariate type (True) # check if all elements in check_list meet the condition for being covariate type type_flag = all(re.search(' t$', element) or re.search(' t[+]$', element) for element in check_list) processed_feature_or_covariate_set = [] # a list to be saved in performance report file if type_flag == 1: for item in feature_or_covariate_set: if item.count(' ') != 0: processed_feature_or_covariate_set.append(item[:-2]) else: processed_feature_or_covariate_set.append(item) else: processed_feature_or_covariate_set = feature_or_covariate_set.copy() if performance_report == True: df_data = { 'model name': list([model_name]), 'history length': list([history_length]), 'feature or covariate set': ', '.join(processed_feature_or_covariate_set) } df = pd.DataFrame(df_data, columns=list(df_data.keys())) for i in range(len(performance_measures)): df[performance_measures[i]] = list([float(test_prediction_errors[i])]) df.to_csv(performance_file_name, index=False) return trained_model
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/pretrain/network_model_pretrain.best" print >> sys.stderr, "Read model from ./model/model.pkl" best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network(pair_feature_dimention, mention_feature_dimention, word_embedding_dimention, span_dimention, 1000, embedding_size, embedding_dimention, embedding_matrix).cuda() print >> sys.stderr, "save model ..." #torch.save(network_model,network_file) net_copy(network_model, best_network_model) reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." train_docs = DataReader.DataGnerater("train" + reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev" + reduced) test_docs = DataReader.DataGnerater("test" + reduced) l2_lambda = 1e-6 lr = 0.0002 dropout_rate = 0.5 shuffle = True times = 0 best_thres = 0.5 model_save_dir = "./model/pretrain/" last_cost = 0.0 all_best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } for echo in range(100): start_time = timeit.default_timer() print "Pretrain Epoch:", echo #if echo == 100: # lr = lr/2.0 #if echo == 150: # lr = lr/2.0 #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda) #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda) optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5, weight_decay=l2_lambda) pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 pos_num = 0 neg_num = 0 inside_time = 0.0 for data in train_docs.train_generater(shuffle=shuffle, top=True): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,top_x = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) reindex = autograd.Variable( torch.from_numpy(top_x["score_index"]).type( torch.cuda.LongTensor)) start_index = autograd.Variable( torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable( torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor)) top_gold = autograd.Variable( torch.from_numpy(top_x["top_gold"]).type( torch.cuda.FloatTensor)) anaphoricity_gold = anaphoricity_target.tolist() ana_lable = autograd.Variable( torch.cuda.FloatTensor([anaphoricity_gold])) optimizer.zero_grad() output, output_reindex = network_model.forward_top_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, reindex, start_index, end_index, dropout_rate) loss = F.binary_cross_entropy( output, top_gold, size_average=False) / train_docs.scale_factor_top ana_output, _ = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) ana_loss = F.binary_cross_entropy( ana_output, ana_lable, size_average=False) / train_docs.anaphoricity_scale_factor_top loss_all = loss + ana_loss loss_all.backward() pair_cost_this_turn += loss.data[0] optimizer.step() end_time = timeit.default_timer() print >> sys.stderr, "PreTrain", echo, "Pair total cost:", pair_cost_this_turn print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "Learning Rate", lr print >> sys.stderr, "save model ..." torch.save(network_model, model_save_dir + "network_model_pretrain.%d.top" % echo) #if cost_this_turn > last_cost: # lr = lr*0.7 gold = [] predict = [] ana_gold = [] ana_predict = [] for data in dev_docs.train_generater(shuffle=False, top=True): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target, top_x = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) reindex = autograd.Variable( torch.from_numpy(top_x["score_index"]).type( torch.cuda.LongTensor)) start_index = autograd.Variable( torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable( torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor)) gold += top_x["top_gold"].tolist() ana_gold += anaphoricity_target.tolist() output, output_reindex = network_model.forward_top_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, reindex, start_index, end_index, 0.0) predict += output.data.cpu().numpy().tolist() ana_output, _ = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_predict += ana_output.data.cpu().numpy()[0].tolist() gold = numpy.array(gold, dtype=numpy.int32) predict = numpy.array(predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } thresh_list = [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6] for thresh in thresh_list: evaluation_results = get_metrics(gold, predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Pair accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if best_results["f1"] > all_best_results["f1"]: all_best_results = best_results print >> sys.stderr, "New High Result, Save Model" torch.save(network_model, model_save_dir + "network_model_pretrain.top.best") ana_gold = numpy.array(ana_gold, dtype=numpy.int32) ana_predict = numpy.array(ana_predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } for thresh in thresh_list: evaluation_results = get_metrics(ana_gold, ana_predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if (echo + 1) % 10 == 0: best_network_model = torch.load(model_save_dir + "network_model_pretrain.top.best") print "DEV:" performance.performance(dev_docs, best_network_model) print "TEST:" performance.performance(test_docs, best_network_model)
def main(): DIR = args.DIR embedding_file = args.embedding_dir #network_file = "./model/model.pkl" #network_file = "./model/pretrain/network_model_pretrain.20" network_file = "./model/pretrain/network_model_pretrain.top.best" if os.path.isfile(network_file): print >> sys.stderr, "Read model from ./model/model.pkl" network_model = torch.load(network_file) else: embedding_matrix = numpy.load(embedding_file) #print len(embedding_matrix) "Building torch model" network_model = network.Network(pair_feature_dimention, mention_feature_dimention, word_embedding_dimention, span_dimention, 1000, embedding_size, embedding_dimention, embedding_matrix).cuda() print >> sys.stderr, "save model ..." torch.save(network_model, network_file) reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." train_docs = DataReader.DataGnerater("train" + reduced) #train_docs = DataReader.DataGnerater("dev"+reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev" + reduced) #test_docs = DataReader.DataGnerater("test"+reduced) l2_lambda = 1e-6 lr = 0.00002 dropout_rate = 0.5 shuffle = True times = 0 best_thres = 0.5 reinforce = True model_save_dir = "./model/pretrain/" metrics = performance.performance(dev_docs, network_model) p, r, f = metrics["b3"] f_b = [f] #for echo in range(30,200): for echo in range(20): start_time = timeit.default_timer() print "Pretrain Epoch:", echo #if echo == 100: # lr = lr/2.0 #if echo == 150: # lr = lr/2.0 #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda) #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda) cost = 0.0 optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5, weight_decay=l2_lambda) pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 pos_num = 0 neg_num = 0 inside_time = 0.0 score_softmax = nn.Softmax() cluster_info = [] new_cluster_num = 0 cluster_info.append(-1) action_list = [] new_cluster_info = [] tmp_data = [] #for data in train_docs.rl_case_generater(): for data in train_docs.rl_case_generater(shuffle=True): inside_time += 1 this_doc = train_docs tmp_data.append(data) mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data gold_chain = this_doc.gold_chain[rl["did"]] gold_dict = {} for chain in gold_chain: for item in chain: gold_dict[item] = chain mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) output, pair_score = network_model.forward_all_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, dropout_rate) ana_output, ana_score = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) reindex = autograd.Variable( torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) scores_reindex = torch.transpose( torch.cat((pair_score, ana_score), 1), 0, 1)[reindex] #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex] for s, e in zip(rl["starts"], rl["ends"]): #action_prob: scores_reindex[s:e][1] score = score_softmax( torch.transpose(scores_reindex[s:e], 0, 1)).data.cpu().numpy()[0] this_action = utils.sample_action(score) #this_action = ac_list.index(max(score.tolist())) action_list.append(this_action) if this_action == len(score) - 1: should_cluster = new_cluster_num new_cluster_num += 1 new_cluster_info.append(1) else: should_cluster = cluster_info[this_action] new_cluster_info.append(0) cluster_info.append(should_cluster) if rl["end"] == True: ev_document = utils.get_evaluation_document( cluster_info, this_doc.gold_chain[rl["did"]], candi_ids_return, new_cluster_num) p, r, f = evaluation.evaluate_documents([ev_document], evaluation.b_cubed) trick_reward = utils.get_reward_trick(cluster_info, gold_dict, new_cluster_info, action_list, candi_ids_return) #reward = f + trick_reward average_f = float(sum(f_b)) / len(f_b) reward = (f - average_f) * 10 f_b.append(f) if len(f_b) > 128: f_b = f_b[1:] index = 0 for data in tmp_data: mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type( torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type( torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type( torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type( torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type( torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type( torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) rl_costs = autograd.Variable( torch.from_numpy(rl["costs"]).type( torch.cuda.FloatTensor)) rl_costs = torch.transpose(rl_costs, 0, 1) output, pair_score = network_model.forward_all_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, dropout_rate) ana_output, ana_score = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) reindex = autograd.Variable( torch.from_numpy(rl["reindex"]).type( torch.cuda.LongTensor)) optimizer.zero_grad() loss = None scores_reindex = torch.transpose( torch.cat((pair_score, ana_score), 1), 0, 1)[reindex] #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex] for s, e in zip(rl["starts"], rl["ends"]): #action_prob: scores_reindex[s:e][1] this_action = action_list[index] #current_reward = reward + trick_reward[index] current_reward = reward #this_loss = -reward*(torch.transpose(F.log_softmax(torch.transpose(scores_reindex[s:e],0,1)),0,1)[this_action]) this_loss = -current_reward * (torch.transpose( F.log_softmax( torch.transpose(scores_reindex[s:e], 0, 1)), 0, 1)[this_action]) if loss is None: loss = this_loss else: loss += this_loss index += 1 #loss /= len(rl["starts"]) loss /= len(rl["starts"]) #loss = loss/train_docs.scale_factor ## policy graident cost += loss.data[0] loss.backward() optimizer.step() new_cluster_num = 0 cluster_info = [] cluster_info.append(-1) tmp_data = [] action_list = [] new_cluster_info = [] #if inside_time%50 == 0: # performance.performance(dev_docs,network_model) # print # sys.stdout.flush() end_time = timeit.default_timer() print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "cost:", cost #print >> sys.stderr,"save model ..." #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo) performance.performance(dev_docs, network_model) sys.stdout.flush()
def open_performance(self): t2 = Toplevel(self.master) self.master.withdraw() performance(t2, self.master, self.previous) t2.wm_protocol("WM_DELETE_WINDOW", self.previous.destroy)
def initialize_performance(self): holding_days = pd.Series(self.bkt_position.holding_matrix.index, index=self.bkt_position.holding_matrix.index) holding_days = holding_days[self.bkt_start:self.bkt_end] self.bkt_performance = performance(self.account_value, benchmark = self.benchmark_value, info_series=self.info_series, risk_free_rate = self.risk_free_rate, holding_days=holding_days)
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best" print >> sys.stderr,"Read model from",best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda() print >> sys.stderr,"save model ..." net_copy(network_model,best_network_model) reduced="" if args.reduced == 1: reduced="_reduced" print >> sys.stderr,"prepare data for train ..." train_docs = DataReader.DataGnerater("train"+reduced) print >> sys.stderr,"prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev"+reduced) test_docs = DataReader.DataGnerater("test"+reduced) l2_lambda = 1e-6 lr = nnargs["lr"] dropout_rate = nnargs["dropout_rate"] epoch = nnargs["epoch"] model_save_dir = "./model/bp/" last_cost = 0.0 all_best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5) scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5) for echo in range(epoch): start_time = timeit.default_timer() print "Pretrain Epoch:",echo scheduler.step() pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 for data in train_docs.train_generater(shuffle=True): mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor)) reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor)) start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor)) top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor)) anaphoricity_target = data["anaphoricity_target"] anaphoricity_gold = anaphoricity_target.tolist() ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold])) optimizer.zero_grad() output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,dropout_rate) loss = F.binary_cross_entropy(output,top_gold,size_average=False)/train_docs.scale_factor_top ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor_top loss_all = loss + ana_loss loss_all.backward() pair_cost_this_turn += loss.data[0] optimizer.step() end_time = timeit.default_timer() print >> sys.stderr, "PreTrain",echo,"Pair total cost:",pair_cost_this_turn print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time) print >> sys.stderr, "Learning Rate",lr gold = [] predict = [] ana_gold = [] ana_predict = [] for data in dev_docs.train_generater(shuffle=False): mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor)) reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor)) start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor)) top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor)) anaphoricity_target = data["anaphoricity_target"] anaphoricity_gold = anaphoricity_target.tolist() ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold])) gold += data["top_gold"].tolist() ana_gold += anaphoricity_target.tolist() output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,0.0) predict += output.data.cpu().numpy().tolist() ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_predict += ana_output.data.cpu().numpy()[0].tolist() gold = numpy.array(gold,dtype=numpy.int32) predict = numpy.array(predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } thresh_list = [0.3,0.35,0.4,0.45,0.5,0.55,0.6] for thresh in thresh_list: evaluation_results = get_metrics(gold, predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Pair accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if best_results["f1"] >= all_best_results["f1"]: all_best_results = best_results print >> sys.stderr, "New High Result, Save Model" torch.save(network_model, model_save_dir+"network_model_pretrain.best.top") ana_gold = numpy.array(ana_gold,dtype=numpy.int32) ana_predict = numpy.array(ana_predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } for thresh in thresh_list: evaluation_results = get_metrics(ana_gold, ana_predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if (echo+1)%10 == 0: best_network_model = torch.load(model_save_dir+"network_model_pretrain.best.top") print "DEV:" performance.performance(dev_docs,best_network_model) print "TEST:" performance.performance(test_docs,best_network_model)
def performance(self): self.performance = performance()
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best.top" print >> sys.stderr, "Read model from ", best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" worker = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() net_copy(worker, best_network_model) best_network_file = "./model/network_model_pretrain.best.top" print >> sys.stderr, "Read model from ", best_network_file best_network_model = torch.load(best_network_file) manager = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() net_copy(manager, best_network_model) reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." #train_docs_iter = DataReader.DataGnerater("train"+reduced) train_docs_iter = DataReader.DataGnerater("dev" + reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs_iter = DataReader.DataGnerater("dev" + reduced) test_docs_iter = DataReader.DataGnerater("test" + reduced) print "Performance after pretraining..." print "DEV" metric = performance.performance(dev_docs_iter, worker, manager) print "Average:", metric["average"] print "TEST" metric = performance.performance(test_docs_iter, worker, manager) print "Average:", metric["average"] print "***" print sys.stdout.flush() lr = nnargs["lr"] top_k = nnargs["top_k"] model_save_dir = "./model/reinforce/" utils.mkdir(model_save_dir) score_softmax = nn.Softmax() optimizer_manager = optim.RMSprop(manager.parameters(), lr=lr, eps=1e-6) optimizer_worker = optim.RMSprop(worker.parameters(), lr=lr, eps=1e-6) MAX_AVE = 2048 for echo in range(nnargs["epoch"]): start_time = timeit.default_timer() print "Pretrain Epoch:", echo reward_log = Logger(Tensorboard + args.tb + "/acl2018/%d/reward/" % echo, flush_secs=3) entropy_log_manager = Logger(Tensorboard + args.tb + "/acl2018/%d/entropy/worker" % echo, flush_secs=3) entropy_log_worker = Logger(Tensorboard + args.tb + "/acl2018/%d/entropy/manager" % echo, flush_secs=3) #train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl') train_docs = utils.load_pickle(args.DOCUMENT + 'dev_docs.pkl') docs_by_id = {doc.did: doc for doc in train_docs} ave_reward = [] ave_manager_entropy = [] ave_worker_entropy = [] print >> sys.stderr, "Link docs ..." tmp_data = [] cluster_info = {0: [0]} cluster_list = [0] current_new_cluster = 1 predict_action_embedding = [] choose_action = [] mid = 1 step = 0 statistic = { "worker_hits": 0, "manager_hits": 0, "total": 0, "manager_predict_last": 0, "worker_predict_last": 0 } for data in train_docs_iter.rl_case_generater(shuffle=True): rl = data["rl"] scores_manager, representations_manager = get_score_representations( manager, data) for s, e in zip(rl["starts"], rl["ends"]): action_embeddings = representations_manager[s:e] probs = F.softmax(torch.transpose(scores_manager[s:e], 0, 1)) m = Categorical(probs) this_action = m.sample() index = this_action.data.cpu().numpy()[0] if index == (e - s - 1): should_cluster = current_new_cluster cluster_info[should_cluster] = [] current_new_cluster += 1 else: should_cluster = cluster_list[index] choose_action.append(index) cluster_info[should_cluster].append(mid) cluster_list.append(should_cluster) mid += 1 cluster_indexs = torch.cuda.LongTensor( cluster_info[should_cluster]) action_embedding_predict = torch.mean( action_embeddings[cluster_indexs], 0, keepdim=True) predict_action_embedding.append(action_embedding_predict) tmp_data.append(data) if rl["end"] == True: inside_index = 0 manager_path = [] worker_path = [] doc = docs_by_id[rl["did"]] for data in tmp_data: rl = data["rl"] pair_target = data["pair_target"] anaphoricity_target = 1 - data["anaphoricity_target"] target = numpy.concatenate( (pair_target, anaphoricity_target))[rl["reindex"]] scores_worker, representations_worker = get_score_representations( worker, data) for s, e in zip(rl["starts"], rl["ends"]): action_embeddings = representations_worker[s:e] score = score_softmax( torch.transpose(scores_worker[s:e], 0, 1)).data.cpu().numpy()[0] action_embedding_choose = predict_action_embedding[ inside_index] similarities = torch.sum( torch.abs(action_embeddings - action_embedding_choose), 1) similarities = similarities.data.cpu().numpy() action_probabilities = [] action_list = [] action_candidates = heapq.nlargest( top_k, -similarities) for action in action_candidates: action_index = numpy.argwhere( similarities == -action)[0][0] action_probabilities.append(score[action_index]) action_list.append(action_index) manager_action = choose_action[inside_index] if not manager_action in action_list: action_list.append(manager_action) action_probabilities.append(score[manager_action]) this_target = target[s:e] manager_action = choose_action[inside_index] sample_action = utils.sample_action( numpy.array(action_probabilities)) worker_action = action_list[sample_action] if this_target[worker_action] == 1: statistic["worker_hits"] += 1 if this_target[manager_action] == 1: statistic["manager_hits"] += 1 if worker_action == (e - s - 1): statistic["worker_predict_last"] += 1 if manager_action == (e - s - 1): statistic["manager_predict_last"] += 1 statistic["total"] += 1 inside_index += 1 #link = manager_action link = worker_action m1, m2 = rl['ids'][s + link] doc.link(m1, m2) manager_path.append(manager_action) worker_path.append(worker_action) reward = doc.get_f1() for data in tmp_data: for s, e in zip(rl["starts"], rl["ends"]): ids = rl['ids'][s:e] ana = ids[0, 1] old_ant = doc.ana_to_ant[ana] doc.unlink(ana) costs = rl['costs'][s:e] for ant_ind in range(e - s): costs[ant_ind] = doc.link(ids[ant_ind, 0], ana, hypothetical=True, beta=1) doc.link(old_ant, ana) #costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor)) inside_index = 0 worker_entropy = 0.0 for data in tmp_data: new_step = step # worker scores_worker, representations_worker = get_score_representations( worker, data, dropout=nnargs["dropout_rate"]) optimizer_worker.zero_grad worker_loss = None for s, e in zip(rl["starts"], rl["ends"]): costs = rl['costs'][s:e] costs = autograd.Variable( torch.from_numpy(costs).type( torch.cuda.FloatTensor)) action = worker_path[inside_index] score = F.softmax( torch.transpose(scores_worker[s:e], 0, 1)) if not score.size()[1] == costs.size()[0]: continue score = torch.squeeze(score) baseline = torch.sum(costs * score) this_cost = torch.log( score[action]) * -1.0 * (reward - baseline) if worker_loss is None: worker_loss = this_cost else: worker_loss += this_cost worker_entropy += torch.sum( score * torch.log(score + 1e-7) ).data.cpu().numpy()[ 0] #+ 0.001*torch.sum(score*torch.log(score+1e-7)) inside_index += 1 worker_loss.backward() torch.nn.utils.clip_grad_norm(worker.parameters(), nnargs["clip"]) optimizer_worker.step() ave_worker_entropy.append(worker_entropy) if len(ave_worker_entropy) >= MAX_AVE: ave_worker_entropy = ave_worker_entropy[1:] entropy_log_worker.log_value( 'entropy', float(sum(ave_worker_entropy)) / float(len(ave_worker_entropy)), new_step) new_step += 1 inside_index = 0 manager_entropy = 0.0 for data in tmp_data: new_step = step rl = data["rl"] ave_reward.append(reward) if len(ave_reward) >= MAX_AVE: ave_reward = ave_reward[1:] reward_log.log_value( 'reward', float(sum(ave_reward)) / float(len(ave_reward)), new_step) scores_manager, representations_manager = get_score_representations( manager, data, dropout=nnargs["dropout_rate"]) optimizer_manager.zero_grad manager_loss = None for s, e in zip(rl["starts"], rl["ends"]): score = F.softmax( torch.transpose(scores_manager[s:e], 0, 1)) costs = rl['costs'][s:e] costs = autograd.Variable( torch.from_numpy(costs).type( torch.cuda.FloatTensor)) if not score.size()[1] == costs.size()[0]: continue action = manager_path[inside_index] score = torch.squeeze(score) baseline = torch.sum(costs * score) this_cost = torch.log(score[action]) * -1.0 * ( reward - baseline ) # + 0.001*torch.sum(score*torch.log(score+1e-7)) #this_cost = torch.sum(score*costs) + 0.001*torch.sum(score*torch.log(score+1e-7)) if manager_loss is None: manager_loss = this_cost else: manager_loss += this_cost manager_entropy += torch.sum( score * torch.log(score + 1e-7)).data.cpu().numpy()[0] inside_index += 1 manager_loss.backward() torch.nn.utils.clip_grad_norm(manager.parameters(), nnargs["clip"]) optimizer_manager.step() ave_manager_entropy.append(manager_entropy) if len(ave_manager_entropy) >= MAX_AVE: ave_manager_entropy = ave_manager_entropy[1:] entropy_log_manager.log_value( 'entropy', float(sum(ave_manager_entropy)) / float(len(ave_manager_entropy)), new_step) new_step += 1 step = new_step tmp_data = [] cluster_info = {0: [0]} cluster_list = [0] current_new_cluster = 1 mid = 1 predict_action_embedding = [] choose_action = [] end_time = timeit.default_timer() print >> sys.stderr, "TRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "save model ..." #print "Top k",top_k print "Worker Hits", statistic[ "worker_hits"], "Manager Hits", statistic[ "manager_hits"], "Total", statistic["total"] print "Worker predict last", statistic[ "worker_predict_last"], "Manager predict last", statistic[ "manager_predict_last"] #torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo) #torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo) print "DEV" metric = performance.performance(dev_docs_iter, worker, manager) print "Average:", metric["average"] print "DEV manager" metric = performance_manager.performance(dev_docs_iter, worker, manager) print "Average:", metric["average"] print "TEST" metric = performance.performance(test_docs_iter, worker, manager) print "Average:", metric["average"] print sys.stdout.flush()
attention=AttentionLayer() test_model=load_model(out_name+'_best_model',custom_objects={'AttentionLayer':attention}) loss,acc=test_model.evaluate(test_seq,test_label) print('loss:',loss) print('acc:',acc) out=test_model.predict(test_seq) pred_proba=out[:,1] pred=np.argmax(out,axis=1) pred=np.argmax(pred_proba) pred=np.array([1 if x>0.5 else 0 for x in pred_proba]) acc=metrics.accuracy_score(test_label_,pred) print(acc) roc_auc=metrics.roc_auc_score(test_label_,pred_proba) MCC=metrics.matthews_corrcoef(test_label_, pred) precision, recall, SN, SP, GM, TP, TN, FP, FN = performance(test_label_, pred) performance_result={ 'acc':[acc], 'roc_auc':[roc_auc], 'precision':[precision], 'recall':[recall], 'SN':[SN], 'SP':[SP], 'GM':[GM], 'MCC':[MCC], 'TP':[TP], 'FP':[FP], 'TN':[TN], 'FN':[FN] } result={
def test(self): #self.merge() #self.compress() #return embedding_size = 100 for CLUSTER_MIN_SIZE in range(4, 19, 2): for dsname in ['webkb']: mln = MLN(dsname) db = DBManager(dsname, mln) print('merge db dom sizes:') db.set_doms_atoms(mln, db.merge_db_file) cf = common_f() #cf.delete_files(mln.pickle_location) if dsname == 'er': cf.remove_irrelevant_atoms() embedding_size = 300 print('generating sentences') start = time.time() cnn_atoms, ntn_atoms = db.pred_atoms, db.pred_atoms while True: #cnn_atoms = self.embed(cnn_atoms,mln.pdm,mln.dom_sizes_map,True) ntn_atoms = self.embed(ntn_atoms, mln.pdm, mln.dom_sizes_map, False) sg = None if dsname == 'review': return #end = time.time() #print('Time : ',end-start) else: sg = sentence_generator(mln.pdm, db.pred_atoms, db.TEST_SIZE, db) #print('calling w2v') #wv = word2vec_cnn() #print('making images') #wv.make_images(sg.sentences,mln.pdm,db.pred_atoms,mln.dom_sizes_map,dsname,sg.train_atoms,sg.test_atoms,db.TEST_SIZE) cor = corrupt(dsname, db.pred_atoms, mln.pdm, db.dom_objs_map, sg.sentences) return bmf = bmf_cluster(dsname) bmf.cluster(db, 1, mln.pdm, dom_obj_map) print('original db dom sizes(after compression):') orig_dom_objs_map = db.get_dom_objs_map(mln, mln.orig_db_file) CLUSTER_MIN_SIZE = 10 w2v = word2vec(dsname, db, CLUSTER_MIN_SIZE, embedding_size) print('w2v cluster dom sizes:') w2v_dom_objs_map = db.get_dom_objs_map( mln, w2v.w2v__cluster_db_file) cr = cf.calculate_cr(orig_dom_objs_map, w2v_dom_objs_map) print('cr : ' + str(cr)) rc = random_cluster(dsname) rc.generate_random_db(db, w2v.pred_atoms_reduced_numbers, mln, w2v_dom_objs_map) print('random cluster dom sizes') db.get_dom_objs_map(mln, mln.random__cluster_db_file) kmc = kmeans_cluster(dsname) kmc.cluster(db, str(cr), mln.pdm, w2v_dom_objs_map, mln.dom_pred_map) print('kmeans cluster dom sizes:') kmeans_dom_objs_map = db.get_dom_objs_map( mln, kmc.kmeans__cluster_db_file) mln.create_magician_mln() magician(dsname, mln) #tuffy(dsname) orig_meta_map = {} orig_meta_map['bmf'] = bmf.bmf_orig_meta_map orig_meta_map['w2v'] = w2v.w2v_orig_meta_map orig_meta_map['random'] = rc.rand_orig_meta_map orig_meta_map['kmeans'] = kmc.kmeans_orig_meta_map print('Dataset : ' + dsname + '; CR : ' + str(cr)) p = performance(dsname, embedding_size) p.compare_marginal(mln, orig_meta_map, cr)
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best.top.pair" print >> sys.stderr,"Read model from ",best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda() net_copy(network_model,best_network_model) best_network_file = "./model/network_model_pretrain.best.top.ana" print >> sys.stderr,"Read model from ",best_network_file best_network_model = torch.load(best_network_file) ana_network = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda() net_copy(ana_network,best_network_model) reduced="" if args.reduced == 1: reduced="_reduced" print >> sys.stderr,"prepare data for train ..." train_docs_iter = DataReader.DataGnerater("train"+reduced) print >> sys.stderr,"prepare data for dev and test ..." dev_docs_iter = DataReader.DataGnerater("dev"+reduced) test_docs_iter = DataReader.DataGnerater("test"+reduced) print "Performance after pretraining..." print "DEV" metric = performance.performance(dev_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "TEST" metric = performance.performance(test_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "***" print sys.stdout.flush() l2_lambda = 1e-6 #lr = 0.00001 #lr = 0.000005 lr = 0.000002 #lr = 0.0000009 dropout_rate = 0.5 shuffle = True times = 0 reinforce = True model_save_dir = "./model/reinforce/" utils.mkdir(model_save_dir) score_softmax = nn.Softmax() optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps = 1e-6) ana_optimizer = optim.RMSprop(ana_network.parameters(), lr=lr, eps = 1e-6) scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5) ana_scheduler = lr_scheduler.StepLR(ana_optimizer, step_size=15, gamma=0.5) for echo in range(30): start_time = timeit.default_timer() print "Pretrain Epoch:",echo scheduler.step() ana_scheduler.step() train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl') docs_by_id = {doc.did: doc for doc in train_docs} print >> sys.stderr,"Link docs ..." tmp_data = [] path = [] for data in train_docs_iter.rl_case_generater(shuffle=True): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor)) mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor)) output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,0.0) ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents, 0.0) reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex] ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex] doc = docs_by_id[rl['did']] for s,e in zip(rl["starts"],rl["ends"]): score = score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1)).data.cpu().numpy()[0] pair_score = score_softmax(torch.transpose(scores_reindex[s:e-1],0,1)).data.cpu().numpy()[0] ana_action = utils.sample_action(score) if ana_action == (e-s-1): action = ana_action else: pair_action = utils.sample_action(pair_score*score[:-1]) action = pair_action path.append(action) link = action m1, m2 = rl['ids'][s + link] doc.link(m1, m2) tmp_data.append((mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return)) if rl["end"] == True: doc = docs_by_id[rl['did']] reward = doc.get_f1() inside_index = 0 for mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return in tmp_data: for (start, end) in zip(rl['starts'], rl['ends']): ids = rl['ids'][start:end] ana = ids[0, 1] old_ant = doc.ana_to_ant[ana] doc.unlink(ana) costs = rl['costs'][start:end] for ant_ind in range(end - start): costs[ant_ind] = doc.link(ids[ant_ind, 0], ana, hypothetical=True, beta=1) doc.link(old_ant, ana) cost = 0.0 mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor)) mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor)) ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate) reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex] ana_optimizer.zero_grad() ana_loss = None i = inside_index for s,e in zip(rl["starts"],rl["ends"]): costs = rl["costs"][s:e] costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor)) score = torch.squeeze(score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1))) baseline = torch.sum(score*costs) action = path[i] this_cost = torch.log(score[action])*-1.0*(reward-baseline) if ana_loss is None: ana_loss = this_cost else: ana_loss += this_cost i += 1 ana_loss.backward() torch.nn.utils.clip_grad_norm(ana_network.parameters(), 5.0) ana_optimizer.step() mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor)) mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor)) output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate) ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex] pair_loss = None optimizer.zero_grad() i = inside_index index = 0 for s,e in zip(rl["starts"],rl["ends"]): action = path[i] if (not (action == (e-s-1))) and (anaphoricity_target[index] == 1): costs = rl["costs"][s:e-1] costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor)) score = torch.squeeze(score_softmax(torch.transpose(scores_reindex[s:e-1],0,1))) baseline = torch.sum(score*costs) this_cost = torch.log(score[action])*-1.0*(reward-baseline) if pair_loss is None: pair_loss = this_cost else: pair_loss += this_cost i += 1 index += 1 if pair_loss is not None: pair_loss.backward() torch.nn.utils.clip_grad_norm(network_model.parameters(), 5.0) optimizer.step() inside_index = i tmp_data = [] path = [] end_time = timeit.default_timer() print >> sys.stderr, "TRAINING Use %.3f seconds"%(end_time-start_time) print >> sys.stderr, "cost:",cost print >> sys.stderr,"save model ..." torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo) torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo) print "DEV" metric = performance.performance(dev_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "DEV Ana: ",metric["ana"] print "TEST" metric = performance.performance(test_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "TEST Ana: ",metric["ana"] print sys.stdout.flush()