def __init__(self, shape=None, full_conn=True, biases=True, random_weights=True, normalize=True, reduce_empty_dims=True): """ shape: shape of a NN given as a tuple """ self.shape = shape self.full_conn = full_conn self.biases = biases self.random_weights = random_weights self.normalize = normalize self.reduce_empty_dims = reduce_empty_dims if self.normalize: self.norm = StandardScaler() self.shape = shape if shape: if self.full_conn: conec = tmlgraph(self.shape, self.biases) else: conec = mlgraph(self.shape, self.biases) self.model = ffnet(conec) if random_weights: self.model.randomweights()
def fit(self, descs, target_values, train_alg='tnc',**kwargs): # setup neural network if self.full_conn: conec = tmlgraph(self.shape, self.biases) else: conec = mlgraph(self.shape, self.biases) self.model = ffnet(conec) if self.random_weights: if not self.random_state is None: random_seed(self.random_state) self.model.randomweights() # train getattr(self.model, 'train_'+train_alg)(descs, target_values, nproc='ncpu' if self.n_jobs < 1 else self.n_jobs, **kwargs) return self
def fit(self, descs, target_values, train_alg='tnc', **kwargs): # setup neural network if self.full_conn: conec = tmlgraph(self.shape, self.biases) else: conec = mlgraph(self.shape, self.biases) self.model = ffnet(conec) if self.random_weights: if not self.random_state is None: random_seed(self.random_state) self.model.randomweights() # train getattr(self.model, 'train_' + train_alg)( descs, target_values, nproc='ncpu' if self.n_jobs < 1 else self.n_jobs, **kwargs) return self
def creatingNeuralNetwork(num_hidden,data_s,data_l): dataset_data = data_s dataset_labels = data_l #create the gab of 6-grams vocabulary tfidf_vectorizer = TfidfVectorizer(ngram_range=(1, 6),token_pattern=ur'\b\w+\b',min_df=0.05) tfidf_vectorizer.fit(dataset_data) feature_names = tfidf_vectorizer.get_feature_names() number_of_input_features = len(feature_names) # Create the feature dataset indata = np.zeros((len(dataset_data),number_of_input_features)) for j,sentence in enumerate(dataset_data): indata[j,:] = tfidf_vectorizer.transform([sentence]).toarray()[0] #create the neural network number_of_hidden_nodes = num_hidden conec = tmlgraph((number_of_input_features,number_of_hidden_nodes, 1)) net = ffnet(conec) #please print nothing net.train_tnc(indata, dataset_labels, maxfun = 5000, messages=0) output, regression = net.test(indata, dataset_labels, iprint = 0) return net, number_of_input_features, tfidf_vectorizer
def __init__( self, shape=None, full_conn=True, biases=True, random_weights=True, normalize=True, reduce_empty_dims=True ): """ shape: shape of a NN given as a tuple """ self.shape = shape self.full_conn = full_conn self.biases = biases self.random_weights = random_weights self.normalize = normalize self.reduce_empty_dims = reduce_empty_dims if self.normalize: self.norm = StandardScaler() self.shape = shape if shape: if self.full_conn: conec = tmlgraph(self.shape, self.biases) else: conec = mlgraph(self.shape, self.biases) self.model = ffnet(conec) if random_weights: self.model.randomweights()
print feature_names number_of_input_features = len(feature_names) # Create the feature dataset indata = np.zeros((len(dataset_data),number_of_input_features)) for j,sentence in enumerate(dataset_data): indata[j,:] = tfidf_vectorizer.transform([sentence]).toarray()[0] print 'Creating the neural network object' t2 = time.time() #create the neural network number_of_hidden_nodes = number_of_input_features conec = tmlgraph((number_of_input_features,number_of_hidden_nodes, 1)) net = ffnet(conec) t3 = time.time() nn_create_time = t3-t2 print 'Time it took to create the neural network: ' + str(nn_create_time) + ' seconds.' print 'Starting the training of the neural network. This will take a while...' t4 = time.time() net.train_tnc(indata, dataset_labels, maxfun = 5000, messages=1) t5 = time.time() nn_train_time = t5-t4 output, regression = net.test(indata, dataset_labels, iprint = 2) print 'Time it took to train the neural network: ' + str(nn_train_time) + ' seconds.' #build the evaluation dataset - similar as above evaluation_filenames_positive = glob.glob("testing/pos/*.txt")
def train_ANN(inputs_array, target_array, iterations, node_architecture, **configs_dict): # Same first dimension? if not inputs_array.shape[0] == target_array.shape[0]: raise Exception('Input and target arrays must have same first ' \ 'dimension!') # Specified number of input nodes matches second dim of input array? n_input_nodes = node_architecture[0] if len(inputs_array.shape) == 1: sec_dim_inputs = 1 else: sec_dim_inputs = inputs_array.shape[1] if not n_input_nodes == sec_dim_inputs: raise Exception('Specified input node architecture (n = %s) ' \ 'incompatible with passed input arrays... Returning!' %str(n_input_nodes)) # Specified number of target nodes matches second dim of target array? n_target_nodes = node_architecture[-1] if len(target_array.shape) == 1: sec_dim_target = 1 else: sec_dim_target = target_array.shape[1] if not n_target_nodes == sec_dim_target: raise Exception('Specified target node architecture (n = %s) ' \ 'incompatible with passed input arrays... Returning!' %str(n_target_nodes)) # Missing data in inputs array? (Warning only) if np.isnan(inputs_array).any(): missing_inputs_flag = True warnings.warn('Specified ANN training input variables contain missing ' \ 'data. NaNs will be inserted into prediction series!') else: missing_inputs_flag = False # Missing data in target array? (Warning only) if np.isnan(target_array).any(): missing_target_flag = True warnings.warn('Specified ANN training target variables contain missing ' \ 'data. These will be removed for training!') else: missing_target_flag = False # Check if saving trained network save_flag = False if 'save_network' in configs_dict.keys(): if configs_dict['save_network']: save_flag = True if not 'network_filepath' in configs_dict.keys(): raise Exception('You must specify a file path if you wish to ' \ 'save a new network!') else: split_pathname_list = os.path.split(configs_dict['network_filepath']) if not os.path.isdir(split_pathname_list[0]): raise Exception('The specified file path is not valid!') if split_pathname_list[1] == '': print 'Filename not supplied - using this_net.ann!' configs_dict['network_filepath'] = os.path.join(split_pathname_list[0], 'this_net.ann') # Check if doing testing test_flag = False if 'test' in configs_dict: if configs_dict['test']: test_flag = True # Create a second series with nans dropped if missing_inputs_flag or missing_target_flag: new_array = np.empty([inputs_array.shape[0], sec_dim_inputs + sec_dim_target]) new_array[:, :sec_dim_target] = target_array new_array[:, sec_dim_target:] = inputs_array new_array = new_array[~np.isnan(new_array).any(axis = 1)] clean_target_array = new_array[:, :sec_dim_target] clean_inputs_array = new_array[:, sec_dim_target:] # Generate network and train conec = tmlgraph(node_architecture) net = ffnet(conec) net.train_tnc(clean_inputs_array, clean_target_array, maxfun = iterations, messages = 1) # Save network if requested if save_flag: ffnet_class.savenet(net, configs_dict['network_filepath']) # Generate full series from inputs predict_array = net.call(inputs_array) # Do testing if requested if test_flag: vars_list = ['slope', 'intercept', 'r-value', 'p-value', 'slope stderr', 'estim. stderr'] valid_predict_array, stats_list = net.test(clean_inputs_array, clean_target_array) stats_dict = {var: stats_list[0][i] for i, var in enumerate(vars_list)} return predict_array, stats_dict else: return predict_array
def train_ANN(inputs_array, target_array, iterations, node_architecture, **configs_dict): # Same first dimension? if not inputs_array.shape[0] == target_array.shape[0]: raise Exception('Input and target arrays must have same first ' \ 'dimension!') # Specified number of input nodes matches second dim of input array? n_input_nodes = node_architecture[0] if len(inputs_array.shape) == 1: sec_dim_inputs = 1 else: sec_dim_inputs = inputs_array.shape[1] if not n_input_nodes == sec_dim_inputs: raise Exception('Specified input node architecture (n = %s) ' \ 'incompatible with passed input arrays... Returning!' %str(n_input_nodes)) # Specified number of target nodes matches second dim of target array? n_target_nodes = node_architecture[-1] if len(target_array.shape) == 1: sec_dim_target = 1 else: sec_dim_target = target_array.shape[1] if not n_target_nodes == sec_dim_target: raise Exception('Specified target node architecture (n = %s) ' \ 'incompatible with passed input arrays... Returning!' %str(n_target_nodes)) # Missing data in inputs array? (Warning only) if np.isnan(inputs_array).any(): missing_inputs_flag = True warnings.warn('Specified ANN training input variables contain missing ' \ 'data. NaNs will be inserted into prediction series!') else: missing_inputs_flag = False # Missing data in target array? (Warning only) if np.isnan(target_array).any(): missing_target_flag = True warnings.warn('Specified ANN training target variables contain missing ' \ 'data. These will be removed for training!') else: missing_target_flag = False # Check if saving trained network save_flag = False if 'save_network' in configs_dict.keys(): if configs_dict['save_network']: save_flag = True if not 'network_filepath' in configs_dict.keys(): raise Exception('You must specify a file path if you wish to ' \ 'save a new network!') else: split_pathname_list = os.path.split( configs_dict['network_filepath']) if not os.path.isdir(split_pathname_list[0]): raise Exception('The specified file path is not valid!') if split_pathname_list[1] == '': print 'Filename not supplied - using this_net.ann!' configs_dict['network_filepath'] = os.path.join( split_pathname_list[0], 'this_net.ann') # Check if doing testing test_flag = False if 'test' in configs_dict: if configs_dict['test']: test_flag = True # Create a second series with nans dropped if missing_inputs_flag or missing_target_flag: new_array = np.empty( [inputs_array.shape[0], sec_dim_inputs + sec_dim_target]) new_array[:, :sec_dim_target] = target_array new_array[:, sec_dim_target:] = inputs_array new_array = new_array[~np.isnan(new_array).any(axis=1)] clean_target_array = new_array[:, :sec_dim_target] clean_inputs_array = new_array[:, sec_dim_target:] # Generate network and train conec = tmlgraph(node_architecture) net = ffnet(conec) net.train_tnc(clean_inputs_array, clean_target_array, maxfun=iterations, messages=1) # Save network if requested if save_flag: ffnet_class.savenet(net, configs_dict['network_filepath']) # Generate full series from inputs predict_array = net.call(inputs_array) # Do testing if requested if test_flag: vars_list = [ 'slope', 'intercept', 'r-value', 'p-value', 'slope stderr', 'estim. stderr' ] valid_predict_array, stats_list = net.test(clean_inputs_array, clean_target_array) stats_dict = {var: stats_list[0][i] for i, var in enumerate(vars_list)} return predict_array, stats_dict else: return predict_array
if args.index: src = np.concatenate([xLUT['index'] for xLUT in LUT], axis=0) src = src.astype(float) else: src = np.concatenate([xLUT['src'] for xLUT in LUT], axis=0) trg = np.concatenate([xLUT[LUT_var] for xLUT in LUT], axis=0) # initialize ANN if type(args_ANN_setup) == str: net = ff.loadnet(args_ANN_setup) setup = "UNKNOWN" else: num_inp_nodes, num_out_nodes = [src.shape[1]], [trg.shape[1]] setup = tuple(num_inp_nodes + args_ANN_setup + num_out_nodes) if args.full: conec = ff.tmlgraph(setup) else: conec = ff.mlgraph(setup) net = ff.ffnet(conec) # build up a shuffled test and train array ind = int(src.shape[0] * (1.0 - args.test_perc)) s_src, s_trg = Shuffle_2D_X(src, trg) src_train, trg_train = s_src[:ind, :], s_trg[:ind, :] src_test, trg_test = s_src[ind:, :], s_trg[ind:, :] # output netcdf_output = Get_Output_Name(args.LUT[0], args.coeff_type) Print_Header(LUT_name=args.LUT, coeff_type=args.coeff_type, ANN_setup=setup,
def ANN_gapfill_func(myBaseforResults,New_combined,Site_ID,list_in,list_out,iterations,index_str,is_this_all,ANN_label_all,ANN_label,frequency,Use_Fc_Storage): ########################################################################################################### ## START MAIN CODE ########################################################################################################### if 'Fc' in list_out: units="umol.m-2.s-1" elif ('Fe' or 'Fh' or 'Fg') in list_out: units="W.m-2" else: units=" " ###### User-set IO file locations ###### print "Starting ANN gap filling" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/ANN"): os.mkdir(myBaseforResults+"/ANN") mypathforResults=myBaseforResults+"/ANN" #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con'])) number_of_inputs=len(list_in) number_of_outputs=len(list_out) #startdate=dt.date(2008,7,1) #enddate=dt.date(2008,8,1) alllist=list_in + list_out xnow=New_combined[alllist] #[startdate:enddate] xnow=xnow.dropna(how='any') #Drop nans and missing values so that Good data only is used in the training xarray=np.array(xnow.dropna().reset_index(drop=True)) #Define inputs and targets for NN from DF inputs = xarray[:, :number_of_inputs] #first 2 columns lastcolums=(-1*number_of_outputs) targets = xarray[:, lastcolums:] #last column # Generate standard layered network architecture and create network #different network architectures avaiable #conec = mlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates standard multilayer network architecture conec = tmlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates multilayer network full connectivity list #conec = imlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates multilayer architecture with independent outputs net = ffnet(conec) print "TRAINING NETWORK..." net.train_tnc(inputs, targets, maxfun = iterations, messages=1) #net.train_rprop(inputs, targets, maxiter=iterations) #net.train_momentum(inputs, targets, maxfun = iterations, messages=1) #net.train_genetic(inputs, targets, maxfun = iterations, messages=1) #net.train_cg(inputs, targets, maxfun = iterations, messages=1) #net.train_bfgs(inputs, targets, maxfun = iterations, messages=1) # Test network print "TESTING NETWORK..." output, regression = net.test(inputs, targets, iprint = 0) print "R-squared: %s " %str(regression[0][2]) #print "max. absolute error: %s " %str(abs( array(output).reshape( len(output) ) - array(targets) ).max()) output, regress = net.test(inputs, targets) #Create array for results. Then loop through elements on the original data to predict the ANN value predicted=np.empty((len(xarray),number_of_outputs)) observed=np.empty((len(xarray),number_of_outputs)) for index,rowdata in enumerate(xarray): predicted[index]=net([rowdata[0:number_of_inputs]]) observed[index]=np.array(rowdata[-1.0*number_of_outputs : ]) #observed[index]=np.array(rowdata[(-1.0*number_of_outputs)]) ############################################ # Generate output and return new variables ############################################ #Create a new variable called '_NN' for index, item in enumerate(list_out): ANN_label=str(item+"_NN") ANN_label_all=str(item+"_NN_all") if is_this_all == True: New_combined[ANN_label_all]=net.call(New_combined[list_in])[:,index] else: New_combined[ANN_label]=net.call(New_combined[list_in])[:,index] for index, item in enumerate(list_out): ##################################################### # Plots ##################################################### #Plot time series of all 30 minute data mintimeseries_plot(mypathforResults,predicted,observed,regress,item, Site_ID,units,targets,output,list_out,index_str) #Plot regression of Tower versus ANN regressionANN2(mypathforResults,predicted,observed,regress,item, Site_ID,units,list_out,index_str) #Plot diurnals for every second month 6 graphs - only when enough months so all or annual if frequency=="all" or frequency=="annual" or is_this_all==True: Doplots_diurnal_monthly(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,is_this_all) #Plot diurnals for every second month 6 graphs Doplots_diurnal(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,frequency) #Plot timeseries of monthly over all periods Doplots_monthly(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,frequency) return New_combined
return network, output, regression, (summ / len(test_output)) def calculate_network(input_, target_, test_input, test_output, net1): network, output, regression, error = single_network(input_, target_, test_input, test_output, net1) for i in range(4): network1, output1, regression1, error1 = single_network(input_, target_, test_input, test_output, net1) if (error1 < error): network = network output = output1 regression = regression1 error = error1 return network, output, regression, error CONEC = tmlgraph((54, 10, 24)) # model of connections NET = ffnet(CONEC) # NX.draw_graphviz(net.graph, prog='dot') show the network that's nice! # pl.show() # # calculate special days summ_error = 0.0 for i in range(4): net = ffnet(CONEC) # year have 52 weeks so, we have 52 special days in year network, output, regression, error = calculate_network( LEARNING_SPECIALIST[i][:-52], LEARNING_SPECIALIST_OUTPUT[i][:-52], LEARNING_SPECIALIST[i][-52:], LEARNING_SPECIALIST_OUTPUT[i][-52:], net)
def Fre_ANN_gapfill_func(myBaseforResults,New_combined,Site_ID,list_in,list_out,iterations,latitude,longitude,index_str,ANN_label,frequency,evening,min_threshold,max_threshold,Ustar_filter_type): if 'Fc' in list_out: units="umol.m-2.s-1" elif ('Fe' or 'Fh' or 'Fg') in list_out: units="W.m-2" else: units=" " ###### User-set IO file locations ###### print "Starting ANN gap filling" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/ANN"): os.mkdir(myBaseforResults+"/ANN") mypathforResults=myBaseforResults+"/ANN" if not os.path.isdir(myBaseforResults+"/ANN/Fre"): os.mkdir(myBaseforResults+"/ANN/Fre") mypathforResults=myBaseforResults+"/ANN/Fre" #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C number_of_inputs=len(list_in) number_of_outputs=len(list_out) #startdate=dt.date(2008,7,1) #enddate=dt.date(2008,8,1) alllist=list_in + list_out #Here now for Re we can further restrict the traing data to be noctural and ustar filtered #So first create a series with day_night based on solar geometry #Get timezone info, call routines from external code global timezone currentdate="2013-06-01" timezone,InDstNow=TimeZone.get_timezone_info(latitude,longitude,currentdate) print "AskGEO TimZone offset (hrs): ", timezone #Start with blank series New_combined['day_night']=np.nan def day_night(x): #Get date from data group currentdate= x.index[0] currentyear= x.index[0].year currentmonth= x.index[0].month currentday= x.index[0].day basedate=dt.datetime(1900, 1, 1,0,0,0,0) delta=(currentdate-basedate).days #Calculate Approximate Solar noon, call routines from external code #Call Solar_Calcs.solar_calculations(Date_input_excel,latitude,longitude,timezone) solar_sunrise,solar_noon_for_date,solar_sunset =Solar_Calcs.solar_calculations(delta,latitude,longitude,timezone) #return a fraction. Convert to decimal hours solar_sunrise=solar_sunrise*24 solar_sunset=solar_sunset*24 daystart_hour= int(solar_sunrise) daystart_minute=int((solar_sunrise-daystart_hour)*60) daystart_dt=dt.datetime(currentyear,currentmonth,currentday,daystart_hour,daystart_minute,0) dayend_hour= int(solar_sunset) dayend_minute=int((solar_sunset-dayend_hour)*60) dayend_dt=dt.datetime(currentyear,currentmonth,currentday,dayend_hour,dayend_minute,0) x['day_night'][daystart_dt:dayend_dt]=1 #Define evening as 3 hours after sunset. Needed for van Gorsel approach d=dt.timedelta(hours=3) eveningend_dt=dayend_dt+d x['day_night'][dayend_dt:eveningend_dt]=2 #Else fill remainder with night x['day_night'][x['day_night'].isnull()]=3 #print x #print x[dayend_dt:eveningend_dt] #print solar_sunset #print daystart_dt #print dayend_dt #print eveningend_dt return x #For each day of each year run the function to calculate day or night New_combined=New_combined.groupby([lambda x: x.year,lambda x: x.month,lambda x: x.day]).apply(day_night) #New_combined.to_csv(myBaseforResults+'/'+'Final_combined_'+Site_ID+'.csv', sep=',') ############################### #Define the data to select ############################### #Can select period of 'night' (2) and or 'evening' (3) if evening==True: xnow=New_combined[(New_combined['day_night']==3)] else: xnow=New_combined[(New_combined['day_night']==2)] # Use the actual ustar_used column which is defined from the type of ustar thershold approach chosen in the config xnow=xnow[xnow['ustar']>xnow['ustar_used']] #Remove -ve and +ve spikes which are uptake and inconsistent with Fre. xnow=xnow[xnow['Fc']>min_threshold][xnow['Fc']<max_threshold] xnow=xnow[alllist] #Drop nans and missing values so that Good data only is used in the training xnow=xnow.dropna(how='any') xarray=np.array(xnow.dropna().reset_index(drop=True)) #Define inputs and targets for NN from DF inputs = xarray[:, :number_of_inputs] #first 2 columns lastcolums=(-1*number_of_outputs) targets = xarray[:, lastcolums:] #last column # Generate standard layered network architecture and create network #different network architectures avaiable #conec = mlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates standard multilayer network architecture conec = tmlgraph((number_of_inputs,6,4,number_of_outputs)) # Creates multilayer network full connectivity list #conec = imlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates multilayer architecture with independent outputs net = ffnet(conec) print "TRAINING NETWORK..." #net.train_tnc(inputs, targets, maxfun = iterations, messages=1) try: net.train_rprop(inputs, targets, maxiter=iterations) except: net.train_tnc(inputs, targets, maxfun = iterations, messages=1) #net.train_momentum(inputs, targets, maxfun = iterations, messages=1) #net.train_genetic(inputs, targets, maxfun = iterations, messages=1) #net.train_cg(inputs, targets, maxfun = iterations, messages=1) #net.train_bfgs(inputs, targets, maxfun = iterations, messages=1) # Test network print "TESTING NETWORK..." output, regression = net.test(inputs, targets, iprint = 0) print "R-squared: %s " %str(regression[0][2]) #print "max. absolute error: %s " %str(abs( array(output).reshape( len(output) ) - array(targets) ).max()) output, regress = net.test(inputs, targets) #Create array for results. Then loop through elements on the original data to predict the ANN value predicted=np.empty((len(xarray),number_of_outputs)) observed=np.empty((len(xarray),number_of_outputs)) for index,rowdata in enumerate(xarray): predicted[index]=net([rowdata[0:number_of_inputs]]) observed[index]=np.array(rowdata[(-1.0*number_of_outputs)]) ############################################ # Generate output and return new variables ############################################ for index, item in enumerate(list_out): New_combined[ANN_label]=net.call(New_combined[list_in])[:,index] #TEST #New_combined.to_csv("E:/My Dropbox/Dropbox/Data_flux_data/Site data processing/HowardSprings/Advanced/test_assertion.csv") ##################################################### # Plots ##################################################### #Plot time series of all 30 minute data mintimeseries_plot(mypathforResults,predicted,observed,regress,item, Site_ID,units,targets,output,ANN_label,index_str) #Plot regression of Tower versus ANN regressionANN2(mypathforResults,predicted,observed,regress,item, Site_ID,units,ANN_label,index_str) #Plot diurnals for every second month 6 graphs Doplots_diurnal(mypathforResults,New_combined,item, Site_ID,units,ANN_label,index_str) #Plot diurnals for every second month 6 graphs if frequency=="all" or frequency=="annual": Doplots_diurnal_monthly(mypathforResults,New_combined,item, Site_ID,units,ANN_label,index_str) #Plot timeseries of monthly over all periods Doplots_monthly(mypathforResults,New_combined,item, Site_ID,units,ANN_label,index_str) ################################################### # File stuff ################################################### return (New_combined)