def __init__(self, data, remove_stop_word = False, lemmatize = True): self.sentences = [] self.graph = {} self.PRI = {} processor = ProcessData(data,"Stanford", False, lemmatize, remove_stop_word, False) self.sentences = processor.clean_sentences()
def trainPredModel(data): ''' Split dataset to training and evaluation (20% evaluation split) Fit model to training data, and generate a doc with evaluation as well as the summary of errors ''' process = ProcessData(sentdata) padded_sequence, tokenizer, labels = process.createTrainData() ft = createFTEmbedding() ft.processDict(tokenizer) x_train, x_val, y_train, y_val = train_test_split(padded_sequence, labels) model = compileModel(padded_sequence, labels, tokenizer, ft.embedding_matrix) model.fit(x_train, y_train, batch_size=20, epochs=10, validation_data=(x_val, y_val)) y_pred = model.predict(x_val) translateEvalData(y_pred, y_val, x_val, tokenizer) return model, x_train, x_val, y_train, y_val, y_pred
class Receiver: def __init__(self): # Symbolic name meaning all available interfaces self.host = '' # IP address for the ambulance self.ip = 'localhost' self.port = 10000 # Create a UDP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # Bind socket to local host and port self.sock.bind((self.host, self.port)) # Queue with the two last data objects received self.position_history = deque() self.current_data = None self.format_dict = dict.fromkeys( ['timestamp', 'longitude', 'latitude']) self.process_data = ProcessData() def add_to_queue(self, dict_insert): '''Add received element to the receiver queue. Keyword arguments: dict_insert -- dict containing GPS data and timestamp ''' self.position_history.append(dict_insert) def notify_process_data(self): '''Notifies process_data when receiving data from sender''' if len(self.position_history) > 1: self.process_data.notify(self.get_data()) def get_data(self): '''Returns the two last datasets from position_history''' if len(self.position_history) > 1: new_amb = self.position_history.popleft() old_amb = self.position_history.popleft() self.position_history.appendleft(old_amb) return [new_amb, old_amb] def receive(self): '''Receive data and convert bytes to string.''' try: # Listen until terminated by user while True: rawdata, addr = self.sock.recvfrom(1024) data = rawdata.decode(encoding='UTF-8') # Convert data to dictionary acceptable_string = data.replace("'", "\"") data = json.loads(acceptable_string) self.add_to_queue(data) self.notify_process_data() if not data: break except (KeyboardInterrupt): print("Closing socket") self.sock.close()
def main(): data_processor = ProcessData() forecast_util = Forecast() series,train, test = data_processor.read_data('shampoo-sales.csv') forecasts = forecast_util.make_forecasts(train,test, 1, 3) forecast_util.evaluate_forecasts(test, forecasts, 1, 3) forecast_util.plot_forecasts(series, forecasts, 12)
def main(): print('processing data...') pd = ProcessData() data, half_data = pd() print('done processing!') trainer = Train() kfold_trainer = TrainKfold() gaussian_trainer = TrainGaussian() # [0.8795, 0.9035, 0.9285, 0.9435, 0.9505, 0.96, 0.963, 0.958, 0.954, 0.9525] trainer(data, 'lr', 0.0001, 4, "logistic regression") # [0.957, 0.959, 0.96, 0.9595, 0.9575, 0.958, 0.9595, 0.953, 0.9515, 0.947] trainer(half_data, 'svm', 0.04, 1.6, "support vector machine") # lr: 1.0248700000000002 - training: 0.9738333333333333 testing: 0.9585 kfold_trainer(data, 'lr', 0.7, 1.1, k=5) # svm: 0.06 - training: 0.9683333333333334 testing: 0.958 kfold_trainer(half_data, 'svm', 0.04, 1.5, k=5) # [0.0026414886541018556, 0.0030797496623704845, 0.0035907244833864723, 0.004186477385849299, 0.004881074274375396, # 0.0056909147897226675, 0.006635119509224956, 0.007735981389354633, 0.009019492109107729, 0.010515955741336555, # 0.012260704240994024, 0.014294931643181576, 0.016666666666666666, 0.019431906686330536, 0.022655939847975447, # 0.02641488654101857] gaussian_trainer(half_data, 0.04, 1.5)
def main(): """ Kieran Ringel For each data set three lines are run in main. The first creates an instance of Org with the arguments being the data file name, an array of rows with header information to be removed, and the column location of the class so that all the classes can be put in the same column. The second line takes the instance of Org and calls the open method, returning the pandas dataframe of the file. The third line creates an instance of ProcessData, the arguments are the dataframe created in Org.open(), classification or regression, the type (none, edited, condensed), and an array of the columns with discrete values.""" #print('glass') #glass = Org('Data/glass.data', [-1], -1) #df = glass.open() #ProcessData(df, 'classification', 'condensed', [-1]) #print('image') #img = Org('Data/segmentation.data', [0, 1, 2, 3, 4], 0) #df = img.open() #ProcessData(df, 'classification', 'condensed', [-1]) #print('vote') #vote = Org('Data/house-votes-84.data', [-1], 0) #df = vote.open() #ProcessData(df, 'classification', 'edited', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) #print('abalone') #abalone = Org('Data/abalone.data', [-1], -1) #df = abalone.open() #ProcessData(df, 'regression', 'edited', [0]) print('machine') machine = Org('Data/machine.data', [-1], -1) df = machine.open() ProcessData(df, 'regression', 'condensed', [0, 1])
def create_relations_image(self): # Create the random data data_dict_obj = DataDict(self.data_dicts, self.max_rep) # Write the data to a csv file data_dict_obj.write_dup_data(self.csv_file_name) # Process the data and get counts of emails process_data_obj = ProcessData(self.csv_file_name) processed_counts = process_data_obj.process_data() # Make a graph of the processed data graph_file_obj = MakeGraphFile(self.dot_file_name, self.png_file_name) graph_file_obj.write_dot_file(processed_counts) # Write the graph to a file graph_file_obj.write_png_file()
def __init__(self, data): self.data = [] for sentence in data: tokens = sent_tokenize(sentence) for s in tokens: if s not in self.data: self.data.append(tokens) self.sentence_weights = {} data_processor = ProcessData(data) self.sentences = data_processor.remove_tags( data_processor.clean_sentences()) self.probabilities = self._get_probabilities(self.sentences)
def __init__(self): # Symbolic name meaning all available interfaces self.host = '' # IP address for the ambulance self.ip = 'localhost' self.port = 10000 # Create a UDP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # Bind socket to local host and port self.sock.bind((self.host, self.port)) # Queue with the two last data objects received self.position_history = deque() self.current_data = None self.format_dict = dict.fromkeys( ['timestamp', 'longitude', 'latitude']) self.process_data = ProcessData()
def handle_socket(data, client_address, local_storage): # print('The client at {} says {!r}'.format(client_address, data)) # Load received data into JSON data_receive = json.loads(data.decode()) # Split JSON data into specific parts process_data = ProcessData(data_receive) # Verify if the client_code is valid client_code = process_data.client_code # Verify client code if client_code == "T": print("MESSAGE RECEIVED: T To: {} From: {} Text: {}".format( process_data.dest_id, process_data.client_id, process_data.send_text)) if process_data.dest_id not in local_storage: local_storage[process_data.dest_id] = [ ] # Create a queue to store data with the same key local_storage[process_data.dest_id].append(process_data) result = json.dumps({"message": "messages stored"}) elif client_code == "C": if process_data.client_id in local_storage.keys(): obj = local_storage[process_data.client_id].pop( 0) # Pop out relative object if obj.send_text is not None: result = json.dumps({ "message": obj.send_text, "sender": obj.client_id }) # Convert text and id into json print("MESSAGE TO SEND: T To: {} From: {} Text: {}".format( obj.dest_id, obj.client_id, obj.send_text)) else: result = json.dumps({ "message": "No Text", "sender": obj.client_id }) print("MESSAGE TO SEND: T To: {} Text: No Text".format( obj.client_id)) else: result = json.dumps({"message": "No Text"}) print("MESSAGE TO SEND: T To: {} Text: No Text".format( process_data.client_id)) else: result = json.dumps({"message": "Invalid <code> format. "}) with socket_lock: UDPServerSocket.sendto(result.encode(), client_address)
def post(self): logging.info("---------------------------") logging.info("Class ProcessDataSummary: post") # Validate request redirect_url = authorise_user(self.request.uri) if redirect_url: logging.info("Redirecting") self.redirect(redirect_url) # Process data. data = self.request.body logging.info(">>>>> {}".format(data)) file_name = FileUtils().create_file(data=data) ProcessData().process_data(data=data) logging.info("---------------------------") self.response.headers['content-Type'] = 'application/json' self.response.out.write(json.dumps({"file_name": file_name}))
class TestProcessData(TestCase): def setUp(self): self.process_data = ProcessData('./test/input/', './test/output/temp_folder/') def test___get_csv_names(self): names = [ './test/input/subfolder1/test1.json', './test/input/subfolder1/test2.json', './test/input/subfolder2/test3.json', ] test_names = [name for name in self.process_data.get_csv_names()] print(test_names) self.assertEqual(len(test_names), 3) self.assertTrue(all(name in names for name in test_names)) def tear_down(self): pass
def __init__(self): # Symbolic name meaning all available interfaces self.host = '' # IP address for the ambulance self.ip = 'localhost' self.port = 10000 # Create a UDP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # Bind socket to local host and port self.sock.bind((self.host, self.port)) # Queue with the two last data objects received self.position_history = deque() self.current_data = None self.format_dict = dict.fromkeys(['timestamp', 'longitude', 'latitude']) self.process_data = ProcessData()
from process_data import ProcessData from model import Model if __name__ == '__main__': pass # one module for data preprocessing # another module for model construction # another for evaulation (k-fold) train_data = ProcessData() model = Model() model.construct_model(train_data.data_frame) #test test_data = ProcessData(file_name="test.tsv") model.evaluate_model(test_data.data_frame)
def setUp(self): self.process_data = ProcessData('./test/input/', './test/output/temp_folder/')
class Receiver: def __init__(self): # Symbolic name meaning all available interfaces self.host = '' # IP address for the ambulance self.ip = 'localhost' self.port = 10000 # Create a UDP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # Bind socket to local host and port self.sock.bind((self.host, self.port)) # Queue with the two last data objects received self.position_history = deque() self.current_data = None self.format_dict = dict.fromkeys(['timestamp', 'longitude', 'latitude']) self.process_data = ProcessData() def add_to_queue(self, dict_insert): '''Add received element to the receiver queue. Keyword arguments: dict_insert -- dict containing GPS data and timestamp ''' self.position_history.append(dict_insert) def notify_process_data(self): '''Notifies process_data when receiving data from sender''' if len(self.position_history) >1: self.process_data.notify(self.get_data()) def get_data(self): '''Returns the two last datasets from position_history''' if len(self.position_history) > 1: new_amb = self.position_history.popleft() old_amb = self.position_history.popleft() self.position_history.appendleft(old_amb) return [new_amb, old_amb] def receive(self): '''Receive data and convert bytes to string.''' try: # Listen until terminated by user while True: rawdata, addr = self.sock.recvfrom(1024) data = rawdata.decode(encoding='UTF-8') # Convert data to dictionary acceptable_string = data.replace("'","\"") data = json.loads(acceptable_string) self.add_to_queue(data) self.notify_process_data() if not data: break except(KeyboardInterrupt): print("Closing socket") self.sock.close()
PKU_Datafile = '../data/PKU_MOOC/question_sessions.csv' PKU_train = '../data/PKU_MOOC/training.csv' PKU_test = '../data/PKU_MOOC/testing.csv' PKU_evaluate_result = '../data/PKU_MOOC/PKU_results.csv' PKU_Category_MappingFile = '../data/PKU_MOOC/question_category.csv' fname_TrainData = ASSISTment_train fname_TestData = ASSISTment_test fname_MapData = ASSISTment_Category_MappingFile fname_Result = ASSISTment_evaluate_result DATA_READY = True SILENT_WARNINGS = True # to silent the warnings (https://github.com/tensorflow/tensorflow/issues/8037) if not DATA_READY: PrePrcess = ProcessData(data_folder=DATA_FOLDER) PreProcess.ASSISTment_load_save(ASSISTment_Datafile) if SILENT_WARNINGS: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' def main(args): batch_size = args.batch_size n_epoch = args.n_epoch n_step = args.n_step keep_prob = args.keep_prob n_hidden_units = args.n_hidden_units embedding_size = args.embedding_size initial_learning_rate = args.initial_learning_rate final_learning_rate = args.final_learning_rate assert args.embedding in ['random',
def build_model(self, epoch_config): """Builds tensorflow neural network layers. Initializes layers weights, biases from random normal distribution. Connects layers by matrix multiplication and apply activation function (for non-linearities) except last layer Args: FLAGS: Macro dictionary contains user params and some defaults epoch_config: epoch configuration epoch_config: nodes_per_layer: List contains number of nodes in each layers and its length determines number of layers Example: nodes_per_layer = [1, 5, 4, 1] First (input) layer has 1 node takes input vector of (N,) Second hidden layer has 5 nodes (with tanh activation) Third hidden layer has 4 nodes (with tanh activation) Fourth output layer has 1 node outputs vector of (N,) optimizer: optimizer to use for training. Default is Adam learning_rate: learning rate used by optimizer. Default is 0.05 activation: non-linear activation function. Default is relu Returns: None. Computed costs are saved to self.costs Raises: None """ # Instantiate DNN Regressor Data Class PD = ProcessData(self.logging) if epoch_config['load_data'] == True: self.X_train, self.X_test, self.Y_train, self.Y_test = PD.loadData( ) else: # Initialize variables to prepare synthetic data N = epoch_config['data_instances'] M = epoch_config['data_features'] self.X_train, self.X_test, self.Y_train, self.Y_test = PD.generateData( N, M) self.logging.info("Node per layer:%s", epoch_config['nodes_per_layer']) self.logging.info("Train Optimizer:%s", epoch_config['train_optimizer']) self.logging.info("Learning rate:%f", epoch_config['learning_rate']) self.logging.info("Activation:%s", epoch_config['activation']) # Local variables nodes_per_layer = epoch_config['nodes_per_layer'] # TensorFlow Variables and Placeholders # Global iteration steps global_step = tf.Variable(0, name="global_step", trainable=False) # Placeholder for input features and target output self.input_features = tf.placeholder(tf.float64) self.target_output = tf.placeholder(tf.float64) # Each layer is a matrix multiplication followed by a set of nonlinear operators # The size of each matrix is [size of output layer] x [size of input layer] layer_matrices = [ None, ] * len(nodes_per_layer) layer_biases = [ None, ] * len(nodes_per_layer) # Compute weight matries and biases for layered neural network for layer in range(len(nodes_per_layer) - 1): input_size = nodes_per_layer[layer] output_size = nodes_per_layer[layer + 1] layer_matrices[layer] = tf.Variable( tf.random_normal([output_size, input_size], dtype=tf.float64)) layer_biases[layer] = tf.Variable( tf.random_normal([output_size, 1], dtype=tf.float64)) self.logging.info( "[%d] layer_matrices for layer %d of size %d x %d", epoch_config['opt_epoch_iter'], layer, output_size, input_size) # Now we need to compute the output. We'll do that by connecting the matrix multiplications # through non-linearities except at the last layer, where we will just use matrix multiplication. intermediate_outputs = [ None, ] * (len(nodes_per_layer) - 1) for layer in range(len(nodes_per_layer) - 1): if layer == 0: matmul = tf.add( tf.matmul(layer_matrices[layer], self.input_features), layer_biases[layer]) else: matmul = tf.add( tf.matmul(layer_matrices[layer], intermediate_outputs[layer - 1]), layer_biases[layer]) if layer < len(nodes_per_layer) - 2: self.logging.info("Using Activation: %s", epoch_config['activation']) if epoch_config['activation'] == "tanh": intermediate_outputs[layer] = tf.nn.tanh(matmul) else: # Default "relu" intermediate_outputs[layer] = tf.nn.relu(matmul) else: intermediate_outputs[layer] = matmul # And now the output -- we'll simply use matrix multiplication self.output = intermediate_outputs[-1] # compute error between target vs estimated output error = self.output - self.target_output self.cost = tf.matmul(error, tf.transpose(error)) # optimize for loss or cost self.logging.info("Using Train Optimizer: %s", epoch_config['train_optimizer']) if epoch_config['train_optimizer'] == "sgd": self.opt = tf.train.GradientDescentOptimizer( epoch_config['learning_rate']) elif epoch_config['train_optimizer'] == "Adagrad": self.opt = tf.train.AdagradOptimizer(epoch_config['learning_rate']) else: # Default is Adam self.opt = tf.train.AdamOptimizer(epoch_config['learning_rate']) # Between the graph replication. If enabled training happens *syncronously* if epoch_config['sync_replicas'] == True: worker_spec = epoch_config['worker_hosts'].split(",") # Get the number of workers. num_workers = len(worker_spec) self.opt = tf.train.SyncReplicasOptimizer( self.opt, replicas_to_aggregate=num_workers, total_num_replicas=num_workers, name="nn_sync_replicas") self.logging.info("Sync Replica Optimizer Enabled...") self.train_step = self.opt.minimize(self.cost, global_step=global_step) return self.opt
def train_predict(self, data, time_budget, n_class, schema): s1 = time.time() seed = SEED fix_seed(seed) LOGGER.info(f'time_budget:{time_budget}') LOGGER.info(f'n_class:{n_class}') LOGGER.info(f'node:{data["fea_table"].shape[0]}') LOGGER.info(f'edge:{data["edge_file"].shape[0]}') #pre-process data process_data = ProcessData(data) table = process_data.pre_process(time_budget, n_class, schema) # Feature Dimension Reduction feat = Feat() process_data.drop_unique_columns(table) drop_sum_columns = process_data.drop_excessive_columns(table) feat.fit_transform(table, drop_sum_columns) LOGGER.info( f'train:test={(table.df["is_test"]!=1).sum()}:{(table.df["is_test"]==1).sum()}' ) #这里好像没用到哦 table.large_features = False if table.ori_columns.shape[0] > 500: table.large_features = True model_type_list = ['sage', 'gat', 'tagc', 'gcn'] repeat = 3 model_name_list = [ f'{model_type_list[i]}{i+len(model_type_list)*j}' for j in range(repeat) for i in range(len(model_type_list)) ] model_type_list = model_type_list * repeat LOGGER.info('use node embedding') categories = [ 'node_index', 'degree_bins', 'bin_2-neighbor_mean_degree_bins' ] for model in set(model_type_list): LOGGER.info( f"""{model} feature num:{eval(f'table.{model}_columns.shape[0]')}""" ) exec( f'table.{model}_data = process_data.process_gnn_data(table,table.{model}_columns,categories)' ) allmodel = AllModel() table.lr_epoch = 16 table.lr_list = [0.05, 0.03, 0.01, 0.0075, 0.005, 0.003, 0.001, 0.0005] train_valid_idx_list, valid_idx_list = split_train_and_valid( table, train_rate=0.8, seed=SEED, mode=split_mode) train_idx, test_idx = split_train_and_test(table) test_idx = test_idx.sort_values() run_model = [] run_type = [] run_time = {} for i in range(len(model_type_list)): seed = SEED * (i + 1) fix_seed(seed) model_type = model_type_list[i] model_name = model_name_list[i] if model_type not in run_time: init_time, one_epoch_time, early_stopping_rounds = allmodel.get_run_time( table, model_type, model_name, train_idx, test_idx, seed=seed) run_lr_time = len(table.lr_list) * ( init_time + table.lr_epoch * one_epoch_time) run_time500 = init_time * (2) + one_epoch_time * ( 500 + early_stopping_rounds) * 2 + run_lr_time run_time300 = init_time * (2) + one_epoch_time * ( 300 + early_stopping_rounds) * 2 + run_lr_time run_time150 = init_time * (2) + one_epoch_time * ( 150 + early_stopping_rounds) * 2 + run_lr_time run_time[model_type] = (run_time500 - run_lr_time, run_time300 - run_lr_time, run_time150 - run_lr_time, early_stopping_rounds, init_time, one_epoch_time, run_lr_time) else: run_time500, run_time300, run_time150, early_stopping_rounds, init_time, one_epoch_time, run_lr_time = run_time[ model_type] s2 = time.time() LOGGER.info( f"time_budget:{time_budget}s,used time:{s2-s1:.2f}s,{model_name} model will use {run_time500:.2f}s|{run_time300:.2f}s|{run_time150:.2f}s" ) if s2 - s1 + run_time500 + 5 < time_budget: LOGGER.info('train 500 epoch') allmodel.V37_fit_transform(table, model_type, model_name, train_valid_idx_list, valid_idx_list, train_idx, test_idx, mode=split_mode, num_boost_round=500, seed=seed) run_model.append(model_name) run_type.append(model_type) elif s2 - s1 + run_time300 + 5 < time_budget: LOGGER.info('train 300 epoch') allmodel.V37_fit_transform(table, model_type, model_name, train_valid_idx_list, valid_idx_list, train_idx, test_idx, mode=split_mode, num_boost_round=300, seed=seed) run_model.append(model_name) run_type.append(model_type) elif s2 - s1 + run_time150 + 5 < time_budget: LOGGER.info('train 150 epoch') allmodel.V37_fit_transform(table, model_type, model_name, train_valid_idx_list, valid_idx_list, train_idx, test_idx, mode=split_mode, num_boost_round=150, seed=seed) run_model.append(model_name) run_type.append(model_type) elif len(allmodel.valid_models[0]) == 0: this_epoch = int(( (time_budget - (s2 - s1 + 5) - run_lr_time) / 2 - init_time) / (one_epoch_time) - early_stopping_rounds) LOGGER.info(f'short time train {this_epoch} epoch') allmodel.V37_fit_transform(table, model_type, model_name, train_valid_idx_list, valid_idx_list, train_idx, test_idx, mode=split_mode, num_boost_round=this_epoch, seed=seed) run_model.append(model_name) run_type.append(model_type) elif time_budget - (s2 - s1) < 5: LOGGER.info('never train; break') break else: LOGGER.info('no train this model; continue') continue if offline: if table.especial: df = table.df[['node_index', 'is_test']] df = df.merge(data['test_label'], how='left', on='node_index') test_label = df.loc[(df['is_test'] == 1) & (table.directed_mask.tolist()), 'label'].astype('int').values else: test_label = data['test_label']['label'].values else: test_label = None preds1, valid_acc1 = get_preds(0, run_model, run_type, allmodel, model_name_list, table, test_label, valid_idx_list) preds2, valid_acc2 = get_preds(1, run_model, run_type, allmodel, model_name_list, table, test_label, valid_idx_list) preds = (preds1 + preds2) / 2 preds = preds.argmax(axis=1).flatten() if table.especial: LOGGER.info(f'preds\n{preds}') df = table.df[['label', 'is_test']] df['preds'] = int( df.loc[[not i for i in table.directed_mask.tolist()], 'label'].value_counts().index[0]) df.loc[(df['is_test'] == 1) & (table.directed_mask.tolist()), 'preds'] = preds preds = df.loc[df['is_test'] == 1, 'preds'].values LOGGER.info( f"train label\n{data['train_label']['label'].value_counts()/data['train_label'].shape[0]}" ) df_preds = pd.Series(preds, name='preds') LOGGER.info( f"preds label\n{df_preds.value_counts()/df_preds.shape[0]}") if offline: preds1 = preds1.argmax(axis=1).flatten() preds2 = preds2.argmax(axis=1).flatten() if table.especial: LOGGER.info(f'preds1\n{preds1}') df = table.df[['label', 'is_test']] df['preds'] = int( df.loc[[not i for i in table.directed_mask.tolist()], 'label'].value_counts().index[0]) df.loc[(df['is_test'] == 1) & (table.directed_mask.tolist()), 'preds'] = preds1 preds1 = df.loc[df['is_test'] == 1, 'preds'].values LOGGER.info(f'preds2\n{preds2}') df = table.df[['label', 'is_test']] df['preds'] = int( df.loc[[not i for i in table.directed_mask.tolist()], 'label'].value_counts().index[0]) df.loc[(df['is_test'] == 1) & (table.directed_mask.tolist()), 'preds'] = preds2 preds2 = df.loc[df['is_test'] == 1, 'preds'].values df_test = table.df[['degree', 'label', 'is_test']] df_test = df_test.loc[df_test['is_test'] == 1] df_test['preds'] = preds df_test['label'] = data['test_label']['label'].values df_test['acc'] = df_test['preds'] == df_test['label'] pd.set_option('display.max_rows', 1000) print(df_test.groupby('degree')['acc'].mean()) return preds, valid_acc1, valid_acc2, preds1, preds2 else: return preds
with tf.name_scope('Evaluation'): # MAPE : if it not y+1, will be inf self.MAPE = tf.reduce_mean( tf.divide(tf.abs(tf.subtract(self.pred, self.y)), tf.add(self.y, 1.0))) # RMSE : if it not y+1, will be inf self.RMSE = tf.sqrt( tf.reduce_mean(tf.square(tf.subtract(self.pred, self.y)))) self.merged = tf.summary.merge_all() if __name__ == '__main__': pdata = pdata() training_X, training_Y, testing_X, testing_Y = pdata.get_taxi_data_24() training_X_batch = np.shape(np.reshape( training_X, (-1, 24, 3600)))[0] # (B, T, N) 255*24*3600 g = Graph() config = tf.ConfigProto() # config.gpu_options.allow_growth = True with tf.Session(graph=g.graph, config=config) as sess: # tensorboard writer writer = tf.summary.FileWriter('logs/', sess.graph) tStart = time.time() sess.run(tf.global_variables_initializer()) step = 0 while step < training_iters:
import pdfkit from process_data import ProcessData, create_dir # 获取配置文件 cf = configparser.ConfigParser() cf.read('conf.ini') account_id = cf.get("juejin", "id") client_id = cf.get("juejin", "client_id") uid = cf.get("juejin", "uid") src = cf.get("juejin", "src") token = cf.get("juejin", "token") get_url = cf.get("juejin", "getUrl") section_url = cf.get("juejin", "getSectionUrl") book_name = cf.get("juejin", "bookName") create_dir() process = ProcessData(account_id=account_id, uid=uid, client_id=client_id, token=token, book_name=book_name, get_url=get_url, section_url=section_url, src=src, dir_path="source") section_ids = process.get_section_id() process.get_content_from_section(section_ids) # 制作mobi使用 # create_dir("dist") # 本来打算用这个库制作mobi https://github.com/jachinlin/kindle_maker,但是目前这个库报错 # make_ebook("source", "dist") # 使用pdfkit制作电子书 pdfkit.from_file(book_name.strip('"')+'.html', book_name.strip('"')+'.pdf')