def data_Generator(path=None, saveModelpath=None): parser = argparse.ArgumentParser( description='Call for help Classification Training') parser.add_argument('--model_type', type=str, default='RNN_model', help='model to run lstm') parser.add_argument('--src_root', type=str, default=path) parser.add_argument('--dst_root', type=str, default=saveModelpath, help='Model path') parser.add_argument( '--batch_size', type=int, default=32, help='batch_size' ) #ปรับ mini batch เพื่อแบ่งข้อมูลเป็นกลุ่มเล็กในการเทรนแต่ละ Epoch ข้อมูลจะน้อยลง ตวามเร็วจะเพิ่มขึ้น แนะนำให้ใช้ 1 เพื่อความแม่นยำ parser.add_argument('--delta_time', '-dt', type=float, default=2.0, help='time in seconds to sample audio') parser.add_argument('--sample_rate', '-sr', type=int, default=16000, help='sample rate') args, _ = parser.parse_known_args() DataGenerator.train(args)
def variance_likelihood(algo, param = "T"): sigma = 0.16 beta = 0.64 phi = 1. step = 2 if param == "N": T = 100 x,y = DataGenerator.SVGenerator(phi, sigma, beta, T) list_likelihood = [] for n in range(10,500,step): norm_w, particles, likelihood = algo(beta, sigma, y, n) list_likelihood.append(likelihood) print(n) plt.plot(list_likelihood) plt.xlabel('N/'+str(step)) if param == "T": N = 100 list_likelihood = [] for t in range(10,200,step): print(t) x,y = DataGenerator.SVGenerator(phi, sigma, beta, t) norm_w, particles, likelihood = algo(beta, sigma, y, N) list_likelihood.append(likelihood) plt.plot(list_likelihood) plt.xlabel('T/'+str(step)) plt.ylabel('log likelihood') plt.show()
def process(): parametros=[] contenido = request.form diccionarioContenido=contenido.copy() for i in range(len(diccionarioContenido)): parametros.append(diccionarioContenido[str(i)]) tipoBusqueda=parametros.pop() tipoFiltro=parametros.pop() if (tipoBusqueda == 'AND'): #compendio por conjunción dataFrameFinal, estadoConsulta = dg.generarCompendioAND(dg.separateParameters(parametros),dates,tipoFiltro) else: #comprendio por disyunción dataFrameFinal, estadoConsulta = dg.generarCompendioOR(parametros,dates,tipoFiltro) if (estadoConsulta): dataFrameFrecuencyWords=fp.alistarTexto(dataFrameFinal) dataFrameFrecuencyHashtag=fp.tratatHashtag(dataFrameFinal) arregloUnoX,arregloUnoY=at.wordsMoreRepetitive(dataFrameFrecuencyWords) arregloDosX,arregloDosY=at.hashtagMoreRepetitive(dataFrameFrecuencyHashtag) arregloTresX,arregloTresY=at.hashTagPopular(dataFrameFinal) arregloCuatroX, arregloCuatroY = at.scatterRTvsHashtag(dataFrameFinal) # De acuerdo al estado de la consulta se enviar la respuesta a JS, donde se pasa por medio de un JSON if (estadoConsulta) : parametros=[] return jsonify({ 'resultX1' : arregloUnoX , 'resultY1' : arregloUnoY , 'resultX2' : arregloDosX , 'resultY2' : arregloDosY , 'resultX3' : arregloTresX , 'resultY3' : arregloTresY , 'resultX4' : arregloCuatroX , 'resultY4' : arregloCuatroY }) else: parametros=[] return jsonify({'error' : 'Los términos no arrojaron resultados!'})
def reg_test_model(self, model_file, reg_test_file, reg_train_file=None, reg_vali_file=None): ##load_model print('loading modle!') self.model_reg_task = load_model(model_file, custom_objects={ 'mean_squared_error_l2': self.mean_squared_error_l2 }) tmp = model_file.split('/')[-1] if tmp.find('.h5') != -1: model_name = re.findall(r"(.+?).h5", tmp)[0] else: model_name = re.findall(r"(.+?).hdf5", tmp)[0] ##1.read data print('starting read data!') reg_test_prot, reg_test_comp, _, reg_test_value = dg.read_pro_com_file_regression( reg_test_file ) #multi_process_read_pro_com_file_regression(reg_test_file) print('test data size:', len(reg_test_prot)) reg_test_predict_value = self.model_reg_task.predict( [reg_test_prot, reg_test_comp]) if model_name[-3:] == 'reg': #reg_model reg_test_predict_value_df = pd.DataFrame(reg_test_predict_value, columns=['value']) else: #total model reg_test_predict_value_df = pd.DataFrame(reg_test_predict_value[0], columns=['label']) reg_test_predict_value_df['value'] = reg_test_predict_value[1] reg_test_df = self.save_predict_result(reg_test_predict_value_df, reg_test_value, model_name, 'reg', 'test') self.computer_parameter_draw_scatter_plot(reg_test_df, model_name) if reg_train_file != None: reg_train_prot, reg_train_comp, _, reg_train_value = dg.multi_process_read_pro_com_file_regression( reg_train_file) reg_train_predict_value = self.model_reg_task.predict( [reg_train_prot, reg_train_comp]) reg_train_predict_value = pd.DataFrame(reg_train_predict_value) reg_train_df = self.save_predict_result(reg_train_predict_value, reg_train_value, model_name, 'reg', 'train') self.computer_parameter_draw_scatter_plot(reg_train_df, model_name) if reg_vali_file != None: reg_vali_prot, reg_vali_comp, _, reg_vali_value = dg.multi_process_read_pro_com_file_regression( reg_vali_file) #predict value reg_vali_predict_value = self.model_reg_task.predict( [reg_vali_prot, reg_vali_comp]) reg_vali_predict_value = pd.DataFrame(reg_vali_predict_value) reg_vali_df = self.save_predict_result(reg_vali_predict_value, reg_vali_value, model_name, 'reg', 'validation') self.computer_parameter_draw_scatter_plot(reg_vali_df, model_name)
def save_as_hdf5(X, Y, mask, path_save, bs=1024, shuffle=False, compression='gzip'): """ Save training and validation X and Y in hdf5. Arguments: - X: np.matrix Returned from CV(). - Y: np.matrix Returned from CV(). - mask: np.array Returned from CV(). - path_save: str path/to/save/data.hdf5. """ gen_tr = DataGenerator(X[mask], Y[mask], bs, shuffle) gen_va = DataGenerator(X[~mask], Y[~mask], bs, shuffle) def save(ds_X, ds_Y, gen): prog = Progbar(len(gen)) cur_idx = 0 for idx, (x, y) in enumerate(gen): rows = x.shape[0] assert(rows == y.shape[0]) ds_X[cur_idx:(cur_idx+rows), :] = x ds_Y[cur_idx:(cur_idx+rows), :] = y cur_idx += rows prog.update(idx) print() with h5py.File(path_save, 'w') as f: ds = {} ds['X_tr'] = f.create_dataset('X_tr', (X[mask].shape[0], glb.SIZE_OBS_VEC), dtype='i8', chunks=True, compression=compression) ds['X_va'] = f.create_dataset('X_va', (X[~mask].shape[0], glb.SIZE_OBS_VEC), dtype='i8', chunks=True, compression=compression) ds['Y_tr'] = f.create_dataset('Y_tr', (X[mask].shape[0], Y.shape[1]), dtype='i8', chunks=True, compression=compression) ds['Y_va'] = f.create_dataset('Y_va', (X[~mask].shape[0], Y.shape[1]), dtype='i8', chunks=True, compression=compression) print('Converting training sets') save(ds['X_tr'], ds['Y_tr'], gen_tr) f.flush() print('Converting validation sets') save(ds['X_va'], ds['Y_va'], gen_va)
def __init__(self, source_dir, data_checker_path): self.clear_temp_folder() self.source_dir = source_dir self.data_checker_path = data_checker_path self.files = [] self.csheets = [] self.code_generator = CodeGenerator(FLAGS.namespace) self.data_generator = DataGenerator(self.code_generator, FLAGS.out_data_file) self.data_checkers = []
def insert_int_before_each_element(): temporary_list = DataGenerator.generate_testing_list(True) result_list = [] print(temporary_list) for element in temporary_list: result_list.append(DataGenerator.generate_random_int(8)) result_list.append(element) if len(temporary_list) * 2 != len(result_list): print("error") print(result_list)
def train_model(self, train_file, validation_file, model_name): save_dir = os.path.join(os.getcwd(), 'models', model_name) if not os.path.exists(save_dir): os.makedirs(save_dir) self.model = get_model.get_model_regression( save_dir, self.alpha, pro_branch_switch1='inception_block', pro_branch_switch2='inception_block', pro_branch_switch3='inception_block_b', pro_add_attention=False, comp_branch_switch1='inception_block', comp_branch_switch2='inception_block', comp_branch_switch3='inception_block_b', comp_add_attention=False) self.validation_x_prot, self.validation_x_comp, self.validation_y = dg.read_reg( validation_file) optimizer = keras.optimizers.Adam(lr=self.lr) self.model.compile(loss=self.mean_squared_error_l2, optimizer=optimizer, metrics=['mse', 'mae']) early_stopping = EarlyStopping(monitor='val_loss', patience=self.patience) bestfile = save_dir + "/%s_best_model.hdf5" % model_name checkpoint = ModelCheckpoint(bestfile, monitor='val_loss', verbose=1, save_best_only=True, mode='min') # AUC RocAuc = AUC.RocAucEvaluation( validation_data=([self.validation_x_prot, self.validation_x_comp], self.validation_y), interval=1) history = self.model.fit_generator( dg.read_reg_generator(train_file, self.batch_size), steps_per_epoch=57, epochs=self.epochs, validation_data=([self.validation_x_prot, self.validation_x_comp], self.validation_y), callbacks=[early_stopping, checkpoint]) print('load the best model %s to test' % bestfile) self.model = load_model(bestfile, custom_objects={ 'mean_squared_error_l2': self.mean_squared_error_l2 }) score = self.model.evaluate( [self.validation_x_prot, self.validation_x_comp], self.validation_y) print("validation results is ", score)
def dataSetConstruct(self): np.random.seed(0) file_path = DATA_DIRECTORY_PATH[2:] + '/' + DATA_FILE_NAME if not os.path.exists(file_path): DataGenerator.data_generate() reader = Reader(line_format='user item rating', sep='\t') self.data = Dataset.load_from_file(file_path, reader=reader) print('Contructing trainset and testset') self.trainset = self.data.build_full_trainset() self.testset = self.trainset.build_anti_testset()
def train_model(self, train_file, validation_file, model_name): save_dir = os.path.join(os.getcwd(), 'models', model_name) if not os.path.exists(save_dir): os.makedirs(save_dir) self.model = get_model.get_model_classification( save_dir, self.alpha, pro_branch_switch1='inception_block', pro_branch_switch2='inception_block', pro_branch_switch3='inception_block_b', pro_add_attention=False, comp_branch_switch1='inception_block', comp_branch_switch2='inception_block', comp_branch_switch3='inception_block_b', comp_add_attention=False) self.validation_x_prot, self.validation_x_comp, self.validation_y = dg.read_class( validation_file) optimizer = keras.optimizers.Adam(lr=self.lr) self.model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) early_stopping = EarlyStopping(monitor='val_loss', patience=self.patience) bestfile = save_dir + "/%s_best_model.hdf5" % model_name checkpoint = ModelCheckpoint(bestfile, monitor='val_loss', verbose=1, save_best_only=True, mode='min') # AUC RocAuc = AUC.RocAucEvaluation( validation_data=([self.validation_x_prot, self.validation_x_comp], self.validation_y), interval=1) history = self.model.fit_generator( dg.read_class_generator(train_file, self.batch_size), steps_per_epoch=739, epochs=self.epochs, validation_data=([self.validation_x_prot, self.validation_x_comp], self.validation_y), callbacks=[RocAuc, early_stopping, checkpoint]) print('load the best model %s to test' % bestfile) self.model = load_model(bestfile) results = self.model.evaluate( [self.validation_x_prot, self.validation_x_comp], self.validation_y) print('validation accuracy:', results) self.draw_loss_change(history, model_name, save_dir) self.draw_ROC_curve('validation', model_name, save_dir) self.save_predict_result('validation', model_name, save_dir)
def __init__(self, dataset_path, img_format, mask_format, verbose=True, input_shape=(256,256,1), target_shape=(256,256,1), batchsize=16, batchsize_valid=16): self.dataset_path = dataset_path self.img_format = img_format self.mask_format = mask_format self.verbose = verbose self.input_shape = input_shape self.target_shape = target_shape self.batchsize = batchsize self.batchsize_valid = batchsize_valid #get directory name #dir_name = os.path.dirname(self.dataset_path) dir_name = self.dataset_path train_path = os.path.join(dir_name, 'data/train') train_path_input = os.path.join(train_path, 'input/') train_path_target = os.path.join(train_path, 'target/') train_imgs = get_files(train_path_input, img_format) train_masks = get_files(train_path_target, mask_format) train_imgs.sort() train_masks.sort() if len(train_imgs) == 0: sys.exit('empty train list: {}'.format(train_path)) self.train_len = len(train_imgs) labels_train = dict() for i in range(len(train_imgs)): labels_train[train_imgs[i]] = train_masks[i] valid_path = os.path.join(dir_name, 'data/valid') valid_path_input = os.path.join(valid_path, 'input/') valid_path_target = os.path.join(valid_path, 'target/') valid_imgs = get_files(valid_path_input, img_format) valid_masks = get_files(valid_path_target, mask_format) valid_imgs.sort() valid_masks.sort() labels_valid = dict() for i in range(len(valid_imgs)): labels_valid[valid_imgs[i]] = valid_masks[i] self.valid_len = len(valid_imgs) self.trainGenerator = DataGenerator(slices_fn=train_imgs, segments_fn=labels_train,batch_size=self.batchsize, input_shape=self.input_shape, target_shape=self.target_shape) self.validGenerator = DataGenerator(slices_fn=valid_imgs, segments_fn=labels_valid, batch_size=self.batchsize_valid, input_shape=self.input_shape, target_shape=self.target_shape)
def load_data_mnist(self, batch_size): self.x_train = np.asarray( np.load(os.path.join(self.base_path, "train/X.npy"))) self.y_train = np.asarray( np.load(os.path.join(self.base_path, "train/y.npy"))) self.x_test = np.asarray( np.load(os.path.join(self.base_path, "test/X.npy"))) self.y_test = np.asarray( np.load(os.path.join(self.base_path, "test/y.npy"))) # map label to 0-9 max_label = np.max(self.y_train) if max_label > 9: self.y_train = self.y_train - (max_label - 9) self.y_test = self.y_test - (max_label - 9) print("# of training exp:%d, testing exp:%d" % (len(self.x_train), len(self.x_test))) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) self.training_set = DG.MnistDataGenerator(self.x_train, self.y_train) DataParams = { 'batch_size': batch_size, 'shuffle': True, 'num_workers': 0 } self.train_generator = DataLoader(self.training_set, **DataParams) self.test_set = DG.MnistDataGenerator(self.x_test, self.y_test) DataParams = { 'batch_size': batch_size, 'shuffle': False, 'num_workers': 0 } self.test_generator = DataLoader(self.test_set, **DataParams) return self.train_generator
def liner_find_n_cnt(): # Задаем исходные данные для линейного распределения mu = [[0, 2, 3], [4, 6, 0]] sigma = [[2, 1, 2], [1, 2, 1]] N = 1000 X, Y, class0, class1 = dg.norm_dataset(mu, sigma, N) trainCount = round(0.7 * N * 2) Xtrain = X[0:trainCount] Xtest = X[trainCount:N * 2] Ytrain = reshape(Y[0:trainCount]) Ytest = reshape(Y[trainCount:N * 2]) # задаем максимальное кол-во нейронов N_max = 20 # тренируем сети и получаем точность data = np.empty((N_max, 3)) for i in range(N_max): # инициализируем ИНС model = nn.NeuralNetwork(Xtrain, Ytrain, i) # итерация обучения model.full_train(20, 0.005) # получаем точность LOSS, ACC = model.test(Xtest, Ytest) data[i][0] = i data[i][1] = LOSS data[i][2] = ACC graphs(data, 'neural')
def run(): data_results = dg.prepareData() X = data_results["X"] print(f"data_results[X].shape: {X.shape}") cluster_to_label_map = ut.createClusterLabelMap( data_results["TRAIN_DATA_y"], data_results["CLUSTER_NAMES"]) model = None if co.IS_KMEANS_MODEL: model = km.computeClustersUsingKmeans(X, data_results["TRUE_K"]) else: model = am.computeClustersUsingAutoencoder(X, data_results["TRUE_K"]) print(co.LINE_BREAK) ut.reportTopNClutersByWords(model, data_results["SVD"], data_results["VECTORIZER"], cluster_to_label_map) print(co.LINE_BREAK) data_results = predictKemeansClusters(model, data_results) kmeans_accuracy = data_results["ACCURACY"] kmeans_homogeniety = data_results["HOMOGENEITY"] print(f"K-means prediction accuracy is: {kmeans_accuracy:0.2f}") print( f"K-means prediction homogeniety score is: {kmeans_homogeniety:0.2f}") print(co.LINE_BREAK) results_df = ut.reportTopNClutersByDocs(model, data_results["TRAIN_DATA_y"], data_results["DOC_FILENAME"]) fig = vc.visualizeClusteringResults(data_results, results_df, cluster_to_label_map) return fig
def main(): warnings.simplefilter(action='ignore', category=FutureWarning) start_date = '2012-01-02' end_date = '2020-04-06' test_x, test_y = DataGenerator.make_features(start_date, end_date, is_training=False) ################################################################################################################### # inspect test data assert test_y.tolist() == get_test_dollar_price( start_date, end_date).tolist(), 'your test data is wrong!' ################################################################################################################### # TODO: fix pickle file name filename = 'team11_model.pkl' loaded_model = pickle.load(open(filename, 'rb')) print('load complete') print(loaded_model.get_params()) predict = loaded_model.predict([test_x]) # inverse scaled predict = return_y_inv_scaled(start_date, end_date, predict) print('mae: ', mean_absolute_error(np.reshape(predict, -1), test_y))
def test(start_date, end_date, mode, input_days, span, n_components): test_x, past_price, past_feature, target_price, scaler = DataGenerator.make_features(start_date, end_date, mode=mode, input_days=input_days, span=span ,is_training=False) ################################################################################################################### # inspect data assert past_price.tolist() == get_past_price(start_date, end_date).tolist(), 'your past price data is wrong!' assert target_price.tolist() == get_target_price(start_date, end_date).tolist(), 'your target price data is wrong!' ################################################################################################################### # TODO: fix pickle file name filename = 'team11_model.pkl' model = pickle.load(open(filename, 'rb')) hidden_states = model.predict(test_x) expected_diff_price = np.dot(model.transmat_, model.means_) diff = list(zip(*expected_diff_price))[0] diff = tuple([ i*math.sqrt(scaler.var_[0]) + scaler.mean_[0] for i in diff]) predicted_price = list() for idx in range(10): # predict gold price for 10 days state = hidden_states[idx] current_price = past_feature[idx] next_day_price = current_price * (1+diff[state]) # predicted gold price of next day predicted_price.append(next_day_price) predict = np.array(predicted_price) return mean_absolute_error(target_price, predict)
def find_best_n_estimators(): #Задаем исходные данные для линейного распределения mu = [[0, 2, 3], [3, 5, 1]] sigma = [[2, 1, 2], [1, 2, 1]] N = 1000 X, Y, class0, class1 = dg.norm_dataset(mu, sigma, N) trainCount = round(0.7 * N * 2) Xtrain = X[0:trainCount] Xtest = X[trainCount:N * 2] Ytrain = Y[0:trainCount] Ytest = Y[trainCount:N * 2] #В цикле меняем количество деревьев и определяем максимальную точность maxACC = 0 resN = 0 for n in range(1, 300, 10): #Выполняем классификацию с RandomForestClassifier model = RandomForestClassifier(criterion="entropy", random_state=0, n_estimators=n).fit(Xtrain, Ytrain) ACC = model.score(Xtest, Ytest) if (ACC > maxACC): maxACC = ACC resN = n #Выводим результат print("N: " + str(resN) + " ACC: " + str(maxACC))
def main(): test_day = ['2020-01-19', '2020-02-01', '2020-02-02', '2020-02-08'] # test_day = ['2020-02-09', '2020-02-15', '2020-02-16', '2020-02-22'] training_x, training_y = DataGenerator.get_data(test_day, is_training=True) # ================================ train SVM model========================================= # TODO: set parameters print('start training model') model = svm.SVC(random_state=0, C=10, kernel='rbf', gamma=0.1, coef0=0.0, shrinking=True, probability=False, cache_size=600, tol=0.001, class_weight='balanced', verbose=False, max_iter=-1, decision_function_shape='ovr') model.fit(training_x, training_y) print('completed training model') # TODO: fix pickle file name filename = 'team10_model.pkl' pickle.dump(model, open(filename, 'wb')) print('save complete')
def main(): x0 = normal(0,sigma**2) x,y = DataGenerator.SVGenerator2(phi,sigma, beta,T) def point_plot(): X_point = [] for n in range(10,50,1): norm_w,particles,point_x, var_weights=SIS(y,T,n,x[0], phi, sigma, beta) X_point.append(1/2*(point_x-x[-1])**2) plt.plot(X_point) plt.show() def var_plot(): X_point = [] for n in range(10,50,1): norm_w,particles,point_x, var_weights=log_SIS(y,T,n,x[0], phi, sigma, beta) X_point.append(var_weights) plt.plot(X_point) plt.show() def weights_plot(): norm_w,particles,point_x,var_weights=log_SIS(y,T,N,x[0], phi, sigma, beta) fig, (ax1, ax2, ax3) = plt.subplots(1, 3) ax1.hist(norm_w[:, 1]) ax2.hist(norm_w[:, 10]) ax3.hist(norm_w[:, 50]) ax1.set_title('normalized weights iter 1') ax2.set_title('normalized weights iter 10') ax3.set_title('normalized weights iter 50') plt.show() weights_plot()
def next(self): if self.cur_batch < self.num_batches: self.cur_batch += 1 datas, labels = dg.get_data_label_pair_threads(self._provide_data, self.datas_batch, self._provide_label, self.labels_batch, self.datalist, self.rndidx_list, self.cur_batch) return mx.io.DataBatch(datas, labels) else: raise StopIteration
def next(self): if self.cur_batch < self.num_batches: datas, labels = dg.get_data_label_proxy2(self._provide_data, self._provide_label, self.datalist, self.rndidx_list, self.cur_batch) self.cur_batch += 1 return mx.io.DataBatch(datas, labels) else: raise StopIteration
def next(self): if self.cur_idx < self.batchnum: # print self._provide_data carinfo = dg.get_feature_label_test(self._provide_data[0][1], self.datalist, self.cur_idx) self.cur_idx += 1 return carinfo else: raise StopIteration
def next(self): if self.cur_idx < self.datalen: # print self._provide_data carinfo = dg.get_data_label_test(self._provide_data[0][1][1:], self.datalist, self.cur_idx, self.normalize) self.cur_idx += 1 return carinfo else: raise StopIteration
def __init__(self, data_name, data_shape, datafn, normalize=True): super(CarReID_Test_Iter, self).__init__() self._provide_data = zip(data_name, data_shape) self.cur_idx = 0 self.datalist = dg.get_datalist(datafn) self.datalen = len(self.datalist) self.normalize = normalize
def next(self): if self.cur_batch < self.num_batches: datas, labels, paths = dg.get_test_data_label_pair_threads(self._provide_data, self.datas_batch, self._provide_label, self.labels_batch, self.datalist, self.cur_batch) self.cur_batch += 1 self.paths = paths return mx.io.DataBatch(datas, labels) else: raise StopIteration
def return_list_of_elements_with_space(): testing_list = DataGenerator.generate_testing_list(True, True, True, True) result_list = [] print(testing_list) for element in testing_list: look_for_empty_character_in_string(element, result_list) look_for_empty_character_in_collection(element, result_list) print(result_list)
def read_data(self, data_type, file,reinforced=False): print("starting read %s data:" % data_type) x_prot, x_comp, y = dg.multi_process_read_pro_com_file_regression(file,reinforced=reinforced) print("%s data,%s, has been read succeed!" % (data_type, file)) print('x_prot.shape', x_prot.shape) print('x_comp.shape', x_comp.shape) print('y.shape', y.shape) return x_prot, x_comp, y
def main(): test_day = ['2020-01-19', '2020-02-01', '2020-02-02', '2020-02-08'] # test_day = ['2020-02-09', '2020-02-15', '2020-02-16', '2020-02-22'] training_x, training_y = DataGenerator.get_data(test_day, is_training=True) test_x, test_y = DataGenerator.get_data(test_day, is_training=False) # Up-sample training_df = pd.concat([training_x, training_y], axis=1) minor_df = training_df[training_df.win == 1] major_df = training_df[training_df.win == 0] minor_df_upsample = resample(minor_df, replace=True, n_samples=len(major_df), random_state=1) new_training_df = pd.concat([major_df, minor_df_upsample], axis=0) training_y = new_training_df['win'] training_x = new_training_df.drop(['win'], axis=1) scaler = MinMaxScaler() training_x = scaler.fit_transform(training_x, training_y) test_x = scaler.fit_transform(test_x, test_y) # ================================ train SVM model========================================= # TODO: set parameters print('start training model') # model = svm.SVC(C=1, kernel='linear', random_state=0, class_weight={0: 5, 1: 5}) kernel_list = ['rbf', 'linear'] C_list = [1, 100, 1000] for kernel in kernel_list: for C in C_list: model = svm.SVC(C=C, kernel=kernel, random_state=123) model.fit(training_x, training_y) pred_y = model.predict(test_x) print("kernel, C: {},{}".format(kernel, C)) print() print('precision: {}'.format(precision_score(test_y, pred_y))) print('accuracy: {}'.format(accuracy_score(test_y, pred_y))) print('recall: {}'.format(recall_score(test_y, pred_y))) print('f1-score: {}'.format(f1_score(test_y, pred_y)))
def __init__(self, data_name, data_shape, datafn): super(CarReID_Feat_Iter, self).__init__() self._provide_data = zip(data_name, data_shape) self.cur_idx = 0 self.datalist = dg.get_datalist(datafn) self.datalen = len(self.datalist) self.batchsize = data_shape[0][0] self.batchnum = self.datalen / self.batchsize
def next(self): if self.cur_batch < self.num_batches: datas, labels, carids, infos = dg.get_data_label_proxy_batch_plate_mxnet_threads(self._provide_data, self.datas_batch, self._provide_label, self.labels_batch, self.proxy_datalist, self.cur_batch, self.caridnum) self.batch_carids = carids self.batch_infos = infos self.cur_batch += 1 return mx.io.DataBatch(datas, labels) else: raise StopIteration
def main(): clf = svm.NuSVC() flag = True X = [] y = [] while flag: print "1. Translate" print "2. Learn" print "3. Quit" choice = raw_input("Choose an option: ") print if choice == '1': choice = raw_input("Letter?") if choice == 'A': print clf.predict(DataGenerator.genA()) if choice == 'B': print clf.predict(DataGenerator.genB()) if choice == 'C': print clf.predict(DataGenerator.genC()) if choice == 'D': print clf.predict(DataGenerator.genD()) if choice == 'E': print clf.predict(DataGenerator.genE()) elif choice == '2': X, y = DataGenerator.genLearning() clf.fit(X, y) elif choice == '3': flag = False
except NameError: print 'a port is necessary' print usage sys.exit(0) #if there are no length given use random (length=0) try: length except NameError: length = 0 print 'using random length' try: tcp.connect((host, int(port))) print "Connected" except socket.error: print "Couldn't connect to Server:" + host + ":" + port sys.exit(2) while(True): try: random = DataGenerator.randString(int(length)) dataSent = tcp.send(random) print "sent" time.sleep(5) except socket.error: print "Connection lost..." break if __name__ == "__main__": main(sys.argv[1:])
# definition of the wrong model of measured data wrongSignalShape = lambda x, a, b, c: a*numpy.sin(b*numpy.array(x)+c) # create plotting object plotter = Plotter("noiseEffectStudy") # defining list of (absolute) noise levels -> noise RMSes noiseList = [0.01, 0.2, 0.4, 0.6, 0.8] # running "experiments" with different noise levels for noiseLevel in noiseList: # creating a random (gaussian) noise generator noiseGen = NoiseGenerator(noiseLevel) # creating a "data factory" following the signalShape with some noise given by noiseGen dataGen = DataGenerator(signalShape, 10, (-2, 2), 100, noiseGen) # generating data data = dataGen.generateData() # fitting an appropriate data model and evaluating output fitOutput_A = Fitter.fit(data, polynomial3) statAnalysisOutput_A = StatAnalyser.evaluate(fitOutput_A) # fitting wrong data model and evaluating output fitOutput_B = Fitter.fit(data, wrongSignalShape) statAnalysisOutput_B = StatAnalyser.evaluate(fitOutput_B) # adding both fitting outcomes to printing list plotter.addDataToPlot(data, fitOutput_A, statAnalysisOutput_A, noiseLevel) plotter.addDataToPlot(data, fitOutput_B, statAnalysisOutput_B, noiseLevel)
def main(argv): input_graph_file = None graph_save_file = None input_params_file = None airport_q = 10 only_generate = False verbosity = 5 data = None graph = None # Parameters params = { 'graph' : graph, 'start_idx' : 1, 'end_idx' : 4, 'max_flights' : 150, 'cost_weight' : 2, 'time_weight' : 1, 'pop_size' : 20, 'generations' : 10, 'mutation_rate' : 0.2, 'tournament_size' : 2, 'elitism' : False, 'dest_min' : 2, 'dest_max' : 5 } # Parse command line options try: opts, args = getopt.getopt(argv, "hi:s:p:a:gv:", ["ifile=", "save=", "params=", "airports=", "verbose=", "start_idx=" , "end_idx=" , "max_flights=", "cost_weight=", "time_weight=", "pop_size=" , "generations=", "mutation_rate=", "tournament_size=", "elitism=", "dest_min=", "dest_max=" ]) except getopt.GetoptError: print_help() sys.exit(2) for opt, arg in opts: if opt == '-h': print_help() sys.exit() elif opt in ("-i", "--ifile"): input_graph_file = arg print('Will read data from: {}'.format(input_graph_file)) elif opt in ("-s", "--save"): graph_save_file = arg print('Will save graph to: {}'.format(graph_save_file)) elif opt in ("-p", "--params"): input_params_file = arg print('Will read params from: {}'.format(input_params_file)) elif opt in ("-a", "--airports"): airport_q = int(arg) print('Number of airports = {}'.format(airport_q)) elif opt in ("-g", "--generate"): only_generate = True print('Will just generate data') elif opt in ("-v", "--verbose"): verbosity = int(arg) print('Verbosity level = {} ({})'.format(verbosity, get_verbosity_info(verbosity))) elif opt in list("--" + x for x in params.keys()): if opt[2:] == 'mutation_rate': params[opt[2:]] = float(arg) elif opt[2:] == 'elitism': params[opt[2:]] = arg.lower() in ['true', 't', 'tak', 'yes', 'y', '1'] else: params[opt[2:]] = int(arg) print('Params are: \n{} \n'.format('\n'.join(' {:{}}: {}'.format(k, len(max(params.keys()))+1, v) for k,v in sorted(params.items())))) # DataGenerator data = DataGenerator() DataGenerator.DESTINATIONS_MIN = params['dest_min'] DataGenerator.DESTINATIONS_MAX = params['dest_max'] if input_graph_file is not None: data.load_saved_graph(input_graph_file) else: data.load_new_data(airport_q) data.create_graph() if graph_save_file is not None: data.save_graph(graph_save_file) testsuite_airports = data.get_airports() testsuite_graph = data.get_graph() graph = GraphManager(params['max_flights']) graph.set_graph(testsuite_graph, testsuite_airports) if verbosity > 1: graph.print_airports() graph.print_flights() graph.print_graph() if not only_generate: print(' === GeneticAlgorithm tests ===') params['graph'] = graph GA.run_with_params(params)