def getEntrys(): file_dir = file_dir_entry.get() conditions_path = conditions_path_entry.get() output_path = output_path_entry.get() num_rows_skip = int(num_rows_skip_entry.get()) unique_keys = str(unique_keys_entry.get()).split(',') product_filters = json.loads(str(product_filters_entry.get())) drop_dups = bool(drop_dups_var.get()) duplicate_subset = str(duplicate_subset_entry.get()).split(',') if len(file_dir) == 0 or len(conditions_path) == 0 or len( output_path) == 0 or len(unique_keys) or len(num_rows_skip): tkinter.Label( window, text= "Certain entries must be entered to successfully run automation..." ).pack() return else: # Save settings if checkbox is checked if save_settings_var.get(): tkinter.Label(window, text="Saving settings...").pack() to_save = dict( file_dir=file_dir, conditions_path=conditions_path, output_path=output_path, ) with open('user_interface_settings.json', 'w') as fp: json.dump(to_save, fp) # Run automation tkinter.Label(window, text="Starting automation...").pack() try: # cleanup old files repo_cleanup.main() tkinter.Label( window, text="Old automation files have been removed.").pack() # process and deliver data data_processing(file_dir=file_dir, num_rows_skip=num_rows_skip, unique_keys=unique_keys, conditions_path=conditions_path, \ output_path=output_path, product_filters=product_filters, drop_dups=drop_dups, duplicate_subset=duplicate_subset) tkinter.Label( window, text="Data has been processed and delivered.").pack() tkinter.Label(window, text="Automation ran successfully.").pack() except Exception as err: traceback.print_exc() logger.exception(str(err)) tkinter.Label( window, text= "Failure occurred during automation. Check automation.log file for details." ).pack()
def get_corpus(path): corpus_post_processing = data_processing.data_processing(path) sequences = data_processing.init_sequence(corpus_post_processing) word_seqs = sequences[0] position_seqs = sequences[1] tag_seqs = sequences[2] model_input = data_processing.get_model_input(word_seqs) x_train, x_test, y_train, y_test = train_test_split(model_input, tag_seqs, test_size=0.3) return x_train, x_test, y_train, y_test
def post(self,url,data): db=data_processing() data1=db.encrypt(data)#加密 header={"Content-Type":"application/json"} re=requests.post(url,data=data1,headers=header) response=re.text dict=json.loads(response)#将unicode转为字典 result=db.decrypt(dict) #解密 print(result)
def input_data_for_model(input_shape): # 数据导入 input_data = load_data() # 数据处理 data_processing() # 导入字典 with open(CONSTANTS[1], 'rb') as f: word_dictionary = pickle.load(f) with open(CONSTANTS[2], 'rb') as f: inverse_word_dictionary = pickle.load(f) with open(CONSTANTS[3], 'rb') as f: label_dictionary = pickle.load(f) with open(CONSTANTS[4], 'rb') as f: output_dictionary = pickle.load(f) vocab_size = len(word_dictionary.keys()) label_size = len(label_dictionary.keys()) # 处理输入数据 aggregate_function = lambda input: [ (word, pos, label) for word, pos, label in zip(input['word'].values.tolist(), input[ 'pos'].values.tolist(), input['tag'].values.tolist()) ] grouped_input_data = input_data.groupby('sent_no').apply( aggregate_function) sentences = [sentence for sentence in grouped_input_data] x = [[word_dictionary[word[0]] for word in sent] for sent in sentences] x = sequence.pad_sequences(maxlen=input_shape, sequences=x, padding='post', value=0) y = [[label_dictionary[word[2]] for word in sent] for sent in sentences] y = sequence.pad_sequences(maxlen=input_shape, sequences=y, padding='post', value=0) y = [to_categorical(label, num_classes=label_size + 1) for label in y] return x, y, output_dictionary, vocab_size, label_size, inverse_word_dictionary
def __init__(self, path_data, path_nwp, nominal_p, horizon, start_date, end_date): # Retrieving training data data_model = data_processing(path_nwp, path_data) self.x_train, self.y_train = data_model.train_data( nominal_p, horizon, start_date, end_date) # User parameters self.nominal_p = nominal_p self.horizon = horizon return
def run_custom_processing(): # Skip costly computations if shapefile already exists print("Processing started:") try: pro = data_processing() pro.setup_processing(projectPath) pro.calc_distance_differences() pro.calculate_height_speed_differences(projectPath) pro.prepare_predictions() pro.predict() pro.export_layer(projectPath) print("Processing finished.\n") except: raise RuntimeError("Error encountered while processing data.")
def validation(train, valid, mode='validation', param=0): import data_processing as dp dphelper = dp.data_processing() dense_train, sparse_train = dphelper.split(train) dense_valid, sparse_valid = dphelper.split(valid) import sgd_bias as sgd train_rss_dense, valid_rss_dense = sgd.sgd_bias(dense_train, dense_valid, 'validation') import baseline as bs train_rss_sparse, valid_rss_sparse = bs.baseline(sparse_train, sparse_valid, 'validation') return train_rss_dense + train_rss_sparse, valid_rss_dense + valid_rss_sparse
def run_greenhouse(): #import variables from config.ini config = configparser.ConfigParser() path = "C:/Users/dabha/Documents/GitHub/Greenhouse/Brain/" file = "config.ini" config.read((path+file)) #read variables, define GPIO pin useage and define GPIO numbering convention humidity_average = float(config['devpi01_greenhouse']['humidity_average']) temperature_average = float(config['devpi01_greenhouse']['temperature_average']) humidity_limit = float(config['devpi01_greenhouse']['humidity_limit']) temperature_limit = float(config['devpi01_greenhouse']['temperature_limit']) heater_pin = int(config['devpi01_greenhouse']['heater_pin']) fan_pin = int(config['devpi01_greenhouse']['fan_pin']) LED_pin = int(config['devpi01_greenhouse']['LED_pin']) sensor_pin = int(config['devpi01_greenhouse']['sensor_pin']) sensor_name = str(config['devpi01_greenhouse']['sensor_name']) LED_scheme = str(config['devpi01_greenhouse']['LED_scheme']) acclimatise_from = datetime.date(int(config['devpi01_greenhouse']['acclimatise_from_year']), int(config['devpi01_greenhouse']['acclimatise_from_month']), int(config['devpi01_greenhouse']['acclimatise_from_day'])) component_activation = config.getboolean('devpi01_greenhouse', 'component_activation') #setup pins for useage GPIO.setmode(GPIO.BCM) GPIO.setup(heater_pin,GPIO.OUT) GPIO.setup(fan_pin, GPIO.OUT) GPIO.setup(LED_pin, GPIO.OUT) #get averaged, processed sensor data humidity_average, temperature_average = data_processing(sensor_pin, sensor_name) #store variables as tuple averages = temperature_average, humidity_average limits = temperature_limit, humidity_limit pins = heater_pin, fan_pin #activate heater, fan and LED components if necessary if component_activation: temperature_and_humidity_control(averages, limits, pins) LED_control(LED_pin, LED_scheme, acclimatise_from) return None
def prediction(train_valid, test, pred_filename): import data_processing as dp dphelper = dp.data_processing() dense_train, sparse_train = dphelper.split(train_valid) dense_test, sparse_test = dphelper.split(test) ####### import sgd_bias as sgd y_hat_dense, train_rmse_dense = sgd.sgd_bias(dense_train, dense_test, 'prediction') import baseline as bs y_hat_sparse, train_rmse_sparse = bs.baseline(sparse_train, sparse_test, 'prediction') ####### print 'dense subset train rmse: %.16f' % train_rmse_dense print 'sparse subset train rmse: %.16f' % train_rmse_sparse test = dphelper.merge(test, y_hat_dense, y_hat_sparse) util.write_predictions(test, pred_filename)
def update(self, country, symbol, style, on): print(country, symbol, style) data_request = [country, symbol, style] print("Request: ", data_request) if data_request[:2] != self.previous_request[:2] or self.previous_request == []: print("Requesting data...") self.df_daily, self.df_yearly, self.df_est, self.currency = req_handle( *data_request[:2]) self.previous_request = data_request print("Data received.") print("Processing data...") processing_request = [ self.df_daily, self.df_yearly, self.df_est, *data_request[1:], self.currency, on ] trace_base, trace_ratio, pe, pe_norm, grw, grw_exp = data_processing( *processing_request) print("Data processed.") return trace_base, trace_ratio, pe, pe_norm, grw, grw_exp
def testing(): company = raw_input('Enter company name: ') data = dp.data_processing(dp.data_download(company)) closing = dp.closing_prices(data)[:90] maxim = dp.max_prices(data)[:90] minim = dp.min_prices(data)[:90] datalist = [] for i in range(90): datalist.append(data[i][4]) # smaa = sma(50, datalist) # emalist = ema(50, datalist) # demaa = dema(50, datalist) # framaa = frama(50, datalist, w=-4.5) # rsii = rsi(14, datalist) # macdd = macd(12, 26, datalist) # print(len(dema(50, datalist))) # print len(emalist) # print (len(smaa)) # print (len(framaa)) # print(len(rsii)) # print(len(macdd)) stoch = stochastic_oscillator(5, datalist) boll = bollinger_bands(15, datalist) cci = commodity_channel_index(closing, maxim, minim) print(len(cci)) print(cci) plt.style.use('ggplot') plt.plot(boll[1]) plt.plot(boll[0]) plt.plot(cci) plt.plot(datalist) # plt.plot(framaa) # plt.plot(smaa, color='red') # plt.plot(emalist, color='orange') # plt.plot(demaa, color='black') # plt.plot(macdd) #plt.plot(stoch) plt.show()
def post(): print("Loaded") # print(json.loads(request.get_data())) data = json.loads(request.get_data()) data = json.dumps(data, ensure_ascii=False).encode('utf8') # post 요청으로 받은 json 데이터는 binary data로 받는다 # 따라서 binary data를 string으로 변환 data = eval(data) # 현재 받은 data에서는 true, false라고 저장되어 있다. # 하지만 python에서는 bool 자료형의 첫 글자는 대문자이므로, True, False라고 저장해준다. data = data.replace("true", "True") data = data.replace("false", "False") # 바꾼 string 데이터를 json 파일로 바꾸어주어야한다. # python에서 json 데이터는 dictionary 타입이므로 string to dictionary으로 변환해준다 data = literal_eval(data) # 데이터 1차 가공 - input으로 받은 Youtube API 데이터 중 댓글들만 모아서 저장한다 data2 = data_processing(data) # 가공된 데이터는 string 형태이므로, json 형태 즉, dictionary 형태로 변환 data2 = literal_eval(data2) # 데이터 2차 가공 - 댓글 중에 하이라이트를 찾아서 return 해준다, highlighting data3 = comment_highlight(data2) # 데이터에서 역 슬래쉬가 연속으로 두번 나올 때 4번으로 표기되는 문제 해결하기 위해 작성 data3 = data3.replace("\\\\", "\\") return data3
mpl.rcParams['font.size'] = 22 mpl.rcParams['legend.fontsize'] = 10 mpl.rcParams['lines.linewidth'] = 2 mpl.rcParams['xtick.major.size'] = 5 mpl.rcParams['ytick.major.size'] = 5 mpl.rcParams['font.family'] = 'Arial' mpl.rcParams['mathtext.fontset'] = "stixsans" #############################################################self made modules import measurement import data_processing import QueEditor as AddQue import SecondExcite as SE_control import FileUI import SourEdit import MeasEdit DTP = data_processing.data_processing() ##################################################################### path = sys.path[0] vtiTempControlGUI = path + r'\lineplot.ui' Ui_MainWindow, QtBaseClass = uic.loadUiType(vtiTempControlGUI) class main(QtWidgets.QMainWindow, Ui_MainWindow, QtWidgets.QFileDialog, QtWidgets.QMessageBox, QtWidgets.QInputDialog): def __init__(self): super(main, self).__init__() self.setupUi(self) self.MSMT = measurement.measurement() self.thread = QThread() ################################################### self.loop1 = ''
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Sep 2 15:35:59 2018 @author: isayapin """ #XGboost # Importing the libraries import numpy as np import pandas as pd from data_processing import data_processing X_train, X_test, y_train, y_test, sc_y = data_processing(2000000) from xgboost import XGBRegressor model = XGBRegressor() model.fit(X_train, y_train, verbose=False) y_pred = model.predict(X_test) y_pred = sc_y.inverse_transform(y_pred) from sklearn.metrics import mean_squared_error print(np.sqrt(mean_squared_error(y_pred, y_test))) """ RMSE = 2.7817755
def main(): #attr_type_flag, full_labels, data_value_list, data_label_list = data_processing.data_processing(DATASET_NAME, 0) #Djk_label_list, Djv_label_list = D_matrix_generate.D_matrix_generate(full_labels, data_label_list) #model, max_and_min = Bayes.Naive_Bayes(attr_type_flag, data_value_list, full_labels, Djk_label_list, Djv_label_list) #attr_type_flag, full_labels, data_value_list, data_label_list = data_processing.data_processing(DATASET_NAME, 0) #accuracy = Bayes.classify(model, max_and_min, attr_type_flag, data_value_list, full_labels, data_label_list) #return attr_type_flag, full_labels, data_value_list, data_label_list = data_processing.data_processing( DATASET_NAME, 0) max_and_min = {} for i in range(len(attr_type_flag)): if (attr_type_flag[str(i)] == "1"): for data in data_value_list: if data.__contains__(str(i)): value = float(data[str(i)]) max = value min = value break for data in data_value_list: if data.__contains__(str(i)): value = float(data[str(i)]) if value > max: max = value if value < min: min = value max_and_min["%s_max" % str(i)] = str(max) max_and_min["%s_min" % str(i)] = str(min) print(max_and_min) minus = float(max_and_min["102_max"]) - float(max_and_min["102_min"]) print(minus) datasets_value, datasets_label = cross_validation.construct_cv_folds( N_FOLDS, data_value_list, data_label_list) #print(datasets_label) Accuracy_scores = [] HamLosses = [] Precisions = [] Recalls = [] F1s = [] AVGPRECs = [] RANKLOSSes = [] avg_Accuracy_score = 0 avg_HamLoss = 0 avg_Precision = 0 avg_Recall = 0 avg_F1 = 0 avg_AVGPREC = 0 avg_RANKLOSS = 0 std_Accuracy_score = 0 std_HamLoss = 0 std_Precision = 0 std_Recall = 0 std_F1 = 0 std_AVGPREC = 0 std_RANKLOSS = 0 for i in range(N_FOLDS): training_set_value = [] training_set_label = [] for j in range(N_FOLDS): if i != j: training_set_value = training_set_value + datasets_value[j] training_set_label = training_set_label + datasets_label[j] testing_set_value = datasets_value[i] testing_set_label = datasets_label[i] Djk_label_list, Djv_label_list = D_matrix_generate.D_matrix_generate( full_labels, training_set_label) model, max_and_min = Bayes.Naive_Bayes(attr_type_flag, training_set_value, full_labels, Djk_label_list, Djv_label_list, max_and_min) Accuracy_score, HamLoss, Precision, Recall, F1, AVGPREC, RANKLOSS = Bayes.classify( model, max_and_min, attr_type_flag, testing_set_value, full_labels, testing_set_label) Accuracy_scores.append(Accuracy_score) HamLosses.append(HamLoss) Precisions.append(Precision) Recalls.append(Recall) F1s.append(F1) AVGPRECs.append(AVGPREC) RANKLOSSes.append(RANKLOSS) avg_Accuracy_score += Accuracy_score avg_HamLoss += HamLoss avg_Precision += Precision avg_Recall += Recall avg_F1 += F1 avg_AVGPREC += AVGPREC avg_RANKLOSS += RANKLOSS avg_Accuracy_score /= N_FOLDS avg_HamLoss /= N_FOLDS avg_Precision /= N_FOLDS avg_Recall /= N_FOLDS avg_F1 /= N_FOLDS avg_AVGPREC /= N_FOLDS avg_RANKLOSS /= N_FOLDS for i in range(N_FOLDS): std_Accuracy_score += (Accuracy_scores[i] - avg_Accuracy_score)**2 std_HamLoss += (HamLosses[i] - avg_HamLoss)**2 std_Precision += (Precisions[i] - avg_Precision)**2 std_Recall += (Recalls[i] - avg_Recall)**2 std_F1 += (F1s[i] - avg_F1)**2 std_AVGPREC += (AVGPRECs[i] - avg_AVGPREC)**2 std_RANKLOSS += (RANKLOSSes[i] - avg_RANKLOSS)**2 std_Accuracy_score = math.sqrt(std_Accuracy_score / N_FOLDS) std_HamLoss = math.sqrt(std_HamLoss / N_FOLDS) std_Precision = math.sqrt(std_Precision / N_FOLDS) std_Recall = math.sqrt(std_Recall / N_FOLDS) std_F1 = math.sqrt(std_F1 / N_FOLDS) std_AVGPREC = math.sqrt(std_AVGPREC / N_FOLDS) std_RANKLOSS = math.sqrt(std_RANKLOSS / N_FOLDS) print( "AVG:\nAccuracy_score=%.4f %.4f\nHamLoss=%.4f %.4f\nPrecision=%.4f %.4f\nRecall=%.4f %.4f\nF1=%.4f %.4f\nAVGPREC=%.4f %.4f\nRANKLOSS=%.4f %.4f" % (avg_Accuracy_score, std_Accuracy_score, avg_HamLoss, std_HamLoss, avg_Precision, std_Precision, avg_Recall, std_Recall, avg_F1, std_F1, avg_AVGPREC, std_AVGPREC, avg_RANKLOSS, std_RANKLOSS))
def main(): ave_acc = [] ave_f1 = [] ave_hemiloss = [] ave_precision = [] ave_recall = [] attr_type_flag, full_labels, data_value_list, data_label_list = data_processing.data_processing( DATASET_NAME, 0) for i in range(len(attr_type_flag)): for j in range(len(data_value_list)): if data_value_list[j].__contains__(str(i)) == False: data_value_list[j][str(i)] = "0" for i in range(len(full_labels)): for j in range(len(data_label_list)): if data_label_list[j].__contains__(str(i)) == False: data_label_list[j][str(i)] = "0" datasets_value, datasets_label = cross_validation.construct_cv_folds( N_FOLDS, data_value_list, data_label_list) for i in range(N_FOLDS): training_set_value = [] training_set_label = [] for j in range(N_FOLDS): if i != j: training_set_value = training_set_value + datasets_value[j] training_set_label = training_set_label + datasets_label[j] testing_set_value = datasets_value[i] testing_set_label = datasets_label[i] #-------------------- train model ------------------- # 1. Transform Mutilabel into powerset lp_label, random_k_labels_str, M, numberD = label_powerset( training_set_label, k, 0, 1) # 2. Train model conditional_prob_dict, threshold_dict = train_model( attr_type_flag, data_value_list, training_set_value, numberD, lp_label, m_estimate) print("-------- Finish Training --------") #-------------------- test model ------------------- # 1. convert continuous value into discrete continuous_to_discrete(testing_set_value, attr_type_flag, threshold_dict) # 2. test model -- predict prediction = test_model(testing_set_value, testing_set_label, conditional_prob_dict) # 3. compare prediction and target lp_label_test, _, _, num_test = label_powerset(testing_set_label, k, random_k_labels_str, 2) # 4. evaluate methods # -- 4.1 convert to binary binary_prediction = convert_to_binary(prediction, k) # -- 4.2 confusion matrix label_confusion_parameter = confusion_matrix(binary_prediction, testing_set_label) # -- 4.3 Evaluation Parameters: f1, precision, recall = cal_f1(label_confusion_parameter) acc = accuracy(prediction, lp_label_test) acc_2 = accuracy_matrix(label_confusion_parameter) hemiloss = sum(acc_2.values()) / k # 5. Sum up accuracy in each iteration ave_acc.append(acc) ave_f1.append(f1) ave_hemiloss.append(hemiloss) ave_precision.append(precision) ave_recall.append(recall) print("-------- Finish Testing --------") print("Dataset is : ", DATASET_NAME) print("k = ", k) print("accuracy---------", ave_acc) print("average accuracy-", sum(ave_acc) / N_FOLDS) print("hemiloss---------", [1 - x for x in ave_f1]) print("average hemiloss-", 1 - sum(ave_f1) / N_FOLDS) print("precision--------", ave_precision) print("average precision", sum(ave_precision) / N_FOLDS) print("recall-----------", ave_recall) print("average recall---", sum(ave_recall) / N_FOLDS) print("f1---------------", ave_f1) print("average f1-------", sum(ave_f1) / N_FOLDS)
from keras.preprocessing.sequence import pad_sequences import numpy as np from gensim.models import KeyedVectors from keras.layers import Dense, Input, Lambda, merge, dot, Subtract from keras.layers.embeddings import Embedding from keras.layers.recurrent import GRU from keras.layers.core import Dropout, Activation, Reshape from keras.models import Model import approximateMatch from keras.layers.wrappers import Bidirectional, TimeDistributed from keras.layers.merge import Concatenate import keras.backend as K import os, re data_path = "H:/pubmed_adr/data/ADE-Corpus-V2/DRUG-AE.rel" final_data, idx2word, idx2label, maxlen, vocsize, nclasses, tok_senc_adr, train_lex, test_lex, train_y, test_y = data_processing( data_path) test_toks = [] test_tok_senc_adr = tok_senc_adr[4372:] for i in test_tok_senc_adr: test_toks.append(i[0]) train_toks = [] train_tok_senc_adr = tok_senc_adr[:4372] for i in train_tok_senc_adr: train_toks.append(i[0]) # Char embedding char_per_word = [] char_word = [] char_senc = []
def model(): def calc_precision(column): return (data_true.apply( lambda row: len(set(row['true_test']).intersection(row[column]) ) / min(len(row['true_test']) + 0.001, top_k), axis=1)).mean() def calc_recall(column): return (data_true.apply( lambda row: len(set(row['true_test']).intersection(row[column]) ) / min(len(row[column]) + 0.001, top_k), axis=1)).mean() def calc_fscore_precision(column): beta = 0.5 precision = calc_precision(column) recall = calc_recall(column) fscore_precision = ((1 + beta**2) * precision * recall) / (beta**2 * precision + recall) return fscore_precision # top k рекомендаций top_k = 10 # соединение с базой данных PostgreSQL db_connect = psycopg2.connect(host='127.0.0.1', port='5432', database='reccomendation_system', user='******', password='******') select_clients_data = 'SELECT * FROM analytics_client' select_categories_data = 'SELECT * FROM analytics_category' select_transactions_data = 'SELECT * FROM analytics_transaction' clients_data = sqlio.read_sql_query(select_clients_data, db_connect) categories_data = sqlio.read_sql_query(select_categories_data, db_connect) transaction_data = sqlio.read_sql_query(select_transactions_data, db_connect) # модуль предобработки данных data_matrix, data_true = data_processing(clients_data, categories_data, transaction_data) # модуль обучения модели и предсказания рекомендаций predictions, data_true = latent_factor_model_with_svd( data_matrix, data_true, top_k) # оценка модели по метрике качества F-score@1 fscore = calc_fscore_precision("prediction_svd") dict_category_clients = {} data_true.reset_index(inplace=True) for row in range(data_true.shape[0]): categories = data_true.loc[row, 'prediction_svd'] client = data_true.loc[row, 'client_id'] for category in categories: if category not in dict_category_clients.keys(): dict_category_clients[category] = [] dict_category_clients[category].append(client) else: dict_category_clients[category].append(client) return dict_category_clients, fscore
y_pred = bst_model.predict(X_test) print("Classification Report: ", file=text_file) print(classification_report(y_test, y_pred), file=text_file) print("Tseting Accuracy : " + str(accuracy_score(y_test, y_pred)), file=text_file) cnf_matrix = confusion_matrix(y_test, y_pred) return cnf_matrix if __name__ == "__main__": df = pd.read_csv("dataSet_business.csv") text_file = open("./result/model_result.txt", "a+") text_file.write("\n") text_file.write(".This is KNN model") text_file.write("\n") X_train, X_test, y_train, y_test = data_processing() cnf_matrix = train(X_train, X_test, y_train, y_test, text_file) print("Confusion matrix", file=text_file) print(cnf_matrix, file=text_file) text_file.close() class_names = ["bad", "good"] plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix__KNN Classifier')
std = np.array([ 362.02239963, 26.59734713, 92.56434046, 25.6864239, 51.21534631, 71.97984122, 21.45158069, 18.20996105, 51.09030231, 17.70573799, 36.89085642, 74.19714713, 34.06744095 ]) #def load_data(): # print("开始准备特征数据:》》》》》》》》》》》") # df=data_processing() # print("数据准备完毕:!!!") # time.sleep(5) # print("开始交通流量预测:》》》》》》》》》》》》》》》》》") # return df print("开始准备特征数据:》》》》》》》》》》》") df = data_processing() print("数据准备完毕:!!!") time.sleep(2) print("开始交通流量预测:》》》》》》》》》》》》》》》》》") def Lstm_forecast(rnn_unit, lr, md): tf.reset_default_graph() def forecast_Model(lk): # df=load_data() col = df.columns df1 = df[df['lk'] == lk] data = df1[col[3:16]].astype('int').values print(lk + "数据准备完毕:!!!") tf.reset_default_graph()
from data_processing import data_processing import sys # run # python datasets_generate.py 0 # debug # python datasets_generate.py 1 inputs = sys.argv[1] if inputs == "1": debug = 1 else: debug = 0 # label_correct DATA_PATH = "../cell_data/label_correct" train_ratio = 6 val_ratio = 1 test_ratio = 3 ratio_list = [train_ratio, val_ratio, test_ratio] data_processing(DATA_PATH, ratio_list, debug, label_correct=True) # label_no_correct DATA_PATH = "../cell_data/label_no_correct" train_ratio = 6 val_ratio = 1 test_ratio = 3 ratio_list = [train_ratio, val_ratio, test_ratio] data_processing(DATA_PATH, ratio_list, debug, label_correct=False)
random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) def get_dataset(tokenizer, type_path, args): return TranslatorDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path, max_len=args.max_seq_length) seed_all(34) data_processing() with open('config.json', 'r') as f: args_dict = json.load(f) args = argparse.Namespace(**args_dict) model = T5Lightning(args) print("T5Model created\n===========\n") tokenizer = T5Tokenizer.from_pretrained('t5-base') dataset = TranslatorDataset(tokenizer, 'data', 'train', 512) print(len(dataset)) checkpoint_callback = pl.callbacks.ModelCheckpoint(filepath=args.output_dir, prefix="checkpoint", monitor="val_loss", mode="min", save_top_k=5)
from data_processing import data_processing from comment_highlight import comment_highlight import json # 로컬에서 테스트를 위한 코드 # 테스트할 .json 파일의 이름을 입력 # 파일은 data 폴더 내에 .json 파일로 있어야함 file_name = input() with open('./data/' + file_name + '.json') as json_file: json_data = json.load(json_file) new_json_data = data_processing(json_data, file_name) print(new_json_data) json_file.close() with open('./data/' + file_name + '_output_dummy.json') as json_file: json_data = json.load(json_file) comment_highlight(json_data, file_name)
tf_config.gpu_options.allow_growth=True tf.Session(config=tf_config) # LabelEncoder le = LabelEncoder() le.fit(label) # KFold for self-projection random_state = args.state n_splits = args.n_splits kf = KFold(n_splits=n_splits,shuffle=True,random_state=random_state) k_kfold = 1 for train_index ,test_index in kf.split(X = data,y = label): train_data = data[train_index,:] train_label = label[train_index] test_data = data[test_index,:] test_label = label[test_index] train_data,train_label,test_data,test_label = data_processing(train_data,train_label,test_data,test_label,peak_rate) # label to target train_target = le.transform(train_label) test_target = le.transform(test_label) # create data_train for training dtype = np.float32 n_classes = max(train_target)+1 Data = {i: np.array(train_data[train_target == i,:]) for i in range(n_classes)} sample_shape = {i: sum(train_target == i) for i in range(n_classes)}# sample_shape for each class data_train = Data[0] for i in range(n_classes-1): data_train=np.vstack((data_train,Data[i+1])) # get Variables of EpiAnno qmu , qsigma,qz,qw,qnoise = EpiAnno.Q(latent_dim,data_train.shape[1],n_classes,sample_shape) qmu_dict = {v.distribution.name.split("_")[0].split("/")[0][1:]: v for v in qmu}
#pfam_id = sys.argv[1] #print(pfam_id) #ipdb = sys.argv[2] ipdb = int(ipdb) #print(ipdb) ext_name = '%s/%02d' % (pfam_id, ipdb) #-------------------------------------------- # read data pdb = np.load('%s/%s/pdb_refs.npy' % (data_path, pfam_id)) npdb = pdb.shape[0] # data processing s0,cols_removed = data_processing(data_path,pfam_id,ipdb,\ gap_seqs=0.2,gap_cols=0.2,prob_low=0.004,conserved_cols=0.8) #np.savetxt('cols_removed.dat',cols_removed,fmt='%i') #-------------------------------------------------------------------------- def contact_map(pdb, ipdb, cols_removed): pdb_id = pdb[ipdb, 5] pdb_chain = pdb[ipdb, 6] pdb_start, pdb_end = int(pdb[ipdb, 7]), int(pdb[ipdb, 8]) #print('pdb id, chain, start, end, length:',pdb_id,pdb_chain,pdb_start,pdb_end,pdb_end-pdb_start+1) #print('download pdb file') pdb_file = pdb_list.retrieve_pdb_file(pdb_id, file_format='pdb') #pdb_file = pdb_list.retrieve_pdb_file(pdb_id) chain = pdb_parser.get_structure(pdb_id, pdb_file)[0][pdb_chain]