def _process_item(self, download_url, result_url, **kw): token = kw.get('security_token', '') tmp = TemporaryDirectory(prefix='msds-', dir=WORKDIR) outdir = os.path.join(tmp.name, 'out') json_file = os.path.join(outdir, 'all.json') result_file = os.path.join(outdir, 'single_chem.json') r = requests.get(download_url) if r.status_code != 200: return with open(os.path.join(tmp.name, 'sdb.pdf'), 'wb') as fp: fp.write(r.content) sdbparser.batch_call(outdir, [tmp.name], True, UBA_FILE) if not os.path.isfile(json_file): return with open(json_file, encoding='utf-8') as fp: data = json.load(fp) try: prepare.prepare_data(data[0], outdir) except: pass if os.path.isfile(result_file): with open(result_file, encoding='utf-8') as fp: result = json.load(fp) result['security_token'] = token requests.post(result_url, json=result)
def main(): """Get shortest path for given parameters.""" parser = argparse.ArgumentParser(description='Shortest Route') parser.add_argument('-g', '--graph', help='input graph file" ', type=str, dest="graph", required=True) parser.add_argument('-s', '--start', help='start node" ', type=str, dest="start", required=True) parser.add_argument('-f', '--finish', help='finish node" ', type=str, dest="finish", required=True) args = parser.parse_args() node_dictionary = prepare_data(args.graph) route = prepare_routes(node_dictionary) route.find_shortest_route(args.start, args.finish) print(route.get_route_distance(route.shortest_route))
def predict(predict_conf): # load data _, data = load_pkl_data(predict_conf.path_data) # load model meta data meta = load_pkl_data(predict_conf.path_meta) meta_image_shape = meta['ModelConf'].img_shape meta_re_sample_type = meta['ModelConf'].img_re_sample meta_text_len = meta['ModelConf'].text_length meta_label_num = len(meta['label2id']) meta_id2label = {v: k for k, v in meta['label2id'].items()} # load model model = keras.models.load_model(predict_conf.path_model, custom_objects={ "CoAttentionParallel": CoAttentionParallel }) # prepare data _, _, data_test = prepare_data(data, meta_image_shape, meta_re_sample_type, meta_text_len, meta_label_num, 0, 0) # predict with trained model x_test, y_test = data_test y_predict = model.predict(x_test) y_true = y_test.tolist() # save predictions save_pkl_data(predict_conf.path_predictions, [y_predict, y_test]) # print metric results scores = evaluate(y_true, y_predict, predict_conf.threshold) label_names = [meta_id2label[i] for i in range(len(meta_id2label))] display_scores(scores, label_names)
def split_data_with_conf(data, label_size, train_conf, model_conf): train_ratio, valid_ratio, test_ratio = normalize_data_ratio( train_conf.train_ratio, train_conf.valid_ratio, train_conf.test_ratio) data_train, data_valid, data_test = prepare.prepare_data( data, model_conf.img_shape, model_conf.img_re_sample, model_conf.text_length, label_size, train_ratio, valid_ratio) return data_train, data_valid, data_test
def calculate_idf_score(df): df = pd.read_json("data.json") df = prepare.prep_readme_data(df) df = prepare.prepare_data(df) languages = df.is_top_language.unique() idf_scores = pd.DataFrame() for language in languages: words = clean(' '.join(df[df.is_top_language == language].clean_lemmatized)) idf_df = return_words_with_idf(words) idf_df["language"] = language idf_scores = pd.concat([idf_scores, idf_df]) return idf_scores
def return_words_with_idf(words): df = pd.read_json("data.json") df = prepare.prep_readme_data(df) df = prepare.prepare_data(df) def idf(word): return df.shape[0] / (1 + (df.clean_lemmatized.str.contains(word)).sum()) # put the unique words into a data frame idf_df = (pd.DataFrame(dict(word=words)) # calculate the idf for each word .assign(idf=lambda df: df.word.apply(idf)) # sort the data for presentation purposes .set_index('word') .sort_values(by='idf', ascending=False) .head(5)) return idf_df
def main(): subgraphs, ids_by_length, ids_by_number_of_matched_files, lengths, jsons = prepare_data( DATE) subgraphs = sort_subgraphs(subgraphs, lengths, ids_by_length) #file_length('116746_gd1990-03-14s1t02.flac', lengths) #json.dump(jsons, open('jsons.json', 'w')) #json.dump(lengths, open('lengths.json', 'w')) #json.dump(subgraphs, open('subgraphs.json', 'w')) #sys.exit() all_partitions = [] partition_jkeys = [] for sub in subgraphs: chains = [] # json keys of chained alignments sub_partitions = [] for s in list(sub.values())[0]: #if len(s) > 1: if len(s) > 1: jkey = track_tuple_to_json_id((s[0], s[1])) chains.append(s + list(sub.keys())) else: jkey = track_tuple_to_json_id((s[0], list(sub.keys())[0])) dtw = jsons[jkey]['dtw'] dtw = [[x[1], x[0]] for x in dtw ] #swap columns to match order of file names/lengths tuning_diff = jsons[jkey]['tuning_diff'] partitions = get_partition_bounds(dtw, jkey) partitions = fill_gaps(jkey, partitions, lengths, jsons[jkey]['tuning_diff']) all_partitions.append(partitions) partition_jkeys.append(jkey) target_folder = os.path.join('plots', DATE) if not os.path.exists(target_folder): os.mkdir(target_folder) fname = f'{target_folder}/{jkey}' #print(fname) #json.dump(sorted(partitions, key=lambda x: x[0][0]), open(fname+'.json', 'w')) #sys.exit() #plotFigure(partitions, jkeys[0], lengths, fname, dtw, jsons) #break for c in chains: all_partitions, partition_jkeys = process_chain( c, all_partitions, partition_jkeys, jsons, lengths) #break #json.dump(all_partitions, open('all_partition.json', 'w')) #break all_partitions, partition_jkeys = cleanResult(subgraphs, all_partitions, partition_jkeys) result = {} for key, value in zip(partition_jkeys, all_partitions): result[key] = value result['unmatched'] = jsons['unmatched'] json.dump(result, open('all_partition.json', 'w')) #json.dump(result, open('all_partition.json', 'w')) #pprint(partition_jkeys) #timelines = #plot_timelines(timelines, names, outfile) '''
parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml") # parser.add_argument("--ignore_display", dest="display", action="store_false", default=True) parser.add_argument("--display", action="store_true", default=False) parser.add_argument("--frame_interval", type=int, default=1) parser.add_argument("--display_width", type=int, default=800) parser.add_argument("--display_height", type=int, default=600) parser.add_argument("--save_path", type=str, default="./output/") parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True) parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1") return parser.parse_args() if __name__ == "__main__": print('start mot') start_time = time.time() args = parse_args() prepare_data(args.data_path) tracks = os.listdir(r'./dataset/test-c') for track in tracks: track_data_folder = os.path.join(r'./dataset/test-c', track, 'img1') im = cv2.imread(os.path.join(track_data_folder, '00000.jpg')) i_h, i_w,_ = im.shape os.system('cgexec -g memory:myGroup python vis_zhongxing.py --data_path '+track_data_folder+'--track_name'+track) post_process(args.result_path, './output/'+track+'.txt')
def pred_death_P(): predict = pre.predict_death_P() res = predict.astype(str).to_json(orient='records') return res @app.route("/pred_death_A") # Prédiction du nombre décès - Modèle Prophet def pred_death_A(): res = pre.predict_death_A() res = res.to_json(orient='records') return res if __name__ == "__main__": pre.prepare_data() cursor = hive.connect(host='localhost').cursor() os.system("docker cp res.csv server_hive-server_1:/opt/hive/bin/res.csv") # Supprime l'ancien table cursor.execute("""DROP TABLE IF EXISTS covid""") # Créer la table si besoin et ignorer la prèmiere ligne de fichier csv cursor.execute( """CREATE TABLE IF NOT EXISTS covid(country STRING,prov STRING,confirm INT, recov INT, death INT,jour STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ';' tblproperties('skip.header.line.count'='1')""") cursor.execute( "LOAD DATA LOCAL INPATH '/opt/hive/bin/res.csv' OVERWRITE INTO TABLE covid" ) app.run()
def split_scale_data(): df = prepare_data() train, validate, test = split_data(df) #Select features to be scaled X = train.select_dtypes(include=['float']).columns return scale_data(train, validate, test, X)
def train(model_conf, train_conf): # set up random seed random.seed(train_conf.random_seed) # check the output path if not os.path.exists(train_conf.path_output): os.makedirs(train_conf.path_output) # load and statistics (vocab2id, label2id), data = load_pkl_data(train_conf.path_data) id2vocab = {v: k for k, v in vocab2id.items()} id2label = {v: k for k, v in label2id.items()} token_size, label_size = len(id2vocab), len(id2label) label_names = [id2label[i] for i in range(len(id2label))] print('label size:', label_size, 'token size:', token_size) print('label names:', label_names) # split data train_ratio, valid_ratio, test_ratio = normalize_data_ratio( train_conf.train_ratio, train_conf.valid_ratio, train_conf.test_ratio) data_train, data_valid, data_test = prepare.prepare_data( data, model_conf.img_shape, model_conf.img_re_sample, model_conf.text_length, label_size, train_ratio, valid_ratio) (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = data_train, data_valid, data_test print('train: {0}; valid: {1}; test: {2}'.format(len(y_train), len(y_valid), len(y_test))) # train and test scores = [] predict_threshold = 0.5 for i in range(train_conf.repeat_times): print('{sp}\ntime {i}\n{sp}'.format(sp='=' * 20, i=i)) # prefix to save the training process path_prefix = os.path.join( train_conf.path_output, 'model_{}_{}'.format(train_conf.code_name, i)) # create and train the model model = create_model_with_conf(token_size, label_size, model_conf) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # init callbacks path_cp = path_prefix + '.cp' es = EarlyStopping(monitor=train_conf.monitor_type, patience=train_conf.early_stop_patients) cp = ModelCheckpoint(filepath=path_cp, monitor=train_conf.monitor_type, save_best_only=True) # fit the model history = model.fit(x_train, y_train, batch_size=train_conf.batch_size, epochs=train_conf.epochs, verbose=train_conf.verbose, validation_data=(x_valid, y_valid), callbacks=[cp, es]) # save training history save_on_condition(train_conf.is_log_history, path_prefix + '.his', history.history) # save the trained model model.save(path_prefix + '.h5') # save the training meta data, e.g., TrainConf, vocab2id, label2id save_pkl_data(path_prefix + '.meta', { 'ModelConf': model_conf, 'vocab2id': vocab2id, 'label2id': label2id }) # test if test_ratio > 0 if test_ratio > 0: # predict with trained model model.load_weights(path_cp) y_predict = model.predict(x_test) y_true = y_test.tolist() # save prediction if train_conf.is_log_prediction: path_predict = path_prefix + '.predictions' save_pkl_data(path_predict, [y_predict, y_test]) # evaluate scores_current = metrics.evaluate(y_true, y_predict, predict_threshold) metrics.display_scores(scores_current, label_names) scores.append(scores_current) # prepare for the next loop if train_conf.is_data_refresh: data_train, data_valid, data_test = prepare.prepare_data( data, model_conf.img_shape, model_conf.img_re_sample, model_conf.text_length, label_size, train_ratio, valid_ratio) (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = data_train, data_valid, data_test if test_ratio > 0 and len(scores) > 0: # average score avg_scores = metrics.compute_mean_var(scores) metrics.display_average_scores(avg_scores, label_names, train_conf.repeat_times) # store average score if train_conf.is_log_avg_score: path_avg = os.path.join( train_conf.path_output, 'result_{}.avg.txt'.format(train_conf.code_name)) with codecs.open(path_avg, mode='w', encoding='UTF-8') as fo: metrics.display_average_scores(avg_scores, label_names, train_conf.repeat_times, is_k_print=True, fo=fo)
@Email : [email protected] @File : main.py ''' import cv2 from SVM import SVM from prepare import prepare_data cell_class = {11:'EOSINOPHIL', 22:'LYMPHOCYTE', 33:'MONOCYTE', 44:'NEUTROPHIL'} types = ['hog', 'gray', 'rgb', 'hsv'] feature_type = types[3] img_path = './data/test_data/LYMPHOCYTE/_0_1050.jpeg' #adjust the test image img=cv2.imread(img_path) cv2.putText(img,'LYMPHOCYTE',(23,45),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255),1) cv2.imshow('Result',img) cv2.waitKey() svm=SVM() data=prepare_data(feature_type) print('data:',data) svm.train(data) img=cv2.imread(img_path) ID_num=svm.predict(img,feature_type)
import tensorflow as tf import random from prepare import prepare_data, get_batch_data class_num = 2 learning_rate = 0.0005 training_epochs = 20 batch_size = 20 width = 128 height = 128 data = prepare_data(class_num) keep_prob = tf.placeholder(tf.float32) input_image = tf.placeholder(tf.float32, [None, width, height, 3]) label = tf.placeholder(tf.float32, [None, class_num]) filters = { 'cf1': tf.Variable(tf.random_normal([3, 3, 3, 32], stddev=0.01)), 'cf2': tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01)), 'cf3': tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01)), 'cf4': tf.Variable(tf.random_normal([3, 3, 128, 256], stddev=0.01)) } cl1 = tf.nn.conv2d(input_image, filters['cf1'], strides=[1, 1, 1, 1], padding='SAME') cl1 = tf.nn.max_pool(cl1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #cl1 : [-1, width / 2, height / 2, 32] cl2 = tf.nn.conv2d(cl1, filters['cf2'], strides=[1, 1, 1, 1], padding='SAME') cl2 = tf.nn.relu(cl2) cl2 = tf.nn.max_pool(cl2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
import pandas as pd import matplotlib.pyplot as plt import acquire import prepare import math from sklearn import metrics df = acquire.acquire_data() df = prepare.prepare_data(df) def split_store_data(df, train_prop=.7): train_size = int(len(df) * train_prop) train, test = df[0:train_size], df[train_size:len(df)] return train, test train, test = split_store_data(df) target_vars = ['steps'] yhat = pd.DataFrame(test[target_vars]) def evaluate(target_var, train=train, test=test, output=True): mse = metrics.mean_squared_error(test[target_var], yhat[target_var]) rmse = math.sqrt(mse) if output: print('MSE: {}'.format(mse)) print('RMSE: {}'.format(rmse)) else: return mse, rmse
print(said) except Exception as e: print("Exception: " + str(e)) return said tags = [] # Contains all the different tags all_questions_list = [ ] # Contains the different question with their words tokenized questions_tags = [ ] # Contains the questions tags corresponding to the questions in above list all_question_words = [ ] # Contains all the words in all the questions of the dataset pr = prepare_data(data) all_question_words, tags, all_questions_list, questions_tags = pr.prepare( data, "intents", "all_questions", "tag") all_questions_train = [] tags_output = [] all_questions_train, tags_output = pr.get_training_set() all_questions_train = np.array(all_questions_train) tags_output = np.array(tags_output) tf.reset_default_graph() model = create_model(all_questions_train, tags_output, tags, all_question_words) model.fit_model(all_questions_train, tags_output)
def prepare(args, config): logger = logging.getLogger('BugLoc') logger.info('Preparing data ...') generate_ast(args, config) prepare_data(args, config) logger.info('Done preparing data...')