def main(): # Directory Setting train_dir = "./data/multi_train.csv" test_dir = "./data/multi_test.csv" model_dir = "./model_save" # HyperParameter epoch = 1 batch = 128 max_len = 50 hidden_units = 64 target_names = ['0', '1', '2', '3'] # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir, len(target_names)) print("2. pre processing") train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist() train_x = [' '.join(t.split()[0:max_len]) for t in train_x] train_x = np.array(train_x, dtype=object)[:, np.newaxis] val_x = [' '.join(t.split()[0:max_len]) for t in val_x] val_x = np.array(val_x, dtype=object)[:, np.newaxis] test_x = [' '.join(t.split()[0:max_len]) for t in test_x] test_x = np.array(test_x, dtype=object)[:, np.newaxis] print("3. build model") model = ELMo(hidden_units=hidden_units, data_type="multi", category_size=len(target_names)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("4. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="multi") print("## Target Names : ", target_names) print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
def main(): # Directory Setting train_dir = "./data/multi_train.csv" test_dir = "./data/multi_test.csv" model_dir = "./model_save" embedding_dir = "./glove.6B.50d.txt" # HyperParameter epoch = 1 batch = 256 embedding_dim = 50 target_names = ['0', '1', '2', '3'] # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir, len(target_names)) print("2. pre processing") train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x) print("3. text to vector") embedding_matrix = text_to_vector(tokenizer.word_index, embedding_dir, word_dimension=embedding_dim) print("4. build model") model = TextCNN(sequence_len=train_x.shape[1], embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, filter_sizes=[3, 4, 5], flag="pre_training", data_type="multi", category_num=len(target_names)) model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("5. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="multi") print("## Target Names : ", target_names) print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
def main(): # Directory Setting train_dir = "../data/binary_train.csv" test_dir = "../data/binary_test.csv" model_dir = "./model_save" # HyperParameter epoch = 2 batch = 256 # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir) print("2. pre processing") train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x) print("3. build model") model = TextCNN(sequence_len=train_x.shape[1], embedding_matrix=len(tokenizer.word_index) + 1, embedding_dim=300, filter_sizes=[3, 4, 5], flag="self_training", data_type="binary") model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("4. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="binary") print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
#!/usr/bin/env python # -*- coding: utf-8 -*- # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # Author: "Chris Ward" <*****@*****.**> from datetime import datetime import os from utils import set_env from metrique.utils import debug_setup logger = debug_setup('metrique', level=10, log2stdout=True, log2file=False) env = set_env() exists = os.path.exists testroot = os.path.dirname(os.path.abspath(__file__)) cubes = os.path.join(testroot, 'cubes') fixtures = os.path.join(testroot, 'fixtures') cache_dir = env['METRIQUE_CACHE'] etc_dir = os.environ.get('METRIQUE_ETC') default_config = os.path.join(etc_dir, 'metrique.json') def db_tester(proxy): from metrique.utils import ts2dt from metrique import metrique_object as O
import ogusa import run_ogusa from taxcalc import Policy, Calculator EXPECTED_KEYS = ( 'policy', 'consumption', 'behavior', 'growdiff_baseline', 'growdiff_response', 'gdp_elasticity', ) TEST_FAIL = False from utils import set_env globals().update(set_env()) celery_app = Celery('tasks2', broker=REDISGREEN_URL, backend=REDISGREEN_URL) if MOCK_CELERY: CELERY_ALWAYS_EAGER = True BROKER_BACKEND = 'memory' CELERY_EAGER_PROPAGATES_EXCEPTIONS = True def task(func): celery_app_like = Namespace(delay=func) return celery_app_like celery_app = Namespace(task=task) #Create a Public Use File object rn_seed = 80 if not MOCK_CELERY:
#!/usr/bin/env python # -*- coding: utf-8 -*- # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # Author: "Chris Ward" <*****@*****.**> import os from utils import set_env from metrique.utils import debug_setup logger = debug_setup('metrique', level=10, log2stdout=True, log2file=False) env = set_env() exists = os.path.exists testroot = os.path.dirname(os.path.abspath(__file__)) cubes = os.path.join(testroot, 'cubes') fixtures = os.path.join(testroot, 'fixtures') cache_dir = env['METRIQUE_CACHE'] def test_parse_fields(): from metrique.parse import parse_fields OK_list = ['a', 'b', 'c'] OK_dct = {'a': 1, 'b': 1, 'c': 1} all_ = '~' fields_str = 'c , b , a ' fields_list = ['a ', 'c ', ' b'] fields_dct = {' a': 1, 'b ': 1, 'c ': 1} fields_dct_BAD = {' a': None, 'b ': 1, 'c ': 1}
def main(): # Directory Setting train_dir = "../data/multi_train.csv" test_dir = "../data/multi_test.csv" model_dir = "./model_save" # HyperParameter epoch = 2 batch = 256 max_len = 50 hidden_units = 64 target_names = ['0', '1', '2', '3'] # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data(train_dir, test_dir, len(target_names)) print("2. pre processing") train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist() train_x = [' '.join(t.split()[0:max_len]) for t in train_x] train_x = np.array(train_x, dtype=object)[:, np.newaxis] val_x = [' '.join(t.split()[0:max_len]) for t in val_x] val_x = np.array(val_x, dtype=object)[:, np.newaxis] test_x = [' '.join(t.split()[0:max_len]) for t in test_x] test_x = np.array(test_x, dtype=object)[:, np.newaxis] tokenizer = create_tokenizer_from_hub_module() train_examples = convert_text_to_examples(train_x, train_y) val_examples = convert_text_to_examples(val_x, val_y) test_examples = convert_text_to_examples(test_x, test_y) train_input_ids, train_input_masks, train_segment_ids, train_labels = convert_examples_to_features(tokenizer, train_examples, max_len) val_input_ids, val_input_masks, val_segment_ids, val_labels = convert_examples_to_features(tokenizer, val_examples, max_len) test_input_ids, test_input_masks, test_segment_ids, test_labels = convert_examples_to_features(tokenizer, test_examples, max_len) print("3. build model") model = BERT(max_len, data_type="multi", category_size=len(target_names)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) initialize_vars(sess) cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=model_dir+"/model-weights.{epoch:02d}-{val_acc:.6f}.hdf5", monitor='val_acc', save_best_only=True, verbose=1) model.fit( [train_input_ids, train_input_masks, train_segment_ids], train_labels, validation_data=([val_input_ids, val_input_masks, val_segment_ids], val_labels), epochs=epoch, batch_size=batch, callbacks=[cp_callback] ) print("4. evaluation") evaluation = Evaluation(model, [test_input_ids, test_input_masks, test_segment_ids], test_y) accuracy, cf_matrix, report = evaluation.eval_classification_bert(data_type="multi") print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
import os import uuid import threading from threading import Thread, Event, Lock import time from flask import Flask, request, make_response import pandas as pd import taxcalc from pandas.util.testing import assert_frame_equal import requests import redis from retrying import retry from utils import set_env globals().update(set_env()) from celery_tasks import (celery_app, dropq_task_async, dropq_task_small_async, ogusa_async, elasticity_gdp_task_async, btax_async, example_async, MOCK_CELERY) app = Flask(__name__) server_url = "http://localhost:5050" queue_name = "celery" client = redis.Redis(host="redis", port=6379) RUNNING_JOBS = {} TRACKING_TICKETS = {}
comscore = ascore is_best = comscore > best_score best_score = max(comscore,best_score) infostr = {'Epoch: {:.4f} loss: {:.4f},gs: {:.4f},bs:{:.4f} ,ms:{:.4f},as:{:.4f}'.format(epoch+1,val_loss,val_score,best_score,mscore,ascore)} logging.info(infostr) save_checkpoint( {'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'optimizer' : optimizer.state_dict(), 'scheduler' : scheduler.state_dict(), 'best_score': best_score }, is_best, outdir=conf['outdir']) test(model , train_loader) print('Best val acc: {}'.format(best_score)) return 0 if __name__ == '__main__': # get configs and set envs conf = get_config() set_env(conf) # generate outdir name set_outdir(conf) # Set the logger set_logger(conf) main(conf)
if os.getcwd() != bd: os.chdir(bd) import utils info_dict = dict(base_dir=bd, gtoi_res="dev_for_container/gtoi_res", itod_res="dev_for_container/fxvb_run_cv_{}", data_file="brain_snp_covars_meancentered_scaled.h5", cv_idx="dev_for_container/cv_splits.csv", bf_cv_file="dev_for_container/bf_info_cv_{}_for_fxvb.csv", fxvb_cv="dev_for_container/fxvb_run_cv_{}", read_imaging=True, read_genomic=False, read_depvar=True) # set some required variables nsnp, nimg = int(10e4), 170 # load data data = utils.set_env(info_dict) # get the prediction accuarcies classif_results, roc_auc_insample, roc_auc_outsample = utils.predict_all_folds( info_dict, data, "beta", False) # get the pip values for plotting on the brain df_with_itod_pips = utils.get_itod_pip(info_dict, data, nimg) # get pip values for plotting the manhattan plots gtoi_pip = utils.get_gtoi_pip(info_dict, 1, nsnp, nimg) # bayes factor for each brain region after the first regression gtoi_bf = utils.get_gtoi_bayes_factor(info_dict, data, nimg)