def main():
    """
    capture the config path from the run arguments
    then processw the json configuration file
    """
    try:
        args = get_args()
        config = process_config(args.config)
    except FileNotFoundError as e:
        print('missing or invalid arguments')
        exit(0)
        return

    create_dirs([
        config.callbacks.tensorboard_log_dir, config.callbacks.checkpoint_dir
    ])

    print('Get dataframe from csv')
    train_df, test_df = get_df_from_config(config)
    train_df['class'] = train_df['class'].astype('str')
    train_df = train_df[['img_file', 'class']]
    test_df = test_df[['img_file']]

    print('Create model')
    model = Resnet50Model(config)

    print('Create trainer')
    trainer = Resnet50Trainer(model, train_df, config)

    print('Start training model')
    trainer.train_with_cv(k=4)
示例#2
0
def style_transfer(	content_url, 
					style_url, 
					output_file='output/output.jpg', 
					ngpu=1, 
					content_weight=1, 
					style_weight=1e6, 
					save_every=400,
					make_new=False, 
					steps=2000):

	# get default args
	args = get_args(base_args=True)

	# update args 
	args.content_url = content_url
	args.style_url = style_url
	args.output_file = output_file
	args.ngpu = ngpu
	args.content_weight = content_weight
	args.style_weight = style_weight
	args.save_every = save_every
	args.make_new = make_new
	args.steps = steps

	# transfer style
	transfer(args)
示例#3
0
文件: play.py 项目: minostauros/READ
def play_main():
  args = get_args(ALL_MODELS)

  try:
    main(args)
  except Exception as e:
    raise
示例#4
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill
    try:
        args = get_args()
        config = process_config(args.config)

        # create the experiments dirs
        create_dirs([
            config.callbacks.tensorboard_log_dir,
            config.callbacks.checkpoint_dir
        ])

        print('Create the data generator.')
        data_loader = factory.create("data_loader." +
                                     config.data_loader.name)(config)

        # generator = data_loader.get_train_generator()
        # generator.__getitem__(0)
        print('Create the model.')
        model = factory.create("models." + config.model.name)(config)

        print('Create the trainer')
        trainer = factory.create("trainers." + config.trainer.name)(
            model.model, data_loader.get_train_data(),
            data_loader.get_validation_data(), config)

        print('Start training the model.')
        trainer.train()

        trainer.save_model()

    except Exception as e:
        print(e)
        sys.exit(1)
示例#5
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill
    args = get_args()
    config = process_config(args.config)

    # if hasattr(config,"comet_api_key"):
    #     from comet_ml import Experiment

    # create the experiments dirs
    create_dirs([
        config.callbacks.tensorboard_log_dir, config.callbacks.checkpoint_dir,
        config.preprocessor.data_dir
    ])

    print('Creating the preprocessor.')
    preprocessor = factory.create("preprocessors." +
                                  config.preprocessor.name)(config)
    preprocessor.preprocess()

    print('Create the data generator.')
    data_loader = factory.create("data_loaders." +
                                 config.data_loader.name)(config)

    print('Create the model.')
    model = factory.create("models." + config.model.name)(config)

    print('Create the trainer')
    trainer = factory.create("trainers." + config.trainer.name)(
        model.model, data_loader.get_train_data(), config)

    print('Start training the model.')
    trainer.train()
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill
    try:
        args = get_args()
        config = process_config(args.config)

        amp, sr = librosa.load(config.predictor.predict_file_path, sr=8000)
        amp = audio_norm(amp)  # normalize
        if amp.shape[0] < config.model.duration * sr:
            # 왼쪽 오른쪽 똑같은 크기로 reflect
            amp = np.pad(amp,
                         int(np.ceil((10 * sr - amp.shape[0]) / 2)),
                         mode='reflect')
        amp = amp[:config.model.duration * sr]
        data = np.expand_dims(amp, axis=0)
        data = np.expand_dims(data, axis=-1)

        print('Create the model.')
        model = factory.create("models." + config.model.name)(config)

        predictor = factory.create("predictors." + config.predictor.name)(
            model.model, data, config)
        predictor.predict()
        sys.stdout.flush()
    except Exception as e:
        print(e)
        sys.stdout.flush()
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill
    try:
        args = get_args()
        config = process_config(args.config)

        # create the experiments dirs
        create_dirs([config.callbacks.tensorboard_log_dir, config.callbacks.checkpoint_dir])

        print('Create the data generator.')
        data_loader = factory.create("data_loader."+config.data_loader.name)(config)

        print('Create the model.')
        model = factory.create("models."+config.model.name)(config)

        if config.skip_train!=True:
            print('Create the trainer')
            trainer = factory.create("trainers."+config.trainer.name)(model.model, data_loader.get_train_data(), config)

            print('Start training the model.')
            trainer.train()
        
        # evaluator = factory.create("evaluators."+config.evaluator.name)(model.model, data_loader.get_test_data(), config)
        # evaluator.evaluate()
        evaluator = factory.create("evaluators."+config.evaluator.name)(model.model, data_loader.get_test_data(), config)
        
        result, y = evaluator.evaluate()
        result_idx = np.argmax(result,axis=1)
        y_idx = np.argmax(y,axis=1)
        print(classification_report(result_idx, y_idx))
    except Exception as e:
        print(e)
        sys.exit(1)
示例#8
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)

    except:
        print("missing or invalid arguments")
        exit(0)

    # create the experiments dirs
    create_dirs([config.summary_dir, config.checkpoint_dir])
    # create tensorflow session
    sess = tf.Session()
    # create your data generator
    data = ExampleDataLoader(config)
    
    # create an instance of the model you want
    model = ExampleModel(config)
    # create tensorboard logger
    logger = Logger(sess, config)
    # create trainer and pass all the previous components to it
    trainer = ExampleTrainer(sess, model, data, config, logger)
    # load model if exists
    model.load(sess)
    # here you train your model
    trainer.train()
    # close session
    sess.close()
    def __init__(
        self,
        data_path='data/soccer/',
        vec_dim=300,
        # fasttext_model='/home/debanjan/acl_submissions/soccerbot_acl/vocab/wiki.simple.bin'):
        fasttext_model='/data/dchaudhu/soccerbot_acl/vocab/wiki.en.bin'):
        self.data_path = data_path
        self.max_similarity = 85
        self.vec_dim = vec_dim
        cap_path = datapath(fasttext_model)
        self.word_emb = load_facebook_model(cap_path)
        # print (self.max_er_vec)
        self.stop = set(stopwords.words('english'))
        self.punc = string.punctuation
        self.train_dataset = self.get_data('train')
        self.val_dataset = self.get_data('val')
        self.test_dataset = self.get_data('test')
        self.max_er_vec = []  # max er vector combination size
        for dat in self.train_dataset:
            self.max_er_vec.append(sum(len(v) for k, v in dat['kgER'].items()))
        self.max_out_reln = np.max(self.max_er_vec)
        self.inp_graph_max_size = np.max(
            [len(getER_vec(kg['kgER'])) for kg in self.train_dataset])
        print('input graph size:' + str(self.inp_graph_max_size))
        print(self.max_out_reln)
        self.objects = ['o' + str(j) for j in range(self.max_out_reln)]
        self.args = get_args()
        # Create vocabulary and word2id
        self.vocab = defaultdict(float)
        self.get_vocab(self.train_dataset)
        self.get_vocab(self.test_dataset)
        self.get_vocab(self.val_dataset)
        self.vocab[self.args.unk_tok] += 1.0
        self.vocab[self.args.sos_tok] += 1.0
        self.vocab[self.args.eos_tok] += 1.0
        for o in self.objects:
            self.vocab[o] += 1.0

        self.stoi = dict(zip(self.vocab.keys(), range(0, len(self.vocab))))
        # add additional tokens
        # self.stoi[self.args.unk_tok] = len(self.stoi)
        # self.stoi[self.args.sos_tok] = len(self.stoi)
        # self.stoi[self.args.eos_tok] = len(self.stoi)
        # print(len(self.stoi))
        # self.itos = {v: k for k, v in self.stoi.items()}

        # for j in range(self.max_out_reln):
        #     self.stoi['o'+str(j)] = len(self.stoi)+1
        # del self.stoi

        self.itos = {v: k for k, v in self.stoi.items()}
        print(len(self.stoi))
        self.n_words = len(self.stoi)

        self.vectors = np.zeros((len(self.stoi), vec_dim))
        for w, w2i in self.stoi.items():
            if w2i < self.stoi[self.args.eos_tok]:
                self.vectors[w2i] = self.word_emb.wv[w]
示例#10
0
    def __init__(
        self,
        data_path='data/incar/',
        vec_dim=300,
        # fasttext_model='/home/debanjan/acl_submissions/soccerbot_acl/vocab/wiki.simple.bin'):
        fasttext_model='/home/deep/Emphatic_VW/emotion_classifer-cnn/vectors/wiki.en.bin'
    ):
        self.data_path = data_path
        # self.max_similarity = 85
        self.vec_dim = vec_dim

        cap_path = datapath(fasttext_model)
        self.word_emb = load_facebook_model(cap_path)
        # print (self.max_er_vec)
        self.stop = set(stopwords.words('english'))
        self.punc = string.punctuation
        self.er_dict, self.global_ent, self.eo_dict = self.get_kg(
            self.data_path + 'KG/')

        self.args = get_args()
        self.train_dataset = self.get_data('train')
        self.val_dataset = self.get_data('val')
        self.test_dataset = self.get_data('test')

        self.entitites = [d['e'] for d in self.train_dataset]
        self.entitites = list(set(self.entitites))
        #  Create the vocab
        self.vocab = defaultdict(float)

        # self.vocab[pos]
        self.get_vocab(self.train_dataset)
        self.get_vocab(self.test_dataset)
        self.get_vocab(self.val_dataset)

        # Add additional tokens to vocab
        self.vocab[self.args.unk_tok] += 1.0
        self.vocab[self.args.sos_tok] += 1.0
        # self.vocab[self.args.ent_tok] += 1.0
        if not self.args.use_bert:
            self.vocab[self.args.eou_tok] += 1.0
        self.vocab[self.args.eos_tok] += 1.0

        self.stoi = dict(zip(self.vocab.keys(), range(1, len(self.vocab) + 1)))
        self.stoi[self.args.pad_tok] = 0

        self.itos = {v: k for k, v in self.stoi.items()}
        print(len(self.stoi))
        self.n_words = len(self.stoi)

        self.vectors = np.zeros((len(self.stoi), vec_dim))
        for w, w2i in self.stoi.items():
            # if w2i < self.stoi[self.args.eos_tok]:
            self.vectors[w2i] = self.get_w2v(w)
        self.ent_dict = dict(
            zip(list(self.entitites), range(0, len(self.entitites))))
    def __init__(self,
                 data_path='data/incar/',
                 fasttext_model=os.path.join(os.getcwd(),
                                             'data/wiki.simple.bin'),
                 batch_size=32,
                 max_sent_len=20,
                 vec_dim=300,
                 max_resp_len=15,
                 gpu=False):
        self.args = get_args()
        self.data_path = data_path
        self.batch_size = batch_size
        self.max_sent_len = max_sent_len
        self.max_out_len = max_resp_len
        self.vec_dim = vec_dim
        self.gpu = gpu
        self.n_graph_features = 1
        cap_path = datapath(fasttext_model)
        # self.word_emb = load_facebook_model(cap_path)

        # Load Datasets and preprocess files
        self.train_dataset = np.load(self.data_path + 'preproc_files/' +
                                     'train.npy',
                                     allow_pickle=True)
        self.val_dataset = np.load(self.data_path + 'preproc_files/' +
                                   'val.npy',
                                   allow_pickle=True)
        self.test_dataset = np.load(self.data_path + 'preproc_files/' +
                                    'test.npy',
                                    allow_pickle=True)

        self.stoi = np.load(self.data_path + 'preproc_files/' + 'stoi.npy',
                            allow_pickle=True).item()
        self.etoi = np.load(self.data_path + 'preproc_files/' + 'etoi.npy',
                            allow_pickle=True).item()
        self.vectors = np.load(self.data_path + 'preproc_files/' + 'wemb.npy',
                               allow_pickle=True)
        self.itos = {v: k for k, v in self.stoi.items()}
        self.itoe = {v: k for k, v in self.etoi.items()}
        self.er_dict, self.ent_list, self.eo_dict = self.get_kg(data_path +
                                                                'KG/')

        # Maximum graph input feature
        # self.max_er_vec = []  # max er vector combination size
        # for dat in self.train_dataset:
        #     self.max_er_vec.append(sum(len(v) for k, v in dat['kgER'].items()))
        # self.max_out_reln = np.max(self.max_er_vec)
        # Data Statistics

        self.n_words = len(self.stoi)
        self.n_train = len(self.train_dataset)
        self.n_val = len(self.val_dataset)
        self.n_test = len(self.test_dataset)
示例#12
0
def main():
    ##########################################################
    # TensorFlow configuration
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    k.tensorflow_backend.set_session(tf.Session(config=tf_config))
    ##########################################################

    # capture the config path from the run arguments
    # then process the json configuration fill
    try:
        args = get_args()
        config = process_config(args.config)

        # create the experiments dirs
        create_dirs([
            config.callbacks.tensorboard_log_dir,
            config.callbacks.checkpoint_dir
        ])

        logger('Creating data generators ...'.format(datetime.now()))
        data_loader = {
            'train':
            factory.create("data_loader." + config.data_loader.name)(
                config, subset='train', shuffle=True),
            'eval':
            factory.create("data_loader." + config.data_loader.name)(
                config, subset='eval')
        }

        logger('Creating the model ...'.format(datetime.now()))
        model = factory.create("models." + config.model.name)(config)

        logger('Creating the trainer ...'.format(datetime.now()))
        if config.model.num_gpus > 1:
            trainer = factory.create("trainers." + config.trainer.name)(
                model.parallel_model, data_loader, config)
        else:
            trainer = factory.create("trainers." + config.trainer.name)(
                model.model, data_loader, config)

        logger('Starting model training ...'.format(datetime.now()))
        trainer.train()

        logger('Training has finished!'.format(datetime.now()))

    except Exception as e:
        logger(e)
        sys.exit(1)
示例#13
0
def main():
    # Get the command line arguments.
    args = get_args()

    # Build the vocabulary.
    vocab, rev_vocab = make_vocab()

    if args.data_file is not None:
        # Get training data from the user.
        add_data(os.path.join(args.data_folder, args.data_file), vocab)
    else:
        # Run the bot.
        talk(args, vocab, rev_vocab)

    return
示例#14
0
def main():

    try:
        args = get_args()
        config = process_config(args.config)

        logger = configure_logger(level=logging.DEBUG)
        logger.info('Starting train.py...')
        logger.debug(config)

        # create the experiments dirs
        create_dirs([config.callbacks.checkpoint_dir])

        logger.info('Creating the data generator.')
        data_loader = factory.create("data_loaders." +
                                     config.data_loader.name)(config)

        # For this project the number of classes is only known
        # at runtime so we add that to the configuration.
        config.n_classes = data_loader.n_classes
        logger.debug('Running with {} classes.'.format(config.n_classes))

        logger.info('Creating model.')
        model = factory.create("models." + config.model.name)(config)

        logging.info('Creating trainer')
        trainer = factory.create("trainers." + config.trainer.name)(
            model, data_loader, config)

        logging.info('Running trainer')
        trainer.train()

        logging.info('Loading evaluators')
        evaluators = []
        for evaluator in config.evaluators:
            evaluators.append(
                factory.create("evaluators." + evaluator.name)(model,
                                                               data_loader,
                                                               evaluator))

        logging.info('Evaluating...')
        for evaluator in evaluators:
            evaluator.evaluate()

    except Exception as e:
        print(e)
        sys.exit(1)
示例#15
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill
    args = get_args()
    config = process_config(args.config)

    # if hasattr(config,"comet_api_key"):
    #     from comet_ml import Experiment

    # create the experiments dirs
    create_dirs([
        config.callbacks.tensorboard_log_dir, config.callbacks.checkpoint_dir,
        config.preprocessor.data_dir
    ])

    print('Creating the preprocessor.')
    preprocessor = factory.create("preprocessors." +
                                  config.preprocessor.name)(config)
    preprocessor.preprocess()
def data_dump():
    args = get_args()

    es = Elasticsearch([dict(host=IP, port=PORT)])

    time_frame_start = args.time_frame_start
    time_frame = args.time_frame
    search_term = args.search_term

    request = construct_request(es, search_term, time_frame_start, time_frame)

    result = es.search(
        size=10000,  # hard limit for es.search()
        body=request)

    results = [i['_source'] for i in result['hits']['hits']]

    with open(args.output, 'w') as f:
        f.write(json.dumps(results))

    return results
示例#17
0
def check(hamsters):

    args = get_args()
    for hamster in hamsters:
        potato = hamster.split(',')
        potato[-1] = potato[-1].strip()
        try:
            un = potato[1]
            pw = potato[2]
            driver = webdriver.PhantomJS()
            driver.get(LOGIN_URL)
            WebDriverWait(driver, args.timeout).until(
                EC.title_contains('Trainer Club'))
            user = driver.find_element_by_id('username')
            passw = driver.find_element_by_id('password')
            user.clear()
            user.send_keys(un)
            passw.clear()
            passw.send_keys(pw)
            passw.send_keys(Keys.RETURN)
            try:
                WebDriverWait(driver, args.timeout).until(
                    EC.title_contains('Official'))
                if args.ignoreunactivated:
                    try:
                        if driver.find_element_by_id('id_country') > 0:
                            print ','.join(potato)
                    except Exception:
                        driver.quit()
                        continue
                else:
                    print ','.join(potato)
                    driver.quit()
            except TimeoutException:
                continue
            finally:
                driver.quit()

        except IndexError:
            continue
示例#18
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill

    try:
        args = get_args()
        config, config_dict = process_config(args.config)

    except:
        print("missing or invalid arguments")
        exit(0)

    # create the experiments dirs
    create_dirs([config.summary_dir, config.checkpoint_dir])

    with open(os.path.join(config.experiment_dir, 'config.json'), 'w') as fp:
        json.dump(config, fp)

    print('Create the data generator.')
    data = factory.create("data_loaders." + config.data['name'])(config)

    print('Create the model.')
    model = factory.create("models." + config.model['name'])(config)

    # create tensorflow session
    tfconfig = tf.ConfigProto()
    tfconfig.gpu_options.allow_growth = True
    sess = tf.Session(config=tfconfig)

    # create tensorboard logger
    logger = Logger(sess, config)

    print('Create the trainer')
    trainer = factory.create("trainers." + config.trainer['name'])(sess, model,
                                                                   data,
                                                                   config,
                                                                   logger)

    print('Start training the model.')
    trainer.train()
示例#19
0
def main():
    # then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)
    except:
        print("missing or invalid arguments")
        #exit(0)
    # create the experiments dirs
    create_dirs([config.callbacks.checkpoint_dir])

    # preprocess required data
    # build dictionary
    unique_concepts = get_intersect_concepts(config.data.csvpair, config.data.glove_condition_emb, config.data.glove_drug_emb)
    build_dictionary(unique_concepts, config.data.save_dir)

    # build weight matrices
    build_n2v_matrix(config.data.n2v_emb, os.path.join(config.data.save_dir, "concept2id"), config.data.save_dir)
    build_glove_matrix(config.data.glove_condition_emb, config.data.glove_drug_emb, 
    os.path.join(config.data.save_dir, "concept2id"), config.data.save_dir)

    # generate training pairs
    generate_training_pairs(config.data.csvpair, os.path.join(config.data.save_dir, "concept2id"), 
    config.data.glove_condition_emb, config.data.glove_drug_emb, config.data.save_dir)

    # split training pairs into batch
    split_into_batch(config.data.training_pairs, 500000, config.data.training_batch)

    # set the number of GPU that will be used
    os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
    print('Create the model')
    enhanced_model = EnhancedModel(config)
    print('Create the trainer')
    trainer = EnhancedModelTrainer(enhanced_model.model, config)
    print('Start training')
    trainer.train()
    print('Start generating enhanced representations')
    generator = EnhancedModelGenerator(config)
    generator.generate_enhanced_rep()
    print('Enhanced representations have been generated')
示例#20
0
def main():

    try:
        args = get_args()
        config = process_config(args.config)

        # create the experiments dirs
        create_dirs([config.callbacks.checkpoint_dir])

        print('Create the data generator.')
        data_loader = factory.create("data_loader." +
                                     config.data_loader.name)(config)

        print('Create the model.')
        model = factory.create("models." + config.model.name)(config)

        print('Create the trainer')
        trainer = factory.create("trainers." + config.trainer.name)(
            model, data_loader, config)

        print('Loading evaluators')
        evaluators = []
        for evaluator in config.evaluators:
            evaluators.append(
                factory.create("evaluators." + evaluator.name)(model,
                                                               data_loader,
                                                               evaluator))

        print('Start training the model.')
        trainer.train()

        print('Evaluating...')
        for evaluator in evaluators:
            evaluator.evaluate()

    except Exception as e:
        print(e)
        sys.exit(1)
示例#21
0
def main():

    # capture the config path from the run arguments
    # then process the json configuration fill
    try:
        args = get_args()
        config = process_config(args.config)

        # create the experiments dirs
        create_dirs([
            config.callbacks.tensorboard_log_dir,
            config.callbacks.checkpoint_dir,
        ])

        print("Create the data generator.")
        data_loader = factory.create("VisionEngine.data_loaders." +
                                     config.data_loader.name)(config)

        print("Create the model.")
        model = factory.create("VisionEngine.models." +
                               config.model.name)(config)

        if config.model.loadckpt:
            print("loading model checkpoint")
            model.load(config.model.ckpt_path)

        print("Create the trainer")
        trainer = factory.create("VisionEngine.trainers." +
                                 config.trainer.name)(
                                     model.model, data_loader.get_train_data(),
                                     config)

        print("Start training the model.")
        trainer.train()

    except Exception as e:
        print(e)
        sys.exit(1)
def main():

    args = get_args()
    config = process_config(args)

    print '\n', config, '\n'

    if config.rgb:
        print '-' * 60
        print 'Training on RGB images'
        print '-' * 60
        train_rgb(config)

    elif config.combined:
        print '-' * 60
        print 'Training on Combined images'
        print '-' * 60
        train_combined(config)

    else:
        print '-' * 60
        print 'Training on Persistence images'
        print '-' * 60
        train_persistence(config)
                eval_drift(generator, train_loader, args)

        print(RESULTS[run, 0, -1].mean(), RESULTS[run, 1, -1].mean())

        # calculate forgetting:
        max_valid = RESULTS[run, 0].max(axis=0)
        fgt_valid = (max_valid - RESULTS[run, 0, -1])[:-1].mean()

        max_test = RESULTS[run, 1].max(axis=0)
        fgt_test = (max_test - RESULTS[run, 1, -1])[:-1].mean()

        wandb.log({
            'fgt_valid': fgt_valid,
            'acc_valid': RESULTS[run, 0, -1].mean(),
            'fgt_test': fgt_test,
            'acc_test': RESULTS[run, 1, -1].mean()
        })

        if not args.debug:
            # save model
            os.makedirs('/checkpoint/lucaspc/aqm/' + args.name, exist_ok=True)
            save_path = os.path.join('/checkpoint/lucaspc/aqm/', args.name,
                                     'gen.pth')
            torch.save(generator.state_dict(), save_path)


if __name__ == '__main__':
    args = get_args()
    print(args)
    main(args)
示例#24
0
    def __init__(self,
                 data_path='data/incar/',
                 pretrained_weights='bert-base-uncased',
                 use_bert=True,
                 fasttext_model='data/wiki.simple.bin',
                 batch_size=32,
                 max_sent_len=20,
                 vec_dim=300,
                 max_resp_len=15,
                 gpu=False,
                 domain='incar'):
        # fasttext_model='/home/debanjan/acl_submissions/soccerbot_acl/vocab/wiki.simple.bin',
        self.args = get_args()
        self.data_path = data_path
        self.batch_size = batch_size
        self.max_sent_len = max_sent_len
        self.max_out_len = max_resp_len
        self.vec_dim = vec_dim
        self.gpu = gpu
        self.n_graph_features = 1
        cap_path = datapath(fasttext_model)
        self.word_emb = load_facebook_model(cap_path)
        self.tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
        # SRC and TRG vocabularies
        self.src_vocab = defaultdict(float)
        self.trg_vocab = defaultdict(float)
        self.trg_vocab[self.args.sos_tok] = 1.0
        self.trg_vocab[self.args.eos_tok] = 1.0
        self.trg_vocab[self.args.unk_tok] = 1.0
        self.src_vocab[self.args.unk_tok] = 1.0
        # Load Datasets and preprocess files
        self.train_dataset = np.load(self.data_path + 'preproc_files_kg/' +
                                     'train.npy',
                                     allow_pickle=True)
        # random.shuffle(self.train_dataset)
        self.val_dataset = np.load(self.data_path + 'preproc_files_kg/' +
                                   'val.npy',
                                   allow_pickle=True)
        self.test_dataset = np.load(self.data_path + 'preproc_files_kg/' +
                                    'test.npy',
                                    allow_pickle=True)
        # Create vocabularies
        self.create_vocab(self.train_dataset)
        self.create_vocab(self.val_dataset)
        self.create_vocab(self.test_dataset)
        self.src_stoi = dict(
            zip(self.src_vocab.keys(), range(0, len(self.src_vocab.keys()))))
        self.src_itos = {v: k for k, v in self.src_stoi.items()}
        self.trg_stoi = dict(
            zip(self.trg_vocab.keys(), range(0, len(self.trg_vocab.keys()))))
        self.trg_itos = {v: k for k, v in self.trg_stoi.items()}

        # self.stoi = np.load(self.data_path+'preproc_files_kg/'+'stoi.npy', allow_pickle=True).item()
        self.etoi = np.load(self.data_path + 'preproc_files_kg/' + 'etoi.npy',
                            allow_pickle=True).item()
        # self.vectors = np.load(self.data_path+'preproc_files_kg/'+'wemb.npy', allow_pickle=True)
        # Remove vectors which are not present in source stoi
        self.src_vectors = np.zeros((len(self.src_stoi), self.vec_dim))
        self.trg_vectors = np.zeros((len(self.trg_stoi), self.vec_dim))
        # for w, i in self.src_stoi.items():
        #     self.src_vectors[i] = self.get_w2v(w)
        # for w, i in self.trg_stoi.items():
        #     self.trg_vectors[i] = self.get_w2v(w)
        # self.itos = {v: k for k, v in self.stoi.items()}
        self.itoe = {v: k for k, v in self.etoi.items()}

        self.er_dict, self.global_ent, self.eo_dict, self.e_o_1hop, self.e_r_l = self.get_kg(
            data_path + 'KG/', dat=domain)

        # Maximum graph input feature
        # self.max_er_vec = []  # max er vector combination size
        # for dat in self.train_dataset:
        #     self.max_er_vec.append(sum(len(v) for k, v in dat['kgER'].items()))
        # self.max_out_reln = np.max(self.max_er_vec)
        # Data Statistics

        # self.n_words = len(self.stoi)
        self.n_train = len(self.train_dataset)
        self.n_val = len(self.val_dataset)
        self.n_test = len(self.test_dataset)
示例#25
0
def main():
    try:
        args = get_args()
        config = process_config(args.config)
    except:
        print("missing or invalid arguments")
        exit(0)

    # create the experiments dirs
    create_dirs([
        config.callbacks.tensorboard_log_dir, config.callbacks.checkpoint_dir
    ])

    print('Create the data generator.')
    semcor_dl = SemcorDataLoader(config)
    # X_trn, y_trn = semcor_dl.get_data()
    X_trn, y_trn = semcor_dl.read_data()
    # y_trn = y_trn[0]  # get senses only

    senses = semcor_dl.read_senses()
    poses = semcor_dl.get_pos()
    lexes = semcor_dl.get_lexes()

    # build vocab
    t_x = Tokenizer(oov_token='<UNK>')
    t_x.fit_on_texts(X_trn)

    input_vocab = list(t_x.word_index.keys())
    senses_vocab = input_vocab + senses

    # build output tokenizer
    t_y_senses = create_output_vocab_dict(senses_vocab)
    t_y_pos = create_output_vocab_dict(poses, start=2)
    t_y_lex = create_output_vocab_dict(lexes, start=2)

    mask = np.asarray(semcor_dl.create_mask(t_y_senses.word_index, len(X_trn)))

    # save tokenizer
    with open('tokenizer.pic', 'wb') as f:
        import pickle
        pickle.dump(t_y_senses, f)

    # set config params
    config.model.vocab_size = len(input_vocab)
    config.model.output_size = config.model.vocab_size + len(senses) + 1
    config.model.pos_size = len(poses) + 2
    config.model.lex_size = len(lexes) + 2
    config.trainer.examples_size = len(X_trn)

    print('Create the model.')
    model = MultiTaskModel(config).model
    model.summary()

    print('Create the trainer')
    trainer = BlstmTrainer(model, config)

    print('Start training the model.')
    # convert to sequences
    use_elmo = bool(config.model.use_elmo)

    if not use_elmo:
        X_trn = t_x.texts_to_sequences(X_trn)

    X_trn = np.asarray(X_trn)
    y_trn = [
        np.asarray(t_y_senses.texts_to_sequences(y_trn[0])),
        np.asarray(t_y_pos.texts_to_sequences(y_trn[1])),
        np.asarray(t_y_lex.texts_to_sequences(y_trn[2]))
    ]

    trainer.train((X_trn, y_trn, mask))

    model.save('model.h5')
示例#26
0
        }, {
            "short": "-f",
            "help": "absolute path of csv file",
            "dest": "file_path",
            "type": str
        }, {
            "short": "-s",
            "help": "row number to start with",
            "dest": "start_row",
            "type": int,
            "default": 0
        }, {
            "short": "-t",
            "help": "write to table",
            "dest": "write_table",
            "type": str
        }]
        parsed_args = get_args(cli_args)

        connection = connect_to_mysql_db(
            config.DATABASES['cmos_db']['connection'])
        cursor = connection.cursor()
        process(parsed_args)

    except Exception as e:
        print(e)
    finally:
        connection.close()
        print("Connection closed. Total affected rows = {}".format(
            cursor.rowcount))
def main():
    # capture the config path from the run arguments
    # then process the json configuration fill
    try:
        # Parse args
        args = get_args()
        config = process_config(args.config)

        # if debug mode is on, add it on the config dotmap
        if args.debug == True:
            from dotmap import DotMap
            config.debug = True
            print("==== Debugging on ====")

        # comet_ml needs to be imported before Keras
        # if the config file has a comet_ml key, log on comet
        if hasattr(config, "comet_api_key"):
            from comet_ml import Experiment  # PUT the import in main
            experiment = Experiment(api_key=config.exp.comet_api_key,
                                    project_name=config.exp.name)
            config.exp_handle = experiment

        ## extra imports to set GPU options
        import tensorflow as tf
        from keras import backend as k
        ###################################
        # TensorFlow wizardry
        tf_config = tf.ConfigProto()

        # Don't pre-allocate memory; allocate as-needed
        tf_config.gpu_options.allow_growth = True

        # Only allow a total of half the GPU memory to be allocated
        tf_config.gpu_options.per_process_gpu_memory_fraction = 0.9

        # Create a session with the above options specified.
        k.tensorflow_backend.set_session(tf.Session(config=tf_config))

        # create the experiments dirs
        create_dirs([
            config.callbacks.tensorboard_log_dir,
            config.callbacks.checkpoint_dir
        ])

        print('Create the data generator.')
        data_loader = factory.create("data_loader." +
                                     config.data_loader.name)(config)

        print('Create the model.')
        model = factory.create("models." + config.model.name)(config)

        print('Create the trainer')
        # fit_generator needs a validation data generator
        if (config.trainer.validation_split):  # don't give test data
            trainer = factory.create("trainers." + config.trainer.name)(
                model.model, data_loader.get_train_data(), config)
        else:
            trainer = factory.create("trainers." + config.trainer.name)(
                model.model,
                data_loader.get_train_data(),
                config,
                valid_data=data_loader.get_test_data())

        print('Start training the model.')
        trainer.train()

    except Exception as e:
        print(e)
        sys.exit(1)
示例#28
0
def main():
    args = get_args()

    torch.manual_seed(0)
    np.random.seed(0)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using:", device)

    # Logs vars
    metadata_path = '../metadata'

    # Train vars
    ini_epoch = 0
    n_epochs = args["n_epochs"]
    lr = args["lr"]
    wd = args["wd"]
    opt_name = args["opt_name"]

    # Network vars
    dropout_prob = args["dropout_prob"]

    # Get optimizer vars
    optimizer_kwargs = get_optimizer_parameters(opt_name, lr=lr, wd=wd)

    # Dataset vars
    dataset_name = args["dataset_name"]

    dataset_params = get_dataset_params(dataset_name)

    model_name = args["model_name"]
    pretrained = args["pretrained"]

    model_params, batch_size = get_model_params(model_name,
                                                dataset_name,
                                                dataset_params["n_labels"],
                                                pretrained=pretrained,
                                                bh=False,
                                                dropout_prob=dropout_prob)

    data_augmentation_kwargs = {
        "data_augmentation": args["data_augmentation"],
        "rotation_angle": 30,
        "img_size": model_params["img_size"],
        "crop_ratio": 0.8
    }

    dataset_type = get_dataset_by_model(model_name, dataset_name)

    # Dataset loaders
    dataset_path_pattern = '../datasets/{}/kfold_lists/without_crop/groups/{}-600-{}.list'

    # Creating model
    model = ModelFactory.factory(**model_params)
    model.to(device)

    print(model.parameters)

    # Train set
    # files = [dataset_path_pattern.format(dataset_name, mode, 0) for mode in ["train", "validation"]]
    files = dataset_path_pattern.format(dataset_name, "train", 0)

    train_set = DatasetFactory.factory(
        dataset_type=dataset_type,
        csv_file=files,
        n_frames=model_params["n_frames"],
        n_blocks=model_params["n_blocks"],
        frames_per_block=model_params["frames_per_block"],
        train=True,
        **dataset_params,
        **data_augmentation_kwargs)

    trainloader = torch.utils.data.DataLoader(train_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=20)

    # Test set
    files = dataset_path_pattern.format(dataset_name, "validation", 0)
    test_set = DatasetFactory.factory(
        dataset_type=dataset_type,
        csv_file=files,
        n_frames=model_params["n_frames"],
        n_blocks=model_params["n_blocks"],
        frames_per_block=model_params["frames_per_block"],
        train=False,
        **dataset_params,
        **data_augmentation_kwargs)

    testloader = torch.utils.data.DataLoader(test_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=20)

    # Create optimizer
    optimizer = OptimizerFactory.factory(model.parameters(),
                                         **optimizer_kwargs)

    filename = args["checkpoint_path"]
    if filename != "":
        if os.path.isfile(filename):
            ini_checkpoint_experiment(filename, model_name, dataset_name)

            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
            ini_epoch = checkpoint['epoch']

            if isinstance(model, torch.nn.DataParallel):
                model.module.load_state_dict(checkpoint['state_dict'])
            else:
                model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                filename, checkpoint['epoch']))

        else:
            print("=> no checkpoint found at '{}'".format(filename))
            exit()
    else:
        # Init experiment
        model_dir, log_dir, experiment_id = init_experiment(
            metadata_path, dataset_name, model_name)

    train_metrics = train(model=model,
                          optimizer=optimizer,
                          ini_epoch=ini_epoch,
                          n_epochs=n_epochs,
                          device=device,
                          trainloader=trainloader,
                          testloader=testloader,
                          model_dir=model_dir,
                          log_dir=log_dir)

    print("test acc:", train_metrics["test"]["acc"])
    print("Kfold finished log path:", log_dir)

    msg = """
    ```
    Exp Name: `{}``
    Host Machine: `{}`
    acc list: `{}`
    log path: `{}`
    ```
    """.format(experiment_id, os.environ["HOSTNAME"],
               train_metrics["test"]["acc"], log_dir)
    send_slack(msg)
示例#29
0
 def __init__(self, dataset, model):
     self.dataset = dataset
     self.model = model
     self.args = get_args()
     self.loss_func = mdn_loss_func
     self.optimizer = RMSprop(self.model.parameters())
示例#30
0
    def smooth_uniform(self, verts, smoothness=0.03):
        new_verts = verts + smoothness * self.L.dot(verts)
        return new_verts

    def smooth(self, verts, smoothness=0.03):
        return self.smooth_uniform(verts, smoothness) if self.Ltype == "uniform" else self.smooth_cotlap(verts, smoothness)


if __name__ == "__main__":
    import os
    import global_var
    from utils.args import get_args
    from tqdm import tqdm

    garment_class = 'smooth_TShirtNoCoat'
    gender, list_name = get_args()

    with open(os.path.join(global_var.ROOT, '{}_{}.txt').format(gender, list_name)) as f:
        avail_items = f.read().splitlines()
    avail_items = [k.split('\t') for k in avail_items]
    people_names = [k[0] for k in avail_items if k[1] == garment_class]
    shape_root = os.path.join(global_var.ROOT, 'neutral_shape_static_pose_new')
    smoothing = None

    shape_names = ["{:02d}".format(k) for k in range(0, 100)]

    for people_name, garment_class in tqdm(avail_items):
        shape_static_pose_people = os.path.join(shape_root, people_name)
        for shape_name in shape_names:
            garment_path = os.path.join(shape_static_pose_people, '{}_{}.obj'.format(shape_name, garment_class))
            if not os.path.exists(garment_path):