示例#1
0
def main():
    init_logger(_user_logs_file)
    logging.info('======================start==========================')
    train_df, test_df = load_data()

    # split train and valid
    train_df, val_df, train_X, val_X, test_X = split_train_val(
        train_df, test_df)
    train_y = train_df['target'].values
    val_y = val_df['target'].values

    # tokenizing and padding
    train_X, val_X, test_X = token_sentence(train_X, val_X, test_X)
    logging.info(train_df['target'].count())
    train_df.groupby('target').count()
    train_df['target'].plot.hist(
        bins=2, title='Distribution of label in trainning set')

    # build the model
    build_model()
    model = build_model()

    # train the model and plot
    train_model(model, train_X, train_y, val_X, val_y)

    # validate the model
    val_model(model, val_X, val_y)

    # predict test
    pred__test_y = model.predict([test_X], batch_size=1024, verbose=1)

    logging.info('=========done===========')
示例#2
0
def main():
    init_logger(_user_logs_file)
    logging.info('======================start==========================')
    train_x, train_y, val_x, val_y, test_x, test_id = load_data()
    train_x, val_x, test_x = preprocess(train_x, val_x, test_x)
    train(train_x, train_y, val_x, val_y)
    eval(val_x, val_y)
    logging.info('Done!')
示例#3
0
def preprocess():
    logger.setLevel(logging.INFO)
    init_logger(logging.INFO)

    config = Config('.', 'temp')

    logger.info('building dict...')
    build_dict(config)
示例#4
0
def preprocess():
    logger.setLevel(logging.INFO)
    init_logger(logging.INFO, 'temp.log.txt', 'w')

    config = Config('.', 'temp')

    logger.info('building dict...')
    build_dict('data/train.txt', config)

    logger.info('generating data...')
    generate_data('data/train.txt', config.train_data)
    generate_data('data/dev.txt', config.valid_data)
    generate_data('data/test_1.txt', config.test_data, is_test=True)
示例#5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-config', type=str, default='config/aishell.yaml')
    parser.add_argument('-log', type=str, default='eval.log')
    parser.add_argument('-mode', type=str, default='retrain')
    opt = parser.parse_args()

    configfile = open(opt.config)
    config = AttrDict(yaml.load(configfile, Loader=yaml.FullLoader))

    exp_name = os.path.join('egs', config.data.name, 'exp', config.model.type,
                            config.training.save_model)
    if not os.path.isdir(exp_name):
        os.makedirs(exp_name)
    logger = init_logger(os.path.join(exp_name, opt.log))

    os.environ["CUDA_VISIBLE_DEVICES"] = config.evaling.gpus
    config.evaling.num_gpu = num_gpus(config.evaling.gpus)
    logger.info('Number of gpu:' + str(config.evaling.num_gpu))
    num_workers = 6 * (config.evaling.num_gpu
                       if config.evaling.num_gpu > 0 else 1)
    batch_size = config.data.batch_size * config.evaling.num_gpu if config.evaling.num_gpu > 0 else config.data.batch_size

    dev_dataset = AudioDataset(config.data, 'test')
    dev_sampler = Batch_RandomSampler(len(dev_dataset),
                                      batch_size=batch_size,
                                      shuffle=config.data.shuffle)
    validate_data = AudioDataLoader(dataset=dev_dataset,
                                    num_workers=num_workers,
                                    batch_sampler=dev_sampler)
    logger.info('Load Test Set!')

    if config.evaling.num_gpu > 0:
        torch.cuda.manual_seed(config.evaling.seed)
        torch.backends.cudnn.deterministic = True
    else:
        torch.manual_seed(config.evaling.seed)
    logger.info('Set random seed: %d' % config.evaling.seed)

    if config.evaling.num_gpu == 0:
        checkpoint = torch.load(config.evaling.load_model, map_location='cpu')
    else:
        checkpoint = torch.load(config.evaling.load_model)
    logger.info(str(checkpoint.keys()))

    with torch.no_grad():
        model = new_model(config, checkpoint).eval()
        beam_rnnt_decoder = build_beam_rnnt_decoder(config, model)
        beamctc_decoder = build_ctc_beam_decoder(config, model)
        if config.evaling.num_gpu > 0:
            model = model.cuda()

        _ = eval(config,
                 model,
                 validate_data,
                 logger,
                 beamctc_decoder=beamctc_decoder,
                 beam_rnnt_decoder=beam_rnnt_decoder)
from src.utils import init_logger

LOGGER = init_logger()
示例#7
0
文件: train.py 项目: rpash/sure_sign
def train():
    """
    Train AdaBoost on given classifier.
    """
    with open(r"config.yaml") as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

        train_path = full_path(config["dataset"]["train"])
        test_path = full_path(config["dataset"]["test"])

        ft_name = config["featurizers"]["featurizer"]
        ft_config = config["featurizers"][ft_name]
        clf_config = config["classification"]
        train_len = config["dataset"]["train_examples"]
        kfold = config["classification"]["k-fold"]
        utils.ask_for_load(config["always_load"])
        utils.init_logger(config)

        # Read images from dataset
        X_train, y_train, X_test, y_test = dataset.load_asl_alphabet(
            train_path, test_path, train_len=train_len)

        N, H, W = X_train.shape
        logging.info("Loaded {} training images of size ({}, {})".format(
            N, H, W))
        N, H, W = X_test.shape
        logging.info("Loaded {} test images of size ({}, {})".format(N, H, W))

        # Extract features based on config
        featurizer = Featurizer()
        if ft_name == "fft":
            features = featurizer.fft(X_train, ft_config)
            test_features = featurizer.fft(X_test, ft_config)
        elif ft_name == "rgb":
            features = featurizer.fft(X_train, ft_config)
            test_features = featurizer.fft(X_test, ft_config)
        elif ft_name == "dwt":
            features = featurizer.dwt(X_train, ft_config)
            test_features = featurizer.dwt(X_test, ft_config)
        elif ft_name == "sift":
            features = featurizer.sift(X_train, ft_config)
            test_features = featurizer.sift(X_test, ft_config)
        elif ft_name == "surf":
            features = featurizer.surf(X_train, ft_config)
            test_features = featurizer.surf(X_test, ft_config)
        elif ft_name == "orb":
            features = featurizer.orb(X_train, ft_config)
            test_features = featurizer.orb(X_test, ft_config)

        N, M = features.shape
        logging.info("Extracted {} training features of length {}".format(
            N, M))
        N, M = test_features.shape
        logging.info("Extracted {} test features of length {}".format(N, M))

        # Cross validations
        clf = ASLClassifier(clf_config)
        xval_res = clf.cross_val_score(features, y_train, kfold, 11)
        logging.critical(xval_res)
        logging.critical(np.mean(xval_res))

        # Predictions on test images
        start = time.time()
        clf.fit(features, y_train)
        end = time.time()
        logging.info("Fit model in {} seconds".format(end - start))
        start = time.time()
        pred = clf.predict(test_features)
        end = time.time()
        logging.info("Predicted test dataset in {} seconds".format(end -
                                                                   start))

        # Plot each prediction and ground truth for test images
        for y_true, y_pred, img in zip(y_test, pred, X_test):
            plt.imshow(img, cmap="gray")
            plt.title("True: {}     Predicted: {}".format(
                utils.number_to_label(y_true), utils.number_to_label(y_pred)))
            plt.show()

        logging.critical(pred)
        logging.critical(y_test)
        logging.critical(np.sum(pred == y_test) / len(pred))
示例#8
0
    get_Into_User_Page
from src.file_processing import Merge
from src.map import Draw_Map

if __name__ == '__main__':
    data_dir = 'data'
    log_dir = 'log'
    picture_dir = 'picture'

    Exist_or_Make_Dir(data_dir)
    Exist_or_Make_Dir(log_dir)
    Exist_or_Make_Dir(picture_dir)
    # 创建logger
    cur_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
    log_file_path = os.path.join(log_dir, 'spider_log' + cur_time + '.txt')
    logger = init_logger(log_file=log_file_path)

    # 获取chorme
    chrome_options = get_Chorme_Option()

    # 获取课程类有关信息
    # 参数配置
    main_url = 'https://www.icourse163.org'

    class_cate_col_name = '类名'
    class_cate_url_col_name = '链接'
    school_page_url = main_url + '/university/view/all.htm'
    categroys = [
        '计算机', '外语', '理学', '工学', '经济管理', '心理学', '文史哲', '艺术设计', '医药卫生', '教育教学',
        '法学', '农林园艺', '音乐与舞蹈'
    ]
示例#9
0
import wave
import deepspeech
import numpy as np
import os
from src.utils import init_logger

logger = init_logger("SpeechtoText")
from application_properties import ApplicationProperties as props


class SpeechtoText:
    def __init__(self,
                 model_file_path,
                 lm_file_path=None,
                 trie_file_path=None,
                 lm_alpha=0.75,
                 lm_beta=1.85,
                 beam_width=500):
        self.model = deepspeech.Model(model_file_path, beam_width)
        if lm_file_path and trie_file_path and lm_alpha and lm_beta:
            self.model.enableDecoderWithLM(lm_file_path, trie_file_path,
                                           lm_alpha, lm_beta)
        logger.info("Model sample rate : {}".format(self.model.sampleRate()))

    def get_text_from_speech(self, audio_file_path):
        w = wave.open(audio_file_path, 'r')
        audio_file_rate = w.getframerate()
        audio_file_frames = w.getnframes()
        audio_file_buffer = w.readframes(audio_file_frames)

        if audio_file_rate != self.model.sampleRate():
示例#10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-config', type=str, default='config/aishell.yaml')
    parser.add_argument('-log', type=str, default='train.log')
    parser.add_argument('-mode', type=str, default='retrain')
    opt = parser.parse_args()

    configfile = open(opt.config)
    config = AttrDict(yaml.load(configfile, Loader=yaml.FullLoader))

    exp_name = os.path.join('egs', config.data.name, 'exp', config.model.type,
                            config.training.save_model)
    if not os.path.isdir(exp_name):
        os.makedirs(exp_name)
    logger = init_logger(os.path.join(exp_name, opt.log))

    shutil.copyfile(opt.config, os.path.join(exp_name, 'config.yaml'))
    logger.info('Save config info.')
    os.environ["CUDA_VISIBLE_DEVICES"] = config.training.gpus

    config.training.num_gpu = num_gpus(config.training.gpus)
    num_workers = 6 * (config.training.num_gpu
                       if config.training.num_gpu > 0 else 1)
    batch_size = config.data.batch_size * config.training.num_gpu if config.training.num_gpu > 0 else config.data.batch_size

    train_dataset = LmDataset(config.data, 'train')
    train_sampler = Batch_RandomSampler(len(train_dataset),
                                        batch_size=batch_size,
                                        shuffle=config.data.shuffle)
    training_data = AudioDataLoader(dataset=train_dataset,
                                    num_workers=num_workers,
                                    batch_sampler=train_sampler)
    logger.info('Load Train Set!')

    dev_dataset = LmDataset(config.data, 'dev')
    dev_sampler = Batch_RandomSampler(len(dev_dataset),
                                      batch_size=batch_size,
                                      shuffle=config.data.shuffle)
    validate_data = AudioDataLoader(dataset=dev_dataset,
                                    num_workers=num_workers,
                                    batch_sampler=dev_sampler)
    logger.info('Load Dev Set!')

    if config.training.num_gpu > 0:
        torch.cuda.manual_seed(config.training.seed)
        torch.backends.cudnn.deterministic = True
    else:
        torch.manual_seed(config.training.seed)
    logger.info('Set random seed: %d' % config.training.seed)

    if config.model.type == "transducer":
        model = Transducer(config.model)
    elif config.model.type == "ctc":
        model = CTC(config.model)
    elif config.model.type == "lm":
        model = LM(config.model)
    else:
        raise NotImplementedError

    if config.training.load_model:
        if config.training.num_gpu == 0:
            checkpoint = torch.load(config.training.load_model,
                                    map_location='cpu')
        else:
            checkpoint = torch.load(config.training.load_model)
        logger.info(str(checkpoint.keys()))
        load_model(model, checkpoint)
        logger.info('Loaded model from %s' % config.training.new_model)

    if config.training.load_encoder or config.training.load_decoder:
        if config.training.load_encoder:
            checkpoint = torch.load(config.training.load_encoder)
            model.encoder.load_state_dict(checkpoint['encoder'])
            logger.info('Loaded encoder from %s' %
                        config.training.load_encoder)
        if config.training.load_decoder:
            checkpoint = torch.load(config.training.load_decoder)
            model.decoder.load_state_dict(checkpoint['decoder'])
            logger.info('Loaded decoder from %s' %
                        config.training.load_decoder)

    if config.training.num_gpu > 0:
        model = model.cuda()
        if config.training.num_gpu > 1:
            device_ids = list(range(config.training.num_gpu))
            model = torch.nn.DataParallel(model, device_ids=device_ids)
        logger.info('Loaded the model to %d GPUs' % config.training.num_gpu)

    # n_params, enc, dec = count_parameters(model)
    # logger.info('# the number of parameters in the whole model: %d' % n_params)
    # logger.info('# the number of parameters in the Encoder: %d' % enc)
    # logger.info('# the number of parameters in the Decoder: %d' % dec)
    # logger.info('# the number of parameters in the JointNet: %d' %
    #             (n_params - dec - enc))

    optimizer = Optimizer(model.parameters(), config.optim)
    logger.info('Created a %s optimizer.' % config.optim.type)

    if opt.mode == 'continue':
        if not config.training.load_model:
            raise Exception(
                "if mode is 'continue', need 'config.training.load_model'")
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch']
        logger.info('Load Optimizer State!')
    else:
        start_epoch = 0

    # create a visualizer
    if config.training.visualization:
        visualizer = SummaryWriter(os.path.join(exp_name, 'log'))
        logger.info('Created a visualizer.')
    else:
        visualizer = None

    logger.info(model)
    for epoch in range(start_epoch, config.training.epochs):

        train(epoch, config, model, training_data, optimizer, logger,
              visualizer)

        save_name = os.path.join(
            exp_name, '%s.epoch%d.chkpt' % (config.training.save_model, epoch))
        save_model(model, optimizer, config, save_name)
        logger.info('Epoch %d model has been saved.' % epoch)

        if config.training.eval_or_not:
            _ = eval(epoch, config, model, validate_data, logger, visualizer)

        if epoch >= config.optim.begin_to_adjust_lr:
            optimizer.decay_lr()
            # early stop
            if optimizer.lr < 1e-6:
                logger.info('The learning rate is too low to train.')
                break
            logger.info('Epoch %d update learning rate: %.6f' %
                        (epoch, optimizer.lr))

    logger.info('The training process is OVER!')
示例#11
0
def main():
    os.makedirs(config.temp_dir, exist_ok=True)
    os.makedirs(config.result_dir, exist_ok=True)
    os.makedirs(config.train_log_dir, exist_ok=True)

    logger.setLevel(logging.INFO)
    init_logger(logging.INFO, 'temp.log.txt', 'w')

    logger.info('preparing data...')
    config.word_2_id, config.id_2_word = read_json_dict(config.vocab_dict)
    config.vocab_size = min(config.vocab_size, len(config.word_2_id))
    config.oov_vocab_size = min(config.oov_vocab_size,
                                len(config.word_2_id) - config.vocab_size)

    embedding_matrix = None
    if args.do_train:
        if os.path.exists(config.glove_file):
            logger.info('loading embedding matrix from file: {}'.format(
                config.glove_file))
            embedding_matrix, config.word_em_size = load_glove_embedding(
                config.glove_file, list(config.word_2_id.keys()))
            logger.info('shape of embedding matrix: {}'.format(
                embedding_matrix.shape))
    else:
        if os.path.exists(config.glove_file):
            with open(config.glove_file, 'r', encoding='utf-8') as fin:
                line = fin.readline()
                config.word_em_size = len(line.strip().split()) - 1

    data_reader = DataReader(config)
    evaluator = Evaluator('tgt')

    logger.info('building model...')
    model = get_model(config, embedding_matrix)
    saver = tf.train.Saver(max_to_keep=10)

    if args.do_train:
        logger.info('loading data...')
        train_data = data_reader.read_train_data()
        valid_data = data_reader.read_valid_data()

        logger.info(log_title('Trainable Variables'))
        for v in tf.trainable_variables():
            logger.info(v)

        logger.info(log_title('Gradients'))
        for g in model.gradients:
            logger.info(g)

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)
            else:
                logger.info('initializing from scratch...')
                tf.global_variables_initializer().run()

            train_writer = tf.summary.FileWriter(config.train_log_dir,
                                                 sess.graph)

            valid_log_history = run_train(sess, model, train_data, valid_data,
                                          saver, evaluator, train_writer)
            save_json(
                valid_log_history,
                os.path.join(config.result_dir, config.current_model,
                             'valid_log_history.json'))

    if args.do_eval:
        logger.info('loading data...')
        valid_data = data_reader.read_valid_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids, valid_loss, valid_accu = run_evaluate(
                    sess, model, valid_data)
                logger.info(
                    'average valid loss: {:>.4f}, average valid accuracy: {:>.4f}'
                    .format(valid_loss, valid_accu))

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_word,
                             config.valid_data, config.valid_outputs)
                results = evaluator.evaluate(config.valid_data,
                                             config.valid_outputs,
                                             config.to_lower)
                save_json(results, config.valid_results)
            else:
                logger.info('model not found!')

    if args.do_test:
        logger.info('loading data...')
        test_data = data_reader.read_test_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids = run_test(sess, model, test_data)

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_word, config.test_data,
                             config.test_outputs)
                results = evaluator.evaluate(config.test_data,
                                             config.test_outputs,
                                             config.to_lower)
                save_json(results, config.test_results)
            else:
                logger.info('model not found!')
示例#12
0
文件: main.py 项目: TheWall9/DRHGCN
def train(config, model_cls=DRHGCN):
    time_stamp = time.asctime()
    datasets = DRDataset(dataset_name=config.dataset_name, drug_neighbor_num=config.drug_neighbor_num,
                         disease_neighbor_num=config.disease_neighbor_num)
    log_dir = os.path.join(f"{config.comment}", f"{config.split_mode}-{config.n_splits}-fold", f"{config.dataset_name}",
                           f"{config.seed}",f"{model_cls.__name__}", f"{time_stamp}")
    config.log_dir = log_dir
    config.n_drug = datasets.drug_num
    config.n_disease = datasets.disease_num

    config.size_u = datasets.drug_num
    config.size_v = datasets.disease_num

    config.gpus = 1 if torch.cuda.is_available() else 0
    config.pos_weight = datasets.pos_weight
    config.time_stamp = time_stamp
    logger = init_logger(log_dir)
    logger.info(pformat(vars(config)))
    config.dataset_type = config.dataset_dype if config.dataset_type is not None else model_cls.DATASET_TYPE
    cv_spliter = CVDataset(datasets, split_mode=config.split_mode, n_splits=config.n_splits,
                           drug_idx=config.drug_idx, disease_idx=config.disease_idx,
                           train_fill_unknown=config.train_fill_unknown,
                           global_test_all_zero=config.global_test_all_zero, seed=config.seed,
                           dataset_type=config.dataset_type)
    pl.seed_everything(config.seed)
    scores, labels, edges, split_idxs = [], [], [], []
    metrics = {}
    start_time_stamp = time.time()
    for split_id, datamodule in enumerate(cv_spliter):
        # if split_id not in [4, 5]:
        #     continue
        config.split_id = split_id
        split_start_time_stamp = time.time()

        datamodule.prepare_data()
        train_loader = datamodule.train_dataloader()
        val_loader = datamodule.val_dataloader()
        config.pos_weight = train_loader.dataset.pos_weight
        model = model_cls(**vars(config))
        model = model.cuda() if config.gpus else model

        if split_id==0:
            logger.info(model)
        logger.info(f"begin train fold {split_id}/{len(cv_spliter)}")
        train_fn(config, model, train_loader=train_loader, val_loader=val_loader)
        logger.info(f"end train fold {split_id}/{len(cv_spliter)}")
        save_file_format = os.path.join(config.log_dir,
                                        f"{config.dataset_name}-{config.split_id} fold-{{auroc}}-{{aupr}}.mat")
        score, label, edge, metric = test_fn(model, val_loader, save_file_format)
        # score, label, edge, metric = train_test_fn(model, train_loader, val_loader, save_file_format)
        metrics[f"{split_id}"] = metric
        scores.append(score)
        labels.append(label)
        edges.append(edge)
        split_idxs.append(np.ones(len(score), dtype=int)*split_id)
        logger.info(f"{split_id}/{len(cv_spliter)} folds: {metric}")
        logger.info(f"{split_id}/{len(cv_spliter)} folds time cost: {time.time()-split_start_time_stamp}")

        if config.debug:
            break
    end_time_stamp = time.time()
    logger.info(f"total time cost:{end_time_stamp-start_time_stamp}")
    scores = np.concatenate(scores, axis=0)
    labels = np.concatenate(labels, axis=0)
    edges = np.concatenate(edges, axis=1)
    split_idxs = np.concatenate(split_idxs, axis=0)
    final_metric = metric_fn.evaluate(predict=scores, label=labels, is_final=True)
    metrics["final"] = final_metric
    metrics = pd.DataFrame(metrics).T
    metrics.index.name = "split_id"
    metrics["seed"] = config.seed
    logger.info(f"final {config.dataset_name}-{config.split_mode}-{config.n_splits}-fold-auroc:{final_metric['auroc']}-aupr:{final_metric['aupr']}")
    output_file_name = f"final-{config.dataset_name}-{config.split_mode}-{config.n_splits}-auroc:{final_metric['auroc']}-aupr:{final_metric['aupr']}-fold"
    scio.savemat(os.path.join(log_dir, f"{output_file_name}.mat"),
                 {"row": edges[0],
                  "col": edges[1],
                  "score": scores,
                  "label": labels,
                  "split_idx":split_idxs}
                 )
    with pd.ExcelWriter(os.path.join(log_dir, f"{output_file_name}.xlsx")) as f:
        metrics.to_excel(f, sheet_name="metrics")
        params = pd.DataFrame({key:str(value) for key, value in vars(config).items()}, index=[str(time.time())])
        for key, value in final_metric.items():
            params[key] = value
        params["file"] = output_file_name
        params.to_excel(f, sheet_name="params")

    logger.info(f"save final results to r'{os.path.join(log_dir, output_file_name)}.mat'")
    logger.info(f"final results: {final_metric}")
示例#13
0
def main():
    os.makedirs(config.temp_dir, exist_ok=True)
    os.makedirs(config.result_dir, exist_ok=True)
    os.makedirs(config.train_log_dir, exist_ok=True)

    logger.setLevel(logging.INFO)
    init_logger(logging.INFO)

    logger.info('loading dict...')
    config.src_2_id, config.id_2_src = read_json_dict(config.src_vocab_dict)
    config.src_vocab_size = min(config.src_vocab_size, len(config.src_2_id))
    config.tgt_2_id, config.id_2_tgt = read_json_dict(config.tgt_vocab_dict)
    config.tgt_vocab_size = min(config.tgt_vocab_size, len(config.tgt_2_id))

    data_reader = DataReader(config)
    evaluator = Evaluator('tgt')

    logger.info('building model...')
    model = get_model(config)
    saver = tf.train.Saver(max_to_keep=10)

    if args.do_train:
        logger.info('loading data...')
        train_data = data_reader.load_train_data()
        valid_data = data_reader.load_valid_data()

        logger.info(log_title('Trainable Variables'))
        for v in tf.trainable_variables():
            logger.info(v)

        logger.info(log_title('Gradients'))
        for g in model.gradients:
            logger.info(g)

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)
            else:
                logger.info('initializing from scratch...')
                tf.global_variables_initializer().run()

            train_writer = tf.summary.FileWriter(config.train_log_dir,
                                                 sess.graph)
            valid_log_history = run_train(sess, model, train_data, valid_data,
                                          saver, evaluator, train_writer)
            save_json(
                valid_log_history,
                os.path.join(config.result_dir, config.current_model,
                             'valid_log_history.json'))

    if args.do_eval:
        logger.info('loading data...')
        valid_data = data_reader.load_valid_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids, valid_loss, valid_accu = run_evaluate(
                    sess, model, valid_data)
                logger.info(
                    'average valid loss: {:>.4f}, average valid accuracy: {:>.4f}'
                    .format(valid_loss, valid_accu))

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_tgt, config.valid_data,
                             config.valid_outputs)
                results = evaluator.evaluate(config.valid_data,
                                             config.valid_outputs,
                                             config.to_lower)
                save_json(results, config.valid_results)
            else:
                logger.info('model not found!')

    if args.do_test:
        logger.info('loading data...')
        test_data = data_reader.load_test_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids = run_test(sess, model, test_data)

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_tgt, config.test_data,
                             config.test_outputs)
                results = evaluator.evaluate(config.test_data,
                                             config.test_outputs,
                                             config.to_lower)
                save_json(results, config.test_results)
            else:
                logger.info('model not found!')
示例#14
0
# model parameters
parser.add_argument("--dim_embedding", type=int, default=300, help="")
parser.add_argument("--dim_output", type=int, default=2, help="")

parser.add_argument("--conv1_size", type=str, default="5_5_8", help="")
parser.add_argument("--pool1_size", type=str, default="10_10", help="")
parser.add_argument("--conv2_size", type=str, default="3_3_16", help="")
parser.add_argument("--pool2_size", type=str, default="5_5", help="")
parser.add_argument("--mp_hidden", type=int, default="128", help="")
parser.add_argument("--dim_out", type=int, default="2", help="")

# parse arguments
params = parser.parse_args()

# check parameters

logger = init_logger(params)

params.word2idx, params.glove_weight = load_w2v(params.embedding_path,
                                                params.dim_embedding)

train_data = MSRPDataset(params.data_path, data_type="train")
test_data = MSRPDataset(params.data_path, data_type="test")

params.train_data = train_data
params.test_data = test_data

mp_model = MatchPyramidClassifier(params)
mp_model.run()
示例#15
0
文件: app.py 项目: rpash/sure_sign
def run():
    yaml_path = full_path(r"config.yaml")
    with open(yaml_path) as yaml_file:
        config = yaml.load(yaml_file, Loader=yaml.FullLoader)

    if config is None:
        logging.warning("Could not load config file.")
        return

    ft_name = config["featurizers"]["featurizer"]
    ft_config = config["featurizers"][ft_name]
    clf_config = config["classification"]
    dt = 1000.0 / config["app"]["fps"]
    save_path = full_path(config["app"]["save_path"])
    utils.init_logger(config)
    utils.ask_for_load(config["always_load"])
    img_num = get_img_num(save_path)

    featurizer = Featurizer()
    featurize = choose_featurizer(featurizer, ft_name)
    classifier = ASLClassifier(clf_config)
    cap = cv2.VideoCapture(0)

    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 2.5
    font_color = (0, 0, 255)
    font_thickness = 2
    font_line_type = cv2.LINE_AA

    if not cap.isOpened():
        logging.warning("Cannot open camera")
        return

    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    xmin = int((height / 2) - 100)
    xmax = xmin + 200
    ymin = int((width / 2) - 100)
    ymax = ymin + 200

    font_y = ymin
    font_x = int(0.1 * height)

    buf = Buffer(5)
    while True:
        ret, frame = cap.read()
        if not ret:
            continue
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        img = img[xmin:xmax, ymin:ymax]
        feature = featurize(img[np.newaxis, ...], ft_config)
        pred = utils.number_to_label(buf.push(classifier.predict(feature)[0]))

        frame = cv2.putText(frame, pred, (font_y, font_x), font, font_scale,
                            font_color, font_thickness, font_line_type)
        frame = cv2.rectangle(frame, (ymin, xmin), (ymax, xmax), font_color,
                              font_thickness)

        cv2.imshow("ASL Classifier", frame)
        key = cv2.waitKey(int(dt)) & 0xff
        if key == ord('y'):
            cv2.imwrite(save_file(save_path, img_num), frame)
            img_num += 1
        elif key == ord('q'):
            cv2.destroyAllWindows()
            return