예제 #1
0
def main(args):
    start = time.time()
    set_seed(args)
    print("start in {}".format(start))

    # load data and preprocess
    print("loading tokenizer")
    tokenizer = select_tokenizer(args)
    model, Processor = select_task(args)

    if args.mission == 'train':
        print("loading train set")
        processor = Processor(args.dataset_dir, 'train')
        processor.load_swtc()
        train_dataloader = processor.make_dataloader(tokenizer,
                                                     args.batch_size, False,
                                                     128)

        print('loading dev set')
        processor = Processor(args.dataset_dir, 'dev')
        processor.load_swtc()
        deval_dataloader = processor.make_dataloader(tokenizer,
                                                     args.batch_size, False,
                                                     128)

    else:
        print(f'loading {args.mission} set')
        # file = 'test' if args.mission == 'predict' else 'test'
        file = 'target' if args.mission == 'predict' else 'test'
        processor = Processor(args.dataset_dir, file)
        processor.load_swtc()
        test_dataloader = processor.make_dataloader(tokenizer, args.batch_size,
                                                    False, 128, False)

    # choose model and initalize controller
    controller = TextClassification(args)
    controller.init(model)

    # run task accroading to mission
    if args.mission == 'train':
        controller.train(train_dataloader,
                         deval_dataloader,
                         save_last=args.save_last)

    elif args.mission == 'test':
        controller.test(test_dataloader)

    elif args.mission == "predict":
        result, predicts = controller.predict(test_dataloader)
        processor.add_predict(predicts)
        processor.save_data(args.pred_file_dir)

        # with open(args.pred_file_name, 'w', encoding='utf-8') as f:
        #     f.write(content)

    end = time.time()
    logger.info("start in {:.0f}, end in {:.0f}".format(start, end))
    logger.info("运行时间:%.2f秒" % (end - start))
예제 #2
0
            estimated[estimator_name].append(estimator_value)
    value_true = estimated.pop('true')
    plt.figure()
    plt.plot(parameters, value_true, label='true', ls='--', marker='x', markersize=8)
    for estimator_name, estimator_value in estimated.items():
        plt.plot(parameters, estimator_value, label=estimator_name)
    plt.xlabel(xlabel)
    plt.ylabel('Estimated Mutual Information, bits')
    plt.title(f"{generator.__name__.lstrip('_mi_')}: len(X)={n_samples}, dim(X)={n_features}")
    plt.legend()
    plt.savefig(IMAGES_MUTUAL_INFO_DISTRIBUTIONS_DIR / f"{generator.__name__}.png")
    # plt.show()


def mi_all_tests(n_samples=10_000, n_features=10):
    set_seed(26)
    mi_test(_mi_uniform_squared, n_samples=n_samples, n_features=n_features,
            xlabel=r'$X \sim $Uniform$(0, x); Y = X^2$')
    mi_test(_mi_squared_integers, n_samples=n_samples, n_features=n_features,
            xlabel=r'$X \sim $Randint$(0, x); Y = X^2$')
    mi_test(_mi_normal_correlated, n_samples=n_samples, n_features=n_features,
            xlabel=r'$XY \sim \mathcal{N}(0, \Sigma^\top \Sigma), \Sigma_{ij} \sim $Uniform$(0, x)$')
    mi_test(_mi_additive_normal_noise, n_samples=n_samples, n_features=n_features,
            xlabel=r'$X \sim \mathcal{N}(0, x^2); Y = X + \epsilon,'
                   r'\epsilon \sim \mathcal{N}(0,\left(\frac{x}{2}\right)^2$)')
    mi_test(_mi_normal_different_location, n_samples=n_samples, n_features=n_features,
            xlabel=r'$XY \sim \mathcal{N}(\mu, 10^2), \mu \sim $Uniform$(0, x)$')
    Timer.checkpoint()


if __name__ == '__main__':
        estimated_test = MutualInfoTest(x=x,
                                        y=y,
                                        mi_true=mi_true,
                                        verbose=False).run_all()
        estimated['true'].append(mi_true)
        for estimator_name, estimator_value in estimated_test.items():
            estimated[estimator_name].append(estimator_value)
    value_true = estimated.pop('true')
    plt.figure()
    plt.plot(parameters, value_true, label='true', ls='--', lw=2, marker='x')
    for estimator_name, estimator_value in estimated.items():
        plt.plot(parameters, estimator_value, label=estimator_name)
    plt.xlabel(xlabel)
    plt.ylabel('Estimated Mutual Information, bits')
    title = f"{method_name}: len(X)={n_samples}"
    if n_features is not None:
        title = f"{title}, dim(X)={n_features}"
    plt.title(title)
    plt.legend()
    plt.savefig(IMAGES_MUTUAL_INFO_CLASSIFIER_DIR / f"{method_name}.png")
    # plt.show()


if __name__ == '__main__':
    set_seed(26)
    # plot_gmm()
    mi_test(method='sample_gaussian_mixture')
    mi_test(method='sample_gaussian_mixture_softmax')
    mi_test(sample_softmax_argmax)
    Timer.checkpoint()
    parser = argparse.ArgumentParser(description="ECG classifier")
    parser.add_argument("-d",
                        "--dir",
                        default="data",
                        help="Directory with the data")
    parser.add_argument("-m",
                        "--mode",
                        help="Script working mode. One of [train, classify]")
    parser.add_argument("-r",
                        "--record",
                        default="",
                        help="Name of the record to be classified")
    parser.add_argument("-o",
                        "--output",
                        default="answers.txt",
                        help="File where to write classification results")
    args = parser.parse_args()

    set_seed(42)

    if args.mode == "train":
        logger.enable_logging('ecg', True)
        train(args)
    elif args.mode == "classify":
        if len(args.record) > 0:
            main_classify_single(args)
        else:
            main_classify_all(args)
    else:
        parser.print_help()