def main(args): start = time.time() set_seed(args) print("start in {}".format(start)) # load data and preprocess print("loading tokenizer") tokenizer = select_tokenizer(args) model, Processor = select_task(args) if args.mission == 'train': print("loading train set") processor = Processor(args.dataset_dir, 'train') processor.load_swtc() train_dataloader = processor.make_dataloader(tokenizer, args.batch_size, False, 128) print('loading dev set') processor = Processor(args.dataset_dir, 'dev') processor.load_swtc() deval_dataloader = processor.make_dataloader(tokenizer, args.batch_size, False, 128) else: print(f'loading {args.mission} set') # file = 'test' if args.mission == 'predict' else 'test' file = 'target' if args.mission == 'predict' else 'test' processor = Processor(args.dataset_dir, file) processor.load_swtc() test_dataloader = processor.make_dataloader(tokenizer, args.batch_size, False, 128, False) # choose model and initalize controller controller = TextClassification(args) controller.init(model) # run task accroading to mission if args.mission == 'train': controller.train(train_dataloader, deval_dataloader, save_last=args.save_last) elif args.mission == 'test': controller.test(test_dataloader) elif args.mission == "predict": result, predicts = controller.predict(test_dataloader) processor.add_predict(predicts) processor.save_data(args.pred_file_dir) # with open(args.pred_file_name, 'w', encoding='utf-8') as f: # f.write(content) end = time.time() logger.info("start in {:.0f}, end in {:.0f}".format(start, end)) logger.info("运行时间:%.2f秒" % (end - start))
estimated[estimator_name].append(estimator_value) value_true = estimated.pop('true') plt.figure() plt.plot(parameters, value_true, label='true', ls='--', marker='x', markersize=8) for estimator_name, estimator_value in estimated.items(): plt.plot(parameters, estimator_value, label=estimator_name) plt.xlabel(xlabel) plt.ylabel('Estimated Mutual Information, bits') plt.title(f"{generator.__name__.lstrip('_mi_')}: len(X)={n_samples}, dim(X)={n_features}") plt.legend() plt.savefig(IMAGES_MUTUAL_INFO_DISTRIBUTIONS_DIR / f"{generator.__name__}.png") # plt.show() def mi_all_tests(n_samples=10_000, n_features=10): set_seed(26) mi_test(_mi_uniform_squared, n_samples=n_samples, n_features=n_features, xlabel=r'$X \sim $Uniform$(0, x); Y = X^2$') mi_test(_mi_squared_integers, n_samples=n_samples, n_features=n_features, xlabel=r'$X \sim $Randint$(0, x); Y = X^2$') mi_test(_mi_normal_correlated, n_samples=n_samples, n_features=n_features, xlabel=r'$XY \sim \mathcal{N}(0, \Sigma^\top \Sigma), \Sigma_{ij} \sim $Uniform$(0, x)$') mi_test(_mi_additive_normal_noise, n_samples=n_samples, n_features=n_features, xlabel=r'$X \sim \mathcal{N}(0, x^2); Y = X + \epsilon,' r'\epsilon \sim \mathcal{N}(0,\left(\frac{x}{2}\right)^2$)') mi_test(_mi_normal_different_location, n_samples=n_samples, n_features=n_features, xlabel=r'$XY \sim \mathcal{N}(\mu, 10^2), \mu \sim $Uniform$(0, x)$') Timer.checkpoint() if __name__ == '__main__':
estimated_test = MutualInfoTest(x=x, y=y, mi_true=mi_true, verbose=False).run_all() estimated['true'].append(mi_true) for estimator_name, estimator_value in estimated_test.items(): estimated[estimator_name].append(estimator_value) value_true = estimated.pop('true') plt.figure() plt.plot(parameters, value_true, label='true', ls='--', lw=2, marker='x') for estimator_name, estimator_value in estimated.items(): plt.plot(parameters, estimator_value, label=estimator_name) plt.xlabel(xlabel) plt.ylabel('Estimated Mutual Information, bits') title = f"{method_name}: len(X)={n_samples}" if n_features is not None: title = f"{title}, dim(X)={n_features}" plt.title(title) plt.legend() plt.savefig(IMAGES_MUTUAL_INFO_CLASSIFIER_DIR / f"{method_name}.png") # plt.show() if __name__ == '__main__': set_seed(26) # plot_gmm() mi_test(method='sample_gaussian_mixture') mi_test(method='sample_gaussian_mixture_softmax') mi_test(sample_softmax_argmax) Timer.checkpoint()
parser = argparse.ArgumentParser(description="ECG classifier") parser.add_argument("-d", "--dir", default="data", help="Directory with the data") parser.add_argument("-m", "--mode", help="Script working mode. One of [train, classify]") parser.add_argument("-r", "--record", default="", help="Name of the record to be classified") parser.add_argument("-o", "--output", default="answers.txt", help="File where to write classification results") args = parser.parse_args() set_seed(42) if args.mode == "train": logger.enable_logging('ecg', True) train(args) elif args.mode == "classify": if len(args.record) > 0: main_classify_single(args) else: main_classify_all(args) else: parser.print_help()