示例#1
0
    def _test_8x_v100_half_precision(self, num_epochs, min_accuracy,
                                     max_accuracy):
        """Utility to benchmark ImageNet on 8xV100 GPUs. Use in your test func."""
        model_dir = self.get_tmp_model_dir()
        FLAGS.batch_size = 2048
        FLAGS.half_precision = True
        FLAGS.num_epochs = num_epochs
        FLAGS.model_dir = model_dir

        start_time = time.time()
        train.main([])
        benchmark_time = time.time() - start_time
        summaries = self.read_summaries(model_dir)

        # Summaries contain all the information necessary for the regression
        # metrics.
        wall_time, _, eval_accuracy = zip(*summaries['eval_accuracy'])
        wall_time = np.array(wall_time)
        sec_per_epoch = np.mean(wall_time[1:] - wall_time[:-1])
        end_accuracy = eval_accuracy[-1]

        # Assertions are deferred until the test finishes, so the metrics are
        # always reported and benchmark success is determined based on *all*
        # assertions.
        self.assertBetween(end_accuracy, min_accuracy, max_accuracy)

        # Use the reporting API to report single or multiple metrics/extras.
        self.report_wall_time(benchmark_time)
        self.report_metrics({
            'sec_per_epoch': sec_per_epoch,
            'accuracy': end_accuracy
        })
    def __call__(self, seed):
        # algs, lrs, n_epochs = ['sgd'], [1e-1], [ 5000]
        algs, lrs, n_epochs = ['adam'], [1e-3], [5000]

        for a, l, e in zip(algs, lrs, n_epochs):
            params = deepcopy(default_D2_pars)
            params['net_params']['saturations'] = [0, 1e8]
            params['net_params']['save_folder'] = 'out/D2/relu_avg/'
            params['train_params'].update({
                'optimizer_name': a,
                'lr': l,
                'n_epochs': e,
                'stop_loss': 1e-8,
                'loss_name': 'avg_d2'
            })
            params['test_suite']['fit_internal_representation'][
                'batch_size'] = 512
            for test_name in params['test_suite'].keys():
                params['test_suite'][test_name]['period'] = e // 4

            os.makedirs(params['net_params']['save_folder'], exist_ok=True)
            with open(params['net_params']['save_folder'] + 'full_params.json',
                      'w+') as f:
                json.dump(params, f, indent=4)

            main(params, seed)
            gc.collect()
示例#3
0
    def __call__(self, seed):
        algs, lrs, n_epochs = ['adam'], [5e-4], [5000]

        for a, l, e in zip(algs, lrs, n_epochs):
            for n in [1024]:
                params = deepcopy(default_D3_pars)
                params['sampler_params']['decays'] = [0.8, 0.77, 0.75]
                params['net_params']['saturations'] = [0, 1e8]
                params['sampler_params']['batch_size'] = 4096
                params['net_params']['n'] = n
                params['net_params'][
                    'save_folder'] = 'out/D3_relu_avg/n_{}/'.format(n)
                params['train_params'].update({
                    'optimizer_name': a,
                    'lr': l,
                    'n_epochs': e,
                    'loss_name': 'avg_generic'
                })
                params['test_suite'] = { # By default, tests only run at final step
                                    'weight_analysis': {'period': 1000},
                                    'sanity_check': {'T': 200, 'period': 1000},
                                  }

                os.makedirs(params['net_params']['save_folder'], exist_ok=True)
                with open(
                        params['net_params']['save_folder'] +
                        'full_params.json', 'w+') as f:
                    json.dump(params, f, indent=4)

                main(params, seed)
                gc.collect()
示例#4
0
    def __call__(self, seed):

        algs, n_epochs, lr_list = ['adam'], [10000], [5e-5]
        train_d = True

        for a, e, l in zip(algs, n_epochs, lr_list):
            for train_bias in [True, False]:
                for sig_slope in [50.]:  #1.]:
                    for sig_thresh in [
                            .1,
                    ]:  #, .1]:
                        params = deepcopy(default_D3_pars)

                        params['sampler_params']['decays'] = [0.8, 0.75, .75]
                        params['sampler_params']['batch_size'] = 1024
                        params['sampler_params']['epoch_length'] = 10
                        params['train_params'].update({
                            'optimizer_name': a,
                            'lr': l,
                            'n_epochs': e
                        })
                        params['train_params'].update({
                            'train_d_only': train_d,
                            'stop_loss': 1e-8,
                            'loss_name': 'batch'
                        })

                        params['net_params']['activation_type'] = 'Sigmoid'
                        params['net_params']['sigmoid_threshold'] = sig_thresh
                        params['net_params']['sigmoid_slope'] = sig_slope
                        params['net_params']['sigmoid_random_bias'] = False
                        params['net_params']['sigmoid_train_bias'] = train_bias
                        n = 1000
                        params['net_params']['n'] = n
                        params['net_params'][
                            'save_folder'] = 'out/D3_sigmoid_batch/n_{}_slope_{}_thresh_{}_train_bias_{}/'.format(
                                n, sig_slope, sig_thresh, train_bias)
                        # if params['net_params']['sigmoid_random_bias']:
                        #     params['net_params']['save_folder'] = params['net_params']['save_folder'] + 'randomly_biased/'
                        # if train_bias:
                        #     params['net_params']['save_folder'] = params['net_params']['save_folder'] +
                        # print(d_scale, e_scale)
                        params['test_suite'] = { # By default, tests only run at final step
                                            'weight_analysis': {'period': 2**20},
                                            'sanity_check': {'T': 200, 'period': 2**20},
                                            'fit_internal_representation': {'T': 200, 'period': 2**20},
                                          }

                        for test_name in params['test_suite'].keys():
                            params['test_suite'][test_name]['period'] = 1000

                        os.makedirs(params['net_params']['save_folder'],
                                    exist_ok=True)
                        with open(
                                params['net_params']['save_folder'] +
                                'full_params.json', 'w+') as f:
                            json.dump(params, f, indent=4)

                        main(params, seed)
                        gc.collect()
    def test_keras(self):
        """Test training_with_keras."""
        output_dir = tempfile.mkdtemp()
        output_model = os.path.join(output_dir, "model.h5")

        body_pp_dpkl = os.path.join(output_dir, "body_pp.dpkl")
        title_pp_dpkl = os.path.join(output_dir, "body_pp.dpkl")

        title_vecs = os.path.join(output_dir, "title.npy")
        body_vecs = os.path.join(output_dir, "body.npy")

        this_dir = os.path.dirname(__file__)
        args = [
            "--sample_size=100",
            "--num_epochs=1",
            "--input_data=" +
            os.path.join(this_dir, "test_data", "github_issues_sample.csv"),
            "--output_model=" + output_model,
            "--output_body_preprocessor_dpkl=" + body_pp_dpkl,
            "--output_title_preprocessor_dpkl=" + title_pp_dpkl,
            "--output_train_title_vecs_npy=" + title_vecs,
            "--output_train_body_vecs_npy=" + body_vecs,
        ]

        train.main(args)

        output_files = [
            output_model, body_pp_dpkl, title_pp_dpkl, title_vecs, body_vecs
        ]

        for f in output_files:
            self.assertTrue(os.path.exists(f))
示例#6
0
def train_language_model(data_dir, arch, extra_flags=None):
    train_parser = options.get_training_parser()
    train_args = options.parse_args_and_arch(
        train_parser,
        [
            '--task',
            'language_modeling',
            data_dir,
            '--arch',
            arch,
            '--optimizer',
            'adam',
            '--lr',
            '0.0001',
            '--criterion',
            'adaptive_loss',
            '--adaptive-softmax-cutoff',
            '5,10,15',
            '--max-tokens',
            '500',
            '--tokens-per-sample',
            '500',
            '--save-dir',
            data_dir,
            '--max-epoch',
            '1',
            '--no-progress-bar',
            '--distributed-world-size',
            '1',
            '--ddp-backend',
            'no_c10d',
        ] + (extra_flags or []),
    )
    train.main(train_args)
示例#7
0
  def test_8x_v100_half_precision(self):
    """Run ImageNet on 8x V100 GPUs in half precision for 2 epochs."""
    model_dir = self.get_tmp_model_dir()
    FLAGS.batch_size = 2048
    FLAGS.half_precision = True
    FLAGS.num_epochs = 2
    FLAGS.model_dir = model_dir

    start_time = time.time()
    train.main([])
    benchmark_time = time.time() - start_time
    summaries = self.read_summaries(model_dir)

    # Summaries contain all the information necessary for the regression
    # metrics.
    wall_time, _, eval_accuracy = zip(*summaries['eval_accuracy'])
    wall_time = np.array(wall_time)
    sec_per_epoch = np.mean(wall_time[1:] - wall_time[:-1])
    end_accuracy = eval_accuracy[-1]

    # Assertions are deferred until the test finishes, so the metrics are
    # always reported and benchmark success is determined based on *all*
    # assertions.
    self.assertBetween(end_accuracy, 0.06, 0.09)

    # Use the reporting API to report single or multiple metrics/extras.
    self.report_wall_time(benchmark_time)
    self.report_metrics({'sec_per_epoch': sec_per_epoch,
                         'accuracy': end_accuracy})
    self.report_extras({
        'description': 'Toy 8 x V100 test for ImageNet ResNet50.',
        'model_name': 'resnet50',
        'parameters': 'hp=true,bs=2048',
    })
    def __call__(self, seed):
        algs, lrs, n_epochs = ['sgd'], [1e0], [
            10000,
        ]

        for a, l, e in zip(algs, lrs, n_epochs):
            for T in [4]:
                params = deepcopy(default_D2_pars)
                params['sampler_params']['epoch_length'] = T
                params['net_type'] = 'DaleNet'
                params['net_params']['saturations'] = [0, 1e8]
                params['net_params'][
                    'save_folder'] = 'out/D2/dale/{}/T_{}/'.format(a, T)
                params['net_params']['inhib_proportion'] = .25
                params['net_params']['l2_penalty'] = 0.
                params['train_params'].update({
                    'optimizer_name': a,
                    'lr': l,
                    'n_epochs': e
                })
                for test_name in params['test_suite'].keys():
                    params['test_suite'][test_name]['period'] = e // 4

                os.makedirs(params['net_params']['save_folder'], exist_ok=True)
                with open(
                        params['net_params']['save_folder'] +
                        'full_params.json', 'w+') as f:
                    json.dump(params, f, indent=4)

                main(params, seed)
                gc.collect()
示例#9
0
def main():

    ctr = 1

    while ctr:

        inp = input(
            f'Select: \n\t1 Configure \n\t2 Train \n\t3 Interact \n\n      > ')
        cls()

        if inp == '1':
            list_config()
            change_config()

        elif inp == '2':
            import train
            train.main()

        elif inp == '3':
            import interact
            interact.main()

        elif inp == '0':
            cls()
            break

        cls()
        ctr += 1
示例#10
0
    def test_cpu(self):
        """Run full training for MNIST CPU training."""
        model_dir = self.get_tmp_model_dir()
        FLAGS.model_dir = model_dir
        start_time = time.time()
        train.main([])
        benchmark_time = time.time() - start_time
        summaries = self.read_summaries(model_dir)

        # Summaries contain all the information necessary for the regression
        # metrics.
        wall_time, _, eval_accuracy = zip(*summaries['eval_accuracy'])
        wall_time = np.array(wall_time)
        sec_per_epoch = np.mean(wall_time[1:] - wall_time[:-1])
        end_eval_accuracy = eval_accuracy[-1]

        # Assertions are deferred until the test finishes, so the metrics are
        # always reported and benchmark success is determined based on *all*
        # assertions.
        self.assertBetween(end_eval_accuracy, 0.98, 1.0)

        # Use the reporting API to report single or multiple metrics/extras.
        self.report_wall_time(benchmark_time)
        self.report_metrics({
            'sec_per_epoch': sec_per_epoch,
            'accuracy': end_eval_accuracy,
        })
        self.report_extras({
            'model_name': 'MNIST',
            'description': 'CPU test for MNIST.'
        })
def main():

    if parser.extract:
        extract.main()

    if parser.train:
        train.main()
示例#12
0
def train_translation_model(data_dir, arch, extra_flags=None, task='translation'):
    train_parser = options.get_training_parser()
    train_args = options.parse_args_and_arch(
        train_parser,
        [
            '--task', task,
            data_dir,
            '--save-dir', data_dir,
            '--arch', arch,
            '--lr', '0.05',
            '--max-tokens', '500',
            '--max-epoch', '1',
            '--no-progress-bar',
            '--distributed-world-size', '1',
            '--source-lang', 'in',
            '--target-lang', 'out',
        ] + (extra_flags or []),
    )
    train.main(train_args)

    # test validation
    validate_parser = options.get_validation_parser()
    validate_args = options.parse_args_and_arch(
        validate_parser,
        [
            '--task', task,
            data_dir,
            '--path', os.path.join(data_dir, 'checkpoint_last.pt'),
            '--valid-subset', 'valid',
            '--max-tokens', '500',
            '--no-progress-bar',
        ]
    )
    validate.main(validate_args)
示例#13
0
  def test_main(self, mock_gan_train, mock_define_train_ops, mock_cyclegan_loss,
                mock_define_model, mock_data_provider, mock_gfile):
    FLAGS.image_set_x_file_pattern = '/tmp/x/*.jpg'
    FLAGS.image_set_y_file_pattern = '/tmp/y/*.jpg'
    FLAGS.batch_size = 3
    FLAGS.patch_size = 8
    FLAGS.generator_lr = 0.02
    FLAGS.discriminator_lr = 0.3
    FLAGS.train_log_dir = '/tmp/foo'
    FLAGS.master = 'master'
    FLAGS.task = 0
    FLAGS.cycle_consistency_loss_weight = 2.0
    FLAGS.max_number_of_steps = 1

    mock_data_provider.provide_custom_datasets.return_value = (tf.zeros(
        [1, 2], dtype=tf.float32), tf.zeros([1, 2], dtype=tf.float32))

    train.main(None)
    mock_data_provider.provide_custom_datasets.assert_called_once_with(
        ['/tmp/x/*.jpg', '/tmp/y/*.jpg'], batch_size=3, patch_size=8)
    mock_define_model.assert_called_once_with(mock.ANY, mock.ANY)
    mock_cyclegan_loss.assert_called_once_with(
        mock_define_model.return_value,
        cycle_consistency_loss_weight=2.0,
        tensor_pool_fn=mock.ANY)
    mock_define_train_ops.assert_called_once_with(
        mock_define_model.return_value, mock_cyclegan_loss.return_value)
    mock_gan_train.assert_called_once_with(
        mock_define_train_ops.return_value,
        '/tmp/foo',
        get_hooks_fn=mock.ANY,
        hooks=mock.ANY,
        master='master',
        is_chief=True)
示例#14
0
    def __call__(self, seed):
        # algs, lrs, n_epochs = ['adam', 'sgd'], [3e-4, 5e-1], [1000, 5000]
        # algs, lrs, n_epochs = ['adam'], [1e-3], [1000, ]
        algs, lrs, n_epochs = ['sgd'], [1e0], [
            10000,
        ]
        T = 4

        for a, l, e in zip(algs, lrs, n_epochs):
            for inhib_frac in [.5, .4, .25, .1]:
                params = deepcopy(default_D3_pars)
                params['sampler_params']['epoch_length'] = T
                params['net_type'] = 'DaleNet'
                params['net_params']['saturations'] = [0, 1e8]
                params['net_params'][
                    'save_folder'] = 'out/D3/dale/inhib_frac_{}/'.format(
                        inhib_frac)
                params['train_params'].update({
                    'optimizer_name': a,
                    'lr': l,
                    'n_epochs': e
                })
                params['net_params']['inhib_proportion'] = inhib_frac
                params['net_params']['l2_penalty'] = 0
                for test_name in params['test_suite'].keys():
                    params['test_suite'][test_name]['period'] = e // 4

                os.makedirs(params['net_params']['save_folder'], exist_ok=True)
                with open(
                        params['net_params']['save_folder'] +
                        'full_params.json', 'w+') as f:
                    json.dump(params, f, indent=4)

                main(params, seed)
                gc.collect()
示例#15
0
def main():
    print("Ara Voice trainer 1.0.0 in development")
    print("checking system")
    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function

    import argparse
    import os.path
    import sys

    import numpy as np
    from six.moves import xrange
    import tensorflow as tf
    import train
    import freeze

    import input_data
    import models
    from tensorflow.python.platform import gfile
    print("Stating...")
    train.main()
    print("Exporting to .tflite")
    freeze.main()
    print("done")
示例#16
0
def run_experiment(run_config):
    for rt in replay_types:
        for bs in buffer_size:
            for i in range(num_of_runs):
                train_config = create_config(run_config, bs, rt)
                print(f'Starting trainin on: {train_config.name}')
                train.main(train_config)
示例#17
0
def main():
    csv_list = cvt_midi_2_csv('midi')
    with open('data/input.txt', 'w+', encoding='utf-8'):
        pass
    for csv in csv_list:
        cvt_csv_2_chrmid(csv)
    train.main()
    def __call__(self, seed):
        algs, lrs, n_epochs = ['adam', 'sgd'], [3e-4, 5e-1], [1000, 5000]
        T = 4

        for a, l, e in zip(algs, lrs, n_epochs):
            for ed_init in [
                    'random', 'support_same', 'support_disjoint',
                    'support_random_e', 'support_same_with_overlap'
            ]:
                params = deepcopy(default_D2_pars)
                params['sampler_params']['epoch_length'] = T
                params['net_params']['saturations'] = [0, 1e8]
                params['net_params']['init_vectors_type'] = ed_init
                params['net_params'][
                    'save_folder'] = 'out/D2/relu_support/{}/{}/'.format(
                        ed_init, a)
                params['train_params'].update({
                    'optimizer_name': a,
                    'lr': l,
                    'n_epochs': e
                })
                for test_name in params['test_suite'].keys():
                    params['test_suite'][test_name]['period'] = e // 4

                os.makedirs(params['net_params']['save_folder'], exist_ok=True)
                with open(
                        params['net_params']['save_folder'] +
                        'full_params.json', 'w+') as f:
                    json.dump(params, f, indent=4)

                main(params, seed)
                gc.collect()
示例#19
0
    def test_1x_v100(self):
        """Run Wide ResNet CIFAR10 on 1x V100 GPUs for 2 epochs."""
        model_dir = tempfile.mkdtemp()
        FLAGS.num_epochs = 2
        FLAGS.arch = 'wrn26_10'
        FLAGS.model_dir = model_dir

        start_time = time.time()
        train.main([])
        benchmark_time = time.time() - start_time
        summaries = self.read_summaries(model_dir)

        # Summaries contain all the information necessary for the regression
        # metrics.
        wall_time, _, eval_error_rate = zip(*summaries['eval_error_rate'])
        wall_time = np.array(wall_time)
        sec_per_epoch = np.mean(wall_time[1:] - wall_time[:-1])
        end_error_rate = eval_error_rate[-1]

        # Assertions are deferred until the test finishes, so the metrics are
        # always reported and benchmark success is determined based on *all*
        # assertions.
        self.assertBetween(sec_per_epoch, 80., 84.)
        self.assertBetween(end_error_rate, 0.30, 0.36)

        # Use the reporting API to report single or multiple metrics/extras.
        self.report_wall_time(benchmark_time)
        self.report_metrics({
            'sec_per_epoch': sec_per_epoch,
            'error_rate': end_error_rate
        })
        self.report_extra('description',
                          'Toy 1 x V100 test for CIFAR10 WideResNet26_10.')
示例#20
0
def run_grid_search(run_config):
    for lr in lrs:
        for df in dfs:
            for i in range(num_of_runs):
                train_config = create_config_gs(run_config, lr, df)
                print(f'Starting training on: {train_config.name}')
                train.main(train_config)
    def __call__(self, seed):
        algs, lrs, n_epochs = ['sgd'], [5e-1], [5000]
        # algs, lrs, n_epochs = ['adam', 'sgd'], [3e-4, 5e-1], [1000, 5000]

        for a, l, e in zip(algs, lrs, n_epochs):
            # for T in [3,10]:
            for T in [3]:
                params = deepcopy(default_D2_pars)
                params['sampler_params']['epoch_length'] = T
                params['net_params']['saturations'] = [0, 1e8]
                params['net_params'][
                    'save_folder'] = 'out/D2/relu/{}/T_{}/'.format(a, T)
                params['train_params'].update({
                    'optimizer_name': a,
                    'lr': l,
                    'n_epochs': e
                })
                params['test_suite']['fit_internal_representation'][
                    'batch_size'] = 512
                for test_name in params['test_suite'].keys():
                    params['test_suite'][test_name]['period'] = e // 4

                os.makedirs(params['net_params']['save_folder'], exist_ok=True)
                with open(
                        params['net_params']['save_folder'] +
                        'full_params.json', 'w+') as f:
                    json.dump(params, f, indent=4)

                main(params, seed)
                gc.collect()
示例#22
0
def train_translation_model(data_dir,
                            arch,
                            extra_flags=None,
                            task='translation'):
    train_parser = options.get_training_parser()
    train_args = options.parse_args_and_arch(
        train_parser,
        [
            '--task',
            task,
            data_dir,
            '--save-dir',
            data_dir,
            '--arch',
            arch,
            '--lr',
            '0.05',
            '--max-tokens',
            '500',
            '--max-epoch',
            '1',
            '--no-progress-bar',
            '--distributed-world-size',
            '1',
            '--source-lang',
            'in',
            '--target-lang',
            'out',
        ] + (extra_flags or []),
    )
    train.main(train_args)
    def test_main(self, mock_gan_train, mock_define_train_ops,
                  mock_cyclegan_loss, mock_define_model, mock_data_provider,
                  mock_gfile):
        FLAGS.image_set_x_file_pattern = '/tmp/x/*.jpg'
        FLAGS.image_set_y_file_pattern = '/tmp/y/*.jpg'
        FLAGS.batch_size = 3
        FLAGS.patch_size = 8
        FLAGS.generator_lr = 0.02
        FLAGS.discriminator_lr = 0.3
        FLAGS.train_log_dir = '/tmp/foo'
        FLAGS.master = 'master'
        FLAGS.task = 0
        FLAGS.cycle_consistency_loss_weight = 2.0
        FLAGS.max_number_of_steps = 1

        mock_data_provider.provide_custom_datasets.return_value = (tf.zeros(
            [1, 2], dtype=tf.float32), tf.zeros([1, 2], dtype=tf.float32))

        train.main(None)
        mock_data_provider.provide_custom_datasets.assert_called_once_with(
            ['/tmp/x/*.jpg', '/tmp/y/*.jpg'], batch_size=3, patch_size=8)
        mock_define_model.assert_called_once_with(mock.ANY, mock.ANY)
        mock_cyclegan_loss.assert_called_once_with(
            mock_define_model.return_value,
            cycle_consistency_loss_weight=2.0,
            tensor_pool_fn=mock.ANY)
        mock_define_train_ops.assert_called_once_with(
            mock_define_model.return_value, mock_cyclegan_loss.return_value)
        mock_gan_train.assert_called_once_with(
            mock_define_train_ops.return_value,
            '/tmp/foo',
            get_hooks_fn=mock.ANY,
            hooks=mock.ANY,
            master='master',
            is_chief=True)
示例#24
0
def main(args):
    args = parser.parse_args(args)

    config = {
        'name': args.name,
        'n_gpu': args.gpus,
        'weights_path': args.weights_path,
        'arch': {
            'type': args.model_type,
            'args': {
                'variant': args.model_layers,
                'num_classes': 2,
                'print_model': True
            }
        },
        'loss': 'cross_entropy_loss',
        'metrics': ['accuracy'],
        'data_loader': {
            'type': 'COVID_Dataset',
            'args': {
                'root': args.dataset,
                'k_fold_idx': args.k_fold_idx,
                'mode': 'ct',
                'pos_neg_file': args.labels,
                'splits': [0.7, 0.15, 0.15],
                'replicate_channel': 1,
                'batch_size': 32,
                'input_size': 224,
                'num_workers': 2,
                'self_supervised': 0
            }
        },
        'optimizer': {
            'type': 'Adam',
            'args': {
                'lr': args.learning_rate,
                'weight_decay': args.weight_decay,
                'amsgrad': True
            }
        },
        'lr_scheduler': {
            'type': 'StepLR',
            'args': {
                'step_size': args.lr_step_size,
                'gamma': 0.1
            }
        },
        'trainer': {
            'epochs': args.epochs,
            'save_dir': args.outdir,
            'save_period': 1,
            'verbosity': 2,
            'monitor': 'min val_loss',
            'early_stop': 10,
            'tensorboard': False
        }
    }

    config = ConfigParser(config)
    train.main(config)
def objective(params):
    try:
        bounding_box_fbeta = params["bounding_box_fbeta"]
        ir_coverage = params["ir_coverage"]
        crop = params["crop"]
        dropout = params["dropout"]
        global workspace, data_dir, preprocess_config, training_config, analysis_results
        # preprocess data
        pp_data_dir = os.path.join(workspace, "data")
        if not os.path.exists(pp_data_dir):
            os.mkdir(pp_data_dir)
        f_scores = visualize_analysis_results.f_score(
            analysis_results["precisions"], analysis_results["recalls"],
            bounding_box_fbeta)
        i = np.argmax(f_scores)
        bounding_box = np.unravel_index(i, f_scores.shape) + np.array(
            analysis_results["bb_range"][0])
        if tuple(bounding_box) != tuple(
                preprocess_config["cropping"]["bb_size"]):
            preprocess_config["cropping"]["bb_size"] = bounding_box
            preprocess_dataset.main(preprocess_config,
                                    data_dir,
                                    pp_data_dir,
                                    do_resample=False,
                                    do_normalize=False,
                                    crops=[crop])
        histogram = analyze_dataset.analyze(
            pp_data_dir,
            do_bounding_boxes=False)["histograms"][LABELS.index(crop)]
        _, i_min, i_max = visualize_analysis_results.crop_histogram(
            histogram, ir_coverage)
        if (i_min, i_max) != preprocess_config["normalization"][f"ir_{crop}"]:
            preprocess_config["normalization"][f"ir_{crop}"] = (i_min, i_max)
            preprocess_dataset.main(preprocess_config,
                                    data_dir,
                                    pp_data_dir,
                                    do_resample=False,
                                    do_crop=False,
                                    crops=[crop])

        # train
        if os.path.exists(training_config["workspace"]):
            shutil.rmtree(training_config["workspace"])
            os.mkdir(training_config["workspace"])

        def get_model(input_shape):
            return models.simple_net.get_model(input_shape, dropout=dropout)

        train.main(training_config, custom_model_generator=get_model)
        with open(os.path.join(training_config["workspace"], "summary.json"),
                  "r") as file:
            summary = json.load(file)
        return {
            "loss": -summary["auc_mean"],
            "status": STATUS_OK,
            "epochs": summary["epochs_mean"],
        }
    except Exception as e:
        return {"status": STATUS_FAIL, "attachments": {"exception": str(e)}}
示例#26
0
def main():
    for i in range(7, 10):
        train.main()

        src = Path('./model/cost.h5')
        to = Path(f'./model/cost-1024x4-1000x1000x{i + 1:02d}00.h5')

        copy(str(src), str(to))
示例#27
0
    def test_experiment_vhrs1s2(self):

        args = self.get_common_args()
        args["experiment"] = "vhrs1s2"
        try:
            main(**args)
        except Exception as err:
            self.fail(err)
示例#28
0
def main(args):
    """
    Main running script
    """

    # Get the config file
    config = util.get_config(args.config)
    root_dir = config['ROOT_DIR']
    # fill out initial folders
    if not os.path.isdir('{}/metadata'.format(root_dir)):
        os.mkdir('{}/metadata'.format(root_dir))
        print('created metadata dir')
    if not os.path.isdir('{}'.format(config['OBS_ROOT'])):
        os.mkdir('{}'.format(config['OBS_ROOT']))
        print('created OBS dir')
    if not os.path.isdir('{}'.format(config['ESTIMATORS_ROOT'])):
        os.mkdir('{}'.format(config['ESTIMATORS_ROOT']))
        print('created ESTIMATORS dir')
    if not os.path.isdir('{}'.format(config['PREDICTIONS_ROOT'])):
        os.mkdir('{}'.format(config['PREDICTIONS_ROOT']))
        print('created PREDICTIONS dir')
    if not os.path.isdir('{}'.format(config['QAQC_ROOT'])):
        os.mkdir('{}'.format(config['QAQC_ROOT']))
        print('created QAQC dir')
    if not os.path.isdir('{}'.format(config['PLOT_ROOT'])):
        os.mkdir('{}'.format(config['PLOT_ROOT']))
        print('created PLOT dir')

    # --- download data ---
    if args.clean:
        clean.main(config)
    else:
        print('skipping database cleaning')
    # --- download data ---
    if args.download:
        download.main(config)
    else:
        print('skipping download of new data')
    # --- train models
    if args.train:
        train.main(config)
    else:
        print('skip training')
    # --- make predictions ---
    if args.predict:
        predict.main(config)
    else:
        print('skipping download of new data')
    # --- run qaqc checks ---
    if args.qaqc:
        qaqc.main(config)
    else:
        print('skipping qaqc')
    # --- plot ---
    if args.plot:
        plot.main(config)
    else:
        print('skipping plots')
示例#29
0
文件: main.py 项目: daandouwe/glove
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('mode', choices=['train', 'plot', 'eval', 'hogwild'])

    # IO arguments.
    parser.add_argument('--name', type=str, default='text8.10k',
                        help='name for model')
    parser.add_argument('--vocab-dir', type=str, default='data/vocab',
                        help='input path for vocabulary')
    parser.add_argument('--matrix-dir', type=str, default='data/cooccur',
                        help='input path for cooccurence matrix')
    parser.add_argument('--out-dir', type=str, default='vec',
                        help='ouput directory to write vectors')
    parser.add_argument('--model-dir', type=str, default='models',
                        help='directory to save model')
    parser.add_argument('--log-dir', type=str, default='log',
                        help='directory log losses')
    parser.add_argument('--vec-path', type=str, default='',
                        help='path to load vectors for plotting')
    parser.add_argument('--gensim-format', action='store_true',
                        help='save vectors in gensim format')

    # Model arguments.
    parser.add_argument('--emb-dim', type=int, default=50,
                        help='dimension of vectors')

    # Train arguments.
    parser.add_argument('--num-updates', type=int, default=10000,
                        help='number of parameter updates')
    parser.add_argument('--batch-size', type=int, default=512,
                        help='size of minibatches')
    parser.add_argument('--lr', type=float, default=1e-2,
                        help='learning rate')
    parser.add_argument('--seed', type=int, default=42,
                        help='random seed')
    parser.add_argument('--use-schedule', action='store_true',
                        help='using scheduler for optimizer')
    parser.add_argument('--save-every', type=int, default=1000,
                        help='how often to save the model parameters')
    parser.add_argument('--print-every', type=int, default=100,
                        help='how often to print loss to screen')

    # Plot arguments.
    parser.add_argument('--tsne', action='store_true',
                        help='plot tsne')
    parser.add_argument('--matrices', action='store_true',
                        help='plot matrices and decomposition')

    args = parser.parse_args()

    if args.mode == 'train':
        train.main(args)
    if args.mode == 'plot':
        plot.main(args)
    if args.mode == 'hogwild':
        hogwild.main(args)
示例#30
0
def main():
    logger.info("Running main ...")

    train.main()
    predict.main()

    logger.info("Run complete ...")

    return
示例#31
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--type", default=0, help="number of param")

    args = parser.parse_args()

    r.reset()
    train.main(args.type)
    m.main(args.type)
def main():

    print "TRAINING"
    train.main()

    print "PREDICTING"
    predict.main()

    print "EVAlUATING"
    evaluate.main()
示例#33
0
  def test_main(self):

    FLAGS.image_file_patterns = [
        os.path.join(FLAGS.test_srcdir, TESTDATA_DIR, 'black/*.jpg'),
        os.path.join(FLAGS.test_srcdir, TESTDATA_DIR, 'blond/*.jpg'),
        os.path.join(FLAGS.test_srcdir, TESTDATA_DIR, 'brown/*.jpg'),
    ]
    FLAGS.max_number_of_steps = 1
    FLAGS.steps_per_eval = 1
    FLAGS.batch_size = 1
    train.main(None, _test_generator, _test_discriminator)
示例#34
0
  def test_main(self, mock_provide_data):
    FLAGS.image_file_patterns = ['/tmp/A/*.jpg', '/tmp/B/*.jpg', '/tmp/C/*.jpg']
    FLAGS.max_number_of_steps = 10
    FLAGS.batch_size = 2
    num_domains = 3

    images_shape = [FLAGS.batch_size, FLAGS.patch_size, FLAGS.patch_size, 3]
    img_list = [tf.zeros(images_shape)] * num_domains
    lbl_list = [tf.one_hot([0] * FLAGS.batch_size, num_domains)] * num_domains
    mock_provide_data.return_value = (img_list, lbl_list)

    train.main(None)
示例#35
0
  def _test_build_graph_helper(self, weight_factor):
    FLAGS.max_number_of_steps = 0
    FLAGS.weight_factor = weight_factor
    FLAGS.batch_size = 9
    FLAGS.patch_size = 32

    mock_imgs = np.zeros(
        [FLAGS.batch_size, FLAGS.patch_size, FLAGS.patch_size, 3],
        dtype=np.float32)
    with mock.patch.object(train, 'data_provider') as mock_data_provider:
      mock_data_provider.provide_data.return_value = mock_imgs
      train.main(None)
示例#36
0
  def testTrainingAndInferenceGraphsAreCompatible(
      self, mock_provide_custom_data, unused_mock_gan_train):
    # Training and inference graphs can get out of sync if changes are made
    # to one but not the other. This test will keep them in sync.

    # Save the training graph
    train_sess = tf.Session()
    FLAGS.image_set_x_file_pattern = '/tmp/x/*.jpg'
    FLAGS.image_set_y_file_pattern = '/tmp/y/*.jpg'
    FLAGS.batch_size = 3
    FLAGS.patch_size = 128
    FLAGS.generator_lr = 0.02
    FLAGS.discriminator_lr = 0.3
    FLAGS.train_log_dir = self._export_dir
    FLAGS.master = 'master'
    FLAGS.task = 0
    FLAGS.cycle_consistency_loss_weight = 2.0
    FLAGS.max_number_of_steps = 1
    mock_provide_custom_data.return_value = (
        tf.zeros([3, 4, 4, 3,]), tf.zeros([3, 4, 4, 3]))
    train.main(None)
    init_op = tf.global_variables_initializer()
    train_sess.run(init_op)
    train_saver = tf.train.Saver()
    train_saver.save(train_sess, save_path=self._ckpt_path)

    # Create inference graph
    tf.reset_default_graph()
    FLAGS.patch_dim = FLAGS.patch_size
    logging.info('dir_path: %s', os.listdir(self._export_dir))
    FLAGS.checkpoint_path = self._ckpt_path
    FLAGS.image_set_x_glob = self._image_glob
    FLAGS.image_set_y_glob = self._image_glob
    FLAGS.generated_x_dir = self._genx_dir
    FLAGS.generated_y_dir = self._geny_dir

    inference_demo.main(None)
    logging.info('gen x: %s', os.listdir(self._genx_dir))

    # Check that the image names match
    self.assertSetEqual(
        set(_basenames_from_glob(FLAGS.image_set_x_glob)),
        set(os.listdir(FLAGS.generated_y_dir)))
    self.assertSetEqual(
        set(_basenames_from_glob(FLAGS.image_set_y_glob)),
        set(os.listdir(FLAGS.generated_x_dir)))

    # Check that each image in the directory looks as expected
    for directory in [FLAGS.generated_x_dir, FLAGS.generated_x_dir]:
      for base_name in os.listdir(directory):
        image_path = os.path.join(directory, base_name)
        self.assertRealisticImage(image_path)
示例#37
0
  def test_build_graph(self, gan_type):
    FLAGS.max_number_of_steps = 0
    FLAGS.gan_type = gan_type

    # Mock input pipeline.
    mock_imgs = np.zeros([FLAGS.batch_size, 28, 28, 1], dtype=np.float32)
    mock_lbls = np.concatenate(
        (np.ones([FLAGS.batch_size, 1], dtype=np.int32),
         np.zeros([FLAGS.batch_size, 9], dtype=np.int32)), axis=1)
    with mock.patch.object(train, 'data_provider') as mock_data_provider:
      mock_data_provider.provide_data.return_value = (
          mock_imgs, mock_lbls, None)
      train.main(None)
示例#38
0
  def test_full_flow(self, mock_data_provider):
    FLAGS.eval_dir = self.get_temp_dir()
    FLAGS.batch_size = 16
    FLAGS.max_number_of_steps = 2
    FLAGS.noise_dims = 3

    # Construct mock inputs.
    mock_imgs = np.zeros([FLAGS.batch_size, 28, 28, 1], dtype=np.float32)
    mock_lbls = np.concatenate(
        (np.ones([FLAGS.batch_size, 1], dtype=np.int32),
         np.zeros([FLAGS.batch_size, 9], dtype=np.int32)), axis=1)
    mock_data_provider.provide_data.return_value = (mock_imgs, mock_lbls, None)

    train.main(None)
示例#39
0
  def test_build_graph(self, conditional, use_sync_replicas):
    FLAGS.max_number_of_steps = 0
    FLAGS.conditional = conditional
    FLAGS.use_sync_replicas = use_sync_replicas
    FLAGS.batch_size = 16

    # Mock input pipeline.
    mock_imgs = np.zeros([FLAGS.batch_size, 32, 32, 3], dtype=np.float32)
    mock_lbls = np.concatenate(
        (np.ones([FLAGS.batch_size, 1], dtype=np.int32),
         np.zeros([FLAGS.batch_size, 9], dtype=np.int32)), axis=1)
    with mock.patch.object(train, 'data_provider') as mock_data_provider:
      mock_data_provider.provide_data.return_value = (
          mock_imgs, mock_lbls, None, None)
      train.main(None)
示例#40
0
  def test_run_one_train_step(self, mock_data_provider):
    FLAGS.max_number_of_steps = 1
    FLAGS.gan_type = 'unconditional'
    FLAGS.batch_size = 5
    FLAGS.grid_size = 1
    tf.set_random_seed(1234)

    # Mock input pipeline.
    mock_imgs = np.zeros([FLAGS.batch_size, 28, 28, 1], dtype=np.float32)
    mock_lbls = np.concatenate(
        (np.ones([FLAGS.batch_size, 1], dtype=np.int32),
         np.zeros([FLAGS.batch_size, 9], dtype=np.int32)), axis=1)
    mock_data_provider.provide_data.return_value = (mock_imgs, mock_lbls, None)

    train.main(None)
示例#41
0
def train(query):
	ans = "<NA>"
        import train
        ans = list(train.main(query))
        #print ans
        if ans:
           ans={"train":ans}
        #print ans
	return [ans]
示例#42
0
def train_translation_model(data_dir, arch, extra_flags=None):
    train_parser = options.get_training_parser()
    train_args = options.parse_args_and_arch(
        train_parser,
        [
            '--task', 'translation',
            data_dir,
            '--save-dir', data_dir,
            '--arch', arch,
            '--optimizer', 'nag',
            '--lr', '0.05',
            '--max-tokens', '500',
            '--max-epoch', '1',
            '--no-progress-bar',
            '--distributed-world-size', '1',
            '--source-lang', 'in',
            '--target-lang', 'out',
        ] + (extra_flags or []),
    )
    train.main(train_args)
示例#43
0
def train_language_model(data_dir, arch):
    train_parser = options.get_training_parser()
    train_args = options.parse_args_and_arch(
        train_parser,
        [
            '--task', 'language_modeling',
            data_dir,
            '--arch', arch,
            '--optimizer', 'nag',
            '--lr', '1.0',
            '--criterion', 'adaptive_loss',
            '--adaptive-softmax-cutoff', '5,10,15',
            '--decoder-layers', '[(850, 3)] * 2 + [(1024,4)]',
            '--decoder-embed-dim', '280',
            '--max-tokens', '500',
            '--tokens-per-sample', '500',
            '--save-dir', data_dir,
            '--max-epoch', '1',
            '--no-progress-bar',
            '--distributed-world-size', '1',
        ],
    )
    train.main(train_args)
    def test_train(self):
        argv = ["train.py", "-niter", "2", "-batch", "2", "tests/test_train_data.arff", "tests/test_train_model.model"]
        train.main(argv)

        argv = ["train.py", "-niter", "2", "-batch", "2", "tests/test_train_data.arff", "tests/test_train_model.model"]
        train.main(argv)

        argv = ["train.py", "-niter", "2", "-batch", "2", "tests/test_train_data.arff", "tests/test_train_model.model"]
        train.main(argv)
def main(config_file):

    with open(config_file) as f:
    
        # use safe_load instead load
        dataMap = yaml.safe_load(f)


    filebase = os.path.splitext(os.path.basename(config_file))[0]
    
    sig_file = os.path.join('../data', filebase + '_data.mat')
    scat_file = os.path.join('../data', filebase + '_scat.mat')

    labels = dataMap["data"]["labels"]
    Nsig = dataMap["data"]["N"]

    if (('derivative' in dataMap["data"]) and
        dataMap['data']["derivative"]):
        extract_derivatives(labels, Nsig, sig_file)
    else:
        extract_signals(labels, Nsig, sig_file)

    if dataMap["features"] == 'holder':
        holder_exp(sig_file, scat_file)
    elif 'scattering_transfer' in  dataMap["features"]:
        
        scat = dataMap["features"]["scattering_transfer"]
        scatter_transfer(sig_file, scat_file, scat["N"], scat["T"])
    else:
        scat = dataMap["features"]["scatter"]
        fractal_scatter(sig_file, scat_file, scat["N"], scat["M"], scat["T"])

    
    score, X, y, yhat = train.main(scat_file, dataMap["machine_learning"])

    # Make the summary plot
    fig = plot_results(y, yhat, score)
    fig.savefig(os.path.join('../results/', filebase +'.eps'))

    dataMap["results"] = str(score)

    outyaml = os.path.join('../results', filebase+'.yaml')

    with open(outyaml, 'w') as f:
        f.write(yaml.dump(dataMap, default_flow_style=False))
示例#46
0
theano.config.floatX = 'float32'


np.random.seed(0)


if __name__ == '__main__':
    import argparse
    import train

    parser = argparse.ArgumentParser(description='Train/Test A Language Model.')

    parser.add_argument('-data',  help='path to data')

    # NN architecture
    parser.add_argument('--vocab',  type=int, default=100000000, help='vocabulary size')
    parser.add_argument('--emb',    type=int, default=32,        help='dimension of embeddings')
    parser.add_argument('--hidden', type=int, default=32,        help='dimension of hidden layer')
    parser.add_argument('--layer',  type=int, default=1,         help='number of layers')

    # Training Parameters
    parser.add_argument('--batch', type=int, default=10, help='batch size')
    parser.add_argument('--opt', default='adagrad', help='optimization method')
    parser.add_argument('--epoch', type=int, default=100, help='number of epochs')
    parser.add_argument('--lr', type=float, default=0.0001, help='learning rate')
    parser.add_argument('--save', type=bool, default=False, help='parameters to be saved or not')
    parser.add_argument('--init_emb', default=None, help='Initial embedding to be loaded')
    parser.add_argument('--n_words',  type=int, default=100, help='number of words')

    train.main(parser.parse_args())
#!/usr/bin/env python
from secrets import *
from twython import TwythonStreamer
import train
train.main()
from colorama import init, Fore, Back 
train.categories[0].colour = Fore.GREEN + Back.BLACK
train.categories[1].colour = Fore.RED + Back.WHITE
reset = Fore.RESET + Back.RESET

class SentimentStreamer(TwythonStreamer):
    def on_success(self, data):
        if 'text' in data:
            mood = train.classify(data['text'])
            if mood is None:
                print data['text'].encode('utf-8')
                print "neutral"
            else: 
                print mood.colour + data['text'].encode('utf-8') + reset
                print(mood)

    def on_error(self, status_code, data):
        print status_code

stream = SentimentStreamer(APP_KEY, APP_SECRET, 
           OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
stream.statuses.filter(locations=[-6.833496, 49.894634, 2.087402, 59.623325])

示例#48
0
文件: test.py 项目: pdekker12/nlp2
 def test_pipeline(self):
     train.corpus_paths = ['../data/en-cs-test.txt']
     train.main()
    parser.add_argument('--init_emb', default=None, help='Initial embedding to be loaded')
    parser.add_argument('--opt', default='adam', help='optimization method')
    parser.add_argument('--lr', type=float, default=0.01, help='learning rate')
    parser.add_argument('--reg', type=float, default=0.0001, help='L2 Reg rate')
    parser.add_argument('--batch', type=int, default=32, help='batch size')
    parser.add_argument('--epoch', type=int, default=500, help='number of epochs to train')
    parser.add_argument('--no-shuffle', action='store_true', default=False, help='don\'t shuffle training data')

    """ test options """
    parser.add_argument('--model', default=None, help='path to model')
    parser.add_argument('--arg_dict', default=None, help='path to arg dict')
    parser.add_argument('--vocab_dict', default=None, help='path to vocab dict')
    parser.add_argument('--emb_dict', default=None, help='path to emb dict')

    argv = parser.parse_args()

    print
    print argv
    print

    if argv.mode == 'train':
        import train
        train.main(argv)
    else:
        import test
        assert argv.model is not None
        assert argv.arg_dict is not None
        assert argv.vocab_dict is not None
        assert argv.emb_dict is not None
        test.main(argv)
示例#50
0
文件: ui.py 项目: guanw/GP
 def OnTrain(self,event):
     train.main("master")
示例#51
0
def main():

  parser = argparse.ArgumentParser(description="CliRel (Clinical Relation) \
                                    extractor- trains a classifier able to \
                                    determine the type of relation between \
                                    two medical concepts in a sentence.")

  # Add arguments here
  parser.add_argument("--train", nargs=3, 
                      metavar=("train_dir", "model_file", "model_type"), type=str, 
                      help="Directory should contain three subdirs (txt, \
                            concept, rel) containing .txt, .con, .rel files. \
                            Will train a classifier on this data. \
                            Trained model will be written to specified model file.\n \
                            Current model types:[svm-spt, svm-insert, svm-suffix]",
                      default=None)
  parser.add_argument("--predict", nargs=3,
                      metavar=("test_dir", "model_file", "results_dir"), type=str,
                      help="Directory contains concept and text files \
                            that the specified (or default) model will predict. \
                            Resulting relation files will be written to \
                            the specified results directory.",
                      default=None)
  parser.add_argument("--evaluate", nargs=3,
                      metavar=("test_dir", "gold_dir", "eval_file"), type=str,
                      help="Evaluate the relation files in the test directory \
                      in comparison with those in the gold directory. The \
                      results will be written to the evaluation file.", 
                      default=None)
  parser.add_argument("--verbose", action="store_true",
                      default=False, help="Show debugging info.")
  
  args = parser.parse_args()

  if not args.predict and not args.train and not args.evaluate:
    sys.stderr.write("ERROR: No valid flag specified.\n")
    parser.print_help()
    sys.exit(1)

  if args.train:
    checkDir(args.train[0])
    checkDir(os.path.dirname(args.train[1]))
    if (os.path.isdir(args.train[1])):
      sys.stderr.write("ERROR: Model expected to be a file, %s is a directory\n"
                 % args.train[1])
      sys.exit(1)

    train.main(args.train[0], args.train[1], args.train[2], args.verbose)

  if args.predict:
    checkDir(args.predict[0])
    checkFile(args.predict[1])
    checkDir(args.predict[2])
    predict.main(args.predict[0], args.predict[1], args.predict[2], args.verbose)

  if args.evaluate:
    checkDir(args.evaluate[0])
    checkDir(args.evaluate[1])
    checkDir(os.path.dirname(args.evaluate[2]))
    if (os.path.isdir(args.evaluate[2])):
      sys.stderr.write("ERROR: eval_file expected to be a file, %s is a \
      directory\n" % args.evaluate[2])
      sys.exit(1)

    evaluate.main(args.evaluate[0], args.evaluate[1], args.evaluate[2], args.verbose)
示例#52
0
from bionlp.utils.crf_arguments import crf_model_arguments
from train import main

if __name__=="__main__":
    config_params=crf_model_arguments()
    # Required for choosing CRF network models.
    config_params['CRF_MODEL_ON']=True
    main(config_params)
示例#53
0
def main(run_dir="rfe_chain", start=None, start_auc=None,
         verbose=None, logfile=None):
    """
    Main function to run the chain.
    """
    if logfile is not None:
        sys.stdout = open(logfile, "w")

    # load starting json
    with open(start) as f:
        start = json.load(f)
    if start_auc is None:
        startauc = 0.8

    start['AUC_SCORE_PATH'] = run_dir

    # have to load a list of possible features to replace with
    if all("10feat" in feature for feature in start['FEATURES']):
        with open("10featlist.json") as fh:
            featlist = json.load(fh)['FEATURES']
    else:
        featlist = get_featlist()

    # and possible preceding modifiers
    modlist = get_modlist()

    # creat list of combinations of these two lists
    comblist = []
    for mod in modlist:
        for feature in featlist:
            comblist.append('{0}_{1}_'.format(mod, feature))

    # define sampled json
    prevsample = copy.deepcopy(start)

    # initialise auc
    prevauc = startauc

    first = 1
    counter = 0
    converged = False
    # will decide what constitutes converged later
    while not converged:

        sample = copy.deepcopy(prevsample)
        # If this isn't the first one, sample new settings
        if not first:
            # Sample a new hdf5 and replace existing at random
            #   Or, just push it in, or just drop a hdf5 at random
            utils.print_verbose("===== Sampling new proposal "
                                "settings ======", flag=verbose)
            # sample new settings
            # shuffle combinations
            random.shuffle(comblist)

            # pop 3 features off this
            added = [comblist.pop() for i in range(3)]

            # add them to the settings
            sample['FEATURES'] = added

            utils.print_verbose("============================"
                                "===============", flag=verbose)

        # ensure that ordering of the features is the same between jsons
        sample['FEATURES'].sort()

        # Then save this new json with a descriptive name
        # unless it's already been generated
        if first:
            featurerecord = "".join(sample['FEATURES'])
        else:
            featurerecord = featurerecord + "".join(sample['FEATURES'])
        md5name = hashlib.md5(featurerecord.encode('UTF-8')).hexdigest()
        # get a list of the files in the run_dir
        existingjsons = glob.glob(run_dir + "/*.json")
        # check if the md5 exists
        if md5name + ".json" in existingjsons:
            # then load the results of that run
            with open(os.path.join(run_dir, "AUC_scores.csv"), "r") as fh:
                c = csv.reader(fh, delimiter="\t")
                utils.print_verbose("Already ran {0},"
                                    "reading from results.".format(md5name), flag=verbose)
                for line in c:
                    # look for that md5sum
                    if md5name in line[0]:
                        auc_score = line[-1]
        else:
            # save a json with this name and run train.py on it
            samplefname = os.path.join(run_dir, md5name + ".json")
            utils.print_verbose("Creating new settings"
                                " file for {0}".format(samplefname), flag=verbose)
            with open(samplefname, "w") as fh:
                json.dump(sample, fh)
            # call train.py
            try:
                if first:
                    auc_score_dict = train.main(samplefname, verbose=verbose,
                                                store_models=False, store_features=True)
                else:
                    picklefname = prevsamplefname.split(".")[0] + \
                        "_feature_dump.pickle"
                    # load the features saved in the last run
                    auc_score_dict = train.main(samplefname, verbose=verbose,
                                                store_models=False, store_features=True,
                                                load_pickled=picklefname)
                prevsamplefname = samplefname
                auc_score = auc_score_dict['all']
            except IndexError:
                print("Warning: accidentally added invalid feature.")
                os.remove(samplefname)
                # set auc to zero so these settings are not accepted
                auc_score = 0

        prevsample = sample

        # can't be first anymore
        first = 0

        # as it may be bad manners to run infinite loops
        counter += 1
        if counter > 100:
            converged = True

    return None
示例#54
0
from train import main

logger = logging.getLogger(__name__)

# Get the arguments
parser = argparse.ArgumentParser()
parser.add_argument(
    "--proto",  default="get_config_ch_topical_former_douban",
    help="Prototype config to use for config")
parser.add_argument(
    "--bokeh",  default=False, action="store_true",
    help="Use bokeh server for plotting")
parser.add_argument(
    "--mode", choices=["train", "translate"], default='translate',
    help="The mode to run. In the `train` mode a model is trained."
         " In the `translate` mode a trained model is used to translate"
         " an input file and generates tokenized translation.")
parser.add_argument(
    "--test-file", default='', help="Input test file for `translate` mode")
args = parser.parse_args()


if __name__ == "__main__":
    # Get configurations for model
    configuration = getattr(configurations, args.proto)()
    # configuration['test_set'] = args.test_file
    logger.info("Model options:\n{}".format(pprint.pformat(configuration)))
    # Get data streams and call main
    main(args.mode, configuration, args.bokeh)
def main(mcmcdir="hdf5mcmc", start=None, start_auc=None,
         verbose=True, logfile=None, discr_flag=False):
    """
    Contains the main loop for this script.
    Pseudo-MHMCMC to find optimal AUC scoring
    combinations of HDF5s.
    start - location of json file settings to begin at
    """
    if logfile is not None:
        sys.stdout = open(logfile, "w")
    # pseudo-code for the MCMC iteration
    # want it to start with the probably good features
    with open(start) as f:
        start = json.load(f)
    if start_auc is None:
        startauc = 0.8

    # hardcode AUC results to the hdf5mcmc directory
    start['AUC_SCORE_PATH'] = mcmcdir

    # have to load a list of possible features to replace with
    if all("10feat" in feature for feature in start['FEATURES']):
        with open("10featlist.json") as fh:
            featlist = json.load(fh)['FEATURES']
    else:
        featlist = get_featlist()

    # and possible preceding modifiers
    modlist = get_modlist()

    # define sampled json
    prevsample = copy.deepcopy(start)

    # initialise auc
    prevauc = startauc

    counter = 0
    converged = False
    # will decide what constitutes converged later
    while not converged:

        sample = copy.deepcopy(prevsample)
        # Sample a new hdf5 and replace existing at random
        #   Or, just push it in, or just drop a hdf5 at random
        utils.print_verbose("===== Sampling new proposal "
                            "settings ======", flag=verbose)
        u = np.random.rand()
        if u < 0.25:
            # drop an element at random
            features = sample['FEATURES'][:]
            random.shuffle(features)
            dropped = features.pop()
            sample['FEATURES'] = features
            utils.print_verbose(
                "Dropped feature {0}".format(dropped),
                flag=verbose)
        elif u > 0.25 and u < 0.5:
            # keep trying to sample a new feature until we
            # find one that's not in there already
            while True:
                # push a new feature, but don't remove an old one
                newfeature = random.sample(featlist, 1)[0]
                newmod = random.sample(modlist, 1)[0]
                added = '{0}_{1}_'.format(newmod, newfeature)
                if added not in sample['FEATURES']:
                    break
            sample['FEATURES'].append(added)
            utils.print_verbose(
                "Added feature {0}".format(added),
                flag=verbose)
        elif u > 0.5:
            # push a new feature and remove an old one
            features = sample['FEATURES'][:]
            random.shuffle(features)
            dropped = features.pop()
            # keep trying to sample a new feature until we
            # find one that's not in there already
            while True:
                # push a new feature, but don't remove an old one
                newfeature = random.sample(featlist, 1)[0]
                newmod = random.sample(modlist, 1)[0]
                added = '{0}_{1}_'.format(newmod, newfeature)
                if added not in sample['FEATURES']:
                    break
            features.append(added)
            sample['FEATURES'] = features
            utils.print_verbose("Switched feature {0} for "
                                "{1}".format(dropped, added), flag=verbose)
        utils.print_verbose("============================"
                            "===============", flag=verbose)
        # ensure that ordering of the features is the same between jsons
        sample['FEATURES'].sort()

        # Then save this new json with a descriptive name
        # unless it's already been generated
        md5name = hashlib.md5(
            "".join(sample['FEATURES']).encode('UTF-8')).hexdigest()
        # get a list of the files in the mcmcdir
        existingjsons = glob.glob(mcmcdir + "/*.json")
        # check if the md5 exists
        if md5name + ".json" in existingjsons:
            # then load the results of that run
            with open(os.path.join(mcmcdir, "AUC_scores.csv"), "r") as fh:
                c = csv.reader(fh, delimiter="\t")
                utils.print_verbose("Already ran {0},"
                                    "reading from results.".format(md5name), flag=verbose)
                for line in c:
                    # look for that md5sum
                    if md5name in line[0]:
                        auc_score = line[-1]
        else:
            # save a json with this name and run train.py on it
            samplefname = os.path.join(mcmcdir, md5name + ".json")
            utils.print_verbose("Creating new settings"
                                " file for {0}".format(samplefname), flag=verbose)
            with open(samplefname, "w") as fh:
                json.dump(sample, fh)
            # call train.py or discriminate.py
            if discr_flag:
                try:
                    auc_score_dict = discriminate.main(samplefname,
                                                       verbose=verbose)
                    # don't want to rename this variable
                    # even though it is no longer an AUC score
                    # want a low accuracy score, strangely enough
                    auc_score = 1 - auc_score_dict['all']
                except IndexError:
                    print("Warning: accidentally added invalid feature.")
                    os.remove(samplefname)
                    # set auc to zero so these settings are not accepted
                    auc_score = 0
            else:
                try:
                    auc_score_dict = train.main(samplefname,
                                                verbose=verbose, store_models=False)
                    auc_score = auc_score_dict['all'] - 0.5
                except IndexError:
                    print("Warning: accidentally added invalid feature.")
                    os.remove(samplefname)
                    # set auc to zero so these settings are not accepted
                    auc_score = 0

        utils.print_verbose("==== Acceptance calculation ====", flag=verbose)
        # compute acceptance probability from AUC:
        #     r = min(1,AUC/(previous AUC))
        acceptance = np.max([np.min([1, auc_score / prevauc]), 0])

        u = np.random.rand()
        # accept new point with probability r
        if u < acceptance:
            prevsample = sample
            # save current auc
            prevauc = auc_score
            utils.print_verbose("accepting new settings with probability "
                                "{0}".format(acceptance), flag=verbose)
        else:
            utils.print_verbose("rejecting new settings with probability "
                                "{0}".format(1.0 - acceptance), flag=verbose)
        utils.print_verbose("================================", flag=verbose)
        # otherwise it will not overwrite prevsample, so continue from where it
        # was

        # as it may be bad manners to run infinite loops
        counter += 1
        if counter > 100:
            converged = True
示例#56
0
import train
import config
import sys

if __name__=="__main__":
    jobid = sys.argv[1]

    with open("."+jobid+".id") as f:
        arrayid = int(f.readline())-1

    params = config.parameters_list[arrayid]
    params.append("-j " + jobid)

    params = " ".join(params).split()

    train.main(params)