Пример #1
0
    def __init__(self,
                 hparams=DotDict({
                     'model_type': 'transformer',
                     'ninp': 128,
                     'nhead': 2,
                     'nhid': 512,
                     'nlayers': 2,
                     'tie_layers': True,
                     'tie_encoder_decoder': True,
                     'dropout': 0.1,
                 })):
        super(LanguageModelTrainer, self).__init__()

        self.hparams = hparams if isinstance(hparams, DotDict) \
                        else DotDict(hparams)

        from utils import get_default_tokenizer
        self.vocab_size = get_default_tokenizer()._tokenizer.get_vocab_size()

        self.model_type = hparams.get('model_type', 'transformer')
        assert self.model_type in ['transformer', 'lstm']

        if self.model_type == 'transformer':
            self.model = TransformerModel(ntoken=self.vocab_size, **hparams)
        else:
            self.model = LSTMModel(ntoken=self.vocab_size, **hparams)

        self.batch_size = hparams.get('batch_size', 64)
        self.bptt = hparams.get('bptt', 128)
Пример #2
0
    def __init__(self, **kwargs):

        self.conf = DotDict(kwargs)
        self.current = DotDict()
        self.modules = {}
        self.workflow = BpmnWorkflow
        self.workflow_spec = WorkflowSpec
        self.load_or_create_workflow()
Пример #3
0
def main(args, hparams):
    """ Main function for Keras Sketch-RNN"""
    # Logger:
    logsdir = os.path.join(args.experiment_dir, 'logs')
    os.makedirs(logsdir)
    os.makedirs(os.path.join(args.experiment_dir, 'checkpoints'))
    sys.stdout = Logger(logsdir)

    # Add support for dot access for auxiliary function use:
    hparams_dot = DotDict(hparams)
    hparams_dot.epochs = args.epochs

    # Load dataset:
    hparams_dot.data_set = args.data_set
    datasets = load_dataset(args.data_dir, hparams_dot)

    train_set = datasets[0]
    valid_set = datasets[1]
    #test_set = datasets[2]
    model_params = datasets[3]

    # Build and compile model:
    seq2seq = Seq2seqModel(model_params)
    seq2seq.compile()
    model = seq2seq.model

    # Create a data generator:
    train_generator = batch_generator(train_set, train=True)
    val_generator = batch_generator(valid_set, train=False)

    # Callbacks:
    model_callbacks = get_callbacks_dict(seq2seq=seq2seq,
                                         model_params=model_params,
                                         experiment_path=args.experiment_dir)

    # Load checkpoint:
    if args.checkpoint is not None:
        # Load weights:
        seq2seq.load_trained_weights(args.checkpoint)
        # Initial batch (affects LR and KL weight decay):
        num_batches = model_params.save_every if model_params.save_every is not None else train_set.num_batches
        count = args.initial_epoch * num_batches
        model_callbacks['lr_schedule'].count = count
        model_callbacks['kl_weight_schedule'].count = count

    # Write config file to json file
    with open(os.path.join(logsdir, 'model_config.json'), 'w') as f:
        json.dump(model_params, f, indent=True)

    # Train
    steps_per_epoch = model_params.save_every if model_params.save_every is not None else train_set.num_batches
    model.fit(train_generator,
              steps_per_epoch=steps_per_epoch,
              epochs=model_params.epochs,
              validation_data=val_generator,
              validation_steps=valid_set.num_batches,
              callbacks=[cbk for cbk in model_callbacks.values()],
              initial_epoch=args.initial_epoch)
Пример #4
0
def get_multi_stnn_data(data_dir, disease_name, nt_train, k=1, start_time=0):
    # get dataset
    data = get_time_data(data_dir, disease_name, start_time)
    opt = DotDict()
    opt.nt, opt.nx, opt.nd = data.size()
    opt.periode = opt.nt
    relations = get_multi_relations(data_dir, disease_name, k)
    # ! have to set nt_train = opt.nt - 1
    nt_train = opt.nt - 1
    # make k hop
    # split train / test
    train_data = data[:nt_train]
    test_data = data[nt_train:]
    return opt, (train_data, test_data), relations
Пример #5
0
    def fetch(self):
        self.logger.info('fetching weather greeting')
        if not self.weather_url:

            return
        try:
            req = requests.get(self.weather_url)
        except Exception:
            self.logger.error('Failed to retreive weather information')
        data = DotDict(req.json())
        print(data)
        if 'error' in data:
            print('error fetching')
            self.logger.error('Error fetching weather: {}'.format(
                data['error']))

        weather_details = data.weather
        if len(weather_details) < 1:
            self.logger.error(
                'Lack of weather details with call. Received: {}'.format(data))
            return

        description = weather_details[0]['description']
        todays_high = data.main['temp_max']
        todays_low = data.main['temp_min']
        temp = data.main['temp']
        location = data.name

        self.data = data

        self.read_text = "It's {} degrees in {} with {}. Today's high is {}, and the low will be {}".format(
            int(temp), location, description, int(todays_high),
            int(todays_low))
        self.sound_bit = self.generate_sound_bit(self.read_text)
Пример #6
0
    def _setup(self, config):
        self.FLAGS = FLAGS = DotDict(config)
        self._setup_tf_resource(FLAGS.gpu)

        tf.random.set_seed(FLAGS.seed)
        self.rng = np.random.RandomState(FLAGS.seed)

        self.data_generator = DataGenerator(FLAGS=FLAGS)

        self.model = model = MyModel(FLAGS=FLAGS,
                                     num_nodes=self.data_generator.num_nodes)

        self.loss_object = LossObject(model=model, FLAGS=FLAGS)

        learning_rate_struc = FLAGS.learning_rate_struc
        learning_rate_meta = FLAGS.learning_rate_meta
        self.optimizer_struc = tf.keras.optimizers.Adam(
            learning_rate=learning_rate_struc)
        self.optimizer_meta = tf.keras.optimizers.Adam(
            learning_rate=learning_rate_meta)

        if FLAGS.autograph:
            self.train_one_step_struc = tf.function(self.train_one_step_struc)
            self.train_one_step_meta = tf.function(self.train_one_step_meta)
            self.ag_model = tf.function(self.model)
        else:
            self.ag_model = self.model
Пример #7
0
def input_config():
    ### Input config for Dispatch
    args = DotDict({
        'num_officer': int(input("Number of Officers: ")),
        'num_event': int(input("Number of Events: ")),
        'num_task': int(input("Number of Tasks")),
    })
    return args
Пример #8
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     # Setup required for unit tests.
     print("Unit testing CWD:", os.getcwd())
     self.config = DotDict.from_json("../Configurations/ModelConfigs/MuzeroBoard.json")
     self.g = HexGame(self.hex_board_size)
     self.net = HexNet(self.g, self.config.net_args)
     self.mcts = MuZeroMCTS(self.g, self.net, self.config.args)
Пример #9
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     # Setup required for unit tests.
     print("Unit testing CWD:", os.getcwd())
     self.config = DotDict.from_json("../Configurations/ModelConfigs/MuzeroCartpole.json")
     self.g = GymGame('CartPole-v1')
     self.net = GymNet(self.g, self.config.net_args)
     self.mcts = MuZeroMCTS(self.g, self.net, self.config.args)
Пример #10
0
def import_data(data_dir, file, dims, makerel):
    # dataset configuration
    print(dims[0], dims[1])
    opt = DotDict()
    opt.nt = 18
    opt.nt_train = 15
    opt.nx = dims[0] * dims[1]
    opt.nd = 1
    opt.periode = opt.nt
    # loading data
    csv_nan = os.path.join(data_dir, file)
    csv = os.path.join(data_dir, file[:-8] + '.csv')

    # exclude_dir = os.path.join(data_dir, "tree_cover", file)
    # exclude = np.genfromtxt(exclude_dir, delimiter = ",")
    # if opt.exclude:
    # ex = np.genfromtxt(csv_nan, delimiter = ",")
    # exclude = np.argwhere(np.isnan(ex))
    exclude = np.empty((0))
    area = np.genfromtxt(csv, delimiter=",")
    area_final = np.nan_to_num(area)
    data = torch.from_numpy(np.expand_dims(area_final, axis=2)).float()
    if makerel:
        x = du.make_relation(["all"], dims, exclude, save=False, combine=False)
        relations = x.float()
        for i in relations:
            i = normalize(i).unsqueeze(1)
        print(relations[:9, 0, :9], relations.size())
    else:
        relations = []
    return opt, data, relations
Пример #11
0
def dataset_factory(opt):
    # get dataset
    parm = DotDict(opt)
    try:
        opt, data = import_data(parm.datadir, '{}.csv'.format(parm.dataset),
                                parm)
    except:
        raise ValueError('Non dataset named `{}`.'.format(parm.dataset))
    print(parm.datadir)
    return opt, data
Пример #12
0
    def __enter__(self) -> AblationAnalysis:
        """
        Initialize experiment by generating all ModelConfigs as specified by the hyperparameter grid,
        and storing them in a temporary folder. All config files will be assigned an unique name, which will later
        be accessed for training agents asynchronously.
        """
        if not os.path.exists(self.config_dir):
            os.makedirs(self.config_dir)

        # First construct all possible hyperparameter configuration JSON contents.
        self.configs = list()
        base_config = DotDict.from_json(self.experiment.ablation_base.config)
        for param in self.experiment.ablation_grid:
            config = base_config.copy()
            config.recursive_update(param)
            self.configs.append(config)

        # Save ablation analysis configuration using time annotation.
        dt = datetime.now().strftime("%Y%m%d-%H%M%S")
        schedule = DotDict({i: self.experiment.ablation_grid[i] for i in range(len(self.experiment.ablation_grid))})
        schedule.to_json(os.path.join(self.experiment.output_directory, f'ablation_schedule_{dt}.json'))

        # Store/ generate all unique JSON config files annotated by time and repetition number.
        for run in range(self.experiment.experiment_args.num_repeat):
            for i, config in enumerate(self.configs):
                c = config.copy()  # Note: shallow copy.

                run_config_name = f'rep{run}_config{i}_dt{dt}'
                c.name = f'{c.name}_{run_config_name}'

                out = os.path.join(self.experiment.output_directory, c.args.checkpoint, run_config_name)
                c.args.checkpoint = out
                c.args.load_folder_file = (out, c.args.load_folder_file[1])

                if not os.path.exists(c.args.checkpoint):
                    os.makedirs(c.args.checkpoint)

                config_file = os.path.join(self.config_dir, run_config_name) + '.json'
                c.to_json(config_file)

                self.files.append(config_file)

        return self
Пример #13
0
    def __enter__(self) -> AblationAnalysis:
        if not os.path.exists(self.config_dir):
            os.makedirs(self.config_dir)

        self.configs = list()
        base_config = DotDict.from_json(self.experiment.ablation_base.config)
        for param in self.experiment.ablation_grid:
            config = base_config.copy()
            config.recursive_update(param)
            self.configs.append(config)

        dt = datetime.now().strftime("%Y%m%d-%H%M%S")

        schedule = DotDict({
            i: self.experiment.ablation_grid[i]
            for i in range(len(self.experiment.ablation_grid))
        })
        schedule.to_json(
            os.path.join(self.experiment.output_directory,
                         f'ablation_schedule_{dt}.json'))

        for run in range(self.experiment.experiment_args.num_repeat):
            for i, config in enumerate(self.configs):
                c = config.copy()  # Note: shallow copy.

                run_config_name = f'rep{run}_config{i}_dt{dt}'
                c.name = f'{c.name}_{run_config_name}'

                out = os.path.join(self.experiment.output_directory,
                                   c.args.checkpoint, run_config_name)
                c.args.checkpoint = out
                c.args.load_folder_file = (out, c.args.load_folder_file[1])

                if not os.path.exists(c.args.checkpoint):
                    os.makedirs(c.args.checkpoint)

                config_file = os.path.join(self.config_dir,
                                           run_config_name) + '.json'
                c.to_json(config_file)

                self.files.append(config_file)

        return self
Пример #14
0
    def fetch(self, val=0):
        # TODO: Avoid hard coding
        req = requests.get("http://api.openweathermap.org/data/2.5/weather?q=Austin&appid=1c51e68c4823e92a75f2590404fd6634&units=imperial")
        data = DotDict(req.json())
        if 'error' in data:
            self.logger.error('Error fetching weather: {}'.format(data['error']))
            return

        # TODO: fix this it's gacky
        temp = int(data['main']['temp'])
        self.logger.debug('Parsed temperature: {}'.format(temp))
        self.text = '{} °'.format(temp)
Пример #15
0
    def __init__(self,
                 game,
                 arg_file: typing.Optional[str] = None,
                 name: str = "") -> None:
        super().__init__(game, arg_file, name, parametric=True)
        if self.player_args is not None:
            self.args = DotDict.from_json(self.player_args)

            self.model = DefaultMuZero(self.game, self.args.net_args,
                                       self.args.architecture)
            self.search_engine = MuZeroMCTS(self.game, self.model,
                                            self.args.args)
            self.name = self.args.name
Пример #16
0
def create_parameter_grid(content: DotDict) -> typing.List:
    """
    Recursively build up a parameter-grid using itertools.product on all dict values within content
    :param content: DotDict dictionary with keys accessible as object attributes
    :return: List of DotDict objects containing a parameter grid for each value given per key.
    :reference: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterGrid.html
    """
    # Recursively unpack dictionary to create flat grids at each level.
    base = DotDict()
    for key, value in content.items():
        if isinstance(value, DotDict):
            base[key] = create_parameter_grid(value)
        else:
            base[key] = value

    # Build up a list of dictionaries for each possible value combination.
    grid = list()
    keys, values = zip(*base.items())
    for v in product(*values):
        grid.append(DotDict(zip(keys, v)))

    return grid
Пример #17
0
    def __init__(self,
                 game,
                 arg_file: typing.Optional[str] = None,
                 name: str = "") -> None:
        super().__init__(game, arg_file, name, parametric=True)
        if self.player_args is not None:
            # Initialize AlphaZero by loading its parameter config and constructing the network and search classes.
            self.args = DotDict.from_json(self.player_args)

            self.model = DefaultAlphaZero(self.game, self.args.net_args,
                                          self.args.architecture)
            self.search_engine = AlphaZeroMCTS(self.game, self.model,
                                               self.args.args)
            self.name = self.args.name
Пример #18
0
def from_numpy_data(data_dir):
    data = torch.Tensor(np.load(data_dir)).unsqueeze(-1) #(Time,Series,1)
    opt = DotDict()
    opt.nt = data.size(0)
    opt.nt_train = opt.nt//4
    opt.nx = data.size(1)
    opt.nd = 1
    opt.periode = 1
    relations = torch.zeros(data.size(1),1,data.size(1))
    train_data = data[:opt.nt_train]
    test_data = data[opt.nt_train:]

    return opt, (train_data, test_data), relations
def inference_only(param_path):
    # -----
    # load data
    with open(param_path) as f:
        args = json.load(f)

    args = DotDict(args)
    out_dir = args.model_dir.replace('model', 'output')
    doc_vecs_path = os.path.join(out_dir, 'doc_vecs.npy')

    pub_med_ids, _ = read_file(args.documents_path)
    labels = load(args.labels_path)
    index2word = load(args.index2word_path)
    terms = load(args.terms_path)
    doc_vecs = np.load(doc_vecs_path)

    # ---------
    # Inference
    doc_tfidf_reps = labels
    if len(args.doc_tfidf_reps_path) > 0:
        doc_tfidf_reps = load(args.doc_tfidf_reps_path)

    fused_docs, expanded, top_k_indices = inference.main(
        doc_vecs, doc_tfidf_reps, args.k, args.fuse_doc_type)

    save(os.path.join(out_dir, 'top_k_indices'), top_k_indices)
    if args.keep_model_files:
        np.save(os.path.join(out_dir, 'fused_docs'), fused_docs)
        np.save(os.path.join(out_dir, 'doc_vecs'), doc_vecs)
    del doc_vecs, top_k_indices, fused_docs

    # ----------------------------
    # Save expanded labels to disk
    # convert to word ids
    labels = [[terms[l] for l in lab] for lab in labels]

    if len(args.doc_tfidf_reps_path) == 0:
        expanded = [[terms[l] for l in lab] for lab in expanded]

    expanded_labels = []
    for p_id, l, ex in zip(pub_med_ids, labels, expanded):
        e_words = ', '.join([index2word[e] for e in ex])
        original = ', '.join([index2word[i] for i in l])
        line = str(p_id) + '\tORIGINAL: ' + original + '\tEXPANDED: ' + e_words
        expanded_labels.append(line)

    fname = os.path.split(out_dir)[-1] + '_expanded_labels.txt'
    expanded_labels_dir = os.path.join(out_dir, fname)
    save_list(expanded_labels_dir, expanded_labels)
Пример #20
0
def perform_tournament(experiment: ExperimentConfig,
                       by_checkpoint: bool = True) -> None:
    """
    Helper function to unpack the player configs provided in the ExperimentConfig into a pool (list) of player-data
    tuples that is given to the tourney function. If 'by_checkpoint' is set to True, we check the directory
    of the provided model path and create individual players for each of the available model checkpoints.
    Otherwise we just take the (latest) model specified in the config.

    The experiment config must contain a 'checkpoint_resolution' integer argument to indicate a step to omit some of
    the checkpoints to reduce computation time --- i.e., use every 'checkpoint_resolution's model of x implementations.

    We expect model checkpoint files to be unaltered from the source code, meaning the format follows:
     - prefix_checkpoint_(int).pth.tar

    :param experiment: ExperimentConfig Contains the players to be pitted against each other.
    :param by_checkpoint: bool Whether to include every model checkpoint in the player pool (or just the specified one)
    """
    args = experiment.experiment_args  # Helper variable to reduce verbosity.
    # Collect player configurations
    player_checkpoint_pool = get_player_pool(
        experiment.player_configs,
        by_checkpoint=by_checkpoint,
        resolution=args.checkpoint_resolution)
    results, trajectories = tourney(player_checkpoint_pool, experiment.game,
                                    args.num_repeat, args.num_trials,
                                    args.num_opponents, args.return_data)

    # Save results to output file.
    dt = datetime.now().strftime("%Y%m%d-%H%M%S")
    data = DotDict({'results': results, 'args': experiment.experiment_args})
    data.to_json(experiment.output_directory + f'{experiment.name}_{dt}.json')

    if trajectories:
        with open(experiment.output_directory + f'{experiment.name}_{dt}.out',
                  'wb') as f:
            pickle.dump(trajectories, f)
Пример #21
0
def crash_ex(data_dir, file='crash_.csv'):
    # dataset configuration
    opt = DotDict()
    opt.nt = 1085
    opt.nt_train = 1080
    opt.nx = 131
    opt.np = 8
    opt.nd = 1
    opt.periode = opt.nt
    # loading data
    data = torch.Tensor(np.genfromtxt(os.path.join(data_dir, file))).view(
        opt.nt, opt.nx, opt.np + opt.nd)
    # load relations
    relations = torch.Tensor(
        np.genfromtxt(os.path.join(data_dir, 'crash_relations.csv')))
    relations = normalize(relations).unsqueeze(1)
    return opt, data, relations
Пример #22
0
def get_keras_dataset(data_dir,
                      disease_name,
                      nt_train,
                      seq_len,
                      start_time=0,
                      normalize='variance'):
    # get dataset
    # data_dir = 'data', disease_name = 'ncov_confirmed'
    # return (nt, nx, nd) time series data
    time_data_dir = os.path.join(data_dir, disease_name, 'time_data')
    time_datas = os.listdir(time_data_dir)
    data = []
    for time_data in time_datas:
        data_path = os.path.join(time_data_dir, time_data)
        new_data = np.genfromtxt(data_path, encoding='utf-8',
                                 delimiter=',')[start_time:][..., np.newaxis]
        data.append(new_data)
    data = np.concatenate(data, axis=2).astype(np.float64)
    # get option
    opt = DotDict()
    opt.nt, opt.nx, opt.nd = data.shape
    opt.normalize = normalize
    train_data = data[:nt_train]
    opt.mean = np.mean(train_data)
    if normalize == 'max_min':
        opt.min = np.min(train_data)
        opt.max = np.max(train_data)
        data = (data - opt.mean) / (opt.max - opt.min)
    elif normalize == 'variance':
        opt.std = np.std(train_data) * np.sqrt(
            train_data.size) / np.sqrt(train_data.size - 1)
        data = (data - opt.mean) / opt.std
    # split train / test
    data = np.reshape(data, (opt.nt, opt.nx * opt.nd))
    train_data = data[:nt_train]
    train_input = []  # (batch, squence_length, opt.nx*opt.nd)
    train_output = []  # (batch, opt.nx*opt.nd)
    for i in range(nt_train - seq_len):
        new_input = []
        train_input.append(train_data[i:i + seq_len][np.newaxis, ...])
        train_output.append(train_data[i + seq_len][np.newaxis, ...])
    train_input = np.concatenate(train_input, axis=0)
    train_output = np.concatenate(train_output, axis=0)
    test_data = data[nt_train:]
    test_input = data[nt_train - seq_len:nt_train]
    return opt, (train_input, train_output, test_input, test_data)
Пример #23
0
    def __init__(self,
                 game,
                 nested_config: typing.Optional[DotDict] = None,
                 name: str = "") -> None:
        super().__init__(game, nested_config.file, name, parametric=True)
        if self.player_args is not None:
            self.args = DotDict.from_json(self.player_args)

            self.model = BlindMuZero(self.game, self.args.net_args,
                                     self.args.architecture,
                                     nested_config.refresh_freq)
            self.model.bind(self.history.actions)

            self.search_engine = MuZeroMCTS(self.game, self.model,
                                            self.args.args)
            self.name = self.args.name
Пример #24
0
def heat(data_dir, file='heat.csv'):
    # dataset configuration
    opt = DotDict()
    opt.nt = 200
    opt.nt_train = 100
    opt.nx = 41
    opt.nd = 1
    opt.periode = opt.nt
    # loading data
    data = torch.Tensor(np.genfromtxt(os.path.join(data_dir, file))).view(opt.nt, opt.nx, opt.nd)
    # load relations
    relations = torch.Tensor(np.genfromtxt(os.path.join(data_dir, 'heat_relations.csv')))
    relations = normalize(relations).unsqueeze(1)
    return opt, data, relations
Пример #25
0
    def __init__(self, experiment_file: str) -> None:
        """
        Initialize the experiment data container using a string path to a .json settings file.
        Not all variables are initialized directly (require an explicit call to construct) seeing as
        this may bloat the memory with large implementations when performing a large number of experiments sequentially.

        :param experiment_file: str Path to .json file containing experiment details.
        """
        self.experiment_args = DotDict.from_json(experiment_file)
        self.type = self.experiment_args.experiment
        self.name = self.experiment_args.name

        self.output_directory = self.output_directory = f'./out/{self.experiment_args.output_dir}/'
        self.game_config = None
        self.game = None
        self.ablation_base = None
        self.ablation_grid = None
        self.player_configs = list()
Пример #26
0
def init(seed, _config, _run):
    # Next five lines are to call args.seq_length instead of args.common.seq_length
    config = {k: v for k, v in _config.items()}
    common_config = config['common']
    config.pop('common')
    for k, v in common_config.items():
        assert k not in config
        config[k] = v

    dataset_config = config['dataset']
    config.pop('dataset')
    for k, v in dataset_config.items():
        assert k not in config
        config[k] = v

    args = DotDict(config)
    # utils.seedAll(seed) # TODO: implement seedAll
    return args
Пример #27
0
    def __init__(self,
                 game,
                 nested_config: typing.Optional[DotDict] = None,
                 name: str = "") -> None:
        super().__init__(game, nested_config.file, name, parametric=True)
        if self.player_args is not None:
            # Initialize MuZero by loading its parameter config and constructing the network and search classes.
            # Additionally assign/ bind internal MDP memory to enable planning strictly within the learned model.
            self.args = DotDict.from_json(self.player_args)

            self.model = BlindMuZero(self.game, self.args.net_args,
                                     self.args.architecture,
                                     nested_config.refresh_freq)
            self.model.bind(self.history.actions)

            self.search_engine = MuZeroMCTS(self.game, self.model,
                                            self.args.args)
            self.name = self.args.name
Пример #28
0
 def parse_node(self, node):
     """
     overrides ProcessParser.parse_node
     parses and attaches the inputOutput tags that created by Camunda Modeller
     :param node: xml task node
     :return: TaskSpec
     """
     spec = super(CamundaProcessParser, self).parse_node(node)
     spec.data = DotDict()
     try:
         input_nodes = self._get_input_nodes(node)
         if input_nodes:
             for nod in input_nodes:
                 spec.data.update(self._parse_input_node(nod))
     except Exception as e:
         LOG.exception("Error while processing node: %s" % node)
     spec.defines = spec.data
     # spec.ext = self._attach_properties(node, spec)
     return spec
Пример #29
0
def get_rnn_dataset(data_dir,
                    disease,
                    nt_train,
                    seq_len,
                    start_time=0,
                    normalize='variance'):
    # get dataset
    data = get_time_data(data_dir, disease, start_time)  #(nt, nx, nd)
    # get option
    opt = DotDict()
    opt.nt, opt.nx, opt.nd = data.size()
    opt.normalize = normalize
    opt.mean = data.mean().item()
    if normalize == 'max_min':
        opt.min = data.min().item()
        opt.max = data.max().item()
        data = (data - opt.mean) / (opt.max - opt.min)
    elif normalize == 'variance':
        opt.std = torch.std(data).item()
        data = (data - opt.mean) / opt.std
    # split train / test
    train_input = []
    train_output = []
    for i in range(nt_train - seq_len):
        new_input = []
        for j in range(seq_len):
            new_input.append(data[i + j])
        train_input.append(torch.stack(new_input, dim=0))
        train_output.append(data[i + seq_len])
    train_input = torch.stack(train_input, dim=0)
    train_output = torch.stack(train_output, dim=0)
    test_input = []
    for i in range(seq_len):
        test_input.append(data[nt_train - seq_len + i])
    test_data = data[nt_train:]
    test_input = torch.stack(test_input, dim=0)
    return opt, (train_input, train_output), (test_input, test_data)
Пример #30
0
def import_data(data_dir, file, parm):
    # dataset configuration
    dims = [parm.height, parm.width]
    tsize = parm.tsize
    if (parm.stride is None):
        stride = tsize
    else:
        stride = parm.stride
    numtrain = parm.nt_train
    print(dims[0], dims[1])
    opt = DotDict()
    opt.nx = tsize**2
    opt.nd = 1
    opt.periode = parm.nt
    # loading data
    csv = os.path.join(data_dir, file)
    reduced = np.genfromtxt(csv, delimiter=",")
    print(reduced.shape)
    data = reduced.reshape(parm.nt_data, dims[0], dims[1])
    new_dims = [
        roundup(dims[0], tsize, stride),
        roundup(dims[1], tsize, stride)
    ]
    opt.new_dims = new_dims
    pad_data = np.empty((parm.nt_data, new_dims[0], new_dims[1]))
    pad_data[:] = np.nan
    step_x = int((new_dims[1] - tsize) / stride) + 1
    step_y = int((new_dims[0] - tsize) / stride) + 1
    xmin = int((new_dims[1] - dims[1]) / 2)
    xmax = new_dims[1] - (new_dims[1] - dims[1] - xmin)
    ymin = int((new_dims[0] - dims[0]) / 2)
    ymax = new_dims[0] - (new_dims[0] - dims[0] - ymin)
    pad_data[:, ymin:ymax, xmin:xmax] = data
    broken_data = []
    count = 0
    for j in np.arange(0, step_x * stride, stride):
        for i in np.arange(0, step_y * stride, stride):
            data = np.expand_dims(pad_data[:, i:i + tsize, j:j + tsize],
                                  axis=0)
            data = data.reshape(1, parm.nt_data, -1)
            if count == 0:
                broken_data = data
            else:
                broken_data = np.append(broken_data, data, axis=0)
            count += 1
    broken_data = np.array(broken_data)
    return opt, broken_data
Пример #31
0
RandPadder = namedtuple_with_defaults('RandPadder',
    'rand_pad_prob, max_pad_scale, fill_value',
    [0.0, 1.0, 127])

ColorJitter = namedtuple_with_defaults('ColorJitter',
    'random_hue_prob, max_random_hue, \
    random_saturation_prob, max_random_saturation, \
    random_illumination_prob, max_random_illumination, \
    random_contrast_prob, max_random_contrast',
    [0.0, 18,
    0.0, 32,
    0.0, 32,
    0.0, 0.5])


cfg = DotDict()
cfg.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))

# training configs
cfg.train = DotDict()
# random cropping samplers
cfg.train.rand_crop_samplers = [
    RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.1),
    RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.3),
    RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.5),
    RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.7),
    RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.9),]
cfg.train.crop_emit_mode = 'center'
# cfg.train.emit_overlap_thresh = 0.4
# random padding
cfg.train.rand_pad = RandPadder(rand_pad_prob=0.5, max_pad_scale=4.0)
Пример #32
0
def _load_config():
    config = DotDict()
    with open("config.yml", "r") as fp:
        config.update(yaml.load(fp))
    return config