def __init__(self, hparams=DotDict({ 'model_type': 'transformer', 'ninp': 128, 'nhead': 2, 'nhid': 512, 'nlayers': 2, 'tie_layers': True, 'tie_encoder_decoder': True, 'dropout': 0.1, })): super(LanguageModelTrainer, self).__init__() self.hparams = hparams if isinstance(hparams, DotDict) \ else DotDict(hparams) from utils import get_default_tokenizer self.vocab_size = get_default_tokenizer()._tokenizer.get_vocab_size() self.model_type = hparams.get('model_type', 'transformer') assert self.model_type in ['transformer', 'lstm'] if self.model_type == 'transformer': self.model = TransformerModel(ntoken=self.vocab_size, **hparams) else: self.model = LSTMModel(ntoken=self.vocab_size, **hparams) self.batch_size = hparams.get('batch_size', 64) self.bptt = hparams.get('bptt', 128)
def __init__(self, **kwargs): self.conf = DotDict(kwargs) self.current = DotDict() self.modules = {} self.workflow = BpmnWorkflow self.workflow_spec = WorkflowSpec self.load_or_create_workflow()
def _setup(self, config): self.FLAGS = FLAGS = DotDict(config) self._setup_tf_resource(FLAGS.gpu) tf.random.set_seed(FLAGS.seed) self.rng = np.random.RandomState(FLAGS.seed) self.data_generator = DataGenerator(FLAGS=FLAGS) self.model = model = MyModel(FLAGS=FLAGS, num_nodes=self.data_generator.num_nodes) self.loss_object = LossObject(model=model, FLAGS=FLAGS) learning_rate_struc = FLAGS.learning_rate_struc learning_rate_meta = FLAGS.learning_rate_meta self.optimizer_struc = tf.keras.optimizers.Adam( learning_rate=learning_rate_struc) self.optimizer_meta = tf.keras.optimizers.Adam( learning_rate=learning_rate_meta) if FLAGS.autograph: self.train_one_step_struc = tf.function(self.train_one_step_struc) self.train_one_step_meta = tf.function(self.train_one_step_meta) self.ag_model = tf.function(self.model) else: self.ag_model = self.model
def annotate_series(series): period = calculate_periodicity(series) if period is not None: waveform = DotDict(period=period, frequency=1 / period) wave, start, count, underlying = extract_waveform(series, period) wave, offset, scale, first, crossings = normalize_waveform(wave) waveform.samples = wave waveform.beginning = start + first waveform.count = count waveform.amplitude = scale waveform.offset = underlying.mean() + offset waveform.timestamps = np.arange(len(wave)) * series.sample_period waveform.sample_period = series.sample_period waveform.sample_rate = series.sample_rate waveform.capture_start = series.capture_start + waveform.beginning * series.sample_period possibles = characterize_waveform(wave, crossings) if possibles: error, shape, duty_cycle = possibles[0] waveform.error = error waveform.shape = shape if duty_cycle is not None: waveform.duty_cycle = duty_cycle else: waveform.shape = 'unknown' series.waveform = waveform return True return False
def import_data(data_dir, file, dims, makerel): # dataset configuration print(dims[0], dims[1]) opt = DotDict() opt.nt = 18 opt.nt_train = 15 opt.nx = dims[0] * dims[1] opt.nd = 1 opt.periode = opt.nt # loading data csv_nan = os.path.join(data_dir, file) csv = os.path.join(data_dir, file[:-8] + '.csv') # exclude_dir = os.path.join(data_dir, "tree_cover", file) # exclude = np.genfromtxt(exclude_dir, delimiter = ",") # if opt.exclude: # ex = np.genfromtxt(csv_nan, delimiter = ",") # exclude = np.argwhere(np.isnan(ex)) exclude = np.empty((0)) area = np.genfromtxt(csv, delimiter=",") area_final = np.nan_to_num(area) data = torch.from_numpy(np.expand_dims(area_final, axis=2)).float() if makerel: x = du.make_relation(["all"], dims, exclude, save=False, combine=False) relations = x.float() for i in relations: i = normalize(i).unsqueeze(1) print(relations[:9, 0, :9], relations.size()) else: relations = [] return opt, data, relations
def fetch(self): self.logger.info('fetching weather greeting') if not self.weather_url: return try: req = requests.get(self.weather_url) except Exception: self.logger.error('Failed to retreive weather information') data = DotDict(req.json()) print(data) if 'error' in data: print('error fetching') self.logger.error('Error fetching weather: {}'.format( data['error'])) weather_details = data.weather if len(weather_details) < 1: self.logger.error( 'Lack of weather details with call. Received: {}'.format(data)) return description = weather_details[0]['description'] todays_high = data.main['temp_max'] todays_low = data.main['temp_min'] temp = data.main['temp'] location = data.name self.data = data self.read_text = "It's {} degrees in {} with {}. Today's high is {}, and the low will be {}".format( int(temp), location, description, int(todays_high), int(todays_low)) self.sound_bit = self.generate_sound_bit(self.read_text)
def input_config(): ### Input config for Dispatch args = DotDict({ 'num_officer': int(input("Number of Officers: ")), 'num_event': int(input("Number of Events: ")), 'num_task': int(input("Number of Tasks")), }) return args
def main(args, hparams): """ Main function for Keras Sketch-RNN""" # Logger: logsdir = os.path.join(args.experiment_dir, 'logs') os.makedirs(logsdir) os.makedirs(os.path.join(args.experiment_dir, 'checkpoints')) sys.stdout = Logger(logsdir) # Add support for dot access for auxiliary function use: hparams_dot = DotDict(hparams) hparams_dot.epochs = args.epochs # Load dataset: hparams_dot.data_set = args.data_set datasets = load_dataset(args.data_dir, hparams_dot) train_set = datasets[0] valid_set = datasets[1] #test_set = datasets[2] model_params = datasets[3] # Build and compile model: seq2seq = Seq2seqModel(model_params) seq2seq.compile() model = seq2seq.model # Create a data generator: train_generator = batch_generator(train_set, train=True) val_generator = batch_generator(valid_set, train=False) # Callbacks: model_callbacks = get_callbacks_dict(seq2seq=seq2seq, model_params=model_params, experiment_path=args.experiment_dir) # Load checkpoint: if args.checkpoint is not None: # Load weights: seq2seq.load_trained_weights(args.checkpoint) # Initial batch (affects LR and KL weight decay): num_batches = model_params.save_every if model_params.save_every is not None else train_set.num_batches count = args.initial_epoch * num_batches model_callbacks['lr_schedule'].count = count model_callbacks['kl_weight_schedule'].count = count # Write config file to json file with open(os.path.join(logsdir, 'model_config.json'), 'w') as f: json.dump(model_params, f, indent=True) # Train steps_per_epoch = model_params.save_every if model_params.save_every is not None else train_set.num_batches model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=model_params.epochs, validation_data=val_generator, validation_steps=valid_set.num_batches, callbacks=[cbk for cbk in model_callbacks.values()], initial_epoch=args.initial_epoch)
def dataset_factory(opt): # get dataset parm = DotDict(opt) try: opt, data = import_data(parm.datadir, '{}.csv'.format(parm.dataset), parm) except: raise ValueError('Non dataset named `{}`.'.format(parm.dataset)) print(parm.datadir) return opt, data
def fetch(self, val=0): # TODO: Avoid hard coding req = requests.get("http://api.openweathermap.org/data/2.5/weather?q=Austin&appid=1c51e68c4823e92a75f2590404fd6634&units=imperial") data = DotDict(req.json()) if 'error' in data: self.logger.error('Error fetching weather: {}'.format(data['error'])) return # TODO: fix this it's gacky temp = int(data['main']['temp']) self.logger.debug('Parsed temperature: {}'.format(temp)) self.text = '{} °'.format(temp)
def from_numpy_data(data_dir): data = torch.Tensor(np.load(data_dir)).unsqueeze(-1) #(Time,Series,1) opt = DotDict() opt.nt = data.size(0) opt.nt_train = opt.nt//4 opt.nx = data.size(1) opt.nd = 1 opt.periode = 1 relations = torch.zeros(data.size(1),1,data.size(1)) train_data = data[:opt.nt_train] test_data = data[opt.nt_train:] return opt, (train_data, test_data), relations
def create_parameter_grid(content: DotDict) -> typing.List: """ Recursively build up a parameter-grid using itertools.product on all dict values within content :param content: DotDict dictionary with keys accessible as object attributes :return: List of DotDict objects containing a parameter grid for each value given per key. :reference: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterGrid.html """ # Recursively unpack dictionary to create flat grids at each level. base = DotDict() for key, value in content.items(): if isinstance(value, DotDict): base[key] = create_parameter_grid(value) else: base[key] = value # Build up a list of dictionaries for each possible value combination. grid = list() keys, values = zip(*base.items()) for v in product(*values): grid.append(DotDict(zip(keys, v))) return grid
def get_multi_stnn_data(data_dir, disease_name, nt_train, k=1, start_time=0): # get dataset data = get_time_data(data_dir, disease_name, start_time) opt = DotDict() opt.nt, opt.nx, opt.nd = data.size() opt.periode = opt.nt relations = get_multi_relations(data_dir, disease_name, k) # ! have to set nt_train = opt.nt - 1 nt_train = opt.nt - 1 # make k hop # split train / test train_data = data[:nt_train] test_data = data[nt_train:] return opt, (train_data, test_data), relations
def heat(data_dir, file='heat.csv'): # dataset configuration opt = DotDict() opt.nt = 200 opt.nt_train = 100 opt.nx = 41 opt.nd = 1 opt.periode = opt.nt # loading data data = torch.Tensor(np.genfromtxt(os.path.join(data_dir, file))).view(opt.nt, opt.nx, opt.nd) # load relations relations = torch.Tensor(np.genfromtxt(os.path.join(data_dir, 'heat_relations.csv'))) relations = normalize(relations).unsqueeze(1) return opt, data, relations
def inference_only(param_path): # ----- # load data with open(param_path) as f: args = json.load(f) args = DotDict(args) out_dir = args.model_dir.replace('model', 'output') doc_vecs_path = os.path.join(out_dir, 'doc_vecs.npy') pub_med_ids, _ = read_file(args.documents_path) labels = load(args.labels_path) index2word = load(args.index2word_path) terms = load(args.terms_path) doc_vecs = np.load(doc_vecs_path) # --------- # Inference doc_tfidf_reps = labels if len(args.doc_tfidf_reps_path) > 0: doc_tfidf_reps = load(args.doc_tfidf_reps_path) fused_docs, expanded, top_k_indices = inference.main( doc_vecs, doc_tfidf_reps, args.k, args.fuse_doc_type) save(os.path.join(out_dir, 'top_k_indices'), top_k_indices) if args.keep_model_files: np.save(os.path.join(out_dir, 'fused_docs'), fused_docs) np.save(os.path.join(out_dir, 'doc_vecs'), doc_vecs) del doc_vecs, top_k_indices, fused_docs # ---------------------------- # Save expanded labels to disk # convert to word ids labels = [[terms[l] for l in lab] for lab in labels] if len(args.doc_tfidf_reps_path) == 0: expanded = [[terms[l] for l in lab] for lab in expanded] expanded_labels = [] for p_id, l, ex in zip(pub_med_ids, labels, expanded): e_words = ', '.join([index2word[e] for e in ex]) original = ', '.join([index2word[i] for i in l]) line = str(p_id) + '\tORIGINAL: ' + original + '\tEXPANDED: ' + e_words expanded_labels.append(line) fname = os.path.split(out_dir)[-1] + '_expanded_labels.txt' expanded_labels_dir = os.path.join(out_dir, fname) save_list(expanded_labels_dir, expanded_labels)
def initopts(): o = DotDict() o.stopwords_file = "" o.remove_puncuation = False o.remove_stop_words = False o.lemmatize_words = False o.num_replacement = "[NUM]" o.to_lowercase = False o.replace_nums = False # Nums are important, since rumour may be lying about count o.eos = "[EOS]" o.add_eos = True o.returnNERvector = True o.returnDEPvector = True o.returnbiglettervector = True o.returnposvector = True return o
def import_data(data_dir, file, parm): # dataset configuration dims = [parm.height, parm.width] tsize = parm.tsize if (parm.stride is None): stride = tsize else: stride = parm.stride numtrain = parm.nt_train print(dims[0], dims[1]) opt = DotDict() opt.nx = tsize**2 opt.nd = 1 opt.periode = parm.nt # loading data csv = os.path.join(data_dir, file) reduced = np.genfromtxt(csv, delimiter=",") print(reduced.shape) data = reduced.reshape(parm.nt_data, dims[0], dims[1]) new_dims = [ roundup(dims[0], tsize, stride), roundup(dims[1], tsize, stride) ] opt.new_dims = new_dims pad_data = np.empty((parm.nt_data, new_dims[0], new_dims[1])) pad_data[:] = np.nan step_x = int((new_dims[1] - tsize) / stride) + 1 step_y = int((new_dims[0] - tsize) / stride) + 1 xmin = int((new_dims[1] - dims[1]) / 2) xmax = new_dims[1] - (new_dims[1] - dims[1] - xmin) ymin = int((new_dims[0] - dims[0]) / 2) ymax = new_dims[0] - (new_dims[0] - dims[0] - ymin) pad_data[:, ymin:ymax, xmin:xmax] = data broken_data = [] count = 0 for j in np.arange(0, step_x * stride, stride): for i in np.arange(0, step_y * stride, stride): data = np.expand_dims(pad_data[:, i:i + tsize, j:j + tsize], axis=0) data = data.reshape(1, parm.nt_data, -1) if count == 0: broken_data = data else: broken_data = np.append(broken_data, data, axis=0) count += 1 broken_data = np.array(broken_data) return opt, broken_data
def get_keras_dataset(data_dir, disease_name, nt_train, seq_len, start_time=0, normalize='variance'): # get dataset # data_dir = 'data', disease_name = 'ncov_confirmed' # return (nt, nx, nd) time series data time_data_dir = os.path.join(data_dir, disease_name, 'time_data') time_datas = os.listdir(time_data_dir) data = [] for time_data in time_datas: data_path = os.path.join(time_data_dir, time_data) new_data = np.genfromtxt(data_path, encoding='utf-8', delimiter=',')[start_time:][..., np.newaxis] data.append(new_data) data = np.concatenate(data, axis=2).astype(np.float64) # get option opt = DotDict() opt.nt, opt.nx, opt.nd = data.shape opt.normalize = normalize train_data = data[:nt_train] opt.mean = np.mean(train_data) if normalize == 'max_min': opt.min = np.min(train_data) opt.max = np.max(train_data) data = (data - opt.mean) / (opt.max - opt.min) elif normalize == 'variance': opt.std = np.std(train_data) * np.sqrt( train_data.size) / np.sqrt(train_data.size - 1) data = (data - opt.mean) / opt.std # split train / test data = np.reshape(data, (opt.nt, opt.nx * opt.nd)) train_data = data[:nt_train] train_input = [] # (batch, squence_length, opt.nx*opt.nd) train_output = [] # (batch, opt.nx*opt.nd) for i in range(nt_train - seq_len): new_input = [] train_input.append(train_data[i:i + seq_len][np.newaxis, ...]) train_output.append(train_data[i + seq_len][np.newaxis, ...]) train_input = np.concatenate(train_input, axis=0) train_output = np.concatenate(train_output, axis=0) test_data = data[nt_train:] test_input = data[nt_train - seq_len:nt_train] return opt, (train_input, train_output, test_input, test_data)
def crash_ex(data_dir, file='crash_.csv'): # dataset configuration opt = DotDict() opt.nt = 1085 opt.nt_train = 1080 opt.nx = 131 opt.np = 8 opt.nd = 1 opt.periode = opt.nt # loading data data = torch.Tensor(np.genfromtxt(os.path.join(data_dir, file))).view( opt.nt, opt.nx, opt.np + opt.nd) # load relations relations = torch.Tensor( np.genfromtxt(os.path.join(data_dir, 'crash_relations.csv'))) relations = normalize(relations).unsqueeze(1) return opt, data, relations
def init(seed, _config, _run): # Next five lines are to call args.seq_length instead of args.common.seq_length config = {k: v for k, v in _config.items()} common_config = config['common'] config.pop('common') for k, v in common_config.items(): assert k not in config config[k] = v dataset_config = config['dataset'] config.pop('dataset') for k, v in dataset_config.items(): assert k not in config config[k] = v args = DotDict(config) # utils.seedAll(seed) # TODO: implement seedAll return args
def __enter__(self) -> AblationAnalysis: if not os.path.exists(self.config_dir): os.makedirs(self.config_dir) self.configs = list() base_config = DotDict.from_json(self.experiment.ablation_base.config) for param in self.experiment.ablation_grid: config = base_config.copy() config.recursive_update(param) self.configs.append(config) dt = datetime.now().strftime("%Y%m%d-%H%M%S") schedule = DotDict({ i: self.experiment.ablation_grid[i] for i in range(len(self.experiment.ablation_grid)) }) schedule.to_json( os.path.join(self.experiment.output_directory, f'ablation_schedule_{dt}.json')) for run in range(self.experiment.experiment_args.num_repeat): for i, config in enumerate(self.configs): c = config.copy() # Note: shallow copy. run_config_name = f'rep{run}_config{i}_dt{dt}' c.name = f'{c.name}_{run_config_name}' out = os.path.join(self.experiment.output_directory, c.args.checkpoint, run_config_name) c.args.checkpoint = out c.args.load_folder_file = (out, c.args.load_folder_file[1]) if not os.path.exists(c.args.checkpoint): os.makedirs(c.args.checkpoint) config_file = os.path.join(self.config_dir, run_config_name) + '.json' c.to_json(config_file) self.files.append(config_file) return self
def __enter__(self) -> AblationAnalysis: """ Initialize experiment by generating all ModelConfigs as specified by the hyperparameter grid, and storing them in a temporary folder. All config files will be assigned an unique name, which will later be accessed for training agents asynchronously. """ if not os.path.exists(self.config_dir): os.makedirs(self.config_dir) # First construct all possible hyperparameter configuration JSON contents. self.configs = list() base_config = DotDict.from_json(self.experiment.ablation_base.config) for param in self.experiment.ablation_grid: config = base_config.copy() config.recursive_update(param) self.configs.append(config) # Save ablation analysis configuration using time annotation. dt = datetime.now().strftime("%Y%m%d-%H%M%S") schedule = DotDict({i: self.experiment.ablation_grid[i] for i in range(len(self.experiment.ablation_grid))}) schedule.to_json(os.path.join(self.experiment.output_directory, f'ablation_schedule_{dt}.json')) # Store/ generate all unique JSON config files annotated by time and repetition number. for run in range(self.experiment.experiment_args.num_repeat): for i, config in enumerate(self.configs): c = config.copy() # Note: shallow copy. run_config_name = f'rep{run}_config{i}_dt{dt}' c.name = f'{c.name}_{run_config_name}' out = os.path.join(self.experiment.output_directory, c.args.checkpoint, run_config_name) c.args.checkpoint = out c.args.load_folder_file = (out, c.args.load_folder_file[1]) if not os.path.exists(c.args.checkpoint): os.makedirs(c.args.checkpoint) config_file = os.path.join(self.config_dir, run_config_name) + '.json' c.to_json(config_file) self.files.append(config_file) return self
def parse_node(self, node): """ overrides ProcessParser.parse_node parses and attaches the inputOutput tags that created by Camunda Modeller :param node: xml task node :return: TaskSpec """ spec = super(CamundaProcessParser, self).parse_node(node) spec.data = DotDict() try: input_nodes = self._get_input_nodes(node) if input_nodes: for nod in input_nodes: spec.data.update(self._parse_input_node(nod)) except Exception as e: LOG.exception("Error while processing node: %s" % node) spec.defines = spec.data # spec.ext = self._attach_properties(node, spec) return spec
def get_data_set(data_dir, disease): # dataset configuration opt = DotDict() opt.nt = 156 opt.nx = 29 opt.nd = 1 opt.periode = opt.nt file = disease + ".csv" # loading data data = np.genfromtxt(os.path.join(data_dir, file), encoding="utf-8-sig", delimiter=",") # change data data = change(data) data = torch.Tensor(data).view(opt.nt, opt.nx, opt.nd) # load relations reletions_file = disease + "_relations.csv" relations = torch.Tensor( np.genfromtxt(os.path.join(data_dir, 'flu_relations.csv'), encoding="utf-8-sig", delimiter=",")) relations = normalize(relations).unsqueeze(1) return opt, data, relations
def get_rnn_dataset(data_dir, disease, nt_train, seq_len, start_time=0, normalize='variance'): # get dataset data = get_time_data(data_dir, disease, start_time) #(nt, nx, nd) # get option opt = DotDict() opt.nt, opt.nx, opt.nd = data.size() opt.normalize = normalize opt.mean = data.mean().item() if normalize == 'max_min': opt.min = data.min().item() opt.max = data.max().item() data = (data - opt.mean) / (opt.max - opt.min) elif normalize == 'variance': opt.std = torch.std(data).item() data = (data - opt.mean) / opt.std # split train / test train_input = [] train_output = [] for i in range(nt_train - seq_len): new_input = [] for j in range(seq_len): new_input.append(data[i + j]) train_input.append(torch.stack(new_input, dim=0)) train_output.append(data[i + seq_len]) train_input = torch.stack(train_input, dim=0) train_output = torch.stack(train_output, dim=0) test_input = [] for i in range(seq_len): test_input.append(data[nt_train - seq_len + i]) test_data = data[nt_train:] test_input = torch.stack(test_input, dim=0) return opt, (train_input, train_output), (test_input, test_data)
def perform_tournament(experiment: ExperimentConfig, by_checkpoint: bool = True) -> None: """ Helper function to unpack the player configs provided in the ExperimentConfig into a pool (list) of player-data tuples that is given to the tourney function. If 'by_checkpoint' is set to True, we check the directory of the provided model path and create individual players for each of the available model checkpoints. Otherwise we just take the (latest) model specified in the config. The experiment config must contain a 'checkpoint_resolution' integer argument to indicate a step to omit some of the checkpoints to reduce computation time --- i.e., use every 'checkpoint_resolution's model of x implementations. We expect model checkpoint files to be unaltered from the source code, meaning the format follows: - prefix_checkpoint_(int).pth.tar :param experiment: ExperimentConfig Contains the players to be pitted against each other. :param by_checkpoint: bool Whether to include every model checkpoint in the player pool (or just the specified one) """ args = experiment.experiment_args # Helper variable to reduce verbosity. # Collect player configurations player_checkpoint_pool = get_player_pool( experiment.player_configs, by_checkpoint=by_checkpoint, resolution=args.checkpoint_resolution) results, trajectories = tourney(player_checkpoint_pool, experiment.game, args.num_repeat, args.num_trials, args.num_opponents, args.return_data) # Save results to output file. dt = datetime.now().strftime("%Y%m%d-%H%M%S") data = DotDict({'results': results, 'args': experiment.experiment_args}) data.to_json(experiment.output_directory + f'{experiment.name}_{dt}.json') if trajectories: with open(experiment.output_directory + f'{experiment.name}_{dt}.out', 'wb') as f: pickle.dump(trajectories, f)
min_crop_overlaps, max_crop_overlaps, \ min_crop_sample_coverages, max_crop_sample_coverages, \ min_crop_object_coverages, max_crop_object_coverages, \ max_crop_trials', [0.0, 1.0, 0.5, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 25]) RandPadder = namedtuple_with_defaults( 'RandPadder', 'rand_pad_prob, max_pad_scale, fill_value', [0.0, 1.0, 127]) ColorJitter = namedtuple_with_defaults( 'ColorJitter', 'random_hue_prob, max_random_hue, \ random_saturation_prob, max_random_saturation, \ random_illumination_prob, max_random_illumination, \ random_contrast_prob, max_random_contrast', [0.0, 18, 0.0, 32, 0.0, 32, 0.0, 0.5]) cfg = DotDict() cfg.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # training configs cfg.train = DotDict() # random cropping samplers cfg.train.rand_crop_samplers = [ RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.1), RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.3), RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.5), RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.7), RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.9), ] cfg.train.crop_emit_mode = 'center' # cfg.train.emit_overlap_thresh = 0.4 # random padding
p.add('--l1_rel', type=float, help='l1 regularization on relation discovery mode', default=0.) # -- learning p.add('--batch_size', type=int, default=1131, help='batch size') p.add('--patience', type=int, default=150, help='number of epoch to wait before trigerring lr decay') p.add('--nepoch', type=int, default=10, help='number of epochs to train for') p.add('--test', type=boolean_string, default=False, help='test during training') # -- gpu p.add('--device', type=int, default=-1, help='-1: cpu; > -1: cuda device id') # -- seed p.add('--manualSeed', type=int, help='manual seed') # -- logs p.add('--checkpoint_interval', type=int, default=100, help='check point interval') # parse opt = DotDict(vars(p.parse_args())) if opt.dir_auto: opt.outputdir = opt.dataset + "_" + opt.mode if opt.xp_time: opt.xp = opt.xp + "_" + get_time() if opt.xp_auto: opt.xp = get_time() if opt.auto_all: opt.outputdir = opt.dataset + "_" + opt.mode opt.xp = get_time() opt.outputdir = get_dir(opt.outputdir) opt.mode = opt.mode if opt.mode in ('refine', 'discover') else None opt.start = time_dir() start_st = datetime.datetime.now() opt.start_time = datetime.datetime.now().strftime('%y-%m-%d-%H-%M-%S')
def prepare_submission(experiment_name, epoch, stage): model_str = experiment_name cfg = load_config_data(experiment_name) pprint.pprint(cfg) checkpoints_dir = f"./checkpoints/{model_str}" print("\n", experiment_name, "\n") model_info = DotDict(cfg["model_params"]) model = build_model(model_info, cfg) model = model.cuda() checkpoint = torch.load(f"{checkpoints_dir}/{epoch:03}.pt") model.load_state_dict(checkpoint["model_state_dict"]) model.eval() torch.set_grad_enabled(False) eval_dataset = dataset.LyftDatasetPrerendered(dset_name=stage, cfg_data=cfg) # eval_dataset[0] eval_dataloader = DataLoader(eval_dataset, shuffle=False, batch_size=32, num_workers=16) # print(eval_dataset.agent_dataset) def run_prediction(predictor, data_loader): predictor.eval() pred_coords_list = [] confidences_list = [] timestamps_list = [] track_id_list = [] with torch.no_grad(): for data in tqdm(data_loader): image = data["image"].cuda() # agent_state = data["agent_state"].float().cuda() agent_state = None pred, confidences = predictor(image, agent_state) confidences = torch.exp(confidences) pred_world = [] pred = pred.cpu().numpy().copy() if model_info.target_space == "image": if model_info.target_space == "image": world_from_agents = data["world_from_agent"].numpy() centroids = data["centroid"].numpy() for idx in range(pred.shape[0]): pred[idx] = (transform_points( pred[idx].copy().reshape(-1, 2), world_from_agents[idx], ) - centroids[idx]).reshape(-1, 50, 2) for img_idx in range(pred.shape[0]): pred_world.append(pred[img_idx]) pred_coords_list.append(np.array(pred_world)) confidences_list.append(confidences.cpu().numpy().copy()) timestamps_list.append(data["timestamp"].numpy().copy()) track_id_list.append(data["track_id"].numpy().copy()) timestamps = np.concatenate(timestamps_list) track_ids = np.concatenate(track_id_list) coords = np.concatenate(pred_coords_list) confs = np.concatenate(confidences_list) return timestamps, track_ids, coords, confs timestamps, track_ids, coords, confs = run_prediction( model, eval_dataloader) os.makedirs("submissions", exist_ok=True) pred_path = f"submissions/sub_{experiment_name}_{epoch}_{stage}.csv" print(f"Coords: {coords.shape} conf: {confs.shape}") np.savez_compressed( f"submissions/sub_{experiment_name}_{epoch}_{stage}.npz", timestamps=timestamps, track_ids=track_ids, coords=coords, confs=confs) write_pred_csv( pred_path, timestamps=timestamps, track_ids=track_ids, coords=coords, confs=confs, ) print(f"Saved to {pred_path}")
import torch import torch.optim as optim import torch.nn as nn from torch.autograd import Variable import torchvision import torchvision.datasets as datasets import torchvision.transforms as transforms from tensorboard_logger import configure, log_value from models import Generator, Discriminator, FeatureExtractor if __name__ == '__main__': opt = DotDict({}) opt.add_argument('--dataset', value='folder') opt.add_argument('--dataroot', value='/root/palm/PycharmProjects/DATA/SRGAN_HR/') opt.add_argument('--workers', value=0) opt.add_argument('--batchSize', value=1) opt.add_argument('--imageSize', value=224) opt.add_argument('--upSampling', value=2) opt.add_argument('--nEpochs', value=100) opt.add_argument('--generatorLR', value=0.0001) opt.add_argument('--discriminatorLR', value=0.0001) opt.add_argument('--cuda', value=True) opt.add_argument('--nGPU', value=1) opt.add_argument('--generatorWeights', value='', ) opt.add_argument('--discriminatorWeights', value='', ) opt.add_argument('--out', value='checkpoints')