Пример #1
0
def TrainTestValHoldout(dataset,
                        sample_size,
                        random_seed,
                        return_holdouts=False):
    name = '_'.join(dataset.split(' ') + [str(sample_size), str(random_seed)])
    ds = Dataset(dataset)
    names = ds.names
    df = ds.df

    # these are manually selected label combinations for holdouts A-E
    combinations = [['GO:0008144', 'GO:0022857'], ['GO:0003677', 'GO:0003723'],
                    ['GO:0043169', 'GO:0015075'],
                    ['GO:0036094', 'GO:0016301', 'GO:0140096', 'GO:0038023'],
                    ['GO:0003824', 'GO:0043168', 'GO:0048037']]

    holdouts = []
    for comb in combinations:
        _labels = set(comb)
        mask = df['labels'].map(lambda x: _labels.issubset(x))
        holdouts.append(df[mask])

    for comb in combinations:
        _labels = set(comb)
        mask = df['labels'].map(lambda x: _labels.issubset(x))
        df = df[~mask]

    ds = BaseDataset().from_df(df)
    #print(len(ds.df.index))
    #print(ds.terms['count'].min())
    ds.names = names

    if return_holdouts:
        return combinations, holdouts
    else:
        return _TrainTestVal(ds, sample_size, random_seed)
Пример #2
0
 def __init__(self, task_config):
     self.model_config = load_json(task_config['model_config_file'])
     self.model_config['voc_data_dir'] = task_config['data_path']
     self.opt = opt
     self.opt.log_filename = task_config['log_filename']
     self.opt._parse(self.model_config)
     self.dataset = Dataset(self.opt)
     logging.info('load data')
     self.dataloader = DataLoader(self.dataset,
                                  batch_size=self.model_config['batch_size'],
                                  shuffle=True,
                                  num_workers=self.opt.num_workers)
     # TODO: add a valset for validate
     self.testset = TestDataset(self.opt)
     self.test_dataloader = DataLoader(
         self.testset,
         batch_size=self.model_config['batch_size'],
         num_workers=self.opt.test_num_workers,
         shuffle=False,
         pin_memory=True
     )
     self.train_size = self.dataset.__len__()
     self.valid_size = self.testset.__len__()
     self.faster_rcnn = FasterRCNNVGG16()
     logging.info('model construct completed')
     self.trainer = FasterRCNNTrainer(
         self.faster_rcnn, self.opt.log_filename
     ).cuda()
     if self.opt.load_path:
         self.trainer.load(self.opt.load_path)
         logging.info('load pretrained model from %s' % self.opt.load_path)
     self.best_map = 0
     self.lr_ = self.opt.lr
Пример #3
0
class PairwiseSampler(object):
    def __init__(self, batch_size=512, data_name="ml_100k", num_neg=1):
        self.batch_size = batch_size
        self.data_name = data_name
        self.data = Data()
        self.num_neg = num_neg
        self.dataset = Dataset(data_name=self.data_name)

    def get_train_data(self):
        user_movie = self.dataset.get_user_movie()
        num_item = self.dataset.get_max_movie_id()
        data_value = self.data.get_train_data(data_name=self.data_name)
        for idx in range(len(data_value)):
            j = np.random.choice(num_item) + 1
            while j in user_movie[data_value[idx, 0]]:
                j = np.random.choice(num_item) + 1
            data_value[idx, 2] = j
        return data_value

    def get_train_batch(self):
        data_value = self.get_train_data()
        for start in range(0, len(data_value), self.batch_size):
            end = min(start + self.batch_size, len(data_value))
            yield data_value[start:end]

    def get_test_batch(self):
        data_value = self.data.get_test_data(data_name=self.data_name)
        for start in range(0, len(data_value), self.batch_size):
            end = min(start + self.batch_size, len(data_value))
            yield data_value[start:end]

    def get_batch_number(self):
        data_value = self.data.get_train_data(data_name=self.data_name)
        return (len(data_value) + self.batch_size - 1) // self.batch_size
Пример #4
0
    def _filter_matches(self,
                        matches,
                        jm_dataset: Dataset,
                        wm_dataset,
                        jmdf_prefix='jmdf_',
                        wmdf_prefix='wmdf_'):

        timestamp_metrics = [Metric.START_TIME, Metric.STOP_TIME]

        for metric in timestamp_metrics:
            jmdf_ts_col = jmdf_prefix + jm_dataset.col(metric)
            wmdf_ts_col = wmdf_prefix + wm_dataset.col(metric)
            matches = matches[
                (self._timestamp_diff_series(matches[jmdf_ts_col],
                                             matches[wmdf_ts_col]) <
                 self.timestamp_tolerance)
                # |
                # (matches[jmdf_ts_col].isnull()) | (matches[wmdf_ts_col].isnull())
            ]

        jm_workflow_col = jmdf_prefix + jm_dataset.col(Metric.WORKFLOW)
        wm_workflow_col = wmdf_prefix + wm_dataset.col(Metric.WORKFLOW)

        # Only accept jobs that match in their workflow
        matches = matches[matches[jm_workflow_col] == matches[wm_workflow_col]]

        return matches
Пример #5
0
	def __init__(self, view, parent = None):
		'''Construtor da classe Controlador. Inicia seus atributos e conecta os sinais dos botões da tela
		a métodos
		
		Parâmetros:
		
		view:	: MainGui que o controlador está controlando
		'''
		super(ControladorDataset, self).__init__(parent)
		
		#conecta o controlador a view
		self._view = view
		self._view.setupUi(self)
		
		self._dataset = Dataset()	
		
		#conecta controlador ao modelo dos dados a ser usado
		#pelos elementos da view
		self._datasetmodel = DatasetModel(self._dataset)
		
		#conecta os sinais dos botões e das ações do usuário na tela a funções
		self._view.abrirButton.clicked.connect(self.abrir_janela_para_escolher_arquivo)
		self._view.tabelaAtributos.entered.connect(self.atualizar_atributo_selecionado)
		self._view.tabelaAtributos.clicked.connect(self.atualizar_atributo_selecionado)
		self._view.removerButton.clicked.connect(self.remover_atributos)
		
		#modifica parâmetros da view
		self._view.tabelaAtributos.setSelectionBehavior(QTableView.SelectRows)
		self._view.tabelaAtributos.setSelectionMode(QTableView.SingleSelection)
		self._view.tabelaEstatistica.setFocusPolicy(Qt.NoFocus)
		
		#atribui os modelos aos elementos da tela 
		self._view.tabelaAtributos.setModel(self._datasetmodel)
Пример #6
0
    def _match_on_cpu_time(self,
                           jm_dataset: Dataset,
                           wm_dataset: Dataset,
                           jm_subset=None,
                           wm_subset=None):
        jmdf = jm_subset if jm_subset is not None else jm_dataset.df
        wmdf = wm_subset if wm_subset is not None else wm_dataset.df

        # Round CPU time to account for rounding errors while matching float values
        jmdf['cpuApprox'] = jmdf[jm_dataset.col(Metric.CPU_TIME)].round()
        wmdf['cpuApprox'] = wmdf[wm_dataset.col(Metric.CPU_TIME)].round()

        jmdf_index = jmdf.index.name
        wmdf_index = wmdf.index.name

        self._prefix_columns(jmdf, 'jmdf_')
        self._prefix_columns(wmdf, 'wmdf_')

        matches = jmdf.reset_index().merge(wmdf.reset_index(),
                                           left_on='jmdf_cpuApprox',
                                           right_on='wmdf_cpuApprox')

        filtered = self._filter_matches(matches,
                                        jm_dataset,
                                        wm_dataset,
                                        jmdf_prefix='jmdf_',
                                        wmdf_prefix='wmdf_')

        perfect_matches = filtered.groupby(jmdf_index).filter(
            lambda x: len(x) == 1)

        return perfect_matches[[jmdf_index, wmdf_index]]
Пример #7
0
 def sample_mini_dataset(self,
                         num_classes,
                         num_shots,
                         test_shots,
                         classes=None):
     if classes is None:
         classes = np.random.choice(10, 5)
     X, y, X_test, y_test = [], [], [], []
     for idx, c in enumerate(classes):
         X_c = self.X[self.y == c]
         p = np.random.choice(X_c.shape[0],
                              size=num_shots + test_shots,
                              replace=False)
         X.append(X_c[p[:num_shots]])
         X_test.append(X_c[p[num_shots:]])
         y.append(np.ones(num_shots, ) * idx)
         y_test.append(np.ones(test_shots, ) * idx)
     X = np.concatenate(X, axis=0)
     X_test = np.concatenate(X_test, axis=0)
     y = np.concatenate(y, axis=0)
     y_test = np.concatenate(y_test, axis=0)
     if self.one_hot:
         y = helpers.one_hot(y, num_classes)
         y_test = helpers.one_hot(y_test, num_classes)
     train_set = Dataset(batch_size=self.inner_batch_size,
                         X=X,
                         y=y,
                         shuffle=True)
     test_set = Dataset(batch_size=self.inner_batch_size,
                        X=X_test,
                        y=y_test,
                        shuffle=False)
     return train_set, test_set
def create_data_loaders(opt, split='train'):
    """
    Create the training data loader and test data loader
    """
    if split == 'train':
        tr_dataset = Dataset(opt.data, opt, 'train')
        train_loader = torch.utils.data.DataLoader(tr_dataset,
                                                   batch_size=opt.batchSize,
                                                   shuffle=True,
                                                   drop_last=True,
                                                   num_workers=opt.nThreads,
                                                   pin_memory=True)
        return train_loader

    elif split == 'val':
        val_dataset = Dataset(opt.data, opt, 'val')
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=opt.batchSize,
                                                 shuffle=False,
                                                 num_workers=opt.nThreads,
                                                 pin_memory=True)
        return val_loader

    elif split == 'test':
        te_dataset = Dataset(opt.data, opt, 'test')
        test_loader = torch.utils.data.DataLoader(te_dataset,
                                                  batch_size=opt.batchSize,
                                                  shuffle=False,
                                                  num_workers=opt.nThreads,
                                                  pin_memory=True)
        return test_loader
Пример #9
0
    def sample_mini_dataset(self, num_classes, num_shots, test_shots):
        shuffled = list(self.data_source)
        random.shuffle(shuffled)
        cs = shuffled[:num_classes]
        X = []
        y = []
        X_test = []
        y_test = []
        for idx, c in enumerate(cs):
            inputs = c.sample(num_shots+test_shots)
            targets = np.array([idx for i in range(num_shots+test_shots)])
            X.append(inputs[:num_shots])
            y.append(targets[:num_shots])
            X_test.append(inputs[num_shots:])
            y_test.append(targets[num_shots:])
        X = np.concatenate(X, axis=0)
        y = np.concatenate(y, axis=0)
        X_test = np.concatenate(X_test, axis=0)
        y_test = np.concatenate(y_test, axis=0)

        if self.one_hot:
            y = helpers.one_hot(y, num_classes)
            y_test = helpers.one_hot(y_test, num_classes)

        train_set = Dataset(batch_size=self.inner_batch_size, X=X, y=y, shuffle=True)
        test_set = Dataset(batch_size=self.inner_batch_size, X=X_test, y=y_test, shuffle=False)
        # train_set = self._load_dataset(X, y, num_classes, self.inner_batch_size, self.one_hot)
        # test_set = self._load_dataset(X_test, y_test, num_classes, self.inner_batch_size, self.one_hot)
        return train_set, test_set
Пример #10
0
 def predict(self, sine=None, z_value=None):
     if sine is None:
         sine = self.eval_set.sample(1)[0]
     samples = sine.sample(2000)
     train_set, val_set = samples[:1000], samples[1000:]
     train_set = Dataset(batch_size=100,
                         X=train_set[:, 0:1],
                         y=train_set[:, 1])
     val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1])
     Xs, ys, ps = [], [], []
     for data in train_set:
         if z_value is None:
             feed_dict = self._make_feed_dict(data, is_training=False)
         else:
             feed_dict = self._make_feed_dict(data,
                                              is_training=False,
                                              z_value=z_value,
                                              use_z_ph=True)
         data = self._data_preprocessing(data)
         X, y = data
         p = self.session.run([m.predictions for m in self.parallel_models],
                              feed_dict=feed_dict)
         Xs.append(X)
         ys.append(y)
         ps += p
     Xs = np.concatenate(Xs, axis=0)
     ys = np.concatenate(ys, axis=0)
     ps = np.concatenate(ps, axis=0)
     return Xs, ys, ps
Пример #11
0
 def load_data(self, *args):
     x_train, y_train, x_valid, y_valid = get_data(*args)
     x_train_normalized, x_valid_normalized = normalize_train_eval(
         x_train, x_valid)
     train_ds = Dataset(x_train_normalized, y_train)
     valid_ds = Dataset(x_valid_normalized, y_valid)
     c = y_train.max().item() + 1
     return train_ds, valid_ds, c
Пример #12
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    print(args)

    # name
    name = '{}-{}_{}-{}-{}-{}'.format(args.env, args.model, args.hidden_dim,
                                      args.num_layers, args.T, args.lr)
    wandb.init(name=name, project="finance", entity="liuyuezhang", config=args)

    # dim
    dims = {'single': 6, 'pair': 4}
    dim = dims[args.env]

    # data
    train_dataset = Dataset(dir=args.dir + args.env + '/train.pkl', T=args.T)
    train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True)

    # model
    model = VanillaLSTM(input_dim=dim,
                        hidden_dim=args.hidden_dim,
                        output_dim=dim,
                        num_layers=args.num_layers).to("cuda")
    loss_fn = torch.nn.MSELoss()
    optimiser = torch.optim.Adam(model.parameters(), lr=args.lr)

    for e in range(args.epochs):
        # train
        print("\n================Epoch: {}================\n".format(e))
        model.train()
        for data in tqdm(train_loader):
            x_train, y_train = data
            x_train = (x_train.float()).to("cuda")
            y_train = (y_train.float()).to("cuda")

            # forward
            y_pred = model(x_train)
            loss = loss_fn(y_pred, y_train)
            # backward
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            # log
            wandb.log({"loss": loss.item()})

        # if (e % args.save_epochs) == 9:
        # save
        print("model saved.")
        torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model.pt'))

        # test
        test_dataset = Dataset(dir=args.dir + args.env + '/test.pkl', T=args.T)
        scale = np.array(pd.read_pickle(args.dir + args.env + '/test_max.pkl'))
        test_loader = torch.utils.data.DataLoader(test_dataset)
        model.eval()
        test(model, test_loader, scale)
Пример #13
0
 def __init__(self):
     self.data_path = path_params['data_path']
     self.tfrecord_dir = path_params['tfrecord_dir']
     self.train_tfrecord_name = path_params['train_tfrecord_name']
     self.input_width = model_params['input_width']
     self.input_height = model_params['input_height']
     self.channels = model_params['channels']
     self.class_num = len(model_params['classes'])
     self.batch_size = solver_params['batch_size']
     self.dataset = Dataset()
Пример #14
0
 def train_epoch(self):
     for k in range(100):
         sines= self.train_set.sample(1)
         samples = sines[0].sample(200)
         train_set, val_set = samples[:100], samples[100:]
         train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1])
         val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1])
         data = next(train_set)
         feed_dict = self._make_feed_dict(data, is_training=True)
         self.session.run(self.optimize_op, feed_dict=feed_dict)
    def __init__(self, trainer_type, **kwargs):

        self.dataset = Dataset(**kwargs)

        additional_args = {
            "tot_speakers": self.dataset.tot_speakers,
            "type": trainer_type
        }

        kwargs.update(additional_args)
        self.args = kwargs
Пример #16
0
class PointSampler(object):
    def __init__(self, batch_size=512, data_name="ml_100k", num_neg=1):
        self.batch_size = batch_size
        self.data = Data()
        self.data_name = data_name
        self.num_neg = num_neg
        self.dataset = Dataset(data_name=self.data_name)

    def get_train_data(self, keep_label=False, value_for_negative=0.):
        self.data_value = self.data.get_train_data(data_name=self.data_name)
        user_movie_train = self.dataset.get_user_movie_for_train()
        num_item = self.dataset.get_max_movie_id()
        new_data_value = []
        for user_item in self.data_value:
            user, item, label = user_item[0], user_item[1], user_item[2]
            if keep_label:
                new_data_value.append([user, item, label])
            else:
                new_data_value.append([user, item, 1])
            for i in range(self.num_neg):
                j = np.random.choice(num_item)
                while j in user_movie_train[user]:
                    j = np.random.choice(num_item)
                new_data_value.append([user, j, value_for_negative])
        new_data_value = np.array(new_data_value)
        new_data_value[:, 2].astype(np.float32)
        return new_data_value

    def get_train_batch(self,
                        shuffle=False,
                        keep_label=False,
                        value_for_negative=0.):
        self.data_value_batch = self.get_train_data(
            keep_label=keep_label, value_for_negative=value_for_negative)
        if shuffle:
            index = [i for i in range(len(self.data_value_batch))]
            random.shuffle(index)
            self.data_value_batch = self.data_value_batch[index]
        for start in range(0, len(self.data_value_batch), self.batch_size):
            end = min(start + self.batch_size, len(self.data_value_batch))
            yield self.data_value_batch[start:end]

    def get_test_batch(self):
        test_data = self.data.get_test_data()
        len_test_data = len(test_data)
        batch_index = np.random.choice(len_test_data, size=self.batch_size)
        batch_data = test_data[batch_index, :]
        return batch_data

    def get_batch_number(self):
        i = 1
        data_value = self.data.get_train_data(data_name=self.data_name)
        return (len(data_value)) // self.batch_size * (i + self.num_neg)
Пример #17
0
 def __init__(self):
     self.data_path = path_params['data_path']
     self.tfrecord_dir = path_params['tfrecord_dir']
     self.train_tfrecord_name = path_params['train_tfrecord_name']
     self.test_tfrecord_name = path_params['test_tfrecord_name']
     self.image_size = model_params['image_size']
     self.cell_size = model_params['cell_size']
     self.class_num = model_params['num_classes']
     self.class_ind = dict(zip(CLASSES, range(self.class_num)))
     self.batch_size = solver_params['batch_size']
     self.flipped = solver_params['flipped']
     self.dataset = Dataset()
Пример #18
0
 def evaluate(self):
     ls = []
     for k in range(100):
         sines= self.eval_set.sample(1)
         samples = sines[0].sample(200)
         train_set, val_set = samples[:100], samples[100:]
         train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1])
         val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1])
         data = next(train_set)
         feed_dict = self._make_feed_dict(data, is_training=False)
         l = self.session.run([m.loss for m in self.parallel_models], feed_dict=feed_dict)
         ls.append(l)
     return np.mean(ls)
Пример #19
0
def main():
    opt = Config(os.getcwd())
    if opt.backbone == 'resnet18':
        model = resnet_face18(opt.use_se)
    elif opt.backbone == 'resnet34':
        model = resnet34()
    elif opt.backbone == 'resnet50':
        model = resnet50()

    model = DataParallel(model)
    # load_model(model, opt.test_model_path)
    model.load_state_dict(
        torch.load(opt.test_model_path, map_location={'cuda:0': 'cpu'}))
    model.to(torch.device(device))
    model.eval()
    global args

    train_dataset = Dataset(opt.train_root,
                            opt.train_list,
                            phase='train',
                            input_shape=opt.input_shape)
    trainloader = data.DataLoader(train_dataset,
                                  batch_size=opt.train_batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)

    # centroid_map = create_centroid(model, trainloader)

    test_dataset = Dataset(opt.test_root,
                           opt.test_list,
                           phase='test',
                           input_shape=opt.input_shape)
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=1000,
        # batch_size=opt.test_batch_size,
        shuffle=True,
        num_workers=opt.num_workers)

    for x, y in test_loader:

        latent_vecs = model(x)
        print(latent_vecs.shape, y.shape)
        target = y
        plot3d_tsne(
            latent_vecs,
            target,
        )
        show_umap(latent_vecs, target)
        t_sne(latent_vecs, target)
Пример #20
0
 def _test(self):
     print("testing ......")
     ls = []
     for k in range(1):
         sines= self.eval_set.sample(1)
         samples = sines[0].sample(200)
         train_set, val_set = samples[:100], samples[100:]
         train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1])
         val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1])
         data = next(train_set)
         feed_dict = self._make_feed_dict(data, is_training=False)
         mean = self.session.run([m.z_mu for m in self.parallel_models], feed_dict=feed_dict)
         std = self.session.run([m.z_sigma for m in self.parallel_models], feed_dict=feed_dict)
         print(mean)
         print(std)
Пример #21
0
def data_processor(bs, url="MNIST_URL"):
    x_train, y_train, x_valid, y_valid = get_data(url)
    train_mean, train_std = x_train.mean(), x_train.std()
    x_train = normalize(x_train, train_mean, train_std)
    # NB: Use training, not validation mean for validation set
    x_valid = normalize(x_valid, train_mean, train_std)

    train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid)

    train_samp = Sampler(train_ds, bs, shuffle=True)
    valid_samp = Sampler(valid_ds, bs, shuffle=False)

    train_dl = DataLoader(train_ds, sampler=train_samp)
    valid_dl = DataLoader(valid_ds, sampler=valid_samp)
    return train_dl, valid_dl
Пример #22
0
def predict(_):
    data = Dataset(feature_sets, files)
    data.split(batch_size,
               sequence_length,
               test_size=test_size,
               shuffle=False,
               include_std=False)

    predictor_a = TestModel(model_a, batch_size, 'model_A')
    predictor_v = TestModel(model_v, batch_size, 'model_V')

    rmse_a = []
    rmse_v = []
    angle_err = []
    angle_no_small = []

    # plotter = AnimatedPredictionAndColorPlotter(config_str_a, song_names,
    #                                             interpolation_factor=interp,
    #                                             num_plotted_frames=num_plotted_frames)

    for _ in range(0, data.test.num_batches):
        data.test.next_batch()
        # data.test.normalize_mode_test(predictor_a.f_means, predictor_a.f_std)

        for seq_x, seq_y in data.test.sequences:
            pa = predictor_a.predict(seq_x)
            pv = predictor_v.predict(seq_x)

            ya = seq_y[0, 0, 0]
            yv = seq_y[0, 0, 1]

            rmse_a.append((pa - ya) / 1490)
            rmse_v.append((pv - yv) / 1430)

            p_angle = renormalize_angle(np.angle(pa + 1j * pv), deg=False)
            y_angle = renormalize_angle(np.angle(ya + 1j * yv), deg=False)
            err = p_angle - y_angle

            if -np.pi < err < np.pi:
                angle_err.append(err)
                if (abs(ya) > 90) and (abs(yv) > 90):
                    angle_no_small.append(err)
            else:
                angle_err.append(2 * np.pi - abs(err))
                if (abs(ya) > 90) and (abs(yv) > 90):
                    angle_no_small.append(2 * np.pi - abs(err))

    return rmse_a, rmse_v, angle_err, angle_no_small
Пример #23
0
def runCV():
    config.new_experiment()
    start = timeit.default_timer()  # -----------------

    model = AE(n_input=1, n_hidden=config.n_hidden, n_output=1, n_layers=1)
    dataset = Dataset()
    data = dataset[:, [config.CHANNEL]]
    target = dataset[:, [config.CHANNEL]]
    mean = cv(model,
              data,
              target,
              temperature=config.temperature,
              weight_decay=config.weight_decay,
              learning_rate=config.learning_rate,
              sparsity=config.sparsity,
              sparsity_penalty=config.sparsity_penalty,
              n_epochs=config.MAX_TRAINING_EPOCHS,
              n_splits=config.CV_N_SPLITS,
              seed=config.SEED,
              batch_size=config.batch_size,
              shuffle=False)

    stop = timeit.default_timer()  # -----------------
    print(stop - start)
    # save_result(mean)
    print('OK')
Пример #24
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                #pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
Пример #25
0
def train(**kwargs):
    opt._parse(kwargs)
    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
           batch_size=1, \
           shuffle=True, \
								# pin_memory=True,

           num_workers=opt.num_workers)
    print('Loading Model')
    # faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    # trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    lr_ = opt.lr
    extractor, classifier = decom_vgg16()
    img, bbox_, label_, scale = dataset[1]
    _, H, W = img.shape
    img_size = (H, W)
    img, bbox_, label_ = to_tensor(img), to_tensor(bbox_), to_tensor(label_)
    scale = at.scalar(scale)
    img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
    img, bbox, label = Variable(img), Variable(bbox), Variable(label)
    pdb.set_trace()
    features = extractor(img)

    rpn = RegionProposalNetwork(512,
                                512,
                                ratios=ratios,
                                anchor_scales=anchor_scales,
                                feat_stride=self.feat_stride)

    rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale
            )
Пример #26
0
    def merge_datasets(self,
                       matches,
                       left: Dataset,
                       right: Dataset,
                       left_index,
                       right_index,
                       left_suffix='left',
                       right_suffix='right'):

        left_df = left.df
        right_df = right.df

        # Join with matches, preserving all entries in base dataframe
        # Right is base data frame, so use right merge
        half_joined = matches.join(left_df, on=left_index, how='right')

        # Preserve all entries, even those not in
        joined = half_joined.join(right_df,
                                  on=right_index,
                                  how='outer',
                                  lsuffix=left_suffix,
                                  rsuffix=right_suffix)

        # Reset index to the original index: ID of the left dataframe
        # Todo Create new identifier?
        # joined = joined.set_index(left_index)

        # Todo Actually join values

        # Columns that are suffixed are overlapping, included in both data sets
        for left_col in [
                col for col in joined.columns if col.endswith(left_suffix)
        ]:
            col_name = self.remove_trailing(left_col, left_suffix)
            right_col = col_name + right_suffix

            # if right_col in right_df.columns:
            # If suffixed column exists, it also exists in other data set
            joined = self.merge_cols(joined, col_name, left_col, right_col)

        # for right_col in set(right_df.columns) - set(left_df.columns):
        #     # col_name = self.remove_trailing(right_col, right_suffix)
        #     joined[right_col] = joined[right_col + right_suffix]

        # Put result back into a dataset

        # Compute union of dates
        start_date = min(left.start, right.start)
        end_date = max(left.end, right.end)

        # Only retain extra data frames of the base dataset
        extra_dfs = left.extra_dfs

        result = Dataset(joined,
                         left.name,
                         start=start_date,
                         end=end_date,
                         sep=self.part_sep,
                         extra_dfs=extra_dfs)
        return result
Пример #27
0
 def get_test_inputs(self):
     """Get the inputs of all test samples.
     
     Returns:
         An np.chararray, where each row corresponds to an image file name.
     """
     return Dataset.get_test_inputs(self)
Пример #28
0
def get_feature_vectors(real, fakes):
    train_data = Dataset(namedtuple('Conf', 'batch_size')(50),
                         only_plain=True).get_plain_values()

    labels = tf.placeholder(tf.int64, [None])
    input, keep_prob, feature_vectors, logits = classifier()

    cross_entropy = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state('ckpts', latest_filename='metric')
        if ckpt and ckpt.model_checkpoint_path:
            print('restoring classifier')
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())
            print('training classifier')
            for i in range(20000):
                X, y = sess.run(train_data)
                train_step.run(feed_dict={input: X, labels: y, keep_prob: 0.5})
                if i % 100 == 0:
                    print(' iteration', i, 'of', 20000)
            saver.save(sess, 'ckpts/metric.ckpt', latest_filename='metric')
        print('evaluating feature vectors')
        real = feature_vectors.eval(feed_dict={input: real})
        fakes = [
            feature_vectors.eval(feed_dict={input: fake}) for fake in fakes
        ]
    return real, fakes
Пример #29
0
    def input_to_torch_tensor(self, x, device, mode='inference',
                              force_no_preprocessing=False, sample_ids=None):
        """This method can be used to map the internal numpy arrays to PyTorch
        tensors.

        Note, this method has been overwritten from the base class.

        The input images are preprocessed if data augmentation is enabled.
        Preprocessing involves normalization and (for training mode) random
        perturbations.

        Args:
            (....): See docstring of method
                :meth:`data.dataset.Dataset.input_to_torch_tensor`.

        Returns:
            (torch.Tensor): The given input ``x`` as PyTorch tensor.
        """
        # FIXME Method copied from `CIFAR100Data`.
        if self._augment_inputs and not force_no_preprocessing:
            if mode == 'inference':
                transform = self._test_transform
            elif mode == 'train':
                transform = self._train_transform
            else:
                raise ValueError('"%s" not a valid value for argument "mode".'
                                 % mode)

            return CIFAR10Data.torch_augment_images(x, device, transform)

        else:
            return Dataset.input_to_torch_tensor(self, x, device,
                mode=mode, force_no_preprocessing=force_no_preprocessing,
                sample_ids=sample_ids)
Пример #30
0
def main(_):
    # create global configuration object
    model_config = Configuration(FLAGS.config)
    model = create_model(FLAGS, model_config)
    placeholders = {
        'l': tf.placeholder(tf.float32, (1, None, None, 3)),
        'r': tf.placeholder(tf.float32, (1, None, None, 3)),
        'd': tf.placeholder(tf.float32, (1, None, None, 1)),
    }
    x = {
        'l': tf.placeholder(tf.float32, (1, None, None, 3)),
        'r': tf.placeholder(tf.float32, (1, None, None, 3)),
        'd': tf.placeholder(tf.float32, (1, None, None, 1)),
    }
    p = namedtuple('Placeholders', placeholders.keys())(**placeholders)
    px = namedtuple('Placeholders', x.keys())(**x)
    model.build(px, True, None, build_loss=False)
    model.build(p, False, True, build_loss=False)
    session = tf.Session()
    saver = tf.train.Saver()
    # init variables
    session.run(tf.local_variables_initializer())
    session.run(tf.global_variables_initializer())
    # restore model if provided a checkpoint
    if model_config.checkpoint is not None:
        print("Restoring model from {}".format(model_config.checkpoint))
        saver.restore(session, model_config.checkpoint)
    # init dataset
    paths = get_paths_for_dataset(FLAGS.dataset)
    ratios = {
        'train_ratio': FLAGS.train_ratio,
        'train_valid_ratio': FLAGS.train_valid_ratio,
        'valid_ratio': FLAGS.valid_ratio,
        'test_ratio': FLAGS.test_ratio,
    }
    paths = split_dataset_paths(paths, **ratios)
    dataset = Dataset(get_example_class(FLAGS.dataset), paths, FLAGS.dataset)
    results = {}
    fd = lambda x: {p.l: x.left, p.r: x.right}
    phases = ['valid', 'train', 'train_valid']
    reconstructions = os.path.join(model_config.directory, 'results')
    directories = [os.path.join(reconstructions, phase) for phase in phases]
    for dirname in directories:
        os.makedirs(dirname, exist_ok=True)
    f = open(os.path.join(model_config.directory, 'results.txt'), 'w')
    sys.stdout = Logger(sys.stdout, f)
    subset_iterator = zip(phases, [dataset.valid, dataset.train, dataset.train_valid], directories)
    for phase, subset, store_dir in subset_iterator:
        for example in subset:
            gt = example.disparity.squeeze()
            start = time()
            d = session.run(model.outputs[p], fd(example)).squeeze()
            print("Time: {}".format(1000 * (time() - start)), file=sys.stderr)
            hits, total = disp_precision(gt, d, model_config.get('max_disp', FLAGS.max_disp), 3)
            all_hits, all_total = results.get(phase, (0, 0))
            results[phase] = (hits + all_hits, total + all_total)
            store_disparity(d, os.path.join(store_dir, '{}.png'.format(example.name)))
            print('{} {} {}%'.format(phase, example.name, 100 * hits / total))
    for phase in results:
        print('Total {} {}'.format(phase, 100 * results[phase][0] / results[phase][1]))
class ControladorDataset(QMainWindow):
	'''Classe que controla e atualiza as telas com base nos dados adquiridos das classes de modelo
	
	Atributos:
	
	_view			: MainGui que o controlador está controlando
	_dataset		: DatasetModel com os dados carregados através da tela
	'''
	
	def __init__(self, view, parent = None):
		'''Construtor da classe Controlador. Inicia seus atributos e conecta os sinais dos botões da tela
		a métodos
		
		Parâmetros:
		
		view:	: MainGui que o controlador está controlando
		'''
		super(ControladorDataset, self).__init__(parent)
		
		#conecta o controlador a view
		self._view = view
		self._view.setupUi(self)
		
		self._dataset = Dataset()	
		
		#conecta controlador ao modelo dos dados a ser usado
		#pelos elementos da view
		self._datasetmodel = DatasetModel(self._dataset)
		
		#conecta os sinais dos botões e das ações do usuário na tela a funções
		self._view.abrirButton.clicked.connect(self.abrir_janela_para_escolher_arquivo)
		self._view.tabelaAtributos.entered.connect(self.atualizar_atributo_selecionado)
		self._view.tabelaAtributos.clicked.connect(self.atualizar_atributo_selecionado)
		self._view.removerButton.clicked.connect(self.remover_atributos)
		
		#modifica parâmetros da view
		self._view.tabelaAtributos.setSelectionBehavior(QTableView.SelectRows)
		self._view.tabelaAtributos.setSelectionMode(QTableView.SingleSelection)
		self._view.tabelaEstatistica.setFocusPolicy(Qt.NoFocus)
		
		#atribui os modelos aos elementos da tela 
		self._view.tabelaAtributos.setModel(self._datasetmodel)
	
		
	def abrir_janela_para_escolher_arquivo(self):
		'''Abre uma janela para o usuário escolher um arquivo e chama o método abrir'''
	
		#o endereço do arquivo + nome estão no primeiro campo de nome
		nome = QFileDialog.getOpenFileName(self, "Abrir", "", "Arquivos de Texto (*.csv)")[0]
		self.abrir(nome)	
		
	def abrir(self, nome):
		'''Abre o arquivo com nome *nome*  e carrega esse arquivo em DatasetModel e 
		atualiza a aba Dados da tela
		
		Parâmetros:
		
		nome: String contendo o nome completo do arquivo (nome + caminho)
		
		Retorna:
		
		True: 	se conseguir abrir o dataset em csv
		False: 	caso não consiga abrir o arquivo csv
		'''
				
		self._view.statusbar.showMessage(u"Abrindo dataset...")
		#avisa que o modelo vai mudar
		
		if self._dataset.ler_csv(nome):

			self._datasetmodel.beginResetModel()
			self.atualizar_grupo_dados()
			self._view.statusbar.showMessage(u"Dataset aberto")		
			#avisa que o modelo encerrou a mudança
			self._datasetmodel.endResetModel()
			return True
		else:
			self._view.statusbar.showMessage(u"Nenhum dataset selecionado")
			return False
		
	def atualizar_grupo_dados(self):
		'''Atualiza os dados na tela sobre o dataset (nome, instancias e atributos)'''
		
		self._view.nomeLabel.setText(self._dataset.get_nomearquivo())
		self._view.atributosLabel.setText(str(self._dataset.get_natributos()))
		self._view.instanciaLabel.setText(str(self._dataset.get_ninstancias()))
	
	def atualizar_botao_remover(self):
		'''Ativa/desativa o *removerButton* de acordo com a contagem de checkboxes preenchidas'''	
			
		#se não há checkbox marcada, desativa botão de remover
		if sum(self._dataset.get_marcados()) == 0:
			self._view.removerButton.setEnabled(False)
		else:
			self._view.removerButton.setEnabled(True)
			
	def atualizar_atributo_selecionado(self, indiceClicado = None):	
		'''Atualiza os dados do atributo na tela (nome, ausentes, distintos, tipo, estatisticas)
		e chama atualiza_botão_remover caso uma checkbox seja marcada ou desmarcada
		
		Parâmetros:
		
		indiceCLicado: QModelIndex contendo a posição do item clicado
		'''
		
		#o método foi acionado a partir de remover dados
		#nesse caso, deve mostrar os dados do atributo na primeira linha
		if indiceClicado is None:
			row = 0
			column = 1
		else:
			row = indiceClicado.row()
			column = indiceClicado.row()
		
		#se o usuario marcar uma checkbox
		if column == 0:
			self.atualizar_botao_remover()
		
		#encontra o nome do atributo selecionado
		atributo = self._dataset.get_atributos()[row]
		#encontra a coluna do dataframe relativa ao atributo
		dados_atributo = self._dataset.get_dados()[atributo]
		#calcula estatisticas sobre a coluna
		estatisticas = dados_atributo.describe()
		#calcula o número de dados ausentes
		ausentes = int(len(dados_atributo) - estatisticas['count'])
		pct_ausentes = int(100 * ausentes / len(dados_atributo))
		#coluna contém strings
		if 'unique' in estatisticas:
			distintos = estatisticas['unique']
			tipo = "Nominal"
			self._view.tabelaEstatistica.setModel(EstatisticaNominalModel(dados_atributo))
		#coluna contém números
		else:		
			distintos = len(dados_atributo.unique())
			tipo = "Numérico"
			self._view.tabelaEstatistica.setModel(EstatisticaNumericaModel(dados_atributo))
		
		#atualiza os dados do atributo na tela
		self._view.nomeAtributoLabel.setText(atributo)	
		self._view.ausentesLabel.setText("%d (%d%%)" % (ausentes, pct_ausentes))
		self._view.tipoLabel.setText(tipo)
		self._view.distintosLabel.setText(str(distintos))
		
	def remover_atributos(self):
		'''Remove os atributos com as checkboxes selecionadas'''
		self._datasetmodel.beginResetModel()
		self._dataset.remover_atributos()
		self._datasetmodel.endResetModel()
		self.atualizar_grupo_dados()
		self.atualizar_atributo_selecionado()