Пример #1
0
def collect_command(
    user_handle: str,
    parent: str = ":root",
    data_types: List[str] = ["dataverses", "datasets", "datafiles"],
    filename: str = "tree.json",
    create_json: bool = True,
) -> None:
    collect_data(user_handle, parent, data_types, filename, create_json)
    typer.echo("Data collected")
Пример #2
0
    def train(self, pi):
        self._initialize_data_buffers()
        self._record_snapshot(pi, None, None, None)

        for t in tqdm(range(self.time_steps)):
            # collect new data
            xprop, yprop, sprop, d = utils.collect_data(
                self.td,
                self.n_samples,
                policy=self.deployed["pis"][t],
                fix_proposed=self.fix_prop,
                random_state=self.data_seeds[t],
            )
            x, y, s = xprop[d], yprop[d], sprop[d]

            accepted = len(y)
            if accepted < 1:
                # didn't get any data, continue
                logger.warning(f"0 accepted; continue")
            else:
                self._update_data_buffers(x, y, s, None, t, accepted)
                # update logistic model in pi
                try:
                    pi.set_theta(
                        np.array(utils.fit_logit(self.x_buf, self.y_buf))
                    )
                except (PerfectSeparationError, LinAlgError) as err:
                    logger.info(f"Error in LogReg: {err}")

            self._record_snapshot(pi, yprop, sprop, d)
        return self._merge_and_convert_results()
Пример #3
0
    def train(self, pi):
        self._record_snapshot(pi, None, None, None)

        for t in tqdm(range(self.time_steps)):

            xprop, yprop, sprop, d = utils.collect_data(
                self.td,
                self.n_samples,
                policy=pi,
                fix_proposed=self.fix_prop,
                random_state=self.data_seeds[t],
            )
            _, y, _ = xprop[d], yprop[d], sprop[d]

            # the oracle
            if pi is None:
                d = y == 1
                y = y[d]

            accepted = len(y)
            if accepted < 1:
                logger.warning(f"0 accepted; continue")

            self._record_snapshot(pi, yprop, sprop, d)
        return self._merge_and_convert_results()
Пример #4
0
    def train(self, pi):
        lr_timestep = self.lr_init
        self._initialize_data_buffers()
        self._record_snapshot(pi, None, None, None)

        for t in tqdm(range(self.time_steps)):
            lr_timestep = self._next_learning_rate_timestep(lr_timestep, t)

            # collect new data
            xprop, yprop, sprop, d = utils.collect_data(
                self.td,
                self.n_samples,
                policy=self.deployed["pis"][t],
                fix_proposed=self.fix_prop,
                random_state=self.data_seeds[t],
            )
            x, y, s = xprop[d], yprop[d], sprop[d]

            if self.data_type == "all":
                w = self._get_weights(x, self.deployed["pis"][t])
            else:
                w = None
            accepted = len(y)

            if accepted < 1:
                # didn't get any data, continue
                logger.warning(f"0 accepted; continue")
            else:
                self._update_data_buffers(x, y, s, w, t, accepted)

                # epochs
                for e in range(self.epochs):
                    perm = np.random.permutation(accepted)
                    xp, yp, sp = x[perm], y[perm], s[perm]
                    if self.data_type == "all":
                        wp = self.w_buf[perm]
                    # minibatches
                    for i1 in range(0, accepted, self.batchsize):
                        i2 = min(i1 + self.batchsize, accepted)
                        xb, yb, sb = xp[i1:i2], yp[i1:i2], sp[i1:i2]
                        wb = None if self.data_type != "all" else wp[i1:i2]
                        # gradient step
                        grad = self._grad_utility(
                            (xb, yb, sb),
                            pi,
                            self.deployed["pis"][t],
                            weights=wb,
                        )
                        pi.theta += lr_timestep * grad
            self._record_snapshot(pi, yprop, sprop, d)
        return self._merge_and_convert_results()
Пример #5
0
def init_preproc_workflow(bids_dir, output_dir, work_dir, subject_list,
                          session_label, task_label, run_label):
    """
    A workflow for preprocessing complex-valued multi-echo fMRI data with
    single-band reference images and available T1s.
    """
    # setup workflow
    participant_wf = pe.Workflow(name='participant_wf')
    participant_wf.base_dir = os.path.join(work_dir, 'complex_preprocessing')
    os.makedirs(participant_wf.base_dir, exist_ok=True)

    # Read in dataset, but don't validate because phase isn't supported yet
    layout = BIDSLayout(bids_dir, validate=False)

    for subject_label in subject_list:
        # collect the necessary inputs
        subject_data = collect_data(layout,
                                    subject_label,
                                    task=task_label,
                                    run=run_label,
                                    ses=session_label)

        single_subject_wf = init_single_subject_wf(
            name='single_subject_' + subject_label + '_wf',
            output_dir=output_dir,
            layout=layout,
            bold_files=subject_data['bold_mag_files'],
            bold_metadata=subject_data['bold_mag_metadata'],
            phase_files=subject_data['bold_phase_files'],
            phase_metadata=subject_data['bold_phase_metadata'],
        )
        single_subject_wf.config['execution']['crashdump_dir'] = os.path.join(
            output_dir, 'sub-' + subject_label, 'log')

        for node in single_subject_wf._get_all_nodes():
            node.config = deepcopy(single_subject_wf.config)

        participant_wf.add_nodes([single_subject_wf])

    return participant_wf
Пример #6
0
def main():

    torch.cuda.manual_seed(seed)
    cudnn.benchmark = CUDNN

    # model
    model = tiramisu.FcDnSubtle(in_channels=8, n_classes=N_CLASSES)
    model = model.cuda()
    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))
    model.apply(utils.weights_init)
    optimizer = optim.RMSprop(model.parameters(),
                              lr=LEARNING_RATE,
                              weight_decay=WEIGHT_DECAY,
                              eps=1e-12)
    criterion = nn.NLLLoss2d().cuda()

    exp = experiment.Experiment(EXPNAME, EXPERIMENT)
    exp.init()

    START_EPOCH = exp.epoch
    END_EPOCH = START_EPOCH + N_EPOCHS

    # for epoch in range(1):
    for epoch in range(START_EPOCH, END_EPOCH):

        since = time.time()

        ### Collect data ###
        # delete existing folder and old data
        if os.path.exists(res_root_path):
            shutil.rmtree(res_root_path)
        utils.collect_data(ori_train_base_rp, res_train_base_rp)
        utils.collect_data(ori_val_base_rp, res_val_base_rp)
        # data loader
        train_loader, val_loader = utils.data_loader(res_root_path)

        ### Train ###
        trn_loss, trn_err = utils.train(model, train_loader, optimizer,
                                        criterion, epoch)
        print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(
            epoch, trn_loss, trn_err))
        time_elapsed = time.time() - since
        print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                  time_elapsed % 60))

        ### val ###
        val_loss, val_err = utils.test(model, val_loader, criterion, epoch)
        print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
        time_elapsed = time.time() - since
        print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60,
                                                    time_elapsed % 60))

        ### Save Metrics ###
        exp.save_history('train', trn_loss, trn_err)
        exp.save_history('val', val_loss, val_err)

        ### Checkpoint ###
        exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
        exp.save_optimizer(optimizer, val_loss)

        ## Early Stopping ##
        if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE:
            print(("Early stopping at epoch %d since no " +
                   "better loss found since epoch %.3").format(
                       epoch, exp.best_val_loss))
            break

        utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer, epoch,
                                   DECAY_LR_EVERY_N_EPOCHS)

        exp.epoch += 1
Пример #7
0
    random.shuffle(train_set)
    dev_set = train_set[:200]
    train_set = train_set[200:]

    print('Number of training samples:' + str(len(train_set)))
    print('Number of development samples:' + str(len(dev_set)))

    print("Number of node: " + str(len_node) + ", while max allowed is " + str(options.max_node_num))
    print("Number of parent node: " + str(len_in_node) + ", truncated to " + str(options.max_in_node_num))
    print("Number of child node: " + str(len_out_node) + ", truncated to " + str(options.max_out_node_num))
    print("The entity size: " + str(entity_size) + ", truncated to " + str(options.max_entity_size))

    # Build dictionary and mapping of words, characters, edges

    words, chars, edges = collect_data(train_set)
    print('Number of words:' + str(len(words)))
    print('Number of characters:' + str(len(chars)))
    print('Number of edges:' + str(len(edges)))

    dict_word, word_to_id, id_to_word = word_mapping(words)
    dict_char, char_to_id, id_to_char = char_mapping(chars)
    dict_edge, edge_to_id, id_to_edge = edge_mapping(edges)

    options.word_to_id = word_to_id
    options.char_to_id = char_to_id
    options.edge_to_id = edge_to_id

    if options.binary_classification:
        options.relation_num = 2
    else:
Пример #8
0
def main():

  # load parameters for run...
  parameters = yaml.load(open(PARAM_FILE_DIRECTORY))
  db_defs = parameters['database']
  solver_param = parameters['solver_param']        
  model_param = parameters['model_param']
  model_specific_params = parameters[model_param['model_type']]

  
  

  print('Collect Data ....')
  data, count, dictionary, reverse_dictionary = collect_data(vocabulary_size=model_param['vocabulary_size'])
  print('Done Collect Data.')
  
  #skip_window = 2       # How many words to consider left and right.
  #num_skips = 2         # How many times to reuse an input to generate a label.

  # We pick a random validation set to sample nearest neighbors. Here we limit the
  # validation samples to the words that have a low numeric ID, which by
  # construction are also the most frequent.
  num_sampled = 64    # Number of negative examples to sample.
  
  
  '''
  print('trying to connect to db...')
  # create connection to the database.
  db = mysql.connect(host=db_defs['ip'],
                    user=db_defs['usrid'],
                    passwd=db_defs['password'],
                    db=db_defs['name'])
  
  
  print('connected to db...')  
  '''

  '''
  # setup params for training
  ckpt_file=os.path.join(options.dir,options.model_name)
  log_dir=os.path.join(options.logdir,'logs')
  print(ckpt_file)
  print(log_dir)
  if not os.path.exists(log_dir):
    os.makedirs(log_dir)  
  '''
  print('try to import model')
  # build model...
  model = create_model(model_param,model_specific_params)
  print('Model drawn')
                            
             

  # Initialize the solver object.
  solver = Solver(model)
  
  # train model....
  solver.train(data,dictionary,reverse_dictionary,solver_param)

  
  print('done!')
Пример #9
0
from utils import collect_data, plot_img_dep, load_data
import sys
# set recursion limit so pickle doesn't error
sys.setrecursionlimit(40000)

random_state = np.random.RandomState(1999)

DEBUG = True
volume_path = '../data/train/'
model_path = 'models'
if not os.path.exists(model_path):
    os.mkdir(model_path)
n_epochs = 200
minibatchsize = 20

images, dmaps = collect_data(volume_path)
# only keep minibatch divisible num data
num_images = len(images) - (len(images) % minibatchsize)
images = images[:num_images]
dmaps = dmaps[:num_images]
print("Using %s images" % num_images)
X_train, y_train = load_data(images[0:minibatchsize], dmaps[0:minibatchsize])

# theano land tensor4 for 4 dimensions
input_var = tensor.tensor4('X')
target_var = tensor.tensor4('y')
outchan = y_train.shape[1]
inchan = X_train.shape[1]
width = X_train.shape[2]
height = X_train.shape[3]
Пример #10
0
import sys
# set recursion limit so pickle doesn't error
sys.setrecursionlimit(40000)

random_state = np.random.RandomState(1999)

DEBUG = True
volume_path = '../data/train/'
model_path = 'models'
if not os.path.exists(model_path):
    os.mkdir(model_path)
n_epochs = 200
minibatchsize = 20


images, dmaps = collect_data(volume_path)
# only keep minibatch divisible num data
num_images = len(images) - (len(images) % minibatchsize)
images = images[:num_images]
dmaps = dmaps[:num_images]
print("Using %s images" %num_images)
X_train, y_train = load_data(images[0:minibatchsize],
                             dmaps[0:minibatchsize])


# theano land tensor4 for 4 dimensions
input_var = tensor.tensor4('X')
target_var = tensor.tensor4('y')
outchan = y_train.shape[1]
inchan = X_train.shape[1]
width = X_train.shape[2]
def main():
	cudnn.benchmark = True

	deeplab_caffe2pytorch = 'train_iter_20000.caffemodel.pth'
	print('load model:', deeplab_caffe2pytorch)
	pretrained_model = torch.load(deeplab_caffe2pytorch)
	model = UNet_deeplab(in_channels=4, feature_length=512)
	model = model.init_parameters(pretrained_model)

	# seperate layers, to set different lr
	param_exist = []
	param_add = []
	for k, (name, module) in enumerate(model.named_children()):
		# existing layers including: conv1~conv5, fc6, fc7
		if k < 7:
			for param in module.parameters():
				param_exist.append(param)
		# adding layers including: fc7_1
		else:
			for param in module.parameters():
				param_add.append(param)
	model = model.cuda()

	print('  + Number of params: {}'.format(
		sum([p.data.nelement() for p in model.parameters()])))
	optimizer = optim.RMSprop([{'params': param_exist, 'lr': LEARNING_RATE*0.1},
						   {'params': param_add}], lr=LEARNING_RATE,
							  weight_decay=WEIGHT_DECAY, eps=1e-12)

	# use margin=2
	criterion = nn.TripletMarginLoss(margin=2, p=2).cuda()

	exp_dir = EXPERIMENT + 'ranking-test'
	if os.path.exists(exp_dir):
		shutil.rmtree(exp_dir)
	exp = experiment.Experiment('ranking-test', EXPERIMENT)
	exp.init()

	START_EPOCH = exp.epoch
	END_EPOCH = START_EPOCH + N_EPOCHS

	for epoch in range(START_EPOCH, END_EPOCH):

		since = time.time()

		# # ### Collect data ###
		# # delete existing folder and old data
		if os.path.exists(res_root_path):
			shutil.rmtree(res_root_path)
		utils.collect_data(ori_train_base_rp, res_train_base_rp)
		utils.collect_data(ori_val_base_rp, res_val_base_rp)
		# data loader
		train_loader, val_loader = utils.data_loader(res_root_path)

		# # ### Train ###
		trn_loss = utils.train(model, train_loader, optimizer, criterion, epoch)
		trn_err = 0
		print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(epoch, trn_loss, trn_err))
		time_elapsed = time.time() - since
		print('Train Time {:.0f}m {:.0f}s'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Test ###
		val_loss = utils.test(model, val_loader, criterion, epoch)
		val_err = 0
		print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
		time_elapsed = time.time() - since
		print('Total Time {:.0f}m {:.0f}s\n'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Save Metrics ###
		exp.save_history('train', trn_loss, trn_err)
		exp.save_history('val', val_loss, val_err)

		### Checkpoint ###
		exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
		exp.save_optimizer(optimizer, val_loss)

		## Early Stopping ##
		if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE:
			print(("Early stopping at epoch %d since no "
				   +"better loss found since epoch %.3").format(epoch, exp.best_val_loss))
			break

		# Adjust Lr ###--old method
		utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer,
							 epoch, DECAY_LR_EVERY_N_EPOCHS)

		exp.epoch += 1
Пример #12
0
def main():

    torch.cuda.manual_seed(seed)
    cudnn.benchmark = CUDNN

    model = tiramisu.FCDenseNet57(n_classes=N_CLASSES)
    #model = model.cuda()
    model = torch.nn.DataParallel(model).cuda()
    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))
    model.apply(utils.weights_init)
    optimizer = optim.SGD(model.parameters(),
                          lr=LEARNING_RATE,
                          momentum=0.9,
                          weight_decay=0.0005)
    criterion = nn.NLLLoss2d().cuda()

    exp_dir = EXPERIMENT + 'Objectness'
    if os.path.exists(exp_dir):
        shutil.rmtree(exp_dir)

    exp = experiment.Experiment('Objectness', EXPERIMENT)
    exp.init()

    START_EPOCH = exp.epoch
    END_EPOCH = START_EPOCH + N_EPOCHS

    for epoch in range(1, END_EPOCH):

        since = time.time()
        # # ### Collect data ###
        # # # delete existing folder and old data
        cont_rp = data_root_path + 'traincont/'
        if os.path.exists(cont_rp):
            shutil.rmtree(cont_rp)
        utils.collect_data(data_root_path, 'train')
        cont_rp = data_root_path + 'valcont/'
        if os.path.exists(cont_rp):
            shutil.rmtree(cont_rp)
        utils.collect_data(data_root_path, 'val')
        # data loader
        train_loader, val_loader = utils.data_loader(data_root_path)

        ### Train ###
        trn_loss, trn_err = utils.train(model, train_loader, optimizer,
                                        criterion, epoch)
        print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(
            epoch, trn_loss, trn_err))
        time_elapsed = time.time() - since
        print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                  time_elapsed % 60))

        ### Test ###
        val_loss, val_err = utils.test(model, val_loader, criterion, epoch)
        print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
        time_elapsed = time.time() - since
        print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60,
                                                    time_elapsed % 60))

        ### Save Metrics ###
        exp.save_history('train', trn_loss, trn_err)
        exp.save_history('val', val_loss, val_err)

        ### Checkpoint ###
        exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
        exp.save_optimizer(optimizer, val_loss)

        ## Early Stopping ##
        if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE:
            print(("Early stopping at epoch %d since no " +
                   "better loss found since epoch %.3").format(
                       epoch, exp.best_val_loss))
            break

        # Adjust Lr ###--old method
        if epoch % 4 == 0:
            utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer,
                                       epoch, DECAY_LR_EVERY_N_EPOCHS)

        exp.epoch += 1
Пример #13
0
from tensorflow.keras.preprocessing import sequence
import numpy as np
from negative_sampling_model import NegativeSamplingWord2VecEmbedding

window_size = 3
vector_dim = 300
epochs = 5

valid_size = 16  # Random set of words to evaluate similarity on.
valid_window = 100  # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
vocab_size = 10000

embedding_dim = 300

data, count, dictionary, reverse_dictionary = collect_data(
    vocabulary_size=vocab_size)

sampling_table = sequence.make_sampling_table(vocab_size)

couples, labels = sequence.skipgrams(data,
                                     vocab_size,
                                     window_size=window_size,
                                     sampling_table=sampling_table)
# word_target, word_context = zip(*couples)
# word_target = np.array(word_target, dtype="int32")
# word_context = np.array(word_context, dtype="int32")

print(couples[:10], labels[:10])
train_ds = tf.data.Dataset.from_tensor_slices(
    (couples, labels)).shuffle(10000).batch(32)
Пример #14
0
def get_data():
    # Esegue la funzione solo la priva volta che viene vista
    utils.collect_data()
def main():
    cudnn.benchmark = True

    densenet201 = torchvision.models.densenet201(pretrained=True)
    dict_densenet201 = densenet201.state_dict()
    model = DenseNet.densenet201(vector_len=512)
    # # initialize
    DenseNet_dict = model.state_dict()

    pretrained_dict = {
        k: v
        for k, v in dict_densenet201.items() if k in DenseNet_dict
    }
    # for k in pretrained_dict:
    # 	print(k)
    DenseNet_dict.update(pretrained_dict)
    model.load_state_dict(DenseNet_dict)

    # seperate layers, to set different lr
    param_exist = []
    param_add = []
    for k, (name, module) in enumerate(model.named_children()):
        # existing layers including: self.features
        if k == 1:
            for param in module.parameters():
                param_exist.append(param)
        # adding layers including: self.classifier
        else:
            for param in module.parameters():
                param_add.append(param)
    model = model.cuda()
    # model = torch.nn.DataParallel(model).cuda()

    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))
    optimizer = optim.SGD([{
        'params': param_exist,
        'lr': LEARNING_RATE * 0.1
    }, {
        'params': param_add
    }],
                          lr=LEARNING_RATE,
                          momentum=0.9,
                          weight_decay=0.0005)

    # use margin=2
    criterion = nn.TripletMarginLoss(margin=2, p=2).cuda()

    exp_dir = EXPERIMENT + 'rankingVGG'
    if os.path.exists(exp_dir):
        shutil.rmtree(exp_dir)
    exp = experiment.Experiment('rankingVGG', EXPERIMENT)
    exp.init()

    START_EPOCH = exp.epoch
    END_EPOCH = START_EPOCH + N_EPOCHS

    for epoch in range(1, END_EPOCH):

        since = time.time()

        # # ### Collect data ###
        # # delete existing folder and old data
        if os.path.exists(res_root_path):
            shutil.rmtree(res_root_path)
        utils.collect_data(ori_train_base_rp, res_train_base_rp)
        utils.collect_data(ori_val_base_rp, res_val_base_rp)
        # data loader
        train_loader, val_loader = utils.data_loader(res_root_path)

        # # ### Train ###
        trn_loss = utils.train(model, train_loader, optimizer, criterion,
                               epoch)
        trn_err = 0
        print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(
            epoch, trn_loss, trn_err))
        time_elapsed = time.time() - since
        print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                  time_elapsed % 60))

        ### Test ###
        val_loss = utils.test(model, val_loader, criterion, epoch)
        val_err = 0
        print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
        time_elapsed = time.time() - since
        print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60,
                                                    time_elapsed % 60))

        ### Save Metrics ###
        exp.save_history('train', trn_loss, trn_err)
        exp.save_history('val', val_loss, val_err)

        ### Checkpoint ###
        exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
        exp.save_optimizer(optimizer, val_loss)

        ## Early Stopping ##
        if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE:
            print(("Early stopping at epoch %d since no " +
                   "better loss found since epoch %.3").format(
                       epoch, exp.best_val_loss))
            break

        # Adjust Lr ###
        if epoch % 4 == 0:
            utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer,
                                       epoch, DECAY_LR_EVERY_N_EPOCHS)

        exp.epoch += 1