def multigrid_step(self, x, bc, f, step):
        '''
    One layer of multigrid. Recursive function.
    Find solution x to Ax + b = 0.
    '''
        batch_size, image_size, _ = x.size()
        # Pre smoothing
        for i in range(self.pre_smoothing):
            x = utils.fd_step(x, bc, f)

        if step > 1:
            # Downsample
            if f is not None:
                f_sub = 4 * utils.subsample(f)
            else:
                f_sub = None

            if self.is_bc_mask:
                # Subsample geometry
                bc_sub = utils.subsample(
                    bc.view(batch_size * 2, image_size, image_size))
                bc_sub = bc_sub.view(batch_size, 2, *bc_sub.size()[-2:])
            else:
                bc_sub = bc

            x_sub = utils.restriction(x, bc_sub)
            # Refine x_sub recursively
            x_sub = self.multigrid_step(x_sub, bc_sub, f_sub, step - 1)
            # Upsample
            x = utils.interpolation(x_sub, bc)

        # Post smoothing
        for i in range(self.post_smoothing):
            x = utils.fd_step(x, bc, f)
        return x
Exemplo n.º 2
0
 def initialize(self, data, n_samples=30000):
     """
     Initializes the EntropyBoW object by selecting the initial centers for the dictionary and the entropy centers
     """
     data = subsample(data, self.n_feature_samples)
     self.initialize_dictionary(data, n_samples=n_samples)
     self.entropy.init_centers(self.encode_objects_theano(data))
Exemplo n.º 3
0
 def __init__(self,
              corpus,
              pl_dimension,
              hl_dimension,
              window_size,
              learning_rate,
              training_method='cbow'):
     """
     Initializing Word2vec class.
     :param corpus: corpus to train
     :param pl_dimension: projection layer dimension
     :param hl_dimension: hidden layer dimension
     :param window_size: window size
     :param learning_rate: learning
     :param training_method: training method (skip-gram or cbow) default: cbow
     """
     self.corpus = corpus
     self.pl_dimension = pl_dimension
     self.hl_dimension = hl_dimension
     self.window_size = window_size
     self.learning_rate = learning_rate
     self.loss = 0.0
     splitted_corpus = utils.split(corpus)
     subsampled_splitted_corpus = utils.subsample(splitted_corpus)
     self.corpus_window_array = utils.corpus2window(
         subsampled_splitted_corpus, 2)
     np.random.seed(100)
     self.weight1 = np.random.rand(len(self.corpus_window_array),
                                   pl_dimension)
     self.weight2 = np.random.rand(pl_dimension, hl_dimension)
     self.weight3 = np.random.rand(hl_dimension, len(splitted_corpus))
     self.training_method = training_method
Exemplo n.º 4
0
 def fit(self, data, iters=100):
     """
     Train the Soft BoW layer using the entropy objective
     """
     for iter in tqdm(range(iters)):
         subsampled_data = subsample(data, self.n_feature_samples)
         cur_loss = self.train_theano(subsampled_data)[0]
         print "Loss at iteration ", iter, " = ", cur_loss
Exemplo n.º 5
0
def main():

    net = SkipGramNetwork(hp.VOCAB_SIZE, hp.EMBED_SIZE).to(device)
    print(net)

    if args.restore:
        net.load_state_dict(torch.load(args.restore))
        vocab, inverse_vocab = utils.load_data(args.restore)
        print("Model restored from disk.")
    else:
        sentences = utils.load_corpus(args.corpus)
        word_freqs = utils.word_counts(sentences)
        sentences, word_freqs = utils.trunc_vocab(sentences,
                                                  word_freqs)  # TODO
        sentences = utils.subsample(sentences, word_freqs)

        vocab, inverse_vocab = utils.construct_vocab(sentences)  # TODO
        skipgrams = skip_grams(sentences, vocab)  # TODO
        utils.save_data(args.save, vocab, inverse_vocab)

        loader = DataLoader(skipgrams, batch_size=hp.BATCH_SIZE, shuffle=True)
        loss_hist = train(
            net,
            loader)  # TODO returns loss function evaluations as python list
        """ You can plot loss_hist for your writeup:
            plt.plot(loss_hist)
            plt.show()
        """
        plt.plot(loss_hist)
        plt.show()

    # the weights of the embedding matrix are the lookup table
    lookup_table = net.embeddings.weight.data.cpu().numpy()
    """ TODO: Implement what you need in order to answer the writeup questions. """

    nearest = most_similar(lookup_table, lookup_table[vocab['journeyed']])
    nearest_words = [inverse_vocab[w] for w in nearest if w in inverse_vocab]
    print('Nearest to {0}: {1}'.format('journeyed', nearest_words))

    #    print('Dimension Reduction and Plotting')
    #    reduced = TSNE().fit_transform(lookup_table)
    #    plt.scatter(reduced[:,0], reduced[:,1])
    #    plt.show()

    tsne = TSNE(perplexity=30,
                n_components=2,
                init='pca',
                n_iter=5000,
                method='exact')
    plot_only = 500
    low_dim_embs = tsne.fit_transform(lookup_table[:plot_only, :])
    labels = [inverse_vocab[i] for i in range(plot_only)]
    plot_with_labels(low_dim_embs, labels, 'tsne.png')
Exemplo n.º 6
0
    def H(self, r, bc):
        '''
    Return H(r).
    '''
        # Get masks first
        if self.is_bc_mask:
            bc_mask = bc[:, 1:, :, :]
            masks = [1 - bc_mask[:, :, 1:-1, 1:-1]]
            for i in range(self.n_layers - 1):
                bc_mask = utils.subsample(bc_mask.squeeze(1)).unsqueeze(1)
                masks.append(1 - bc_mask[:, :, 1:-1, 1:-1])
            # Multiply by mask
            r = r * masks[0]

        intermediates = []  # used for skip connections

        # First half
        for i in range(self.n_layers):
            for j in range(self.pre_smoothing):
                idx = i * self.pre_smoothing + j
                r = self.first_layers[idx](r)
                if self.is_bc_mask:
                    r = r * masks[i]
            # Add to intermediates
            intermediates.append(r)
            # Subsample
            if i < self.n_layers - 1:
                r = self.pooling_layers[i](r)
                if self.is_bc_mask:
                    r = r * masks[i + 1]

        # Second half
        for i in range(self.n_layers):
            for j in range(self.post_smoothing):
                idx = i * self.post_smoothing + j
                r = self.second_layers[idx](r)
                if self.is_bc_mask:
                    r = r * masks[self.n_layers - i - 1]
            # Add skip connections
            r = r + intermediates[self.n_layers - i - 1]
            # Upsample
            if i < self.n_layers - 1:
                r = F.pad(r, (1, 1, 1, 1))
                new_size = r.size(-1) * 2 - 1
                r = F.interpolate(r,
                                  size=new_size,
                                  mode='bilinear',
                                  align_corners=True)
                r = r[:, :, 1:-1, 1:-1]
                if self.is_bc_mask:
                    r = r * masks[self.n_layers - i - 2]

        return r
Exemplo n.º 7
0
def iter_pcds(file_names, subsample_size, max_scenes):
    """
    return an iterable of all pcd files, from which we
    take a sample.
    """
    for file_id, file_name in enumerate(file_names):
        # max number of scenes reached
        if file_id == max_scenes:
            break
        all_points = utils.readpcd(file_name)
        sample = utils.subsample(all_points, subsample_size) if \
            subsample_size < 1 else all_points
        yield file_id, sample, all_points
Exemplo n.º 8
0
def iter_pcds(file_names, subsample_size, max_scenes):
    """
    return an iterable of all pcd files, from which we
    take a sample.
    """
    for file_id, file_name in enumerate(file_names):
        # max number of scenes reached
        if file_id == max_scenes:
            break
        all_points = utils.readpcd(file_name)
        sample = utils.subsample(all_points, subsample_size) if \
            subsample_size < 1 else all_points
        yield file_id, sample, all_points
Exemplo n.º 9
0
def test_subsampling(geometry, image_size):
    print('########### Test subsampling ##########\n')
    x, bc_values, bc_mask = utils.get_geometry(geometry, image_size, 1, 1)

    bc_values = torch.Tensor(bc_values)
    bc_mask = torch.Tensor(bc_mask)
    plot(bc_values.squeeze(0).numpy())
    plot(bc_mask.squeeze(0).numpy())

    n_layers = 3
    for i in range(n_layers):
        bc_values = utils.subsample(bc_values)
        bc_mask = utils.subsample(bc_mask)

        mask = bc_mask.squeeze(0).numpy()
        assert np.all(np.logical_or(np.isclose(mask, 0), np.isclose(mask, 1)))

        bc_values = bc_values * bc_mask
        values = bc_values.squeeze(0).numpy()
        assert np.all(values < 1.00001)

        plot(values)
        plot(mask)
Exemplo n.º 10
0
def test_upsampling_poisson(x, gt, bc, f):
    print('Upsampling multigrid')
    f_sub = utils.subsample(f)
    x_sub = utils.restriction(x, bc)
    for i in range(1000):
        x_sub = utils.fd_step(x_sub, bc, f_sub)

    # Upsample
    x = utils.interpolation(x_sub, bc)

    A = utils.loss_kernel.view(1, 1, 3, 3)
    r = F.conv2d(x.unsqueeze(1), A).squeeze(1)
    r = utils.pad_boundary(r, torch.zeros(1, 4)) - f
    r = r.cpu().numpy()
    print(r.max())
Exemplo n.º 11
0
def test_subsampling_poisson(x, gt, bc, f):
    print('Subsampling multigrid')
    for i in range(2000):
        x = utils.fd_step(x, bc, f)

    A = utils.loss_kernel.view(1, 1, 3, 3)
    r = F.conv2d(x.unsqueeze(1), A).squeeze(1)
    r = utils.pad_boundary(r, torch.zeros(1, 4)) - f
    print(np.abs(r.cpu().numpy()).max())

    # Subsample
    x_sub = x
    f_sub = f
    for i in range(3):
        f_sub = 4 * utils.subsample(f)
        x_sub = utils.restriction(x, bc)
        r_sub = F.conv2d(x_sub.unsqueeze(1), A).squeeze(1)
        r_sub = utils.pad_boundary(r_sub, torch.zeros(1, 4)) - f_sub
        print(x_sub.size())
        print(np.abs(r_sub.cpu().numpy()).max())
Exemplo n.º 12
0
    def set_field_of_view(self, subsampling=[1,1,1], corner=[0,0,0], size=None, fixed_npoints=None):
        self.block_corner = np.array(corner, dtype='uint')
        if size == None:
            size = self.source.shape
        self.block_size = np.array(size, dtype='uint')
        aux = self.source_clamped[corner[0]:corner[0]+size[0]-1:subsampling[0],
                                  corner[1]:corner[1]+size[1]-1:subsampling[1],
                                  corner[2]:corner[2]+size[2]-1:subsampling[2]]
        if fixed_npoints==None: 
            self.block_subsampling = np.array(subsampling, dtype='uint')
            self.source_block = aux
            self.block_npoints = (self.source_block >= 0).sum()
        else: 
            fixed_npoints = int(fixed_npoints)
            self.source_block, self.block_subsampling, self.block_npoints = \
                subsample(aux, npoints=fixed_npoints)

        ## Taux: block to full array transformation
        Taux = np.diag(np.concatenate((self.block_subsampling,[1]),1))
        Taux[0:3,3] = self.block_corner
        self.block_transform = np.dot(self.source_toworld, Taux)
Exemplo n.º 13
0
        optimizer = optim.Adam(model.parameters(), lr=0.01)
    else:
        model = MLP(x.size()[-1], out_dim = nb_classes).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

    loss_func = nn.CrossEntropyLoss()
    train_mlp(model, optimizer, loss_func, x, labels, train_mask_real, val_mask, test_mask, batch_size=128, epochs = 200, iters_per_epoch=int(nb_train_real/128)+1, patience=10)

elif args.model == 'ATTR_RW_MF':
    ## ``Ours 1`` with (args.times_features == False)
    ## ``Ours 2`` with (args.times_features == True)

    if args.saved == False:
        # For the abalation study of local, non-local
        if args.subsample:
            features = subsample(adj, features)

        trans_attr_rw, trans_deg_inv, vol = normalize_trans(concat_attr(adj, features))

        rank_k = 300
        window_size = 5
        '''
        vals, vecs = LA.eigsh(trans_attr_rw, k=rank_k)

        vals_power = [vals]
        for i in range(window_size):
            vals_power.append(vals_power[-1] * vals)

        vals_power = sum(vals_power) / window_size
        trans_power = vecs @ np.diag(vals_power) @ vecs.transpose()
        '''
Exemplo n.º 14
0
reload(utils)
reload(algo_param)
reload(param)

# TODO Add unlabeled subset functionality
# TODO Add parallelization

#####################   PERFORM GRID SEARCH    ########################
if param.optimize_params:

    # parse data
    all_X, all_Y = utils.parse(param.data_file, param.feature_file,
                               param.response_var, debug_limit=param.debug_limit)
    X, Y = utils.labeled_subset(all_X, all_Y)
    X, Y = utils.subsample((X, Y), param.labeled_subsample)
    (X_train, X_test, Y_train, Y_test) = utils.train_test_split(X, Y, test_size=param.test_size)

    # pickle data for use in other files
    saved_data = (X_train, X_test, Y_train, Y_test)
    utils.pickler(saved_data, param.optimization_data_pickle)

    # make meta pipeline for grid searching
    pipeline, parameter_space = make_meta_pipeline([
        ('imputer', param.imputer_params),
        ('scaler', param.scaler_params),
        ('dim_reducer', param.dim_reducer_params),
        ('regressor', param.regressor_params)
    ], all_X, all_Y)

    print("Opening logfiles")
Exemplo n.º 15
0
def train(config):
    print("Deep copy of model with margin as 1.0")
    ## set pre-process
    prep_dict = {}
    prep_config = config["prep"]
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    ## prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \
                                transform=prep_dict["source"])
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \
            shuffle=True, num_workers=4, drop_last=True)
    dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \
                                transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \
            shuffle=True, num_workers=4, drop_last=True)

    if prep_config["test_10crop"]:
        for i in range(10):
            dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                transform=prep_dict["test"][i]) for i in range(10)]
            dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \
                                shuffle=False, num_workers=4) for dset in dsets['test']]
    else:
        dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                transform=prep_dict["test"])
        dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \
                                shuffle=False, num_workers=4)

    class_num = config["network"]["params"]["class_num"]

    ## set base network
    net_config = config["network"]
    base_network = net_config["call"](net_config["name"],
                                      **net_config["params"])
    base_network_teacher = net_config["call"](net_config["name"],
                                              **net_config["params_teacher"])
    base_network = base_network.cuda()
    base_network_teacher = copy.deepcopy(base_network).cuda()
    for param in base_network_teacher.parameters():
        param.detach_()
    # base_network_teacher = base_network_teacher.cuda()

    # print("check init: ", torch.equal(base_network.fc.weight, base_network_teacher.fc.weight))

    base_network.layer1[-1].relu = nn.ReLU()
    base_network.layer2[-1].relu = nn.ReLU()
    base_network.layer3[-1].relu = nn.ReLU()
    base_network.layer4[-1].relu = nn.ReLU()

    base_network_teacher.layer1[-1].relu = nn.ReLU()
    base_network_teacher.layer2[-1].relu = nn.ReLU()
    base_network_teacher.layer3[-1].relu = nn.ReLU()
    base_network_teacher.layer4[-1].relu = nn.ReLU()

    # print(base_network)

    for n, m in base_network.named_modules():
        if n == 'layer1.2.bn3' or 'layer2.3.bn3' or 'layer3.5.bn3' or 'layer4.2.bn3':
            m.register_forward_hook(get_activation_student(n))

    if config["loss"]["random"]:
        random_layer = network.RandomLayer(
            [base_network.output_num(), class_num],
            config["loss"]["random_dim"])
        ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024)
    else:
        random_layer = None
        ad_net = network.AdversarialNetwork(
            base_network.output_num() * class_num, 1024)
    if config["loss"]["random"]:
        random_layer.cuda()
    ad_net = ad_net.cuda()
    parameter_list = base_network.get_parameters() + ad_net.get_parameters()
    Hloss = loss.Entropy()
    ## set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                    **(optimizer_config["optim_params"]))
    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus])
        base_network = nn.DataParallel(base_network,
                                       device_ids=[int(i) for i in gpus])

    ## train
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    transfer_loss_value = classifier_loss_value = total_loss_value = 0.0
    best_acc = 0.0
    temperature = config["temperature"]

    for i in trange(config["num_iterations"], leave=False):
        global activation_student
        if i % config["test_interval"] == config["test_interval"] - 1:
            base_network.eval()
            base_network_teacher.eval()
            temp_acc, temp_acc_teacher = image_classification_test(dset_loaders, \
                base_network, base_network_teacher, test_10crop=prep_config["test_10crop"])
            temp_model = nn.Sequential(base_network_teacher)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            log_str1 = "precision: {:.5f}".format(temp_acc_teacher)
            config["out_file"].write(log_str + "\t" + log_str1 + "\t" +
                                     str(classifier_loss.item()) + "\t" +
                                     str(dann_loss.item()) + "\t" +
                                     str(ent_loss.item()) + "\t" + "\n")
            config["out_file"].flush()
            print("ent Loss: ", ent_loss.item())
            print("Dann loss: ", dann_loss.item())
            print("Classification Loss: ", classifier_loss.item())
            print(log_str)
            print(log_str1)
        # if i % config["snapshot_interval"] == 0:
        #     torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \
        #         "iter_{:05d}_model.pth.tar".format(i)))

        loss_params = config["loss"]
        ## train one iter
        base_network.train(True)
        base_network_teacher.train(True)

        ad_net.train(True)
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()
        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])

        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()

        inputs_source1, inputs_source2, inputs_target1, inputs_target2, labels_source = utils.get_copies(
            inputs_source, inputs_target, labels_source)

        margin = 1
        loss_alter = 0

        #### For source data

        features_source, outputs_source = base_network(inputs_source1)
        # features_source2, outputs_source2 = base_network(inputs_source2)

        feature1 = base_network_teacher.features1(inputs_source2)
        feature2 = base_network_teacher.features2(feature1)
        feature3 = base_network_teacher.features3(feature2)
        feature4 = base_network_teacher.features4(feature3)
        feature4_avg = base_network_teacher.avgpool(feature4)
        feature4_res = feature4_avg.view(feature4_avg.size(0), -1)
        features_source2 = base_network_teacher.bottleneck(feature4_res)
        outputs_source2 = base_network_teacher.fc(features_source2)

        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer1.2.bn3'], feature1.detach(), margin) / (
                train_bs * activation_student['layer1.2.bn3'].size(1) * 8)
        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer2.3.bn3'], feature2.detach(), margin) / (
                train_bs * activation_student['layer2.3.bn3'].size(1) * 4)
        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer3.5.bn3'], feature3.detach(), margin) / (
                train_bs * activation_student['layer3.5.bn3'].size(1) * 2)
        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer4.2.bn3'], feature4.detach(),
            margin) / (train_bs * activation_student['layer4.2.bn3'].size(1))

        ## For Target data
        ramp = utils.sigmoid_rampup(i, 100004)
        ramp_confidence = utils.sigmoid_rampup(5 * i, 100004)

        features_target, outputs_target = base_network(inputs_target1)
        sample_selection_indices = get_confident_idx.confident_samples(
            base_network, inputs_target1, ramp_confidence, class_num, train_bs)

        confident_targets = utils.subsample(outputs_target,
                                            sample_selection_indices)

        feature1_teacher = base_network_teacher.features1(inputs_target2)
        feature2_teacher = base_network_teacher.features2(feature1_teacher)
        feature3_teacher = base_network_teacher.features3(feature2_teacher)
        feature4_teacher = base_network_teacher.features4(feature3_teacher)
        feature4_teacher_avg = base_network_teacher.avgpool(feature4_teacher)
        feature4_teacher_res = feature4_teacher_avg.view(
            feature4_teacher_avg.size(0), -1)
        features_target2 = base_network_teacher.bottleneck(
            feature4_teacher_res)
        outputs_target2 = base_network_teacher.fc(features_target2)

        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer1.2.bn3'], feature1_teacher.detach(),
            margin) / (train_bs * activation_student['layer1.2.bn3'].size(1) *
                       8)
        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer2.3.bn3'], feature2_teacher.detach(),
            margin) / (train_bs * activation_student['layer2.3.bn3'].size(1) *
                       4)
        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer3.5.bn3'], feature3_teacher.detach(),
            margin) / (train_bs * activation_student['layer3.5.bn3'].size(1) *
                       2)
        loss_alter += loss.decision_boundary_transfer(
            activation_student['layer4.2.bn3'], feature4_teacher.detach(),
            margin) / (train_bs * activation_student['layer4.2.bn3'].size(1))

        loss_alter = loss_alter / 1000  ## May be multiply with 4 later in tests
        loss_alter = loss_alter.unsqueeze(0).unsqueeze(1)

        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)
        softmax_out_src = nn.Softmax(dim=1)(outputs_source)
        softmax_out_tar = nn.Softmax(dim=1)(outputs_target)
        softmax_out = nn.Softmax(dim=1)(outputs)

        features_teacher = torch.cat((features_source2, features_target2),
                                     dim=0)
        outputs_teacher = torch.cat((outputs_source2, outputs_target2), dim=0)
        softmax_out_src_teacher = nn.Softmax(dim=1)(outputs_source2)
        softmax_out_tar_teacher = nn.Softmax(dim=1)(outputs_target2)
        softmax_out_teacher = nn.Softmax(dim=1)(outputs_teacher)

        if config['method'] == 'DANN+E':
            ent_loss = Hloss(confident_targets)
            dann_loss = loss.DANN(features, ad_net)
        elif config['method'] == 'DANN':
            dann_loss = loss.DANN(features, ad_net)
            # dann_loss = 0
        else:
            raise ValueError('Method cannot be recognized.')
        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)
        # loss_KD = -(F.softmax(outputs_teacher/ temperature, 1).detach() *
        # 	        (F.log_softmax(outputs/temperature, 1) - F.log_softmax(outputs_teacher/temperature, 1).detach())).sum() / train_bs
        # print(loss_KD)
        # total_loss =  loss_alter #+ (config["ent_loss"] * ent_loss)

        total_loss = dann_loss + classifier_loss + (
            ramp * ent_loss)  #+ (config["ent_loss"] * ent_loss)
        total_loss.backward(retain_graph=True)
        optimizer.step()
        loss.update_ema_variables(base_network, base_network_teacher,
                                  config["teacher_alpha"], i)
    torch.save(best_model, osp.join(config["output_path"],
                                    "best_model.pth.tar"))
    return best_acc
    def load_split(self, features, split, feature_type="X", sample_rate=0.5):
        # Setup directory and filenames
        dir_features = self.feature_path(features)

        # Get splits for this partion of data
        if self.activity == None:
            file_train = open(
                self.base_dir +
                "Splits/{}/{}/train.txt".format(self.name, split)).readlines()
            file_test = open(
                self.base_dir +
                "Splits/{}/{}/test.txt".format(self.name, split)).readlines()
        else:
            file_train = open(
                self.base_dir + "Splits/{}/{}/{}/train.txt".format(
                    self.name, self.activity, split)).readlines()
            file_test = open(self.base_dir + "Splits/{}/{}/{}/test.txt".format(
                self.name, self.activity, split)).readlines()
        file_train = [f.strip() for f in file_train]
        file_test = [f.strip() for f in file_test]
        # Remove extension
        if "." in file_train[0]:
            file_train = [".".join(f.split(".")[:-1]) for f in file_train]
            file_test = [".".join(f.split(".")[:-1]) for f in file_test]

        self.trials_train = file_train
        self.trials_test = file_test

        # Get all features
        files_features = self.get_files(dir_features, split)
        X_all, Y_all = [], []
        dir_labels = '../UW_IOM_Dataset/VideoLabelsNum/'
        for f in files_features:
            if "Split_" in os.listdir(dir_features)[-1]:
                # data_tmp = sio.loadmat( closest_file("{}{}/{}".format(dir_features,split, f)) )
                # print(closest_file("{}{}/{}".format(dir_labels,split, f)))
                data_tmp_Y = np.load(
                    closest_file("{}{}/{}".format(dir_labels, split, f)))
                data_tmp_X = np.load(
                    closest_file("{}{}/{}".format(
                        dir_features, split, f)))[0:np.shape(data_tmp_Y)[0], :]
                #print('here')
                #print('data_tmp_X: ',np.shape(data_tmp_X))
                #print('data_tmp_Y: ',np.shape(data_tmp_Y))

            else:
                # data_tmp = sio.loadmat( closest_file("{}/{}".format(dir_features, f)) )

                data_tmp_Y = np.load(
                    closest_file("{}/{}".format(dir_labels, f)))
                data_tmp_X = np.load(
                    closest_file("{}/{}".format(
                        dir_features, f)))[0:np.shape(data_tmp_Y)[0], :]

            X_all += [data_tmp_X.astype(np.float32)]
            Y_all += [data_tmp_Y[:, 0]]
            # print('data_tmp_Y: ',np.shape(data_tmp_Y))
            print(f)
            print('X_all ', np.shape(X_all))
            print('Y_all ', np.shape(Y_all))

        # Make sure axes are correct (TxF not FxT for F=feat, T=time)
        print("Make sure axes are correct (TxF not FxT for F=feat, T=time)")
        print(X_all[0].shape, Y_all[0].shape)
        if X_all[0].shape[0] != Y_all[0].shape[0]:
            X_all = [x.T for x in X_all]
        self.n_features = X_all[0].shape[1]
        # print('n_features: ',self.n_features)
        self.n_classes = len(np.unique(np.hstack(Y_all)))
        # print('X_all',np.shape(X_all[0]))
        # Make sure labels are sequential
        # print('n_classes: ',self.n_classes)
        # print(np.shape(np.hstack(Y_all)))

        if self.n_classes != np.hstack(Y_all).max() + 1:
            Y_all = utils.remap_labels(Y_all)
            print("Reordered class labels")

        # Subsample the data
        if sample_rate > 1:
            # print('sample_rate',sample_rate)
            X_all, Y_all = utils.subsample(X_all, Y_all, sample_rate, dim=0)
            # print('X_all',np.shape(X_all[0]))
        #print('Y_all',np.shape(Y_all[0]))
        # ------------Train/test Splits---------------------------
        # Split data/labels into train/test splits
        fid2idx = self.fid2idx(files_features)
        #print(file_train)
        #print(fid2idx)
        X_train = [X_all[fid2idx[f]] for f in file_train if f in fid2idx]
        X_test = [X_all[fid2idx[f]] for f in file_test if f in fid2idx]
        #print(len(X_train))
        y_train = [Y_all[fid2idx[f]] for f in file_train if f in fid2idx]
        y_test = [Y_all[fid2idx[f]] for f in file_test if f in fid2idx]
        #print('Xtrain', np.shape(X_train))
        #print('Ytrain', np.shape(y_train))
        #print('Xtest', np.shape(X_test))
        #print('Ytest', np.shape(y_train))
        if len(X_train) == 0:
            print("Error loading data")

        return X_train, y_train, X_test, y_test
Exemplo n.º 17
0
import subprocess
from re import sub
import os
import numpy as np
import utils

mergedsortfn = sys.argv[1]
origroibamfn = sys.argv[2]

sampledbamfn = sub('.sorted.bam$', ".sampled.sorted.bam", mergedsortfn)

GAIN_FINAL = sys.argv[3]

ratio = float(utils.countReads(mergedsortfn)) / float(
    utils.countReads(origroibamfn))
samplerate = round(0.5 / (ratio * 0.98), 2)

success = False
if (samplerate < 1.0):
    utils.subsample(mergedsortfn, sampledbamfn, str(samplerate))

    success = True
elif (samplerate < 1.5):
    print('sample rate is larger than!  ' + str(samplerate))
    os.rename(mergedsortfn, sampledbamfn)
    success = True
else:
    print("not enough number of reads found for " + mergedsortfn)

if (success):
    os.rename(sampledbamfn, GAIN_FINAL)
Exemplo n.º 18
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN before convolutions.
  This is the full preactivation residual unit variant proposed in [2]. See
  Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
  variant which has an extra bottleneck layer.
  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.
  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.
  Returns:
    The ResNet unit's output.
  """
    with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        preact = slim.batch_norm(inputs,
                                 activation_fn=tf.nn.relu,
                                 scope='preact')
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(preact,
                                   depth, [1, 1],
                                   stride=stride,
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = slim.conv2d(preact,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               normalizer_fn=None,
                               activation_fn=None,
                               scope='conv3')

        output = shortcut + residual

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
Exemplo n.º 19
0
    def load_split(self, features, split, feature_type="X", sample_rate=1):
        # Setup directory and filenames
        dir_features = self.feature_path(features)

        # Get splits for this partion of data
        if self.activity == None:
            file_train = open(
                self.base_dir +
                "splits/{}/{}/train.txt".format(self.name, split)).readlines()
            file_test = open(
                self.base_dir +
                "splits/{}/{}/test.txt".format(self.name, split)).readlines()
        else:
            file_train = open(
                self.base_dir + "splits/{}/{}/{}/train.txt".format(
                    self.name, self.activity, split)).readlines()
            file_test = open(self.base_dir + "splits/{}/{}/{}/test.txt".format(
                self.name, self.activity, split)).readlines()
        file_train = [f.strip() for f in file_train]
        file_test = [f.strip() for f in file_test]

        # Remove extension
        if "." in file_train[0]:
            file_train = [".".join(f.split(".")[:-1]) for f in file_train]
            file_test = [".".join(f.split(".")[:-1]) for f in file_test]

        self.trials_train = file_train
        self.trials_test = file_test

        # Get all features
        files_features = self.get_files(dir_features, split)

        X_all, Y_all = [], []
        for f in files_features:
            if "Split_" in os.listdir(dir_features)[-1]:
                data_tmp = sio.loadmat(
                    closest_file("{}{}/{}".format(dir_features, split, f)))
            else:
                data_tmp = sio.loadmat(
                    closest_file("{}/{}".format(dir_features, f)))
            X_all += [data_tmp[feature_type].astype(np.float32)]
            Y_all += [np.squeeze(data_tmp["Y"])]

        # Make sure axes are correct (TxF not FxT for F=feat, T=time)
        if X_all[0].shape[0] != Y_all[0].shape[0]:
            X_all = [x.T for x in X_all]
        self.n_features = X_all[0].shape[1]
        self.n_classes = len(np.unique(np.hstack(Y_all)))

        # Make sure labels are sequential
        if self.n_classes != np.hstack(Y_all).max() + 1:
            Y_all = utils.remap_labels(Y_all)
            print("Reordered class labels")

        # Subsample the data
        if sample_rate > 1:
            X_all, Y_all = utils.subsample(X_all, Y_all, sample_rate, dim=0)

        # ------------Train/test Splits---------------------------
        # Split data/labels into train/test splits
        fid2idx = self.fid2idx(files_features)
        X_train = [X_all[fid2idx[f]] for f in file_train if f in fid2idx]
        X_test = [X_all[fid2idx[f]] for f in file_test if f in fid2idx]

        y_train = [Y_all[fid2idx[f]] for f in file_train if f in fid2idx]
        y_test = [Y_all[fid2idx[f]] for f in file_test if f in fid2idx]

        if len(X_train) == 0:
            print("Error loading data")

        return X_train, y_train, X_test, y_test