示例#1
0
def main(args, net=None):
    datadir = get_data_dir(args.db)
    outputdir = get_output_dir(args.db)

    use_cuda = torch.cuda.is_available()

    # Set the seed for reproducing the results
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if use_cuda:
        torch.cuda.manual_seed_all(args.manualSeed)
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True

    kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
    trainset = DCCPT_data(root=datadir, train=True, h5=args.h5)
    testset = DCCPT_data(root=datadir, train=False, h5=args.h5)

    # load from checkpoint if we're not given an external net
    load_checkpoint = True if net is None else False
    if net is None:
        net = dp.load_predefined_extract_net(args)

    totalset = torch.utils.data.ConcatDataset([trainset, testset])
    dataloader = torch.utils.data.DataLoader(totalset, batch_size=100, shuffle=False, **kwargs)

    # copying model params from checkpoint
    if load_checkpoint:
        filename = os.path.join(outputdir, args.torchmodel)
        if os.path.isfile(filename):
            print("==> loading params from checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
            net.load_state_dict(checkpoint['state_dict'])
        else:
            print("==> no checkpoint found at '{}'".format(filename))
            raise ValueError

    if use_cuda:
        net.cuda()

    print('Extracting features ...')
    features, features_dr, labels = extract(dataloader, net, use_cuda)
    print('Done.\n')

    feat_path = os.path.join(datadir, args.feat)
    if args.h5:
        import h5py
        fo = h5py.File(feat_path + '.h5', 'w')
        fo.create_dataset('labels', data=labels)
        fo.create_dataset('Z', data=np.squeeze(features_dr))
        fo.create_dataset('data', data=np.squeeze(features))
        fo.close()
    else:
        fo = open(feat_path + '.pkl', 'wb')
        pickle.dump({'labels': labels, 'Z': np.squeeze(features_dr), 'data': np.squeeze(features)}, fo, protocol=2)
        fo.close()
    return features, features_dr, labels
示例#2
0
 def __edit_lua_cb(self, widget):
     import shutil
     path = os.path.join(GLib.get_user_config_dir(), "ibus", "libpinyin")
     os.path.exists(path) or os.makedirs(path)
     path = os.path.join(path, "user.lua")
     if not os.path.exists(path):
         src = os.path.join(config.get_data_dir(), "user.lua")
         shutil.copyfile(src, path)
     os.system("xdg-open %s" % path)
示例#3
0
def main(args):
    datadir = get_data_dir(args.db)
    outputdir = get_output_dir(args.db)

    logger = None
    if args.tensorboard:
        # One should create folder for storing logs
        loggin_dir = os.path.join(outputdir, 'runs', 'pretraining')
        if not os.path.exists(loggin_dir):
            os.makedirs(loggin_dir)
        loggin_dir = os.path.join(loggin_dir, '%s' % (args.id))
        if args.clean_log:
            remove_files_in_dir(loggin_dir)
        logger = Logger(loggin_dir)

    use_cuda = torch.cuda.is_available()

    # Set the seed for reproducing the results
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if use_cuda:
        torch.cuda.manual_seed_all(args.manualSeed)
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True

    kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
    trainset = DCCPT_data(root=datadir, train=True, h5=args.h5)
    testset = DCCPT_data(root=datadir, train=False, h5=args.h5)

    nepoch = int(
        np.ceil(
            np.array(args.niter * args.batchsize, dtype=float) /
            len(trainset)))
    step = int(
        np.ceil(
            np.array(args.step * args.batchsize, dtype=float) / len(trainset)))

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batchsize,
                                              shuffle=True,
                                              **kwargs)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=100,
                                             shuffle=True,
                                             **kwargs)

    return pretrain(
        args, outputdir, {
            'nlayers': 4,
            'dropout': 0.2,
            'reluslope': 0.0,
            'nepoch': nepoch,
            'lrate': [args.lr],
            'wdecay': [0.0],
            'step': step
        }, use_cuda, trainloader, testloader, logger)
示例#4
0
def compressed_data(dataset,
                    n_samples,
                    k,
                    preprocess=None,
                    algo='mknn',
                    isPCA=None,
                    format='mat'):
    datadir = get_data_dir(dataset)
    if format == 'pkl':
        labels, features = load_train_and_validation(load_data, datadir,
                                                     n_samples)
    elif format == 'h5':
        labels, features = load_train_and_validation(load_data_h5py, datadir,
                                                     n_samples)
    else:
        labels, features = load_train_and_validation(load_matdata, datadir,
                                                     n_samples)

    features = feature_transformation(features, preprocessing=preprocess)

    # PCA is computed for Text dataset. Please refer RCC paper for exact details.
    features1 = features.copy()
    if isPCA is not None:
        pca = PCA(n_components=isPCA, svd_solver='full').fit(features)
        features1 = pca.transform(features)

    t0 = time()

    if algo == 'knn':
        weights = kNN(features1, k=k, measure='euclidean')
    else:
        weights = mkNN(features1, k=k, measure='cosine')

    print('The time taken for edge set computation is {}'.format(time() - t0))

    filepath = os.path.join(datadir, 'pretrained')
    if format == 'h5':
        import h5py
        fo = h5py.File(filepath + '.h5', 'w')
        fo.create_dataset('X', data=features)
        fo.create_dataset('w', data=weights[:, :2])
        fo.create_dataset('gtlabels', data=labels)
        fo.close()
    else:
        sio.savemat(filepath + '.mat',
                    mdict={
                        'X': features,
                        'w': weights[:, :2],
                        'gtlabels': labels
                    })
示例#5
0
def main(args):
    datadir = get_data_dir(args.db)

    featurefile = os.path.join(datadir, args.feat)
    graphfile = os.path.join(datadir, args.g)
    outputfile = os.path.join(datadir, args.out)
    if os.path.isfile(featurefile) and os.path.isfile(graphfile):

        if args.h5:
            data0 = h5py.File(featurefile, 'r')
            data1 = h5py.File(graphfile, 'r')
            data2 = h5py.File(outputfile + '.h5', 'w')
        else:
            fo = open(featurefile, 'rb')
            data0 = pickle.load(fo)
            data1 = sio.loadmat(graphfile)
            fo.close()

        x0 = data0['data'][:].astype(np.float32).reshape(
            (len(data0['labels'][:]), -1))
        x1 = data1['X'][:].astype(np.float32).reshape(
            (len(data1['gtlabels'].T), -1))

        a, b = np.where(x0 - x1)
        assert not a.size

        joined_data = {
            'gtlabels': data0['labels'][:],
            'X': data0['data'][:].astype(np.float32),
            'Z': data0['Z'][:].astype(np.float32),
            'w': data1['w'][:].astype(np.float32)
        }

        if args.h5:
            data2.create_dataset('gtlabels', data=data0['labels'][:])
            data2.create_dataset('X', data=data0['data'][:].astype(np.float32))
            data2.create_dataset('Z', data=data0['Z'][:].astype(np.float32))
            data2.create_dataset('w', data=data1['w'][:].astype(np.float32))
            data0.close()
            data1.close()
            data2.close()
        else:
            sio.savemat(outputfile + '.mat', joined_data)
        return joined_data
    else:
        print('one or both the files not found')
        raise FileNotFoundError
示例#6
0
def makeDCCinp(args):
    # pretrained.mat or pretrained.h5 must be placed under the ../data/"db"/ directory. "db" stands for dataset
    datadir = get_data_dir(args.db)
    datafile = 'pretrained'

    if args.h5:
        datafile = os.path.join(datadir, datafile + '.h5')
    else:
        datafile = os.path.join(datadir, datafile + '.mat')
    assert os.path.exists(
        datafile), 'Training data not found at `{:s}`'.format(datafile)

    if args.h5:
        import h5py
        raw_data = h5py.File(datafile, 'r')
    else:
        raw_data = sio.loadmat(datafile, mat_dtype=True)

    data = raw_data['X'][:].astype(np.float32)
    Z = raw_data['Z'][:].astype(np.float32)
    # correct special case where Z is N x 1 and it gets loaded as 1 x N
    if Z.shape[0] == 1:
        Z = np.transpose(Z)

    labels = np.squeeze(raw_data['gtlabels'][:])
    pairs = raw_data['w'][:, :2].astype(int)

    if args.h5:
        raw_data.close()

    print('\n Loaded `{:s}` dataset for finetuning'.format(args.db))

    numpairs = pairs.shape[0]
    numsamples = data.shape[0]

    # Creating pairwise weights and individual sample sample for reconstruction loss term
    R = csr_matrix(
        (np.ones(numpairs, dtype=np.float32), (pairs[:, 0], pairs[:, 1])),
        shape=(numsamples, numsamples))
    R = R + R.transpose()
    nconn = np.squeeze(np.array(np.sum(R, 1)))
    weights = np.average(nconn) / np.sqrt(
        nconn[pairs[:, 0]] * nconn[pairs[:, 1]])
    pairs = np.hstack((pairs, np.atleast_2d(weights).transpose()))

    return data, labels, pairs, Z, nconn
示例#7
0
    def __init_user_data(self):
        #page User Data
        self.__page_user_data.show()

        self.__frame_lua_script = self.__builder.get_object("frameLuaScript")
        path = os.path.join(config.get_data_dir(), 'user.lua')
        if not os.access(path, os.R_OK):
            self.__frame_lua_script.hide()

        self.__edit_lua = self.__builder.get_object("EditLua")
        self.__edit_lua.connect("clicked", self.__edit_lua_cb)

        self.__import_dictionary = self.__builder.get_object("ImportDictionary")
        self.__import_dictionary.connect("clicked", self.__import_dictionary_cb)

        self.__clear_user_data = self.__builder.get_object("ClearUserData")
        self.__clear_user_data.connect("clicked", self.__clear_user_data_cb, "user")
        self.__clear_all_data = self.__builder.get_object("ClearAllData")
        self.__clear_all_data.connect("clicked", self.__clear_user_data_cb, "all")
示例#8
0
文件: test.py 项目: zenhumany/apisan
 def test_retval(self):
     chk = RetValChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("return-value"))
     assert (len(bugs) == 1)
示例#9
0
文件: test.py 项目: zenhumany/apisan
 def test_missing_unlock(self):
     chk = CausalityChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("missing-unlock"))
     assert (len(bugs) == 1)
示例#10
0
def main(args, net=None):
    global oldassignment

    datadir = get_data_dir(args.db)
    outputdir = get_output_dir(args.db)

    logger = None
    if args.tensorboard:
        # One should create folder for storing logs
        loggin_dir = os.path.join(outputdir, 'runs', 'DCC')
        if not os.path.exists(loggin_dir):
            os.makedirs(loggin_dir)
        loggin_dir = os.path.join(loggin_dir, '%s' % (args.id))
        if args.clean_log:
            remove_files_in_dir(loggin_dir)
        logger = Logger(loggin_dir)

    use_cuda = torch.cuda.is_available()

    # Set the seed for reproducing the results
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if use_cuda:
        torch.cuda.manual_seed_all(args.manualSeed)
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True


    startepoch = 0
    kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {}

    # setting up dataset specific objects
    trainset = DCCPT_data(root=datadir, train=True, h5=args.h5)
    testset = DCCPT_data(root=datadir, train=False, h5=args.h5)
    numeval = len(trainset) + len(testset)

    # extracting training data from the pretrained.mat file
    data, labels, pairs, Z, sampweight = makeDCCinp(args)

    # For simplicity, I have created placeholder for each datasets and model
    load_pretraining = True if net is None else False
    if net is None:
        net = dp.load_predefined_extract_net(args)

    # reshaping data for some datasets
    if args.db == 'cmnist':
        data = data.reshape((-1, 1, 28, 28))
    elif args.db == 'ccoil100':
        data = data.reshape((-1, 3, 128, 128))
    elif args.db == 'cytf':
        data = data.reshape((-1, 3, 55, 55))
    elif args.db == 'cyale':
        data = data.reshape((-1, 1, 168, 192))

    totalset = torch.utils.data.ConcatDataset([trainset, testset])

    # computing and initializing the hyperparams
    _sigma1, _sigma2, _lambda, _delta, _delta1, _delta2, lmdb, lmdb_data = computeHyperParams(pairs, Z)
    oldassignment = np.zeros(len(pairs))
    stopping_threshold = int(math.ceil(cfg.STOPPING_CRITERION * float(len(pairs))))

    # Create dataset and random batch sampler for Finetuning stage
    trainset = DCCFT_data(pairs, data, sampweight)
    batch_sampler = DCCSampler(trainset, shuffle=True, batch_size=args.batchsize)

    # copying model params from Pretrained (SDAE) weights file
    if load_pretraining:
        load_weights(args, outputdir, net)


    # creating objects for loss functions, U's are initialized to Z here
    # Criterion1 corresponds to reconstruction loss
    criterion1 = DCCWeightedELoss(size_average=True)
    # Criterion2 corresponds to sum of pairwise and data loss terms
    criterion2 = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True)

    if use_cuda:
        net.cuda()
        criterion1 = criterion1.cuda()
        criterion2 = criterion2.cuda()

    # setting up data loader for training and testing phase
    trainloader = torch.utils.data.DataLoader(trainset, batch_sampler=batch_sampler, **kwargs)
    testloader = torch.utils.data.DataLoader(totalset, batch_size=args.batchsize, shuffle=False, **kwargs)

    # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params
    bias_params = filter(lambda x: ('bias' in x[0]), net.named_parameters())
    bias_params = list(map(lambda x: x[1], bias_params))
    nonbias_params = filter(lambda x: ('bias' not in x[0]), net.named_parameters())
    nonbias_params = list(map(lambda x: x[1], nonbias_params))

    optimizer = optim.Adam([{'params': bias_params, 'lr': 2*args.lr},
                            {'params': nonbias_params},
                            {'params': criterion2.parameters(), 'lr': args.lr},
                            ], lr=args.lr, betas=(0.99, 0.999))

    # this is needed for WARM START
    if args.resume:
        filename = outputdir+'/FTcheckpoint_%d.pth.tar' % args.level
        if os.path.isfile(filename):
            print("==> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
            net.load_state_dict(checkpoint['state_dict'])
            criterion2.load_state_dict(checkpoint['criterion_state_dict'])
            startepoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            _sigma1 = checkpoint['sigma1']
            _sigma2 = checkpoint['sigma2']
            _lambda = checkpoint['lambda']
            _delta = checkpoint['delta']
            _delta1 = checkpoint['delta1']
            _delta2 = checkpoint['delta2']
        else:
            print("==> no checkpoint found at '{}'".format(filename))
            raise ValueError

    # This is the actual Algorithm
    flag = 0
    for epoch in range(startepoch, args.nepoch):
        if logger:
            logger.log_value('sigma1', _sigma1, epoch)
            logger.log_value('sigma2', _sigma2, epoch)
            logger.log_value('lambda', _lambda, epoch)

        train(trainloader, net, optimizer, criterion1, criterion2, epoch, use_cuda, _sigma1, _sigma2, _lambda, logger)
        Z, U, change_in_assign, assignment = test(testloader, net, criterion2, epoch, use_cuda, _delta, pairs, numeval, flag, logger)

        if flag:
            # As long as the change in label assignment < threshold, DCC continues to run.
            # Note: This condition is always met in the very first epoch after the flag is set.
            # This false criterion is overwritten by checking for the condition twice.
            if change_in_assign > stopping_threshold:
                flag += 1
            if flag == 4:
                break

        if((epoch+1) % args.M == 0):
            _sigma1 = max(_delta1, _sigma1 / 2)
            _sigma2 = max(_delta2, _sigma2 / 2)
            if _sigma2 == _delta2 and flag == 0:
                # Start checking for stopping criterion
                flag = 1

        # Save checkpoint
        index = (epoch // args.M) * args.M
        save_checkpoint({'epoch': epoch+1,
                         'state_dict': net.state_dict(),
                         'criterion_state_dict': criterion2.state_dict(),
                         'optimizer': optimizer.state_dict(),
                         'sigma1': _sigma1,
                         'sigma2': _sigma2,
                         'lambda': _lambda,
                         'delta': _delta,
                         'delta1': _delta1,
                         'delta2': _delta2,
                         }, index, filename=outputdir)

    output = {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment}
    sio.savemat(os.path.join(outputdir, 'features'), output)
示例#11
0
文件: test.py 项目: zenhumany/apisan
 def test_SSL(self):
     chk = CondChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("SSL"))
     assert (len(bugs) == 2)  # (X, Y), (Y, X)
示例#12
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('--data',
                        dest='db',
                        type=str,
                        default='mnist',
                        help='name of the dataset')

    args = parser.parse_args()
    np.random.seed(cfg.RNG_SEED)
    random.seed(cfg.RNG_SEED)

    datadir = get_data_dir(args.db)
    strpath = osp.join(datadir, 'traindata.mat')

    if not os.path.exists(strpath):
        if args.db == 'mnist':
            make_mnist_data(datadir)
        elif args.db == 'reuters':
            make_reuters_data(datadir, 10000)
        elif args.db == 'ytf':
            make_misc_data(datadir, 'YTFrgb.pkl', [55, 55, 3])
        elif args.db == 'coil100':
            make_misc_data(datadir, 'coil100rgb.pkl', [128, 128, 3])
        elif args.db == 'yale':
            make_misc_data(datadir, 'yale_DoG.pkl', [168, 192, 1])
        elif args.db == 'rcv1':
            make_misc_data(datadir, 'reuters.pkl', [1, 1, 2000])
示例#13
0
import extract_feature
import copyGraph
import DCC


class IdentityNet(nn.Module):
    """Substitute for the autoencoder for visualization and debugging just the clustering part"""
    def __init__(self):
        super(IdentityNet, self).__init__()

    def forward(self, x):
        # internal encoding is x and output is also just x
        return x, x


datadir = get_data_dir(dp.easy.name)
N = 600

# first create the data
X, labels = make_data.make_easy_visual_data(datadir, N)

# visualize data
# we know there are 3 classes
for c in range(3):
    x = X[labels == c, :]
    plt.scatter(x[:, 0], x[:, 1], label=str(c))
plt.legend()
plt.show()

# then construct mkNN graph
k = 50
示例#14
0
def main():
    global args

    args = parser.parse_args()
    datadir = get_data_dir(args.db)
    outputdir = get_output_dir(args.db)

    use_cuda = torch.cuda.is_available()

    # Set the seed for reproducing the results
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if use_cuda:
        torch.cuda.manual_seed_all(args.manualSeed)
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True

    reluslope = 0.0
    kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
    trainset = DCCPT_data(root=datadir, train=True, h5=args.h5)
    testset = DCCPT_data(root=datadir, train=False, h5=args.h5)
    if args.db == 'mnist':
        net = extract_sdae_mnist(slope=reluslope, dim=args.dim)
    elif args.db == 'reuters' or args.db == 'reuters10k' or args.db == 'rcv1':
        net = extract_sdae_reuters(slope=reluslope, dim=args.dim)
    elif args.db == 'ytf':
        net = extract_sdae_ytf(slope=reluslope, dim=args.dim)
    elif args.db == 'coil100':
        net = extract_sdae_coil100(slope=reluslope, dim=args.dim)
    elif args.db == 'yale':
        net = extract_sdae_yale(slope=reluslope, dim=args.dim)
    elif args.db == 'cmnist':
        net = extract_convsdae_mnist(slope=reluslope)
    elif args.db == 'ccoil100':
        net = extract_convsdae_coil100(slope=reluslope)
    elif args.db == 'cytf':
        net = extract_convsdae_ytf(slope=reluslope)
    elif args.db == 'cyale':
        net = extract_convsdae_yale(slope=reluslope)

    totalset = torch.utils.data.ConcatDataset([trainset, testset])
    dataloader = torch.utils.data.DataLoader(totalset, batch_size=100, shuffle=False, **kwargs)

    # copying model params from checkpoint
    filename = os.path.join(outputdir, args.torchmodel)
    if os.path.isfile(filename):
        print("==> loading params from checkpoint '{}'".format(filename))
        checkpoint = torch.load(filename)
        net.load_state_dict(checkpoint['state_dict'])
    else:
        print("==> no checkpoint found at '{}'".format(filename))
        raise

    if use_cuda:
        net.cuda()

    print('Extracting features ...')
    features, features_dr, labels = extract(dataloader, net, use_cuda)
    print('Done.\n')

    feat_path = os.path.join(datadir, args.feat)
    if args.h5:
        import h5py
        fo = h5py.File(feat_path + '.h5', 'w')
        fo.create_dataset('labels', data=labels)
        fo.create_dataset('Z', data=np.squeeze(features_dr))
        fo.create_dataset('data', data=np.squeeze(features))
        fo.close()
    else:
        fo = open(feat_path + '.pkl', 'wb')
        cPickle.dump({'labels': labels, 'Z': np.squeeze(features_dr), 'data': np.squeeze(features)}, fo, protocol=2)
        fo.close()
示例#15
0
文件: test.py 项目: zenhumany/apisan
 def test_intovfl(self):
     chk = IntOvflChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("integer-overflow"))
     assert (len(bugs) == 1)
示例#16
0
文件: test.py 项目: zenhumany/apisan
 def test_FSB(self):
     chk = FSBChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("format-string-bug"))
     assert (len(bugs) == 1)
示例#17
0
文件: test.py 项目: zenhumany/apisan
 def test_memleak(self):
     chk = CausalityChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("memory-leak"))
     assert (len(bugs) == 1)
示例#18
0
文件: test.py 项目: zenhumany/apisan
 def test_arg(self):
     chk = ArgChecker()
     exp = Explorer(chk)
     bugs = exp.explore_parallel(config.get_data_dir("argument"))
     assert (len(bugs) == 1)
示例#19
0
def main():
    global args, oldassignment

    args = parser.parse_args()
    datadir = get_data_dir(args.db)
    outputdir = get_output_dir(args.db)

    if args.tensorboard:
        # One should create folder for storing logs
        loggin_dir = os.path.join(outputdir, 'runs', 'DCC')
        if not os.path.exists(loggin_dir):
            os.makedirs(loggin_dir)
        configure(os.path.join(loggin_dir, '%s' % (args.id)))

    use_cuda = torch.cuda.is_available()

    # Set the seed for reproducing the results
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if use_cuda:
        torch.cuda.manual_seed_all(args.manualSeed)
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True

    reluslope = 0.0
    startepoch = 0
    kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {}

    # setting up dataset specific objects
    trainset = DCCPT_data(root=datadir, train=True, h5=args.h5)
    testset = DCCPT_data(root=datadir, train=False, h5=args.h5)

    numeval = len(trainset) + len(testset)


    # For simplicity, I have created placeholder for each datasets and model
    if args.db == 'mnist':
        net_s = extract_sdae_mnist(slope=reluslope, dim=args.dim)
        net_z = extract_sdae_mnist(slope=reluslope, dim=args.dim)
    else:
        print("db not supported: '{}'".format(args.db))
        raise

    totalset = torch.utils.data.ConcatDataset([trainset, testset])

    # extracting training data from the pretrained.mat file
    data, labels, pairs, Z, sampweight = makeDCCinp(args)

    # computing and initializing the hyperparams
    _sigma1, _sigma2, _lambda, _delta, _delta1, _delta2, lmdb, lmdb_data = computeHyperParams(pairs, Z, args.step)
    oldassignment = np.zeros(len(pairs))
    stopping_threshold = int(math.ceil(cfg.STOPPING_CRITERION * float(len(pairs))))

    # Create dataset and random batch sampler for Finetuning stage
    trainset = DCCFT_data(pairs, data, sampweight)
    batch_sampler = DCCSampler(trainset, shuffle=True, batch_size=args.batchsize)

    # setting up data loader for training and testing phase
    trainloader = torch.utils.data.DataLoader(trainset, batch_sampler=batch_sampler, **kwargs)
    testloader = torch.utils.data.DataLoader(totalset, batch_size=args.batchsize, shuffle=False, **kwargs)


    if args.step == 1:

        pretraining_filename = os.path.join(outputdir, args.torchmodel_pretraining)
        if os.path.isfile(pretraining_filename):
            print("==> loading params from pretraining checkpoint '{}'".format(pretraining_filename))
            pretraining_checkpoint = torch.load(pretraining_filename)
        else:
            print("==> no pretraining checkpoint found at '{}'".format(pretraining_filename))
            raise


        # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params
        bias_params = filter(lambda x: ('bias' in x[0]), net_s.named_parameters())
        bias_params = list(map(lambda x: x[1], bias_params))
        nonbias_params = filter(lambda x: ('bias' not in x[0]), net_s.named_parameters())
        nonbias_params = list(map(lambda x: x[1], nonbias_params))

        # copying model params from Pretrained (SDAE) weights file
        net_s.load_state_dict(pretraining_checkpoint['state_dict'])

        criterion_sc = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True)
        optimizer_sc = optim.Adam([{'params': bias_params, 'lr': 2*args.lr},
                            {'params': nonbias_params},
                            {'params': criterion_sc.parameters(), 'lr': args.lr},
                            ], lr=args.lr, betas=(0.99, 0.999))
        criterion_rec = DCCWeightedELoss(size_average=True) # OLD


        if use_cuda:
            net_s.cuda()
            criterion_sc = criterion_sc.cuda()
            criterion_rec = criterion_rec.cuda()

        # this is needed for WARM START
        if args.resume:
            filename = outputdir+'/FTcheckpoint_%d.pth.tar' % args.level
            if os.path.isfile(filename):
                print("==> loading checkpoint '{}'".format(filename))
                checkpoint = torch.load(filename)
                net_s.load_state_dict(checkpoint['state_dict_s'])
                criterion_sc.load_state_dict(checkpoint['criterion_state_dict_sc'])
                startepoch = checkpoint['epoch']
                optimizer_sc.load_state_dict(checkpoint['optimizer_sc'])
                _sigma1 = checkpoint['sigma1']
                _sigma2 = checkpoint['sigma2']
                _lambda = checkpoint['lambda']
                _delta = checkpoint['delta']
                _delta1 = checkpoint['delta1']
                _delta2 = checkpoint['delta2']
            else:
                print("==> no checkpoint found at '{}'".format(filename))
                raise

        # This is the actual Algorithm
        flag = 0
        for epoch in range(startepoch, args.nepoch):
            print('sigma1', _sigma1, epoch)
            print('sigma2', _sigma2, epoch)
            print('lambda', _lambda, epoch)
            if args.tensorboard:
                log_value('sigma1', _sigma1, epoch)
                log_value('sigma2', _sigma2, epoch)
                log_value('lambda', _lambda, epoch)

            train_step_1(trainloader, net_s, optimizer_sc, criterion_rec, criterion_sc, epoch, use_cuda, _sigma1, _sigma2, _lambda)
            Z, U, change_in_assign, assignment = test(testloader, net_s, criterion_sc, epoch, use_cuda, _delta, pairs, numeval, flag)

            if flag:
                # As long as the change in label assignment < threshold, DCC continues to run.
                # Note: This condition is always met in the very first epoch after the flag is set.
                # This false criterion is overwritten by checking for the condition twice.
                if change_in_assign > stopping_threshold:
                    flag += 1

            if((epoch+1) % args.M == 0):
                _sigma1 = max(_delta1, _sigma1 / 2)
                _sigma2 = max(_delta2, _sigma2 / 2)
                if _sigma2 == _delta2 and flag == 0:
                    # Start checking for stopping criterion
                    flag = 1

            # Save checkpoint
            index = (epoch // args.M) * args.M
            save_checkpoint({'epoch': epoch+1,
                             'state_dict_s': net_s.state_dict(),
                             'criterion_state_dict_sc': criterion_sc.state_dict(),
                             'optimizer_sc': optimizer_sc.state_dict(),
                             'sigma1': _sigma1,
                             'sigma2': _sigma2,
                             'lambda': _lambda,
                             'delta': _delta,
                             'delta1': _delta1,
                             'delta2': _delta2,
                             }, index, filename=outputdir)

            sio.savemat(os.path.join(outputdir, 'features_s'), {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment})

    elif args.step == 2:
        filename = os.path.join(outputdir, args.torchmodel)
        if os.path.isfile(filename):
            print("==> loading params from checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
        else:
            print("==> no checkpoint found at '{}'".format(filename))
            raise

        # copying model params of s encoder from step 1
        net_s.load_state_dict(checkpoint['state_dict_s'])

        # freezing net_s
        for param in net_s.parameters():
            param.requires_grad = False

        net_d = DecoderNet(1)
        criterion_d = nn.MSELoss()

        # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params
        bias_params = filter(lambda x: ('bias' in x[0]), net_z.named_parameters())
        bias_params = list(map(lambda x: x[1], bias_params))
        nonbias_params = filter(lambda x: ('bias' not in x[0]), net_z.named_parameters())
        nonbias_params = list(map(lambda x: x[1], nonbias_params))

        criterion_zc = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True)
        optimizer_zc = optim.Adam([{'params': bias_params, 'lr': 2*args.lr},
                            {'params': nonbias_params},
                            {'params': criterion_zc.parameters(), 'lr': args.lr},
                            ], lr=args.lr, betas=(0.99, 0.999))
        optimizer_d = torch.optim.Adam(net_d.parameters(), lr=0.001)
        criterion_rec = DCCWeightedELoss(size_average=True)
        if use_cuda:
            net_d.cuda()
            net_s.cuda()
            net_z.cuda()
            criterion_zc = criterion_zc.cuda()
            criterion_d = criterion_d.cuda()
            criterion_rec = criterion_rec.cuda()

        flag = 0
        for epoch in range(startepoch, args.nepoch):
            print('sigma1', _sigma1, epoch)
            print('sigma2', _sigma2, epoch)
            print('lambda', _lambda, epoch)
            if args.tensorboard:
                log_value('sigma1', _sigma1, epoch)
                log_value('sigma2', _sigma2, epoch)
                log_value('lambda', _lambda, epoch)

            train_step_2(trainloader, net_s, net_z, net_d, optimizer_zc, optimizer_d, criterion_rec, criterion_zc, criterion_d, epoch, use_cuda, _sigma1, _sigma2, _lambda)
            Z, U, change_in_assign, assignment = test(testloader, net_z, criterion_zc, epoch, use_cuda, _delta, pairs, numeval, flag)


            if flag:
                # As long as the change in label assignment < threshold, DCC continues to run.
                # Note: This condition is always met in the very first epoch after the flag is set.
                # This false criterion is overwritten by checking for the condition twice.
                if change_in_assign > stopping_threshold:
                    flag += 1

            if((epoch+1) % args.M == 0):
                _sigma1 = max(_delta1, _sigma1 / 2)
                _sigma2 = max(_delta2, _sigma2 / 2)
                if _sigma2 == _delta2 and flag == 0:
                    # Start checking for stopping criterion
                    flag = 1

            # Save checkpoint
            index = (epoch // args.M) * args.M
            save_checkpoint({'epoch': epoch+1,
                             'state_dict_s': net_s.state_dict(),
                             'state_dict_z': net_z.state_dict(),
                             'state_dict_d': net_d.state_dict(),
                             'criterion_state_dict_zc': criterion_zc.state_dict(),
                             'optimizer_zc': optimizer_zc.state_dict(),
                             'sigma1': _sigma1,
                             'sigma2': _sigma2,
                             'lambda': _lambda,
                             'delta': _delta,
                             'delta1': _delta1,
                             'delta2': _delta2,
                             }, index, filename=outputdir)

        sio.savemat(os.path.join(outputdir, 'features_z'), {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment})



    else:
        raise(ValueError("step not recognized!"))