示例#1
0
文件: tester.py 项目: tussock/Vault
 def fill_files(self, remaining=None, root=None):
     log.trace("fill_files")
     if not remaining:
         #    First time in...
         remaining, dummy, dummy = utils.from_readable_form(self.options.size)
         root = self.files_folder
         
     list = [root]
     done = False
     while not done:
         newlist = []
         for folder in list:
             for dname in ["dir1", "dir2", "dir3"]:
                 path = os.path.join(folder, dname)
                 utils.makedirs(path)
                 newlist.append(path)
             for fname in ["f1.avi", "f2.mp3", "f3.exe", "f4.txt"]:
                 path = os.path.join(folder, fname)
                 with open(path, "w") as f:
                     f.write(self.teststring1)
                 remaining -= len(self.teststring1)
             if remaining < 0:
                 done = True
                 break
         list = newlist
         
     return
示例#2
0
 def get(self, src, dest):
     """
     Get a given file from the remote location
     
     The src MUST exist, and MUST be a file on the remote system
     IF dest ends in os.sep (i.e. the path component separator - '/' on linux):
         it will be created if it doesn't exist
         the final dest file name will be dest/basename(src)
     Otherwise
         dest MUST be the full file name
         the folder will be created as required.
     
     the actual filename will be returned.
     """
     if not self.connected:
         self.connect()
     if dest[-1] == os.sep:
         folder = dest
         dest = os.path.join(folder, os.path.basename(src))
     else:
         folder = os.path.split(dest)[0]
     utils.makedirs(folder)
     retries = 0
     success = False
     while not success:
         try:
             self._get(src, dest)
             success = True
         except Exception as e:
             self.disconnect()
             self.connect()
             retries += 1
             if retries > const.Retries:
                 raise e
     return dest
示例#3
0
 def _make_dir(self, folder):
     #    Make the folder. utils.makedirs wont fail if the folder exists.
     if folder in ["", ".", "/"]:
         folder = self.root
     else:
         folder = utils.join_paths(self.root, folder)
     utils.makedirs(folder)
     if not os.path.isdir(folder):
         raise Exception("Unable to build folder")
示例#4
0
文件: db.py 项目: tussock/Vault
 def __init__(self):
     #    Make sure the folder exists. An exception is fatal
     utils.makedirs(const.DataDir)
     #    We permit a 'selected' entry to be kept fs = current file/dir
     self.sel_fs = None
     self.sel_fs_path = None
     self.cur_run_id = None
     self.cur_store = None
     self.sel_cache = {}
     self.fs_saved_cache = []
示例#5
0
    def testChanges(self):
        pass
        #    Full Backup
        #    change a file
        #    Incremental backup
        #    Restore most recent. ensure you get latest file
        #    Restore to just prior to incremental, ensure you get earlier file
        #    Run a full backup
        file = os.path.join(self.files_folder, "changer")
        restore_file = os.path.join(self.restore_folder, file[1:])

        #    t=0 - file does not exist
        b = Run("testbackup", const.FullBackup, self.options)
        b.run()

        #    Make sure we have ticked to another second since the start of the last backup.
        while datetime.now() - b.start_time < timedelta(seconds=1):
            time.sleep(0.01)

        #    t=1 - file exists
        with open(file, "w") as f:
            f.write("1")
        b = Run("testbackup", const.IncrBackup, self.options)
        b.run()

        #    Make sure we have ticked to another second since the start of the last backup.
        while datetime.now() - b.start_time < timedelta(seconds=1):
            time.sleep(0.01)

        #    t=2 - file changed
        with open(file, "w") as f:
            f.write("2")
        b = Run("testbackup", const.IncrBackup, self.options)
        b.run()

        #    Get the times
        runs = self.db.runs("testbackup")
        t0 = runs[0].start_time
        t1 = runs[1].start_time
        t2 = runs[2].start_time

        for t, exists, contents in [(t0, False, None), (t1, True, "1"), (t2, True, "2"), (None, True, "2")]:
            #    Attempt to restore most recent of ALL files
            #    This tests the default restore.
            r = Restore(self.restore_folder, [self.files_folder], t, self.options)
            r.run()
            if exists:
                with open(restore_file, "r") as f:
                    self.assertEqual(f.read(), contents)
            else:
                self.assertFalse(os.path.exists(restore_file))
            #    clean
            shutil.rmtree(self.restore_folder)
            utils.makedirs(self.restore_folder)
示例#6
0
    def test7bitFilenames(self):
        #    Make some 7 bit filenames
        strange_folder = os.path.join(self.files_folder, "strange")
        utils.makedirs(strange_folder)
        for i in xrange(1, 117, 10):
            name = "".join([chr(j) for j in xrange(i, i + 10) if chr(j) != "/"])
            path = os.path.join(strange_folder, name)
            with open(path, "w") as f:
                f.write(os.urandom(100))

        self.backup_restore_compare()
示例#7
0
    def testUnicodeFilenames(self):
        #    Make some unicode bit filenames
        #    Clean out the ordinary files
        shutil.rmtree(self.files_folder)
        utils.makedirs(self.files_folder)
        unicode_folder = os.path.join(unicode(self.files_folder), u"unicode")
        utils.makedirs(unicode_folder)
        for i in xrange(1000, 1200, 10):
            name = u"".join([unichr(j) for j in xrange(i, i + 10) if unichr(j) != u"/"])
            path = os.path.join(unicode_folder, name)
            with open(path, "w") as f:
                f.write(os.urandom(10))

        self.backup_restore_compare()
示例#8
0
    def setUp(self):
        self.config = Config.get_config()

        self.db = DB()
        self.db.check_upgrade()
        self.mark_db_ids()

        self.test_folder = tempfile.mkdtemp()
        self.files_folder = os.path.join(self.test_folder, "files")
        self.store_folder = os.path.join(self.test_folder, "store")
        self.restore_folder = os.path.join(self.test_folder, "restore")
        utils.makedirs(self.files_folder)
        utils.makedirs(self.store_folder)
        utils.makedirs(self.restore_folder)

        utils.build_file_structure(self.files_folder, 50 * const.Kilobyte, 500 * const.Kilobyte)

        #    Build a store object (dont save config)
        #    Note the careful size selection - we want backups to overflow the FolderStore.
        self.store = FolderStore("teststore", "2MB", True, self.store_folder)
        self.config.storage[self.store.name] = self.store

        #    Build the backup object (dont save config)
        self.backup = Backup("testbackup")
        self.backup.include_folders = [self.files_folder]
        self.backup.store = self.store.name
        self.backup.notify_msg = False
        self.include_packages = True
        self.config.backups[self.backup.name] = self.backup

        #    build an options object for use with the backup
        self.options = BlankClass()
        self.options.dry_run = False
        self.options.message = False
        self.options.email = False
        self.options.shutdown = False
        self.options.norecurse = False

        self.old_pass = self.config.data_passphrase
        self.config.data_passphrase = "banana"
示例#9
0
parser.add_argument('--nworkers', type=int, default=2)

#parser.add_argument('--nworkers', type=int, default=2)
#parser.add_argument('--nworkers', type=int, default=1)

parser.add_argument('--print-freq', help='Print progress every so iterations', type=int, default=20)
parser.add_argument('--vis-freq', help='Visualize progress every so iterations', type=int, default=500)

args = parser.parse_args()

# Random seed
if args.seed is None:
    args.seed = np.random.randint(100000)

# logger
utils.makedirs(args.save)
logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))
logger.info(args)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

if device.type == 'cuda':
    logger.info('Found {} CUDA devices.'.format(torch.cuda.device_count()))
    for i in range(torch.cuda.device_count()):
        props = torch.cuda.get_device_properties(i)
        logger.info('{} \t Memory: {:.2f}GB'.format(props.name, props.total_memory / (1024**3)))
else:
    logger.info('WARNING: Using device {}'.format(device))

np.random.seed(args.seed)
torch.manual_seed(args.seed)
示例#10
0
def main(conf):
    dump_dir = conf['lightgbm']['dump']['dir']
    makedirs(dump_dir)

    write_config(conf, join_path(dump_dir, 'application.conf'), 'hocon')
    write_config(conf, join_path(dump_dir, 'application.json'), 'json')
    logging.getLogger().addHandler(
        logging.FileHandler(join_path(dump_dir, 'application.log')))

    logging.info('Kaggle Talking Data')

    label = conf['lightgbm']['label']
    features = conf['lightgbm']['features']
    categorical_features = conf['lightgbm']['categorical_features']
    logging.info('Label: %s', label)
    logging.info('Features: %s', features)
    logging.info('Categorical features: %s', categorical_features)

    data_dir = abspath(conf['lightgbm']['data']['dir'])
    dfc = DataFrameCols(data_dir)
    train_index_name = conf['lightgbm']['data']['train']['index']
    train_index = dfc.load_index(train_index_name)

    df = dfc.load_df(columns=[label] + features, index=train_index)

    if conf['lightgbm']['valid_size'] > 0:
        train_df, valid_df = train_test_split(
            df, test_size=conf['lightgbm']['valid_size'])

        train_dataset = lgb.Dataset(data=train_df[features].values,
                                    label=train_df[label].values,
                                    feature_name=features,
                                    categorical_feature=categorical_features)
        valid_dataset = lgb.Dataset(data=valid_df[features].values,
                                    label=valid_df[label].values,
                                    feature_name=features,
                                    categorical_feature=categorical_features)

        del train_df
        del valid_df
        gc.collect()
    else:
        train_dataset = lgb.Dataset(data=df[features].values,
                                    label=df[label].values,
                                    feature_name=features,
                                    categorical_feature=categorical_features)
        valid_dataset = None

    params = conf['lightgbm']['params']
    options = conf['lightgbm']['options']
    model = train_lightgbm(params, train_dataset, valid_dataset, **options)
    model.save_model(join_path(dump_dir, 'model.bin'))
    del train_dataset
    del valid_dataset
    gc.collect()

    # load model
    # model = lgb.Booster(model_file=join_path(dump_dir, 'model.bin'))

    # train_label = train_df[label].values
    # train_pred = model.predict(train_df[features])
    # train_quality = quality(train_label, train_pred)
    # logging.info('Train quality: %s', train_quality)
    #
    # valid_label = valid_df[label].values
    # valid_pred = model.predict(valid_df[features])
    # valid_quality = quality(valid_label, valid_pred)
    # logging.info('Valid quality: %s', valid_quality)

    test_index_name = conf['lightgbm']['data']['test']['index']
    test_index = dfc.load_index(test_index_name)
    test_df = dfc.load_df(columns=features + ['click_id_submission'],
                          index=test_index)
    test_df['is_attributed'] = model.predict(test_df[features])
    test_df = test_df[['click_id_submission', 'is_attributed'
                       ]].rename(columns={'click_id_submission': 'click_id'})
    test_df.sort_values(by='click_id', inplace=True)
    test_df.to_csv(join_path(dump_dir, 'submission.csv'),
                   header=True,
                   index=False)

    gain = model.feature_importance('gain')
    ft = pd.DataFrame({
        'feature': model.feature_name(),
        'split': model.feature_importance('split'),
        'gain': 100 * gain / gain.sum()
    }).sort_values('gain', ascending=False)
    ft.to_csv(join_path(dump_dir, 'feature_strength.csv'),
              header=True,
              index=False,
              sep='\t')
示例#11
0
    if experimentID is None:
        # Make a new experiment ID
        experimentID = int(SystemRandom().random() * 100000)
    ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt')

    start = time.time()
    print("Sampling dataset of {} training examples".format(args.n))

    input_command = sys.argv
    ind = [i for i in range(len(input_command)) if input_command[i] == "--load"]
    if len(ind) == 1:
        ind = ind[0]
        input_command = input_command[:ind] + input_command[(ind + 2):]
    input_command = " ".join(input_command)

    utils.makedirs("results/")

    ##################################################################
    data_obj = parse_datasets(args, device)
    input_dim = data_obj["input_dim"]

    classif_per_tp = False
    if ("classif_per_tp" in data_obj):
        # do classification per time point rather than on a time series as a whole
        classif_per_tp = data_obj["classif_per_tp"]

    if args.classif and (args.dataset == "hopper" or args.dataset == "periodic"):
        raise Exception("Classification task is not available for MuJoCo and 1d datasets")

    n_labels = 1
    if args.classif:
示例#12
0
Learning the optimal transport map (between Gaussians) via CP-Flow (comparing to IAF)
"""

import gc
from scipy import linalg
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch
from lib.flows import SequentialFlow, DeepConvexFlow, LinearIAF
from lib.icnn import ICNN3
from lib import distributions
from data.toy_data import Gaussian as ToyData
from lib.utils import makedirs

makedirs('figures/OT')


def savefig(fn):
    plt.savefig(f'figures/OT/{fn}')


batch_size_train = 128
batch_size_test = 64
dimx = 2
if dimx == 2:
    m = np.array([1.5, 1.0])
    C = np.array([[0.9, -0.75], [-0.75, 0.9]])  # fixed for visualization
else:
    m = None
    C = None
示例#13
0
parser.add_argument('--print-freq',
                    help='Print progress every so iterations',
                    type=int,
                    default=20)
parser.add_argument('--vis-freq',
                    help='Visualize progress every so iterations',
                    type=int,
                    default=500)
args = parser.parse_args()

# Random seed
if args.seed is None:
    args.seed = np.random.randint(100000)

# logger
utils.makedirs(args.save)
logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'),
                          filepath=os.path.abspath(__file__))
logger.info(args)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

if device.type == 'cuda':
    logger.info('Found {} CUDA devices.'.format(torch.cuda.device_count()))
    for i in range(torch.cuda.device_count()):
        props = torch.cuda.get_device_properties(i)
        logger.info('{} \t Memory: {:.2f}GB'.format(
            props.name, props.total_memory / (1024**3)))
else:
    logger.info('WARNING: Using device {}'.format(device))
def main(conf):
    logging.info('Loading train dataset')
    train_df = load_train_df(conf['dataset_raw'])

    logging.info('Loading test dataset')
    test_df = load_test_df(conf['dataset_raw'])

    class_weight = {int(c['class']): c['weight'] for c in conf['weights']}

    for w, cnf in conf['linear'].iteritems():
        if not cnf.get_bool('enabled', True):
            continue

        if w == 'dataset':
            continue

        logging.info('Start training linear model: %s', w)

        dump_dir = cnf.get('dump.dir') or '.'
        makedirs(dump_dir)

        config_file = join_path(dump_dir, 'application.conf')
        dump_config(conf, config_file)

        vectorizer_file = join_path(dump_dir, 'vectorizer.pkl')
        quality_file = join_path(dump_dir, 'quality.json')

        y = train_df[FieldsTrain.is_duplicate]

        if cnf['dump.cache.enabled']:
            logging.info('Loading vectorizer')

            try:
                vectorizer = joblib.load(vectorizer_file)
            except:
                logging.info('Unable to load vectorizer')
                vectorizer = None

            if vectorizer is None:
                logging.info('Training vectorizer')

                vectorizer = train_vectorizer(train_df, **cnf['vectorizer'])
                nf = len(vectorizer.vocabulary_)
                logging.info('Feature count: %d', nf)

                logging.info('Dumping vectorizer')
                joblib.dump(vectorizer, vectorizer_file)

            features_cache_file = join_path(dump_dir, cnf['dump.cache.train'])
            logging.info('Loading cached train feature matrix from %s',
                         features_cache_file)
            X = load_feature_matrix(features_cache_file)

            if X is None:
                logging.info('Unable to load cached train feature matrix')

                logging.info('Computing train feature matrix')
                X = compute_feature_matrix(train_df,
                                           vectorizer,
                                           combine=cnf['combine'])

                logging.info('Writing train feature matrix to %s',
                             features_cache_file)
                save_feature_matrix(X, features_cache_file)
        else:
            logging.info('Training vectorizer')
            vectorizer = train_vectorizer(train_df, **cnf['vectorizer'])
            X = compute_feature_matrix(train_df,
                                       vectorizer,
                                       combine=cnf['combine'])
            nf = len(vectorizer.vocabulary_)
            logging.info('Feature count: %d', nf)

        logging.info('Training feature matrix: %s', X.shape)

        quality, predictions = train(X,
                                     y,
                                     skfold(),
                                     class_weight,
                                     dump_dir=dump_dir,
                                     **cnf['model'])

        with open(quality_file, 'w') as qfh:
            json.dump(quality, qfh)

        logging.info('Writing train set to disk')
        train_df[FieldsTrain.linear] = predictions
        train_df[[
            FieldsTrain.id, FieldsTrain.is_duplicate, FieldsTrain.linear
        ]].to_csv(join_path(dump_dir, 'train.csv'), index=False)

        if cnf['dump.cache.enabled']:
            features_cache_file = join_path(dump_dir, cnf['dump.cache.test'])

            logging.info('Loading cached test feature matrix from %s',
                         features_cache_file)
            X = load_feature_matrix(features_cache_file)
            if X is None:
                logging.info('Unable to load cached test feature matrix')
                logging.info('Computing test feature matrix')
                X = compute_feature_matrix(test_df,
                                           vectorizer,
                                           combine=cnf['combine'])

                logging.info('Writing test feature matrix to cache')
                save_feature_matrix(X, features_cache_file)
        else:
            logging.info('Computing test feature matrix')
            X = compute_feature_matrix(test_df,
                                       vectorizer,
                                       combine=cnf['combine'])

        logging.info(
            'Computing test predictions as average logit of cross-validation models'
        )
        test_df[FieldsTest.linear_cv] = np.zeros(X.shape[0])
        for fold in quality['folds']:
            f = joblib.load(fold['dump'])
            p = logit(f.predict_proba(X)[:, 1])
            test_df[FieldsTest.linear_cv] = test_df[FieldsTest.linear_cv] + p
        test_df[FieldsTest.linear_cv] = test_df[FieldsTest.linear_cv] / len(
            quality['folds'])

        logging.info('Computing test predictions with full model')
        f = joblib.load(quality['full']['unweighted']['dump'])
        p = logit(f.predict_proba(X)[:, 1])
        test_df[FieldsTest.linear_full] = p

        logging.info('Computing test predictions with full weighted model')
        f = joblib.load(quality['full']['weighted']['dump'])
        p = logit(f.predict_proba(X)[:, 1])
        test_df[FieldsTest.linear_full_weighted] = p

        logging.info('Writing test set to disk')
        test_df[[
            FieldsTest.test_id, FieldsTest.linear_cv, FieldsTest.linear_full,
            FieldsTest.linear_full_weighted
        ]].to_csv(join_path(dump_dir, 'test.csv'), index=False)
示例#15
0
def main(conf):
    dump_dir = conf['xgboost.dump.dir']
    makedirs(dump_dir)

    dump_config_file = join_path(dump_dir, 'application.conf')
    dump_config(conf, dump_config_file)

    logging.info('Loading train dataset')
    train_df = load_train_df(conf['xgboost.dataset'])

    logging.info('Loading test dataset')
    test_df = load_test_df(conf['xgboost.dataset'])

    logging.info('Loading features')
    features = []
    for group, cnf in conf['features'].iteritems():
        logging.info('Loading features group: %s', group)

        features_dump_dir = cnf['dump']
        train_features_file = join_path(features_dump_dir, 'train.csv')
        test_features_file = join_path(features_dump_dir, 'test.csv')

        train_features = pd.read_csv(train_features_file)
        test_features = pd.read_csv(test_features_file)

        for fcnf in cnf['features']:
            feature = fcnf['feature']
            features.append(feature)
            train_col = fcnf.get('train_col', feature)
            test_col = fcnf.get('test_col', feature)
            train_df[feature] = train_features[train_col]
            test_df[feature] = test_features[test_col]

    feature_map_file = join_path(dump_dir, 'xgb.fmap')
    create_feature_map(features, feature_map_file)

    train_df_flipped = train_df.copy()
    for flip in conf['flip']:
        train_df_flipped[flip[0]] = train_df[[flip[1]]]
        train_df_flipped[flip[1]] = train_df[[flip[0]]]

    train_df = pd.concat([train_df, train_df_flipped],
                         axis=0,
                         ignore_index=True)
    logging.info('Train dataset: %s', train_df.shape)

    y = train_df[[FieldsTrain.is_duplicate]].values.flatten()
    logging.info('Train dataset CTR: %s', y.sum() / len(y))

    class_weight = {int(c['class']): c['weight'] for c in conf['weights']}
    w = np.vectorize(class_weight.get)(y)
    logging.info('Train dataset weighted CTR: %s', sum(y * w) / sum(w))

    q1 = train_df[Fields.question1].values
    q2 = train_df[Fields.question2].values

    train_df.drop([
        FieldsTrain.id, FieldsTrain.qid1, FieldsTrain.qid2,
        FieldsTrain.question1, FieldsTrain.question2, FieldsTrain.is_duplicate
    ],
                  axis=1,
                  inplace=True)

    logging.info('Computing test predictions')
    test_ids = test_df[[FieldsTest.test_id]]
    test_df.drop(
        [FieldsTest.test_id, FieldsTest.question1, FieldsTest.question2],
        axis=1,
        inplace=True)
    dtest = xgb.DMatrix(test_df.values)

    model = xgb.Booster({'nthread': 4})
    model.load_model(join_path(dump_dir, 'model.bin'))
    p_test = model.predict(dtest)

    logging.info('Writing submission file')
    submission_file = join_path(dump_dir, 'submission.csv')
    submission(submission_file, test_ids, p_test)
def main(conf):
    dump_dir = conf['word2vec']['dump']['dir']
    makedirs(dump_dir)

    logging.warning('Loading train dataset')
    train_df = load_train_df(conf['word2vec']['dataset'])

    logging.warning('Loading test dataset')
    test_df = load_test_df(conf['word2vec']['dataset'])

    logging.warning('Loading embeddings')
    embeddings_dir = conf['word2vec']['embeddings']['dir']
    embeddings_file = join_path(embeddings_dir,
                                conf['word2vec']['embeddings']['file'])
    w2v = gensim.models.KeyedVectors.load_word2vec_format(embeddings_file,
                                                          binary=True)
    w2v_norm = gensim.models.KeyedVectors.load_word2vec_format(embeddings_file,
                                                               binary=True)
    w2v_norm.init_sims(replace=True)
    processor = Word2Vec(w2v, w2v_norm)

    logging.warning('Computing train features')

    train_df[Fields.w2v_wmd], \
    train_df[Fields.w2v_wmd_norm], \
    train_df[Fields.w2v_cos], \
    train_df[Fields.w2v_city], \
    train_df[Fields.w2v_jacc], \
    train_df[Fields.w2v_canb], \
    train_df[Fields.w2v_eucl], \
    train_df[Fields.w2v_mink], \
    train_df[Fields.w2v_bray], \
    train_df[Fields.w2v_skew_q1], \
    train_df[Fields.w2v_skew_q2], \
    train_df[Fields.w2v_kurt_q1], \
    train_df[Fields.w2v_kurt_q2] = \
        zip(*train_df.progress_apply(lambda r: processor.features(r['question1'], r['question2']), axis=1))

    for feature in [f for f in dir(Fields()) if f.startswith('w2v')]:
        logging.warning(
            'Feature %s AUC=%s', feature,
            roc_auc_score(train_df[FieldsTrain.is_duplicate],
                          train_df[feature]))

    logging.warning('Writing train feature dump')
    train_df.drop([
        Fields.question1, Fields.question2, FieldsTrain.qid1, FieldsTrain.qid2
    ],
                  axis=1,
                  inplace=True)
    train_df.to_csv(join_path(dump_dir, 'train.csv'), index=False)

    logging.warning('Computing test features')
    test_df[Fields.w2v_wmd], \
    test_df[Fields.w2v_wmd_norm], \
    test_df[Fields.w2v_cos], \
    test_df[Fields.w2v_city], \
    test_df[Fields.w2v_jacc], \
    test_df[Fields.w2v_canb], \
    test_df[Fields.w2v_eucl], \
    test_df[Fields.w2v_mink], \
    test_df[Fields.w2v_bray], \
    test_df[Fields.w2v_skew_q1], \
    test_df[Fields.w2v_skew_q2], \
    test_df[Fields.w2v_kurt_q1], \
    test_df[Fields.w2v_kurt_q2] = \
        zip(*test_df.progress_apply(lambda r: processor.features(r['question1'], r['question2']), axis=1))

    logging.warning('Writing test feature dump')
    test_df.drop([Fields.question1, Fields.question2], axis=1, inplace=True)
    test_df.to_csv(join_path(dump_dir, 'test.csv'), index=False)
示例#17
0
def train_it(
        Model,
        Data_obj,
        args,
        file_name,
        ExperimentID,
        #Trainwriter,
        Validationwriter,
        input_command,
        Devices):
    """
	parameters:
		Model, #List of Models
		Data_obj, #List of Data_objects which live on different devices
		args,
		file_name,
		ExperimentID, #List of IDs
		trainwriter, #List of TFwriters
		validationwriter, #List of TFwriters
		input_command,
		Devices #List of devices
	"""

    Ckpt_path = []
    Top_ckpt_path = []
    Best_test_acc = []
    Best_test_acc_step = []
    Logger = []
    Optimizer = []
    otherOptimizer = []
    ODEOptimizer = []

    for i, device in enumerate(Devices):

        Ckpt_path.append(
            os.path.join(args.save,
                         "experiment_" + str(ExperimentID[i]) + '.ckpt'))
        Top_ckpt_path.append(
            os.path.join(
                args.save,
                "experiment_" + str(ExperimentID[i]) + '_topscore.ckpt'))
        Best_test_acc.append(0)
        Best_test_acc_step.append(0)

        log_path = "logs/" + file_name + "_" + str(ExperimentID[i]) + ".log"
        if not os.path.exists("logs/"):
            utils.makedirs("logs/")
        Logger.append(
            utils.get_logger(logpath=log_path,
                             filepath=os.path.abspath(__file__)))
        Logger[i].info(input_command)

        Optimizer.append(
            get_optimizer(args.optimizer, args.lr, Model[i].parameters()))

    num_batches = Data_obj[0]["n_train_batches"]
    labels = Data_obj[0]["dataset_obj"].label_list

    #create empty lists for results and similar
    num_gpus = len(Devices)
    train_res = [None] * num_gpus
    batch_dict = [None] * num_gpus
    test_res = [None] * num_gpus
    label_dict = [None] * num_gpus

    # empty result placeholder
    somedict = {}
    test_res = [somedict]
    test_res[0]["accuracy"] = float(0)

    if args.v == 1 or args.v == 2:
        pbar = tqdm(range(1,
                          num_batches * (args.niters) + 1),
                    position=0,
                    leave=True,
                    ncols=160)
    else:
        pbar = range(1, num_batches * (args.niters) + 1)

    for itr in pbar:

        for i, device in enumerate(Devices):
            Optimizer[i].zero_grad()
        for i, device in enumerate(Devices):
            # default decay_rate = 0.999, lowest= args.lr/10 	# original
            # decay_rate = 0.9995, lowest = args.lr / 50 		# new
            utils.update_learning_rate(Optimizer[i],
                                       decay_rate=args.lrdecay,
                                       lowest=args.lr / 1000)

        wait_until_kl_inc = 10
        if itr // num_batches < wait_until_kl_inc:
            kl_coef = 0.01
        else:
            kl_coef = (1 - 0.99**(itr // num_batches - wait_until_kl_inc))

        for i, device in enumerate(Devices):
            batch_dict[i] = utils.get_next_batch(
                Data_obj[i]["train_dataloader"])

        for i, device in enumerate(Devices):
            train_res[i] = Model[i].compute_all_losses(batch_dict[i],
                                                       n_traj_samples=3,
                                                       kl_coef=kl_coef)

        for i, device in enumerate(Devices):
            train_res[i]["loss"].backward()

        for i, device in enumerate(Devices):
            Optimizer[i].step()

        n_iters_to_viz = 0.333
        if args.dataset == "swisscrop":
            n_iters_to_viz /= 20

        if (itr != 0) and (itr % args.val_freq) == 0:
            with torch.no_grad():

                # Calculate labels and loss on test data
                for i, device in enumerate(Devices):
                    test_res[i], label_dict[i] = compute_loss_all_batches(
                        Model[i],
                        Data_obj[i]["test_dataloader"],
                        args,
                        n_batches=Data_obj[i]["n_test_batches"],
                        experimentID=ExperimentID[i],
                        device=Devices[i],
                        n_traj_samples=3,
                        kl_coef=kl_coef)

                for i, device in enumerate(Devices):

                    #make confusion matrix
                    cm, conf_fig = plot_confusion_matrix(
                        label_dict[0]["correct_labels"],
                        label_dict[0]["predict_labels"],
                        Data_obj[0]["dataset_obj"].label_list,
                        tensor_name='dev/cm')
                    Validationwriter[i].add_figure(
                        "Validation_Confusionmatrix", conf_fig,
                        itr * args.batch_size)

                    # prepare GT labels and predictions
                    y_ref_train = torch.argmax(
                        train_res[0]['label_predictions'],
                        dim=2).squeeze().cpu()
                    y_pred_train = torch.argmax(batch_dict[0]['labels'],
                                                dim=1).cpu()
                    y_ref = label_dict[0]["correct_labels"].cpu()
                    y_pred = label_dict[0]["predict_labels"]

                    # prepare GT labels and predictions
                    y_ref_train = torch.argmax(
                        train_res[0]['label_predictions'],
                        dim=2).squeeze().cpu()
                    y_pred_train = torch.argmax(batch_dict[0]['labels'],
                                                dim=1).cpu()
                    y_ref = label_dict[0]["correct_labels"].cpu()
                    y_pred = label_dict[0]["predict_labels"]

                    #Make checkpoint
                    torch.save(
                        {
                            'args': args,
                            'state_dict': Model[i].state_dict(),
                        }, Ckpt_path[i])

                    if test_res[i]["accuracy"] > Best_test_acc[i]:
                        Best_test_acc[i] = test_res[i]["accuracy"]
                        Best_test_acc_step[i] = itr * args.batch_size
                        torch.save(
                            {
                                'args': args,
                                'state_dict': Model[i].state_dict(),
                                'cm': cm
                            }, Top_ckpt_path[i])

                        #utils.plot_confusion_matrix2(y_ref, y_pred, Data_obj[0]["dataset_obj"].label_list, ExperimentID[i])
                        # Save trajectory here
                        #if not test_res[i]["PCA_traj"] is None:
                        #	with open( os.path.join('vis', 'traj_dict' + str(ExperimentID[i]) + '.pickle' ), 'wb') as handle:
                        #		pickle.dump(test_res[i]["PCA_traj"], handle, protocol=pickle.HIGHEST_PROTOCOL)

                    # make PCA visualization
                    if "PCA_traj" in test_res[0]:
                        #PCA_fig = get_pca_fig(test_res[0]["PCA_traj"]["PCA_trajs1"])
                        PCA_fig = None
                    else:
                        PCA_fig = None

                    logdict = {
                        'Classification_accuracy/train':
                        train_res[i]["accuracy"],
                        'Classification_accuracy/validation':
                        test_res[i]["accuracy"],
                        'Classification_accuracy/validation_peak':
                        Best_test_acc[i],
                        'Classification_accuracy/validation_peak_step':
                        Best_test_acc_step[i],
                        'loss/train':
                        train_res[i]["loss"].detach(),
                        'loss/validation':
                        test_res[i]["loss"].detach(),
                        'Other_metrics/train_cm':
                        sklearn_cm(y_ref_train, y_pred_train),
                        'Other_metrics/train_precision':
                        precision_score(y_ref_train,
                                        y_pred_train,
                                        average='macro'),
                        'Other_metrics/train_recall':
                        recall_score(y_ref_train,
                                     y_pred_train,
                                     average='macro'),
                        'Other_metrics/train_f1':
                        f1_score(y_ref_train, y_pred_train, average='macro'),
                        'Other_metrics/train_kappa':
                        cohen_kappa_score(y_ref_train, y_pred_train),
                        'Other_metrics/validation_cm':
                        sklearn_cm(y_ref, y_pred),
                        'Other_metrics/validation_precision':
                        precision_score(y_ref, y_pred, average='macro'),
                        'Other_metrics/validation_recall':
                        recall_score(y_ref, y_pred, average='macro'),
                        'Other_metrics/validation_f1':
                        f1_score(y_ref, y_pred, average='macro'),
                        'Other_metrics/validation_kappa':
                        cohen_kappa_score(y_ref, y_pred),
                    }

                    if "PCA_traj" in test_res[0]:
                        pass
                        #logdict['Visualization/latent_trajectory'] = wandb.Image( get_pca_fig(test_res[0]["PCA_traj"]) )

                    wandb.log(logdict, step=itr * args.batch_size)

                    # wandb.sklearn.plot_confusion_matrix(y_ref, y_pred, labels)
        # Write training loss and accuracy after every batch (Only recommanded for debugging)
        fine_train_writer = False
        if fine_train_writer:
            if "loss" in train_res[i]:
                Validationwriter[i].add_scalar('loss/train',
                                               train_res[i]["loss"].detach(),
                                               itr * args.batch_size)
            if "accuracy" in train_res[i]:
                Validationwriter[i].add_scalar('Classification_accuracy/train',
                                               train_res[i]["accuracy"],
                                               itr * args.batch_size)

        #update progressbar
        if args.v == 2:
            pbar.set_description(
                "Train Ac: {:.3f} %  |  Test Ac: {:.3f} %, Peak Test Ac.: {:.3f} % (at {} batches)  |"
                .format(train_res[0]["accuracy"] * 100,
                        test_res[0]["accuracy"] * 100, Best_test_acc[i] * 100,
                        Best_test_acc_step[0] // args.batch_size))

        #empty all training variables
        #train_res = [None] * num_gpus
        batch_dict = [None] * num_gpus
        #test_res = [None] * num_gpus
        label_dict = [None] * num_gpus

    print(Best_test_acc[0], " at step ", Best_test_acc_step[0])
    return train_res, test_res, Best_test_acc[0], Best_test_acc_step[0]
def main(conf):
    dump_dir = conf['svdres.dump.dir']
    makedirs(dump_dir)

    dump_config_file = join_path(dump_dir, 'application.conf')
    dump_config(conf, dump_config_file)

    logging.info('Loading train dataset')
    train_df = load_train_df(conf['svdres.dataset'])

    vectorizer_file = join_path(dump_dir, 'vectorizer.pkl')
    try:
        logging.info('Loading vectorizer dump')
        vectorizer = joblib.load(vectorizer_file)
    except:
        logging.info('Loading vectorizer dump failed')
        logging.info('Traininig vectorizer')
        vectorizer = train_vectorizer(train_df, **conf['svdres.vectorizer'])

        logging.info('Writing vectorizer dump')
        joblib.dump(vectorizer, vectorizer_file)

    features_file = join_path(dump_dir, 'features_train.npz')
    logging.info('Loading cached train feature matrix from %s', features_file)
    X = load_feature_matrix(features_file)

    if X is None:
        logging.info('Unable to load cached train feature matrix')

        logging.info('Computing train feature matrix')
        X = compute_feature_matrix(train_df, vectorizer, combine='stack')

        logging.info('Writing train feature matrix to %s', features_file)
        save_feature_matrix(X, features_file)

    logging.info('Loading SVD decomposition')
    k = conf['svdres.svd'].get_int('k')
    singular_values_file = join_path(dump_dir, 'singular_values.txt')
    singular_vectors_file = join_path(dump_dir, 'singular_vectors.npz')
    try:
        S = np.loadtxt(singular_values_file)
        VT = np.load(singular_vectors_file)['VT']
        assert k == len(S)
    except:
        logging.info('Loading SVD decomposition failed')
        logging.info('Computing SVD decomposition')
        S, VT = compute_svd(X.asfptype(), **conf['svdres.svd'])

        logging.info('Writing singular values to file')
        np.savetxt(singular_values_file, S)
        np.savez(singular_vectors_file, VT=VT)

    logging.info('Train matrix %s', X.shape)
    logging.info('Computing train SVD residuals')
    L = X.shape[0] / 2
    Xq1 = X[:L, :]
    Xq2 = X[L:, :]

    start = 0
    batch = 100
    eucl = np.zeros(Xq1.shape[0])
    cos = np.zeros(Xq1.shape[0])
    q1res = np.zeros(Xq1.shape[0])
    q2res = np.zeros(Xq1.shape[0])
    while start < Xq1.shape[0]:
        finish = min(start + batch, Xq1.shape[0])

        Xq1_batch = Xq1[start:finish, :]
        nq1 = (Xq1_batch.multiply(Xq1_batch)).sum(axis=1).flatten()

        Rq1 = safe_sparse_dot(Xq1_batch, VT.transpose()).dot(VT) - Xq1_batch
        nrq1 = np.sum(np.multiply(Rq1, Rq1), axis=1).flatten()

        Xq2_batch = Xq2[start:finish, :]
        nq2 = (Xq2_batch.multiply(Xq2_batch)).sum(axis=1).flatten()

        Rq2 = safe_sparse_dot(Xq2_batch, VT.transpose()).dot(VT) - Xq2_batch
        nrq2 = np.sum(np.multiply(Rq2, Rq2), axis=1).flatten()

        q1res[start:finish] = np.sqrt(nrq1) / np.sqrt(nq1)
        q2res[start:finish] = np.sqrt(nrq2) / np.sqrt(nq2)
        eucl[start:finish] = euclidean(Rq1, Rq2).flatten()
        cos[start:finish] = cosine(Rq1, Rq2).flatten()

        start = finish

    train_df['svd_res_q1'] = q1res
    train_df['svd_res_q2'] = q2res
    train_df['svd_res_eucl'] = eucl
    train_df['svd_res_cos'] = cos

    train_df[[
        FieldsTrain.id, FieldsTrain.is_duplicate, 'svd_res_q1', 'svd_res_q2',
        'svd_res_eucl', 'svd_res_cos'
    ]].to_csv(join_path(dump_dir, 'train.csv'), index=False)

    logging.info('Loading test dataset')
    test_df = load_test_df(conf['svddist.dataset'])

    logging.info('Computing test features')
    X = compute_feature_matrix(test_df, vectorizer, combine='stack')

    logging.info('Computing train SVD residuals')
    L = X.shape[0] / 2
    Xq1 = X[:L, :]
    Xq2 = X[L:, :]

    start = 0
    batch = 100
    eucl = np.zeros(Xq1.shape[0])
    cos = np.zeros(Xq1.shape[0])
    q1res = np.zeros(Xq1.shape[0])
    q2res = np.zeros(Xq1.shape[0])
    while start < Xq1.shape[0]:
        finish = min(start + batch, Xq1.shape[0])

        Xq1_batch = Xq1[start:finish, :]
        nq1 = (Xq1_batch.multiply(Xq1_batch)).sum(axis=1).flatten()

        Rq1 = safe_sparse_dot(Xq1_batch, VT.transpose()).dot(VT) - Xq1_batch
        nrq1 = np.sum(np.multiply(Rq1, Rq1), axis=1).flatten()

        Xq2_batch = Xq2[start:finish, :]
        nq2 = (Xq2_batch.multiply(Xq2_batch)).sum(axis=1).flatten()

        Rq2 = safe_sparse_dot(Xq2_batch, VT.transpose()).dot(VT) - Xq2_batch
        nrq2 = np.sum(np.multiply(Rq2, Rq2), axis=1).flatten()

        q1res[start:finish] = np.sqrt(nrq1) / np.sqrt(nq1)
        q2res[start:finish] = np.sqrt(nrq2) / np.sqrt(nq2)
        eucl[start:finish] = euclidean(Rq1, Rq2).flatten()
        cos[start:finish] = cosine(Rq1, Rq2).flatten()

        start = finish

    test_df['svd_res_q1'] = q1res
    test_df['svd_res_q2'] = q2res
    test_df['svd_res_eucl'] = eucl
    test_df['svd_res_cos'] = cos

    logging.info('Writing test dataset')
    test_df[[
        FieldsTest.test_id, 'svd_res_q1', 'svd_res_q2', 'svd_res_eucl',
        'svd_res_cos'
    ]].to_csv(join_path(dump_dir, 'test.csv'), index=False)
示例#19
0
def run(args, kwargs):
    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_')
    snap_dir = snapshots_path + args.flow

    if args.flow != 'no_flow':
        snap_dir += '_' + 'num_flows_' + str(args.num_flows)

    if args.flow == 'orthogonal':
        snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs)
    elif args.flow == 'orthogonalH':
        snap_dir = snap_dir + '_num_householder_' + str(args.num_householder)
    elif args.flow == 'iaf':
        snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size)

    elif args.flow == 'permutation':
        snap_dir = snap_dir + '_' + 'kernelsize_' + str(args.kernel_size)
    elif args.flow == 'mixed':
        snap_dir = snap_dir + '_' + 'num_householder_' + str(
            args.num_householder)
    elif args.flow == 'cnf_rank':
        snap_dir = snap_dir + '_rank_' + str(
            args.rank) + '_' + args.dims + '_num_blocks_' + str(
                args.num_blocks)
    elif 'cnf' in args.flow:
        snap_dir = snap_dir + '_' + args.dims + '_num_blocks_' + str(
            args.num_blocks)

    if args.retrain_encoder:
        snap_dir = snap_dir + '_retrain-encoder_'
    elif args.evaluate:
        snap_dir = snap_dir + '_evaluate_'

    snap_dir = snap_dir + '__' + args.model_signature + '/'

    args.snap_dir = snap_dir

    if not os.path.exists(snap_dir):
        os.makedirs(snap_dir)

    # logger
    utils.makedirs(args.snap_dir)

    logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'),
                              filepath=os.path.abspath(__file__))
    logger.info(args)

    # SAVING
    torch.save(args, snap_dir + args.flow + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    #train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)
    args.dynamic_binarization = False
    args.input_type = 'binary'
    transform = transforms.Compose([
        transforms.Grayscale(1),
        transforms.Resize((28, 28), interpolation=2),
        transforms.ToTensor()
        #transforms.Normalize((0.5,), (0.5,))
    ])
    args.input_size = [1, 28, 28]
    train_loader = torch.utils.data.DataLoader(FashionMNIST(
        './data', train=True, download=True, transform=transforms.ToTensor()),
                                               batch_size=args.batch_size,
                                               shuffle=True)
    N_mini_batches = len(train_loader)
    test_loader = torch.utils.data.DataLoader(FashionMNIST(
        './data', train=False, download=True, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=False)

    if not args.evaluate:

        # ==============================================================================================================
        # SELECT MODEL
        # ==============================================================================================================
        # flow parameters and architecture choice are passed on to model through args

        if args.flow == 'no_flow':
            model = VAE.VAE(args)
        elif args.flow == 'planar':
            model = VAE.PlanarVAE(args)
        elif args.flow == 'iaf':
            model = VAE.IAFVAE(args)
        elif args.flow == 'orthogonal':
            model = VAE.OrthogonalSylvesterVAE(args)
        elif args.flow == 'householder':
            model = VAE.HouseholderSylvesterVAE(args)
        elif args.flow == 'triangular':
            model = VAE.TriangularSylvesterVAE(args)
        elif args.flow == 'cnf':
            model = CNFVAE.CNFVAE(args)
        elif args.flow == 'cnf_bias':
            model = CNFVAE.AmortizedBiasCNFVAE(args)
        elif args.flow == 'cnf_hyper':
            model = CNFVAE.HypernetCNFVAE(args)
        elif args.flow == 'cnf_lyper':
            model = CNFVAE.LypernetCNFVAE(args)
        elif args.flow == 'cnf_rank':
            model = CNFVAE.AmortizedLowRankCNFVAE(args)
        else:
            raise ValueError('Invalid flow choice')

        if args.retrain_encoder:
            logger.info(f"Initializing decoder from {args.model_path}")
            dec_model = torch.load(args.model_path)
            dec_sd = {}
            for k, v in dec_model.state_dict().items():
                if 'p_x' in k:
                    dec_sd[k] = v
            model.load_state_dict(dec_sd, strict=False)

        if args.cuda:
            logger.info("Model on GPU")
            model.cuda()

        logger.info(model)

        if args.retrain_encoder:
            parameters = []
            logger.info('Optimizing over:')
            for name, param in model.named_parameters():
                if 'p_x' not in name:
                    logger.info(name)
                    parameters.append(param)
        else:
            parameters = model.parameters()

        #optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7)
        optimizer = optim.Adamax(parameters, args.learning_rate, eps=1.e-7)

        # ==================================================================================================================
        # TRAINING AND EVALUATION
        # ==================================================================================================================
        def train(epoch):
            override_divergence_fn(model, "approximate")
            beta = min([(epoch * 1.) / max([args.warmup, 1.]), args.max_beta])
            model.train()
            train_loss_meter = AverageMeter()
            # NOTE: is_paired is 1 if the example is paired
            for batch_idx, (image, text) in enumerate(train_loader):

                if epoch < args.annealing_epochs:
                    # compute the KL annealing factor for the current mini-batch in the current epoch
                    annealing_factor = (
                        float(batch_idx + (epoch - 1) * N_mini_batches + 1) /
                        float(args.annealing_epochs * N_mini_batches))
                else:
                    # by default the KL annealing factor is unity
                    annealing_factor = 1.0

                if args.cuda:
                    image = image.cuda()
                    text = text.cuda()

                image = Variable(image)
                text = Variable(text)

                batch_size = len(image)

                # refresh the optimizer
                optimizer.zero_grad()
                # pass data through model
                recon_image_1, recon_text_1, mu_1, logvar_1, logj1, z01, zk1 = model(
                    image, text)
                recon_image_2, recon_text_2, mu_2, logvar_2, logj2, z02, zk2 = model(
                    image)
                recon_image_3, recon_text_3, mu_3, logvar_3, logj3, z03, zk3 = model(
                    text=text)

                # compute ELBO for each data combo
                joint_loss, rec1_1, rec1_2, kl_1 = elbo_loss(
                    recon_image_1,
                    image,
                    recon_text_1,
                    text,
                    mu_1,
                    logvar_1,
                    z01,
                    zk1,
                    logj1,
                    args,
                    lambda_image=1.0,
                    lambda_text=10.0,
                    annealing_factor=annealing_factor,
                    beta=beta)
                image_loss, rec1_2, rec2_2, kl_2 = elbo_loss(
                    recon_image_2,
                    image,
                    None,
                    None,
                    mu_2,
                    logvar_2,
                    z02,
                    zk2,
                    logj2,
                    args,
                    lambda_image=1.0,
                    lambda_text=10.0,
                    annealing_factor=annealing_factor,
                    beta=beta)
                text_loss, rec1, rec2, kl = elbo_loss(
                    None,
                    None,
                    recon_text_3,
                    text,
                    mu_3,
                    logvar_3,
                    z03,
                    zk3,
                    logj3,
                    args,
                    lambda_image=1.0,
                    lambda_text=10.0,
                    annealing_factor=annealing_factor,
                    beta=beta)
                #print("TEXT", r, "TEXTLOSS",text_loss, image_loss.shape, image_loss)
                train_loss = joint_loss + image_loss + text_loss  # joint_loss# ovie se tie 3 losses, za sekoja kombinacija poedinecno ama aj so 2 ke testiram
                train_loss_meter.update(train_loss.item(), batch_size)
                # compute gradients and take step
                train_loss.backward()
                optimizer.step()

                if batch_idx % args.log_interval == 0:
                    print(
                        'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAnnealing-Factor: {:.3f}'
                        .format(epoch, batch_idx * len(image),
                                len(train_loader.dataset),
                                100. * batch_idx / len(train_loader),
                                train_loss_meter.avg, annealing_factor))

            print('====> Epoch: {}\tLoss: {:.4f}'.format(
                epoch, train_loss_meter.avg))

        def test(epoch):

            model.eval()
            beta = min([(epoch * 1.) / max([args.warmup, 1.]), args.max_beta])
            image_loss_meter = AverageMeter()
            text_loss_meter = AverageMeter()
            test_loss_meter = AverageMeter()
            override_divergence_fn(model, "brute_force")
            for batch_idx, (image, text) in enumerate(test_loader):
                if args.cuda:
                    image = image.cuda()
                    text = text.cuda()
                image = Variable(image, volatile=True)
                text = Variable(text, volatile=True)
                batch_size = len(image)

                recon_image_1, recon_text_1, mu_1, logvar_1, logj1, z01, zk1 = model(
                    image, text)
                recon_image_2, recon_text_2, mu_2, logvar_2, logj2, z02, zk2 = model(
                    image)
                recon_image_3, recon_text_3, mu_3, logvar_3, logj3, z03, zk3 = model(
                    text=text)

                # compute ELBO for each data combo
                joint_loss, rec1, rec2, kl = elbo_loss(recon_image_1, image,
                                                       recon_text_1, text,
                                                       mu_1, logvar_1, z01,
                                                       zk1, logj1, args)
                image_loss_meter.update(rec1.mean().item(), batch_size)
                text_loss_meter.update(rec2.mean().item(), batch_size)
                image_loss, rec1, rec2, kl = elbo_loss(recon_image_2, image,
                                                       None, None, mu_2,
                                                       logvar_2, z02, zk2,
                                                       logj2, args)
                image_loss_meter.update(rec1.mean().item(), batch_size)

                text_loss, rec1, rec2, kl = elbo_loss(None, None, recon_text_3,
                                                      text, mu_3, logvar_3,
                                                      z03, zk3, logj3, args)
                text_loss_meter.update(rec2.mean().item(), batch_size)

                test_loss = joint_loss + image_loss + text_loss
                test_loss_meter.update(test_loss.item(), batch_size)

            print('====> Test image loss: {:.4f}'.format(image_loss_meter.avg))
            print('====> Test text loss: {:.4f}'.format(text_loss_meter.avg))
            print('====> Test Loss: {:.4f}'.format(test_loss_meter.avg))
            return test_loss_meter.avg

        best_loss = sys.maxsize
        for epoch in range(1, args.epochs + 1):
            train(epoch)
            #print ("Test")
            test_loss = test(epoch)
            is_best = test_loss < best_loss
            best_loss = min(test_loss, best_loss)
            # save the best model and current model
            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'args': args,
                    'best_loss': best_loss,
                    'n_latents': args.z_size,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                folder='./trained_models')
示例#20
0
def main(args):
    # logger
    print(args.no_display_loss)
    utils.makedirs(args.save)
    logger = utils.get_logger(
        logpath=os.path.join(args.save, "logs"),
        filepath=os.path.abspath(__file__),
        displaying=~args.no_display_loss,
    )

    if args.layer_type == "blend":
        logger.info("!! Setting time_scale from None to 1.0 for Blend layers.")
        args.time_scale = 1.0

    logger.info(args)

    device = torch.device(
        "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu"
    )
    if args.use_cpu:
        device = torch.device("cpu")

    args.data = dataset.SCData.factory(args.dataset, args.max_dim)

    args.timepoints = args.data.get_unique_times()
    # Use maximum timepoint to establish integration_times
    # as some timepoints may be left out for validation etc.
    args.int_tps = (np.arange(max(args.timepoints) + 1) + 1.0) * args.time_scale

    regularization_fns, regularization_coeffs = create_regularization_fns(args)
    model = build_model_tabular(args, args.data.get_shape()[0], regularization_fns).to(
        device
    )
    if args.use_growth:
        if args.leaveout_timepoint == -1:
            growth_model_path = (
                "../data/externel/growth_model_v2.ckpt"
            )
        elif args.leaveout_timepoint in [1, 2, 3]:
            assert args.max_dim == 5
            growth_model_path = (
                "../data/growth/model_%d"
                % args.leaveout_timepoint
            )
        else:
            print("WARNING: Cannot use growth with this timepoint")

    growth_model = torch.load(growth_model_path, map_location=device)
    if args.spectral_norm:
        add_spectral_norm(model)
    set_cnf_options(args, model)

    if args.test:
        state_dict = torch.load(args.save + "/checkpt.pth", map_location=device)
        model.load_state_dict(state_dict["state_dict"])
        # if "growth_state_dict" not in state_dict:
        #    print("error growth model note in save")
        #    growth_model = None
        # else:
        #    checkpt = torch.load(args.save + "/checkpt.pth", map_location=device)
        #    growth_model.load_state_dict(checkpt["growth_state_dict"])
        # TODO can we load the arguments from the save?
        # eval_utils.generate_samples(
        #    device, args, model, growth_model, timepoint=args.leaveout_timepoint
        # )
        # with torch.no_grad():
        #    evaluate(device, args, model, growth_model)
    #    exit()
    else:
        logger.info(model)
        n_param = count_parameters(model)
        logger.info("Number of trainable parameters: {}".format(n_param))

        train(
            device,
            args,
            model,
            growth_model,
            regularization_coeffs,
            regularization_fns,
            logger,
        )

    if args.data.data.shape[1] == 2:
        plot_output(device, args, model)
def trainAE(net,
            train_loader,
            val_loader,
            saveDir,
            sStartTime,
            argType=torch.float32,
            device=torch.device('cpu')):
    """

    :param net:          AutoEncoder
    :param train_loader: MNIST loader of training data
    :param val_loader:   MNIST loader of validation data
    :param saveDir:      string, path
    :param sStartTime:   string, start time
    :param argType:      torch type
    :param device:       torch device
    :return:
    """
    print("training auto_encoder")

    cvt = lambda x: x.type(argType).to(device, non_blocking=True)
    utils.makedirs(saveDir)

    # specify loss function
    criterion = nn.MSELoss()

    # specify loss function
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    best_loss = float('inf')
    bestParams = None

    # number of epochs to train the model
    n_epochs = 600

    for epoch in range(1, n_epochs + 1):

        # train the encoder-decoder
        net.train()
        train_loss = 0.0
        for data in train_loader:
            # _ stands in for labels, here
            images, _ = data
            # flatten images
            images = images.view(images.size(0), -1)
            images = cvt(images)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, images)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)

        # validate the encoder-decoder
        net.eval()
        val_loss = 0.0
        for data in val_loader:
            images, _ = data
            images = images.view(images.size(0), -1)
            images = cvt(images)

            outputs = net(images)
            loss = criterion(outputs, images)
            loss.backward()
            optimizer.step()
            val_loss += loss.item() * images.size(0)

        # print avg training statistics...different batch_sizes will scale these differnetly
        train_loss = train_loss / len(train_loader)
        val_loss = val_loss / len(val_loader)
        print('Epoch: {} \tTraining Loss: {:.6f} \t Validation Loss: {:.6f}'.
              format(epoch, train_loss, val_loss))

        # save best set of parameters
        if val_loss < best_loss:
            best_loss = val_loss
            bestParams = net.state_dict()

        # plot
        if epoch % 20 == 0:
            net.eval()
            sSavePath = os.path.join(
                saveDir, 'figs',
                sStartTime + '_autoencoder{:d}.png'.format(epoch))
            xRecreate = net(images)
            plotAutoEnc(images, xRecreate, sSavePath)

        # shrink step size
        if epoch % 150 == 0:
            for p in optimizer.param_groups:
                p['lr'] /= 10.0
            print("lr: ", p['lr'])

    d = net.d

    # compute mean and std for normalization
    mu = torch.zeros((1, d), dtype=argType, device=device)
    musqrd = torch.zeros((1, d), dtype=argType, device=device)
    totImages = 0

    net.load_state_dict(bestParams)

    i = 0
    net.eval()
    with torch.no_grad():
        for data in train_loader:
            # _ stands in for labels, here
            images, _ = data
            images = images.view(images.size(0), -1)
            images = cvt(images)
            outputs = net.encode(images)
            nImages = outputs.shape[0]
            totImages += nImages
            mu += torch.mean(outputs, dim=0, keepdims=True)  # *nImages
            musqrd += torch.mean(outputs**2, dim=0, keepdims=True)  # *nImages

            # check quality
            if i == 0:
                sSavePath = os.path.join(saveDir, 'figs',
                                         sStartTime + '_autoencoder.png')
                outputs = (net.encode(images) - 2.34) / 0.005
                xRecreate = net.decode(outputs * 0.005 + 2.34)
                plotAutoEnc(images, xRecreate, sSavePath)

                sSavePath = os.path.join(saveDir, 'figs',
                                         sStartTime + '_noise_autoencoder.png')
                xRecreate = net.decode(outputs +
                                       1.0 * torch.randn_like(outputs))
                plotAutoEnc(images, xRecreate, sSavePath)

            i += 1

        mu = mu / i
        musqrd = musqrd / i
        std = torch.sqrt(torch.abs(mu**2 - musqrd))

        mu.requires_grad = False
        std.requires_grad = False
        net.mu = mu
        net.std = std

        torch.save({
            'state_dict': net.state_dict(),
        }, os.path.join(saveDir, sStartTime + '_autoenc_checkpt.pth'))

        return net
示例#22
0
# -*- coding: utf-8 -*-
"""
CP-Flow on toy conditional distributions
"""

import gc
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
from lib.flows import SequentialFlow, DeepConvexFlow, ActNorm
from lib.icnn import PICNN as PICNN
from data.toy_data import OneDMixtureOfGaussians as ToyData
from lib.utils import makedirs

makedirs('figures/toy/cond_MoG/')


def savefig(fn):
    plt.savefig(f'figures/toy/cond_MoG/{fn}')


torch.set_default_dtype(torch.float64)

batch_size_train = 128
batch_size_test = 64

# noinspection PyUnresolvedReferences
train_loader = torch.utils.data.DataLoader(ToyData(50000),
                                           batch_size=batch_size_train,
                                           shuffle=True)
示例#23
0
def run(args, kwargs):
    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    if args.automatic_saving == True:
        path = '{}/{}/{}/{}/{}/{}/{}/{}/{}/'.format(args.solver, args.dataset,
                                                    args.layer_type, args.atol,
                                                    args.rtol, args.atol_start,
                                                    args.rtol_start,
                                                    args.warmup_steps,
                                                    args.manual_seed)
    else:
        path = 'test/'

    args.snap_dir = os.path.join(args.out_dir, path)

    if not os.path.exists(args.snap_dir):
        os.makedirs(args.snap_dir)

    # logger
    utils.makedirs(args.snap_dir)
    logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'),
                              filepath=os.path.abspath(__file__))

    logger.info(args)

    # SAVING
    torch.save(args, args.snap_dir + 'config.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    if not args.evaluate:

        nfef_meter = utils.AverageMeter()
        nfeb_meter = utils.AverageMeter()

        # ==============================================================================================================
        # SELECT MODEL
        # ==============================================================================================================
        # flow parameters and architecture choice are passed on to model through args

        if args.flow == 'no_flow':
            model = VAE.VAE(args)
        elif args.flow == 'planar':
            model = VAE.PlanarVAE(args)
        elif args.flow == 'iaf':
            model = VAE.IAFVAE(args)
        elif args.flow == 'orthogonal':
            model = VAE.OrthogonalSylvesterVAE(args)
        elif args.flow == 'householder':
            model = VAE.HouseholderSylvesterVAE(args)
        elif args.flow == 'triangular':
            model = VAE.TriangularSylvesterVAE(args)
        elif args.flow == 'cnf':
            model = CNFVAE.CNFVAE(args)
        elif args.flow == 'cnf_bias':
            model = CNFVAE.AmortizedBiasCNFVAE(args)
        elif args.flow == 'cnf_hyper':
            model = CNFVAE.HypernetCNFVAE(args)
        elif args.flow == 'cnf_lyper':
            model = CNFVAE.LypernetCNFVAE(args)
        elif args.flow == 'cnf_rank':
            model = CNFVAE.AmortizedLowRankCNFVAE(args)
        else:
            raise ValueError('Invalid flow choice')

        if args.retrain_encoder:
            logger.info(f"Initializing decoder from {args.model_path}")
            dec_model = torch.load(args.model_path)
            dec_sd = {}
            for k, v in dec_model.state_dict().items():
                if 'p_x' in k:
                    dec_sd[k] = v
            model.load_state_dict(dec_sd, strict=False)

        if args.cuda:
            logger.info("Model on GPU")
            model.cuda()

        logger.info(model)
        logger.info("Number of trainable parameters: {}".format(
            count_parameters(model)))

        if args.retrain_encoder:
            parameters = []
            logger.info('Optimizing over:')
            for name, param in model.named_parameters():
                if 'p_x' not in name:
                    logger.info(name)
                    parameters.append(param)
        else:
            parameters = model.parameters()

        optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7)

        # ==================================================================================================================
        # TRAINING
        # ==================================================================================================================
        train_loss = []
        val_loss = []

        # for early stopping
        best_loss = np.inf
        best_bpd = np.inf
        e = 0
        epoch = 0

        train_times = []

        for epoch in range(1, args.epochs + 1):
            atol, rtol = update_tolerances(args, epoch, decay_factors)
            print(atol)
            set_cnf_options(args, atol, rtol, model)

            t_start = time.time()

            if 'cnf' not in args.flow:
                tr_loss = train(epoch, train_loader, model, optimizer, args,
                                logger)
            else:
                tr_loss, nfef_meter, nfeb_meter = train(
                    epoch, train_loader, model, optimizer, args, logger,
                    nfef_meter, nfeb_meter)

            train_loss.append(tr_loss)
            train_times.append(time.time() - t_start)
            logger.info('One training epoch took %.2f seconds' %
                        (time.time() - t_start))

            v_loss, v_bpd = evaluate(val_loader,
                                     model,
                                     args,
                                     logger,
                                     epoch=epoch)

            val_loss.append(v_loss)

            # early-stopping
            if v_loss < best_loss:
                e = 0
                best_loss = v_loss
                if args.input_type != 'binary':
                    best_bpd = v_bpd
                logger.info('->model saved<-')
                torch.save(model, args.snap_dir + 'model.model')
                # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model')

            elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup):
                e += 1
                if e > args.early_stopping_epochs:
                    break

            if args.input_type == 'binary':
                logger.info(
                    '--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format(
                        e, args.early_stopping_epochs, best_loss))

            else:
                logger.info(
                    '--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n'
                    .format(e, args.early_stopping_epochs, best_loss,
                            best_bpd))

            if math.isnan(v_loss):
                raise ValueError('NaN encountered!')

        train_loss = np.hstack(train_loss)
        val_loss = np.array(val_loss)

        plot_training_curve(train_loss,
                            val_loss,
                            fname=args.snap_dir + '/training_curve.pdf')

        # training time per epoch
        train_times = np.array(train_times)
        mean_train_time = np.mean(train_times)
        std_train_time = np.std(train_times, ddof=1)
        logger.info('Average train time per epoch: %.2f +/- %.2f' %
                    (mean_train_time, std_train_time))

        # ==================================================================================================================
        # EVALUATION
        # ==================================================================================================================

        logger.info(args)
        logger.info('Stopped after %d epochs' % epoch)
        logger.info('Average train time per epoch: %.2f +/- %.2f' %
                    (mean_train_time, std_train_time))

        final_model = torch.load(args.snap_dir + 'model.model')
        validation_loss, validation_bpd = evaluate(val_loader, final_model,
                                                   args, logger)

    else:
        validation_loss = "N/A"
        validation_bpd = "N/A"
        logger.info(f"Loading model from {args.model_path}")
        final_model = torch.load(args.model_path)

    test_loss, test_bpd = evaluate(test_loader,
                                   final_model,
                                   args,
                                   logger,
                                   testing=True)

    logger.info(
        'FINAL EVALUATION ON VALIDATION SET. ELBO (VAL): {:.4f}'.format(
            validation_loss))
示例#24
0
    type=float,
    default=5.,
    help="We subsample points in the interval [0, args.max_tp]")
parser.add_argument('--noise-weight',
                    type=float,
                    default=0.01,
                    help="Noise amplitude for generated traejctories")

parser.add_argument('--gpu', type=int, default=0, help="GPU")

args = parser.parse_args()

device = torch.device(
    "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu")
file_name = os.path.basename(__file__)[:-3]
utils.makedirs(args.save)

#####################################################################################################

if __name__ == '__main__':
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    experimentID = args.load
    if experimentID is None:
        # Make a new experiment ID
        experimentID = int(SystemRandom().random() * 100000)
    ckpt_path = os.path.join(args.save,
                             "experiment_" + str(experimentID) + '.ckpt')

    start = time.time()
示例#25
0
def main(conf):
    dump_dir = conf['svddist.dump.dir']
    makedirs(dump_dir)

    dump_config_file = join_path(dump_dir, 'application.conf')
    dump_config(conf, dump_config_file)

    logging.info('Loading train dataset')
    train_df = load_train_df(conf['svddist.dataset'])

    vectorizer_file = join_path(dump_dir, 'vectorizer.pkl')
    try:
        logging.info('Loading vectorizer dump')
        vectorizer = joblib.load(vectorizer_file)
    except:
        logging.info('Loading vectorizer dump failed')
        logging.info('Traininig vectorizer')
        vectorizer = train_vectorizer(train_df, **conf['svddist.vectorizer'])

        logging.info('Writing vectorizer dump')
        joblib.dump(vectorizer, vectorizer_file)

    features_file = join_path(dump_dir, 'features_train.npz')
    logging.info('Loading cached train feature matrix from %s', features_file)
    X = load_feature_matrix(features_file)

    if X is None:
        logging.info('Unable to load cached train feature matrix')

        logging.info('Computing train feature matrix')
        X = compute_feature_matrix(train_df, vectorizer, combine='stack')

        logging.info('Writing train feature matrix to %s', features_file)
        save_feature_matrix(X, features_file)

    logging.info('Loading SVD decomposition')
    k = conf['svddist.svd'].get_int('k')
    singular_values_file = join_path(dump_dir, 'singular_values.txt')
    singular_vectors_file = join_path(dump_dir, 'singular_vectors.npz')
    try:
        S = np.loadtxt(singular_values_file)
        VT = np.load(singular_vectors_file)['VT']
        assert k == len(S)
    except:
        logging.info('Loading SVD decomposition failed')
        logging.info('Computing SVD decomposition')
        S, VT = compute_svd(X.asfptype(), **conf['svddist.svd'])

        logging.info('Writing singular values to file')
        np.savetxt(singular_values_file, S)
        np.savez(singular_vectors_file, VT=VT)

    logging.info('Computing train SVD features')
    Sinv = np.diag(1. / S) * np.sqrt(X.shape[0])
    U = X.dot(VT.transpose().dot(Sinv))

    logging.info('Train feature matrix dimensions: %s', U.shape)

    logging.info('Symmetrizing input features')
    Uq1, Uq2 = np.vsplit(U, 2)
    del U

    logging.info('Computing euclidean')
    train_df['svd_eucl'] = euclidean(Uq1, Uq2)

    logging.info('Computing cosine')
    train_df['svd_cosine'] = cosine(Uq1, Uq2)
    del Uq1, Uq2

    train_df[[
        FieldsTrain.id,
        FieldsTrain.is_duplicate,
        'svd_eucl',
        'svd_cosine'
    ]].to_csv(join_path(dump_dir, 'train.csv'), index=False)

    logging.info('Loading test dataset')
    test_df = load_test_df(conf['svddist.dataset'])

    logging.info('Computing test features')
    X = compute_feature_matrix(test_df, vectorizer, combine='stack')

    logging.info('Computing test SVD features')
    U = X.dot(VT.transpose().dot(Sinv))

    logging.info('Symmetrizing input features')
    Uq1, Uq2 = np.vsplit(U, 2)
    del U

    logging.info('Computing test euclidean')
    test_df['svd_eucl'] = euclidean(Uq1, Uq2)

    logging.info('Computing test cosine')
    test_df['svd_cosine'] = cosine(Uq1, Uq2)
    del Uq1, Uq2

    logging.info('Writing test dataset')
    test_df[[
        FieldsTest.test_id,
        'svd_eucl',
        'svd_cosine'
    ]].to_csv(join_path(dump_dir, 'test.csv'), index=False)
def main(conf):
    dump_dir = abspath(conf['libffm']['dump']['dir'])
    makedirs(dump_dir)

    data_dir = abspath(conf['libffm']['data']['dir'])
    dfc = DataFrameCols(data_dir)

    target = 'is_attributed'
    fields = {'ip': 0, 'app': 1, 'device': 2, 'os': 3, 'channel': 4}
    shifts = {
        'ip': 0,
        'app': 364779,
        'device': 365548,
        'os': 369776,
        'channel': 370733
    }

    # 1) write test data
    # logging.info('Writing test data in libffm format')
    # df = dfc.load_df(columns=['id', target] + list(fields.keys()))
    # df = df[df[target] == -1]
    # df[target] = 0  # do we need this?
    # df = write_libffm_data(df, target, fields, shifts)
    test_fname = join_path(dump_dir, 'test.txt')
    # df[['data']].to_csv(test_fname, header=False, index=False, quoting=csv.QUOTE_NONE)
    # del df
    # gc.collect()
    # exit()

    # 2) write training folds
    # logging.info('Writing k-fold training data')
    # df = dfc.load_df(columns=['id', target] + list(fields.keys()))
    # df = df[df[target] >= 0]
    # df = write_libffm_data(df, target, fields, shifts)
    #
    # folds = []
    # skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1337)
    # for fold_idx, valid_idx in skf.split(df['id'].values, df[target].values):
    #     folds.append((fold_idx, valid_idx))
    #
    # with open(join_path(dump_dir, 'folds.pkl'), 'wb') as f:
    #     pickle.dump(folds, f)
    #
    # for j_fold, (fold_idx, valid_idx) in enumerate(folds):
    #     logging.info('Writing fold %d in libffm format', j_fold)
    #     train_fname = join_path(dump_dir, 'train_fold_%d.txt' % j_fold)
    #     df.loc[fold_idx, ['data']].to_csv(train_fname, header=False, index=False, quoting=csv.QUOTE_NONE)
    #     valid_fname = join_path(dump_dir, 'valid_fold_%d.txt' % j_fold)
    #     df.loc[valid_idx, ['data']].to_csv(valid_fname, header=False, index=False, quoting=csv.QUOTE_NONE)
    #
    # del df
    # gc.collect()
    # exit()

    df = dfc.load_df(columns=['id', target])
    df = df[df[target] >= 0]

    with open(join_path(dump_dir, 'folds.pkl'), 'rb') as f:
        folds = pickle.load(f)

    chdir(dump_dir)
    for j_fold, (fold_idx, valid_idx) in enumerate(folds):
        logging.info('Training on fold %d', j_fold)
        train_fname = join_path(dump_dir, 'train_fold_%d.txt' % j_fold)
        valid_fname = join_path(dump_dir, 'valid_fold_%d.txt' % j_fold)
        model_fname = join_path(dump_dir, 'model_%d.bin' % j_fold)
        proc = subprocess.run([
            'ffm-train', '-p', valid_fname, '-l',
            str(conf['libffm']['options']['lambda']), '-k',
            str(conf['libffm']['options']['factor']), '-r',
            str(conf['libffm']['options']['learning_rate']), '-t',
            str(conf['libffm']['options']['num_iter']), train_fname,
            model_fname
        ],
                              stdout=subprocess.PIPE,
                              check=True)

        logging.info('Running command %s', ' '.join(proc.args))
        logging.info('Process return code %d', proc.returncode)
        logging.info(proc.stdout.decode('utf-8'))

        train_pred_file = join_path(dump_dir, 'train_pred_%d.txt' % j_fold)
        proc = subprocess.run(
            ['ffm-predict', train_fname, model_fname, train_pred_file],
            stdout=subprocess.PIPE,
            check=True)

        logging.info('Running command %s', ' '.join(proc.args))
        logging.info('Process return code %d', proc.returncode)

        with open(train_pred_file, 'r') as f:
            p_train = np.array([float(s) for s in f.readlines()],
                               dtype=np.float32)
            auc_train = roc_auc_score(df.loc[fold_idx, target].values, p_train)

        valid_pred_file = join_path(dump_dir, 'valid_pred_%d.txt' % j_fold)
        proc = subprocess.run(
            ['ffm-predict', valid_fname, model_fname, valid_pred_file],
            stdout=subprocess.PIPE,
            check=True)

        logging.info('Running command %s', ' '.join(proc.args))
        logging.info('Process return code %d', proc.returncode)

        with open(valid_pred_file, 'r') as f:
            p_valid = np.array([float(s) for s in f.readlines()],
                               dtype=np.float32)
            auc_valid = roc_auc_score(df.loc[valid_idx, target].values,
                                      p_valid)

        logging.info('Fold quality: auc_train=%f auc_valid=%f', auc_train,
                     auc_valid)

        test_pred_file = join_path(dump_dir, 'test_pred_%d.txt' % j_fold)
        proc = subprocess.run(
            ['ffm-predict', test_fname, model_fname, test_pred_file],
            stdout=subprocess.PIPE,
            check=True)

        logging.info('Running command %s', ' '.join(proc.args))
        logging.info('Process return code %d', proc.returncode)
示例#27
0
def visualize(epoch, model, gmm, itr, real_imgs, global_itr):
    print("Starting Visualisation")
    model.eval()
    gmm.eval()
    utils.makedirs(os.path.join(args.save, 'imgs'))

    for x_test, y_test in test_loader:
        # x_test = x_test[0,...].unsqueeze(0)
        # y_test = y_test[0,...].unsqueeze(0)
        x_test = x_test.to(device)
        ### TEMPLATES ###
        D = real_imgs[:, 0, ...].unsqueeze(1)
        D = rescale(D)  # Scale to [0,1] interval
        D = D.repeat(1, args.nclusters, 1, 1)
        x = real_imgs
        with torch.no_grad():
            if isinstance(model, torch.nn.DataParallel):
                z_logp = model.module(D.view(-1, *input_size[1:]),
                                      0,
                                      classify=False)
            else:
                z_logp = model(D.view(-1, *input_size[1:]), 0, classify=False)

            z, delta_logp = z_logp
            if isinstance(gmm, torch.nn.DataParallel):
                logpz, params = gmm.module(
                    z.view(-1, args.nclusters, args.imagesize, args.imagesize),
                    x.permute(0, 2, 3, 1))
            else:
                logpz, params = gmm(
                    z.view(-1, args.nclusters, args.imagesize, args.imagesize),
                    x.permute(0, 2, 3, 1))

        mu_tmpl, std_tmpl, gamma = params
        mu_tmpl = mu_tmpl.cpu().numpy()
        std_tmpl = std_tmpl.cpu().numpy()
        gamma = gamma.cpu().numpy()

        mu_tmpl = mu_tmpl[..., np.newaxis]
        std_tmpl = std_tmpl[..., np.newaxis]

        mu_tmpl = np.swapaxes(mu_tmpl, 0, 1)  # (3,4,1) -> (4,3,1)
        mu_tmpl = np.swapaxes(mu_tmpl, 1, 2)  # (4,3,1) -> (4,1,3)
        std_tmpl = np.swapaxes(std_tmpl, 0, 1)  # (3,4,1) -> (4,3,1)
        std_tmpl = np.swapaxes(std_tmpl, 1, 2)  # (4,3,1) -> (4,1,3)

        ### DEPLOY ###
        D = x_test[:, 0, ...].unsqueeze(1)
        D = rescale(D)  # Scale to [0,1] interval
        D = D.repeat(1, args.nclusters, 1, 1)
        with torch.no_grad():
            if isinstance(model, torch.nn.DataParallel):
                z_logp = model.module(D.view(-1, *input_size[1:]),
                                      0,
                                      classify=False)
            else:
                z_logp = model(D.view(-1, *input_size[1:]), 0, classify=False)

            z, delta_logp = z_logp
            if isinstance(gmm, torch.nn.DataParallel):
                logpz, params = gmm.module(
                    z.view(-1, args.nclusters, args.imagesize, args.imagesize),
                    x_test.permute(0, 2, 3, 1))
            else:
                logpz, params = gmm(
                    z.view(-1, args.nclusters, args.imagesize, args.imagesize),
                    x_test.permute(0, 2, 3, 1))

        mu, std, pi = params
        mu = mu.cpu().numpy()
        std = std.cpu().numpy()
        pi = pi.cpu().numpy()

        mu = mu[..., np.newaxis]
        std = std[..., np.newaxis]

        mu = np.swapaxes(mu, 0, 1)  # (3,4,1) -> (4,3,1)
        mu = np.swapaxes(mu, 1, 2)  # (4,3,1) -> (4,1,3)
        std = np.swapaxes(std, 0, 1)  # (3,4,1) -> (4,3,1)
        std = np.swapaxes(std, 1, 2)  # (4,3,1) -> (4,1,3)

        X_hsd = np.swapaxes(x_test.cpu().numpy(), 1, 2)
        X_hsd = np.swapaxes(X_hsd, 2, 3)

        X_conv = imgtf.image_dist_transform(X_hsd, mu, std, pi, mu_tmpl,
                                            std_tmpl, args)

        # save a random image from the batch
        im_no = random.randint(0, args.batchsize - 1)
        im_tmpl = real_imgs[im_no, ...].cpu().numpy()
        im_tmpl = np.swapaxes(im_tmpl, 0, 1)
        im_tmpl = np.swapaxes(im_tmpl, 1, -1)
        im_tmpl = imgtf.HSD2RGB_Numpy(im_tmpl)
        im_tmpl = (im_tmpl * 255).astype('uint8')
        im_tmpl = Image.fromarray(im_tmpl)
        im_tmpl.save(
            os.path.join(args.save, 'imgs', f'im_tmpl_{global_itr}.png'))

        im_test = x_test[im_no, ...].cpu().numpy()
        im_test = np.swapaxes(im_test, 0, 1)
        im_test = np.swapaxes(im_test, 1, -1)
        im_test = imgtf.HSD2RGB_Numpy(im_test)
        im_test = (im_test * 255).astype('uint8')
        im_test = Image.fromarray(im_test)
        im_test.save(
            os.path.join(args.save, 'imgs', f'im_test_{global_itr}.png'))

        im_D = D[0, 0, ...].cpu().numpy()
        im_D = (im_D * 255).astype('uint8')
        im_D = Image.fromarray(im_D, 'L')
        im_D.save(os.path.join(args.save, 'imgs', f'im_D_{global_itr}.png'))

        im_conv = X_conv[im_no, ...].reshape(args.imagesize, args.imagesize, 3)
        im_conv = Image.fromarray(im_conv)
        im_conv.save(
            os.path.join(args.save, 'imgs', f'im_conv_{global_itr}.png'))

        # gamma
        ClsLbl = np.argmax(gamma, axis=-1)
        ClsLbl = ClsLbl.astype('float32')

        ColorTable = [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0],
                      [0, 255, 255], [255, 0, 255]]
        colors = np.array(ColorTable, dtype='float32')
        Msk = np.tile(np.expand_dims(ClsLbl, axis=-1), (1, 1, 1, 3))
        for k in range(0, args.nclusters):
            #                                       1 x 256 x 256 x 1                           1 x 3
            ClrTmpl = np.einsum('anmd,df->anmf',
                                np.expand_dims(np.ones_like(ClsLbl), axis=3),
                                np.reshape(colors[k, ...], [1, 3]))
            # ClrTmpl = 1 x 256 x 256 x 3
            Msk = np.where(np.equal(Msk, k), ClrTmpl, Msk)

        im_gamma = Msk[0].astype('uint8')
        im_gamma = Image.fromarray(im_gamma)
        im_gamma.save(
            os.path.join(args.save, 'imgs', f'im_gamma_{global_itr}.png'))

        # pi
        ClsLbl = np.argmax(pi, axis=-1)
        ClsLbl = ClsLbl.astype('float32')

        ColorTable = [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0],
                      [0, 255, 255], [255, 0, 255]]
        colors = np.array(ColorTable, dtype='float32')
        Msk = np.tile(np.expand_dims(ClsLbl, axis=-1), (1, 1, 1, 3))
        for k in range(0, args.nclusters):
            #                                       1 x 256 x 256 x 1                           1 x 3
            ClrTmpl = np.einsum('anmd,df->anmf',
                                np.expand_dims(np.ones_like(ClsLbl), axis=3),
                                np.reshape(colors[k, ...], [1, 3]))
            # ClrTmpl = 1 x 256 x 256 x 3
            Msk = np.where(np.equal(Msk, k), ClrTmpl, Msk)

        im_gamma = Msk[0].astype('uint8')
        im_gamma = Image.fromarray(im_gamma)
        im_gamma.save(
            os.path.join(args.save, 'imgs', f'im_pi_{global_itr}.png'))

        model.train()
        gmm.train()
        return
示例#28
0
def main(conf):
    logging.info('Loading train dataset')
    train_df = load_train_df(conf['svd.dataset'])

    logging.info('Loading test dataset')
    test_df = load_test_df(conf['svd.dataset'])

    for f, cnf in conf['svd'].iteritems():
        if f == 'dataset':
            continue

        if not cnf.get('enabled', True):
            continue

        logging.info('Start traning SVD model %s', f)

        dump_dir = cnf['dump.dir']
        makedirs(dump_dir)
        logging.info('Dump %s', dump_dir)

        vectorizer_file = join_path(dump_dir, 'vectorizer.pkl')
        try:
            logging.info('Loading vectorizer dump')
            vectorizer = joblib.load(vectorizer_file)
        except:
            logging.info('Loading vectorizer dump failed')
            logging.info('Traininig vectorizer: %s', cnf['vectorizer'])
            vectorizer = train_vectorizer(train_df, **cnf['vectorizer'])

            logging.info('Writing vectorizer dump')
            joblib.dump(vectorizer, vectorizer_file)

        train_features_matrix_file = join_path(dump_dir, 'train_features.npz')
        logging.info('Loading train features matrix')
        X = load_feature_matrix(train_features_matrix_file)
        if X is None:
            logging.info('Loading train feature matrix failed')
            logging.info('Computing train feature matrix')
            X = compute_feature_matrix(train_df, vectorizer, combine=cnf.get('model.transform', None))

            logging.info('Writing train feature matrix dump')
            save_feature_matrix(X, train_features_matrix_file)

        logging.info('Computing SVD decomposition')
        ksvd = cnf['model'].get_int('k')
        S, VT = compute_svd(X.asfptype(), **cnf['model'])
        Sinv = np.diag(1. / S) * np.sqrt(X.shape[0])
        logging.info('Singular values %s', S)

        logging.info('Computing train SVD features')
        U = X.dot(VT.transpose()).dot(Sinv)
        logging.info('Train features variance: %s', np.var(U, axis=0))

        features = map(lambda i: f + '_%d' % i, range(U.shape[1]))
        if cnf.get('model.transform', None) == 'stack':
            features_q1 = map(lambda s: s + '_q1', features)
            features_q2 = map(lambda s: s + '_q2', features)
            features = features_q1 + features_q2
            train_features_df_q1 = pd.DataFrame(U[:train_df.shape[0], :], columns=features_q1)
            train_features_df_q2 = pd.DataFrame(U[train_df.shape[0]:, :], columns=features_q2)
            train_df = pd.concat([train_df, train_features_df_q1, train_features_df_q2], axis=1)

            train_df['svd_dist_eucl'] = train_df.apply(lambda r: compute_svd_distance_eucl(r, f, ksvd), axis=1)
            features.append('svd_dist_eucl')
        else:
            train_features_df = pd.DataFrame(U, columns=features)
            train_df = pd.concat([train_df, train_features_df], axis=1)

        for feature in features:
            logging.info('Feature %s AUC=%s', feature, roc_auc_score(train_df[FieldsTrain.is_duplicate], train_df[feature]))

        logging.info('Writing train features dump')
        train_file = join_path(dump_dir, 'train.csv')
        train_df[[FieldsTrain.id, FieldsTrain.is_duplicate] + features].to_csv(train_file, index=False)

        test_features_matrix_file = join_path(dump_dir, 'test_features.npz')
        logging.info('Loading test features matrix')
        X = load_feature_matrix(test_features_matrix_file)
        if X is None:
            logging.info('Loading test feature matrix failed')
            logging.info('Computing test feature matrix')
            X = compute_feature_matrix(test_df, vectorizer, combine=cnf.get('model.transform', None))

            logging.info('Writing test feature matrix dump')
            save_feature_matrix(X, test_features_matrix_file)

        U = X.dot(VT.transpose()).dot(Sinv)
        logging.info('Test features variance: %s', np.var(U, axis=0))

        logging.info('Computing test SVD features')
        if cnf.get('model.transform', None) == 'stack':
            logging.info('Computing q1 test SVD features')
            test_features_df_q1 = pd.DataFrame(U[:test_df.shape[0], :], columns=features_q1)
            test_df = pd.concat([test_df, test_features_df_q1], axis=1)
            del test_features_df_q1

            logging.info('Computing q2 test SVD features')
            test_features_df_q2 = pd.DataFrame(U[test_df.shape[0]:, :], columns=features_q2)
            test_df = pd.concat([test_df, test_features_df_q2], axis=1)
            del test_features_df_q2

            logging.info('Computing svd distances')
            test_df['svd_dist_eucl'] = test_df.apply(lambda r: compute_svd_distance_eucl(r, f, ksvd), axis=1)
        else:
            test_features_df = pd.DataFrame(U, columns=features)
            test_df = pd.concat([test_df, test_features_df], axis=1)

        logging.info('Writing test features dump')
        test_file = join_path(dump_dir, 'test.csv')
        test_df[[FieldsTest.test_id] + features].to_csv(test_file, index=False)
示例#29
0
if args.extrap == "True":
    print("Running extrap mode" + "-" * 80)
    args.mode = "extrap"
elif args.extrap == "False":
    print("Running interp mode" + "-" * 80)
    args.mode = "interp"

#####################################################################################################

if __name__ == '__main__':
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    ############ Saving Path and Preload.
    file_name = os.path.basename(__file__)[:-3]  # run_models
    utils.makedirs(args.save)
    utils.makedirs(args.save_graph)

    experimentID = args.load
    if experimentID is None:
        # Make a new experiment ID
        experimentID = int(SystemRandom().random() * 100000)

    ############ Loading Data
    print("Loading dataset: " + args.dataset)
    dataloader = ParseData(args.dataset,
                           suffix=args.suffix,
                           mode=args.mode,
                           args=args)
    test_encoder, test_decoder, test_graph, test_batch = dataloader.load_data(
        sample_percent=args.sample_percent_test,
示例#30
0
    def setUp(self):
        self.config = Config.get_config()
        self.db = DB()
        self.db.check_upgrade()
        self.mark_db_ids()

        self.test_folder = tempfile.mkdtemp()
        self.files_folder = os.path.join(self.test_folder, "files")
        self.store_folder = os.path.join(self.test_folder, "store")
        self.restore_folder = os.path.join(self.test_folder, "restore")
        utils.makedirs(self.files_folder)
        utils.makedirs(self.store_folder)
        utils.makedirs(self.restore_folder)
        
        #    Build the base set of files
        with open(os.path.join(self.files_folder, "base"), "w") as f:
            f.write("base")

        with open(os.path.join(self.files_folder, "incr"), "w") as f:
            f.write("0")
            
        config_file = os.path.expanduser("~/.vault")
        if not os.path.exists(config_file):
            raise Exception("Vault test configuration file (~/.vault) does not exist")
        self.store_config = ConfigParser.RawConfigParser()
        self.store_config.read(config_file)

    #    FOLDER STORE
        self.store = FolderStore("teststore", "50MB", True, self.store_folder)
    #    DROPBOX STORE
#        self.login = self.store_config.get("DropBox", "login")
#        self.password = self.store_config.get("DropBox", "password")
#        self.folder = self.store_config.get("DropBox", "folder")
#        self.app_key = self.store_config.get("DropBox", "app_key")
#        self.app_secret_key = self.store_config.get("DropBox", "app_secret_key")
#        self.store = DropBoxStore("teststore", 0, False, self.folder, self.login, self.password,
#                                  self.app_key, self.app_secret_key)
    #    S3 STORE
#        self.key = self.store_config.get("Amazon", "aws_access_key_id")
#        self.secret_key = self.store_config.get("Amazon", "aws_secret_access_key")
#        self.bucket = self.store_config.get("Amazon", "bucket")
#        self.store = S3Store("teststore", 0, False, bucket=self.bucket, key=self.key, secret_key=self.secret_key)

        #    Now record the existance of this store
        self.config.storage[self.store.name] = self.store


        #    Build the backup object (dont save config)
        self.backup = Backup("testbackup")
        self.backup.include_folders = [self.files_folder]
        self.backup.store = self.store.name
        self.backup.notify_msg = False
        self.old_pass = self.config.data_passphrase
        self.config.data_passphrase = "goofy"
        self.backup.encrypt = True
        self.config.backups[self.backup.name] = self.backup

        #    build an options object for use with the backup
        self.options = BlankClass()
        self.options.dry_run = False
        self.options.message = False
        self.options.email = False
        self.options.shutdown = False
        self.options.norecurse = False
        
        #    How many cycles?
        self.cycles = 20
示例#31
0
def main(rank, world_size, args):
    setup(rank, world_size, args.port)

    # setup logger
    if rank == 0:
        utils.makedirs(args.save)
        logger = utils.get_logger(os.path.join(args.save, "logs"))

    def mprint(msg):
        if rank == 0:
            logger.info(msg)

    mprint(args)

    device = torch.device(
        f'cuda:{rank}' if torch.cuda.is_available() else 'cpu')

    if device.type == 'cuda':
        mprint('Found {} CUDA devices.'.format(torch.cuda.device_count()))
        for i in range(torch.cuda.device_count()):
            props = torch.cuda.get_device_properties(i)
            mprint('{} \t Memory: {:.2f}GB'.format(
                props.name, props.total_memory / (1024**3)))
    else:
        mprint('WARNING: Using device {}'.format(device))

    np.random.seed(args.seed + rank)
    torch.manual_seed(args.seed + rank)
    if device.type == 'cuda':
        torch.cuda.manual_seed(args.seed + rank)

    mprint('Loading dataset {}'.format(args.data))
    # Dataset and hyperparameters
    if args.data == 'cifar10':
        im_dim = 3

        transform_train = transforms.Compose([
            transforms.Resize(args.imagesize),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            add_noise if args.add_noise else identity,
        ])
        transform_test = transforms.Compose([
            transforms.Resize(args.imagesize),
            transforms.ToTensor(),
            add_noise if args.add_noise else identity,
        ])

        init_layer = flows.LogitTransform(0.05)
        train_set = vdsets.SVHN(args.dataroot,
                                download=True,
                                split="train",
                                transform=transform_train)
        sampler = torch.utils.data.distributed.DistributedSampler(train_set)
        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=args.batchsize,
            sampler=sampler,
        )
        test_loader = torch.utils.data.DataLoader(
            vdsets.SVHN(args.dataroot,
                        download=True,
                        split="test",
                        transform=transform_test),
            batch_size=args.val_batchsize,
            shuffle=False,
        )

    elif args.data == 'mnist':
        im_dim = 1
        init_layer = flows.LogitTransform(1e-6)
        train_set = datasets.MNIST(
            args.dataroot,
            train=True,
            transform=transforms.Compose([
                transforms.Resize(args.imagesize),
                transforms.ToTensor(),
                add_noise if args.add_noise else identity,
            ]))
        sampler = torch.utils.data.distributed.DistributedSampler(train_set)
        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=args.batchsize,
            sampler=sampler,
        )
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(args.dataroot,
                           train=False,
                           transform=transforms.Compose([
                               transforms.Resize(args.imagesize),
                               transforms.ToTensor(),
                               add_noise if args.add_noise else identity,
                           ])),
            batch_size=args.val_batchsize,
            shuffle=False,
        )
    else:
        raise Exception(f'dataset not one of mnist / cifar10, got {args.data}')

    mprint('Dataset loaded.')
    mprint('Creating model.')

    input_size = (args.batchsize, im_dim, args.imagesize, args.imagesize)

    model = MultiscaleFlow(
        input_size,
        block_fn=partial(cpflow_block_fn,
                         block_type=args.block_type,
                         dimh=args.dimh,
                         num_hidden_layers=args.num_hidden_layers,
                         icnn_version=args.icnn,
                         num_pooling=args.num_pooling),
        n_blocks=list(map(int, args.nblocks.split('-'))),
        factor_out=args.factor_out,
        init_layer=init_layer,
        actnorm=args.actnorm,
        fc_end=args.fc_end,
        glow=args.glow,
    )
    model.to(device)

    model = DDP(model, device_ids=[rank], find_unused_parameters=True)
    ema = utils.ExponentialMovingAverage(model)

    mprint(model)
    mprint('EMA: {}'.format(ema))

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=(0.9, 0.99),
                           weight_decay=args.wd)

    # Saving and resuming
    best_test_bpd = math.inf
    begin_epoch = 0

    most_recent_path = os.path.join(args.save, 'models', 'most_recent.pth')
    checkpt_exists = os.path.exists(most_recent_path)
    if checkpt_exists:
        mprint(f"Resuming from {most_recent_path}")

        # deal with data-dependent initialization like actnorm.
        with torch.no_grad():
            x = torch.rand(8, *input_size[1:]).to(device)
            model(x)

        checkpt = torch.load(most_recent_path)
        begin_epoch = checkpt["epoch"] + 1

        model.module.load_state_dict(checkpt["state_dict"])
        ema.set(checkpt['ema'])
        optimizer.load_state_dict(checkpt["opt_state_dict"])
    elif args.resume:
        mprint(f"Resuming from {args.resume}")

        # deal with data-dependent initialization like actnorm.
        with torch.no_grad():
            x = torch.rand(8, *input_size[1:]).to(device)
            model(x)

        checkpt = torch.load(args.resume)
        begin_epoch = checkpt["epoch"] + 1

        model.module.load_state_dict(checkpt["state_dict"])
        ema.set(checkpt['ema'])
        optimizer.load_state_dict(checkpt["opt_state_dict"])

    mprint(optimizer)

    batch_time = utils.RunningAverageMeter(0.97)
    bpd_meter = utils.RunningAverageMeter(0.97)
    gnorm_meter = utils.RunningAverageMeter(0.97)
    cg_meter = utils.RunningAverageMeter(0.97)
    hnorm_meter = utils.RunningAverageMeter(0.97)

    update_lr(optimizer, 0, args)

    # for visualization
    fixed_x = next(iter(train_loader))[0][:8].to(device)
    fixed_z = torch.randn(8,
                          im_dim * args.imagesize * args.imagesize).to(fixed_x)
    if rank == 0:
        utils.makedirs(os.path.join(args.save, 'figs'))
        # visualize(model, fixed_x, fixed_z, os.path.join(args.save, 'figs', 'init.png'))
    for epoch in range(begin_epoch, args.nepochs):
        sampler.set_epoch(epoch)
        flows.CG_ITERS_TRACER.clear()
        flows.HESS_NORM_TRACER.clear()
        mprint('Current LR {}'.format(optimizer.param_groups[0]['lr']))
        train(epoch, train_loader, model, optimizer, bpd_meter, gnorm_meter,
              cg_meter, hnorm_meter, batch_time, ema, device, mprint,
              world_size, args)
        val_time, test_bpd = validate(epoch, model, test_loader, ema, device)
        mprint(
            'Epoch: [{0}]\tTime {1:.2f} | Test bits/dim {test_bpd:.4f}'.format(
                epoch, val_time, test_bpd=test_bpd))

        if rank == 0:
            utils.makedirs(os.path.join(args.save, 'figs'))
            visualize(model, fixed_x, fixed_z,
                      os.path.join(args.save, 'figs', f'{epoch}.png'))

            utils.makedirs(os.path.join(args.save, "models"))
            if test_bpd < best_test_bpd:
                best_test_bpd = test_bpd
                torch.save(
                    {
                        'epoch': epoch,
                        'state_dict': model.module.state_dict(),
                        'opt_state_dict': optimizer.state_dict(),
                        'args': args,
                        'ema': ema,
                        'test_bpd': test_bpd,
                    }, os.path.join(args.save, 'models', 'best_model.pth'))

        if rank == 0:
            torch.save(
                {
                    'epoch': epoch,
                    'state_dict': model.module.state_dict(),
                    'opt_state_dict': optimizer.state_dict(),
                    'args': args,
                    'ema': ema,
                    'test_bpd': test_bpd,
                }, os.path.join(args.save, 'models', 'most_recent.pth'))

    cleanup()
示例#32
0
    print(model)
    print("Number of trainable parameters: {}".format(count_parameters(model)))

    model.eval()
    p_samples = toy_data.inf_train_gen(args.data, batch_size=800**2)

    with torch.no_grad():
        sample_fn, density_fn = get_transforms(model)

        plt.figure(figsize=(10, 10))
        ax = ax = plt.gca()
        viz_flow.plt_samples(p_samples, ax, npts=800)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        fig_filename = os.path.join(args.save, 'figs', 'true_samples.jpg')
        utils.makedirs(os.path.dirname(fig_filename))
        plt.savefig(fig_filename)
        plt.close()

        plt.figure(figsize=(10, 10))
        ax = ax = plt.gca()
        viz_flow.plt_flow_density(standard_normal_logprob,
                                  density_fn,
                                  ax,
                                  npts=800,
                                  memory=200,
                                  device=device)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        fig_filename = os.path.join(args.save, 'figs', 'model_density.jpg')
        utils.makedirs(os.path.dirname(fig_filename))
        plt.savefig(fig_filename)
示例#33
0
def run(args, kwargs):
    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_')
    snap_dir = snapshots_path + args.flow

    if args.flow != 'no_flow':
        snap_dir += '_' + 'num_flows_' + str(args.num_flows)

    if args.flow == 'orthogonal':
        snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs)
    elif args.flow == 'orthogonalH':
        snap_dir = snap_dir + '_num_householder_' + str(args.num_householder)
    elif args.flow == 'iaf':
        snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size)

    elif args.flow == 'permutation':
        snap_dir = snap_dir + '_' + 'kernelsize_' + str(args.kernel_size)
    elif args.flow == 'mixed':
        snap_dir = snap_dir + '_' + 'num_householder_' + str(args.num_householder)
    elif args.flow == 'cnf_rank':
        snap_dir = snap_dir + '_rank_' + str(args.rank) + '_' + args.dims + '_num_blocks_' + str(args.num_blocks)
    elif 'cnf' in args.flow:
        snap_dir = snap_dir + '_' + args.dims + '_num_blocks_' + str(args.num_blocks)

    if args.retrain_encoder:
        snap_dir = snap_dir + '_retrain-encoder_'
    elif args.evaluate:
        snap_dir = snap_dir + '_evaluate_'

    snap_dir = snap_dir + '__' + args.model_signature + '/'

    args.snap_dir = snap_dir

    if not os.path.exists(snap_dir):
        os.makedirs(snap_dir)

    # logger
    utils.makedirs(args.snap_dir)
    logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'), filepath=os.path.abspath(__file__))

    logger.info(args)

    # SAVING
    torch.save(args, snap_dir + args.flow + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    if not args.evaluate:

        # ==============================================================================================================
        # SELECT MODEL
        # ==============================================================================================================
        # flow parameters and architecture choice are passed on to model through args

        if args.flow == 'no_flow':
            model = VAE.VAE(args)
        elif args.flow == 'planar':
            model = VAE.PlanarVAE(args)
        elif args.flow == 'iaf':
            model = VAE.IAFVAE(args)
        elif args.flow == 'orthogonal':
            model = VAE.OrthogonalSylvesterVAE(args)
        elif args.flow == 'householder':
            model = VAE.HouseholderSylvesterVAE(args)
        elif args.flow == 'triangular':
            model = VAE.TriangularSylvesterVAE(args)
        elif args.flow == 'cnf':
            model = CNFVAE.CNFVAE(args)
        elif args.flow == 'cnf_bias':
            model = CNFVAE.AmortizedBiasCNFVAE(args)
        elif args.flow == 'cnf_hyper':
            model = CNFVAE.HypernetCNFVAE(args)
        elif args.flow == 'cnf_lyper':
            model = CNFVAE.LypernetCNFVAE(args)
        elif args.flow == 'cnf_rank':
            model = CNFVAE.AmortizedLowRankCNFVAE(args)
        else:
            raise ValueError('Invalid flow choice')

        if args.retrain_encoder:
            logger.info(f"Initializing decoder from {args.model_path}")
            dec_model = torch.load(args.model_path)
            dec_sd = {}
            for k, v in dec_model.state_dict().items():
                if 'p_x' in k:
                    dec_sd[k] = v
            model.load_state_dict(dec_sd, strict=False)

        if args.cuda:
            logger.info("Model on GPU")
            model.cuda()

        logger.info(model)

        if args.retrain_encoder:
            parameters = []
            logger.info('Optimizing over:')
            for name, param in model.named_parameters():
                if 'p_x' not in name:
                    logger.info(name)
                    parameters.append(param)
        else:
            parameters = model.parameters()

        optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7)

        # ==================================================================================================================
        # TRAINING
        # ==================================================================================================================
        train_loss = []
        val_loss = []

        # for early stopping
        best_loss = np.inf
        best_bpd = np.inf
        e = 0
        epoch = 0

        train_times = []

        for epoch in range(1, args.epochs + 1):

            t_start = time.time()
            tr_loss = train(epoch, train_loader, model, optimizer, args, logger)
            train_loss.append(tr_loss)
            train_times.append(time.time() - t_start)
            logger.info('One training epoch took %.2f seconds' % (time.time() - t_start))

            v_loss, v_bpd = evaluate(val_loader, model, args, logger, epoch=epoch)

            val_loss.append(v_loss)

            # early-stopping
            if v_loss < best_loss:
                e = 0
                best_loss = v_loss
                if args.input_type != 'binary':
                    best_bpd = v_bpd
                logger.info('->model saved<-')
                torch.save(model, snap_dir + args.flow + '.model')
                # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model')

            elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup):
                e += 1
                if e > args.early_stopping_epochs:
                    break

            if args.input_type == 'binary':
                logger.info(
                    '--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format(e, args.early_stopping_epochs, best_loss)
                )

            else:
                logger.info(
                    '--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n'.
                    format(e, args.early_stopping_epochs, best_loss, best_bpd)
                )

            if math.isnan(v_loss):
                raise ValueError('NaN encountered!')

        train_loss = np.hstack(train_loss)
        val_loss = np.array(val_loss)

        plot_training_curve(train_loss, val_loss, fname=snap_dir + '/training_curve_%s.pdf' % args.flow)

        # training time per epoch
        train_times = np.array(train_times)
        mean_train_time = np.mean(train_times)
        std_train_time = np.std(train_times, ddof=1)
        logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time))

        # ==================================================================================================================
        # EVALUATION
        # ==================================================================================================================

        logger.info(args)
        logger.info('Stopped after %d epochs' % epoch)
        logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time))

        final_model = torch.load(snap_dir + args.flow + '.model')
        validation_loss, validation_bpd = evaluate(val_loader, final_model, args, logger)

    else:
        validation_loss = "N/A"
        validation_bpd = "N/A"
        logger.info(f"Loading model from {args.model_path}")
        final_model = torch.load(args.model_path)
        test_loss, test_bpd = evaluate(test_loader, final_model, args, logger)
示例#34
0
def train():

    model = build_model_tabular(args, 1).to(device)
    set_cnf_options(args, model)

    logger.info(model)
    logger.info("Number of trainable parameters: {}".format(
        count_parameters(model)))

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)

    time_meter = utils.RunningAverageMeter(0.93)
    loss_meter = utils.RunningAverageMeter(0.93)
    nfef_meter = utils.RunningAverageMeter(0.93)
    nfeb_meter = utils.RunningAverageMeter(0.93)
    tt_meter = utils.RunningAverageMeter(0.93)

    end = time.time()
    best_loss = float('inf')
    model.train()
    for itr in range(1, args.niters + 1):
        optimizer.zero_grad()

        loss = compute_loss(args, model)
        loss_meter.update(loss.item())

        total_time = count_total_time(model)
        nfe_forward = count_nfe(model)

        loss.backward()
        optimizer.step()

        nfe_total = count_nfe(model)
        nfe_backward = nfe_total - nfe_forward
        nfef_meter.update(nfe_forward)
        nfeb_meter.update(nfe_backward)

        time_meter.update(time.time() - end)
        tt_meter.update(total_time)

        log_message = (
            'Iter {:04d} | Time {:.4f}({:.4f}) | Loss {:.6f}({:.6f}) | NFE Forward {:.0f}({:.1f})'
            ' | NFE Backward {:.0f}({:.1f}) | CNF Time {:.4f}({:.4f})'.format(
                itr, time_meter.val, time_meter.avg, loss_meter.val,
                loss_meter.avg, nfef_meter.val, nfef_meter.avg, nfeb_meter.val,
                nfeb_meter.avg, tt_meter.val, tt_meter.avg))
        logger.info(log_message)

        if itr % args.val_freq == 0 or itr == args.niters:
            with torch.no_grad():
                model.eval()
                test_loss = compute_loss(args,
                                         model,
                                         batch_size=args.test_batch_size)
                test_nfe = count_nfe(model)
                log_message = '[TEST] Iter {:04d} | Test Loss {:.6f} | NFE {:.0f}'.format(
                    itr, test_loss, test_nfe)
                logger.info(log_message)

                if test_loss.item() < best_loss:
                    best_loss = test_loss.item()
                    utils.makedirs(args.save)
                    torch.save(
                        {
                            'args': args,
                            'state_dict': model.state_dict(),
                        }, os.path.join(args.save, 'checkpt.pth'))
                model.train()

        if itr % args.viz_freq == 0:
            with torch.no_grad():
                model.eval()

                xx = torch.linspace(-10, 10, 10000).view(-1, 1)
                true_p = data_density(xx)
                plt.plot(xx.view(-1).cpu().numpy(),
                         true_p.view(-1).exp().cpu().numpy(),
                         label='True')

                true_p = model_density(xx, model)
                plt.plot(xx.view(-1).cpu().numpy(),
                         true_p.view(-1).exp().cpu().numpy(),
                         label='Model')

                utils.makedirs(os.path.join(args.save, 'figs'))
                plt.savefig(
                    os.path.join(args.save, 'figs', '{:06d}.jpg'.format(itr)))
                plt.close()

                model.train()

        end = time.time()

    logger.info('Training has finished.')
示例#35
0
def main():
    #os.system('shutdown -c')  # cancel previous shutdown command

    if write_log:
        utils.makedirs(args.save)
        logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'),
                                  filepath=os.path.abspath(__file__))

        logger.info(args)

        args_file_path = os.path.join(args.save, 'args.yaml')
        with open(args_file_path, 'w') as f:
            yaml.dump(vars(args), f, default_flow_style=False)

    if args.distributed:
        if write_log: logger.info('Distributed initializing process group')
        torch.cuda.set_device(args.local_rank)
        distributed.init_process_group(backend=args.dist_backend,
                                       init_method=args.dist_url,
                                       world_size=dist_utils.env_world_size(),
                                       rank=env_rank())
        assert (dist_utils.env_world_size() == distributed.get_world_size())
        if write_log:
            logger.info("Distributed: success (%d/%d)" %
                        (args.local_rank, distributed.get_world_size()))

    # get deivce
    # device = torch.device("cuda:%d"%torch.cuda.current_device() if torch.cuda.is_available() else "cpu")
    device = "cpu"
    cvt = lambda x: x.type(torch.float32).to(device, non_blocking=True)

    # load dataset
    train_loader, test_loader, data_shape = get_dataset(args)

    trainlog = os.path.join(args.save, 'training.csv')
    testlog = os.path.join(args.save, 'test.csv')

    traincolumns = [
        'itr', 'wall', 'itr_time', 'loss', 'bpd', 'fe', 'total_time',
        'grad_norm'
    ]
    testcolumns = [
        'wall', 'epoch', 'eval_time', 'bpd', 'fe', 'total_time',
        'transport_cost'
    ]

    # build model
    regularization_fns, regularization_coeffs = create_regularization_fns(args)
    model = create_model(args, data_shape, regularization_fns)
    # model = model.cuda()
    if args.distributed:
        model = dist_utils.DDP(model,
                               device_ids=[args.local_rank],
                               output_device=args.local_rank)

    traincolumns = append_regularization_keys_header(traincolumns,
                                                     regularization_fns)

    if not args.resume and write_log:
        with open(trainlog, 'w') as f:
            csvlogger = csv.DictWriter(f, traincolumns)
            csvlogger.writeheader()
        with open(testlog, 'w') as f:
            csvlogger = csv.DictWriter(f, testcolumns)
            csvlogger.writeheader()

    set_cnf_options(args, model)

    if write_log: logger.info(model)
    if write_log:
        logger.info("Number of trainable parameters: {}".format(
            count_parameters(model)))
    if write_log:
        logger.info('Iters per train epoch: {}'.format(len(train_loader)))
    if write_log: logger.info('Iters per test: {}'.format(len(test_loader)))

    # optimizer
    if args.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    elif args.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.weight_decay,
                              momentum=0.9,
                              nesterov=False)

    # restore parameters
    if args.resume is not None:
        checkpt = torch.load(
            args.resume,
            map_location=lambda storage, loc: storage.cuda(args.local_rank))
        model.load_state_dict(checkpt["state_dict"])
        if "optim_state_dict" in checkpt.keys():
            optimizer.load_state_dict(checkpt["optim_state_dict"])
            # Manually move optimizer state to device.
            for state in optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = cvt(v)

    # For visualization.
    if write_log:
        fixed_z = cvt(torch.randn(min(args.test_batch_size, 100), *data_shape))

    if write_log:
        time_meter = utils.RunningAverageMeter(0.97)
        bpd_meter = utils.RunningAverageMeter(0.97)
        loss_meter = utils.RunningAverageMeter(0.97)
        steps_meter = utils.RunningAverageMeter(0.97)
        grad_meter = utils.RunningAverageMeter(0.97)
        tt_meter = utils.RunningAverageMeter(0.97)

    if not args.resume:
        best_loss = float("inf")
        itr = 0
        wall_clock = 0.
        begin_epoch = 1
    else:
        chkdir = os.path.dirname(args.resume)
        tedf = pd.read_csv(os.path.join(chkdir, 'test.csv'))
        trdf = pd.read_csv(os.path.join(chkdir, 'training.csv'))
        wall_clock = trdf['wall'].to_numpy()[-1]
        itr = trdf['itr'].to_numpy()[-1]
        best_loss = tedf['bpd'].min()
        begin_epoch = int(tedf['epoch'].to_numpy()[-1] +
                          1)  # not exactly correct

    if args.distributed:
        if write_log: logger.info('Syncing machines before training')
        dist_utils.sum_tensor(torch.tensor([1.0]).float().cuda())

    for epoch in range(begin_epoch, args.num_epochs + 1):
        if not args.validate:
            model.train()

            with open(trainlog, 'a') as f:
                if write_log: csvlogger = csv.DictWriter(f, traincolumns)

                for _, (x, y) in enumerate(train_loader):
                    start = time.time()
                    update_lr(optimizer, itr)
                    optimizer.zero_grad()

                    # cast data and move to device
                    x = add_noise(cvt(x), nbits=args.nbits)
                    #x = x.clamp_(min=0, max=1)
                    # compute loss
                    bpd, (x, z), reg_states = compute_bits_per_dim(x, model)
                    if np.isnan(bpd.data.item()):
                        raise ValueError('model returned nan during training')
                    elif np.isinf(bpd.data.item()):
                        raise ValueError('model returned inf during training')

                    loss = bpd
                    if regularization_coeffs:
                        reg_loss = sum(reg_state * coeff
                                       for reg_state, coeff in zip(
                                           reg_states, regularization_coeffs)
                                       if coeff != 0)
                        loss = loss + reg_loss
                    total_time = count_total_time(model)

                    loss.backward()
                    nfe_opt = count_nfe(model)
                    if write_log: steps_meter.update(nfe_opt)
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        model.parameters(), args.max_grad_norm)

                    optimizer.step()

                    itr_time = time.time() - start
                    wall_clock += itr_time

                    batch_size = x.size(0)
                    metrics = torch.tensor([
                        1., batch_size,
                        loss.item(),
                        bpd.item(), nfe_opt, grad_norm, *reg_states
                    ]).float()

                    rv = tuple(torch.tensor(0.) for r in reg_states)

                    total_gpus, batch_total, r_loss, r_bpd, r_nfe, r_grad_norm, *rv = dist_utils.sum_tensor(
                        metrics).cpu().numpy()

                    if write_log:
                        time_meter.update(itr_time)
                        bpd_meter.update(r_bpd / total_gpus)
                        loss_meter.update(r_loss / total_gpus)
                        grad_meter.update(r_grad_norm / total_gpus)
                        tt_meter.update(total_time)

                        fmt = '{:.4f}'
                        logdict = {
                            'itr': itr,
                            'wall': fmt.format(wall_clock),
                            'itr_time': fmt.format(itr_time),
                            'loss': fmt.format(r_loss / total_gpus),
                            'bpd': fmt.format(r_bpd / total_gpus),
                            'total_time': fmt.format(total_time),
                            'fe': r_nfe / total_gpus,
                            'grad_norm': fmt.format(r_grad_norm / total_gpus),
                        }
                        if regularization_coeffs:
                            rv = tuple(v_ / total_gpus for v_ in rv)
                            logdict = append_regularization_csv_dict(
                                logdict, regularization_fns, rv)
                        csvlogger.writerow(logdict)

                        if itr % args.log_freq == 0:
                            log_message = (
                                "Itr {:06d} | Wall {:.3e}({:.2f}) | "
                                "Time/Itr {:.2f}({:.2f}) | BPD {:.2f}({:.2f}) | "
                                "Loss {:.2f}({:.2f}) | "
                                "FE {:.0f}({:.0f}) | Grad Norm {:.3e}({:.3e}) | "
                                "TT {:.2f}({:.2f})".format(
                                    itr, wall_clock, wall_clock / (itr + 1),
                                    time_meter.val, time_meter.avg,
                                    bpd_meter.val, bpd_meter.avg,
                                    loss_meter.val, loss_meter.avg,
                                    steps_meter.val, steps_meter.avg,
                                    grad_meter.val, grad_meter.avg,
                                    tt_meter.val, tt_meter.avg))
                            if regularization_coeffs:
                                log_message = append_regularization_to_log(
                                    log_message, regularization_fns, rv)
                            logger.info(log_message)

                    itr += 1

        # compute test loss
        model.eval()
        if args.local_rank == 0:
            utils.makedirs(args.save)
            torch.save(
                {
                    "args":
                    args,
                    "state_dict":
                    model.module.state_dict()
                    if torch.cuda.is_available() else model.state_dict(),
                    "optim_state_dict":
                    optimizer.state_dict(),
                    "fixed_z":
                    fixed_z.cpu()
                }, os.path.join(args.save, "checkpt.pth"))
        if epoch % args.val_freq == 0 or args.validate:
            with open(testlog, 'a') as f:
                if write_log: csvlogger = csv.DictWriter(f, testcolumns)
                with torch.no_grad():
                    start = time.time()
                    if write_log: logger.info("validating...")

                    lossmean = 0.
                    meandist = 0.
                    steps = 0
                    tt = 0.
                    for i, (x, y) in enumerate(test_loader):
                        sh = x.shape
                        x = shift(cvt(x), nbits=args.nbits)
                        loss, (x, z), _ = compute_bits_per_dim(x, model)
                        dist = (x.view(x.size(0), -1) -
                                z).pow(2).mean(dim=-1).mean()
                        meandist = i / (i + 1) * dist + meandist / (i + 1)
                        lossmean = i / (i + 1) * lossmean + loss / (i + 1)

                        tt = i / (i + 1) * tt + count_total_time(model) / (i +
                                                                           1)
                        steps = i / (i + 1) * steps + count_nfe(model) / (i +
                                                                          1)

                    loss = lossmean.item()
                    metrics = torch.tensor([1., loss, meandist, steps]).float()

                    total_gpus, r_bpd, r_mdist, r_steps = dist_utils.sum_tensor(
                        metrics).cpu().numpy()
                    eval_time = time.time() - start

                    if write_log:
                        fmt = '{:.4f}'
                        logdict = {
                            'epoch': epoch,
                            'eval_time': fmt.format(eval_time),
                            'bpd': fmt.format(r_bpd / total_gpus),
                            'wall': fmt.format(wall_clock),
                            'total_time': fmt.format(tt),
                            'transport_cost': fmt.format(r_mdist / total_gpus),
                            'fe': '{:.2f}'.format(r_steps / total_gpus)
                        }

                        csvlogger.writerow(logdict)

                        logger.info(
                            "Epoch {:04d} | Time {:.4f}, Bit/dim {:.4f}, Steps {:.4f}, TT {:.2f}, Transport Cost {:.2e}"
                            .format(epoch, eval_time, r_bpd / total_gpus,
                                    r_steps / total_gpus, tt,
                                    r_mdist / total_gpus))

                    loss = r_bpd / total_gpus

                    if loss < best_loss and args.local_rank == 0:
                        best_loss = loss
                        shutil.copyfile(os.path.join(args.save, "checkpt.pth"),
                                        os.path.join(args.save, "best.pth"))

            # visualize samples and density
            if write_log:
                with torch.no_grad():
                    fig_filename = os.path.join(args.save, "figs",
                                                "{:04d}.jpg".format(epoch))
                    utils.makedirs(os.path.dirname(fig_filename))
                    generated_samples, _, _ = model(fixed_z, reverse=True)
                    generated_samples = generated_samples.view(-1, *data_shape)
                    nb = int(np.ceil(np.sqrt(float(fixed_z.size(0)))))
                    save_image(unshift(generated_samples, nbits=args.nbits),
                               fig_filename,
                               nrow=nb)
            if args.validate:
                break
parser.add_argument('--weight-decay', type=float, default=1e-5)
parser.add_argument('--annealing-iters', type=int, default=0)

parser.add_argument('--save', type=str, default='experiments/')

parser.add_argument('--viz_freq', type=int, default=1000)
parser.add_argument('--val_freq', type=int, default=1000)
parser.add_argument('--log_freq', type=int, default=1000)

parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--gpu', type=int, default=0)

args = parser.parse_args()

# logger
utils.makedirs(args.save)

logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'),
                          filepath=os.path.abspath(__file__))
logger.info(args)

device = torch.device('cuda:' +
                      str(args.gpu) if torch.cuda.is_available() else 'cpu')

print('')
print(device)
print(device.type)

print('')
np.random.seed(args.seed)
torch.manual_seed(args.seed)
def main(conf):
    dump_dir = conf['xgboost.dump.dir']
    makedirs(dump_dir)

    dump_config_file = join_path(dump_dir, 'application.conf')
    dump_config(conf, dump_config_file)

    logging.info('Loading train dataset')
    train_df = load_train_df(conf['xgboost.dataset'])

    logging.info('Loading test dataset')
    test_df = load_test_df(conf['xgboost.dataset'])

    logging.info('Loading features')
    features = []
    for group, cnf in conf['features'].iteritems():
        logging.info('Loading features group: %s', group)

        features_dump_dir = cnf['dump']
        train_features_file = join_path(features_dump_dir, 'train.csv')
        test_features_file = join_path(features_dump_dir, 'test.csv')

        train_features = pd.read_csv(train_features_file)
        test_features = pd.read_csv(test_features_file)

        for fcnf in cnf['features']:
            feature = fcnf['feature']
            features.append(feature)
            train_col = fcnf.get('train_col', feature)
            test_col = fcnf.get('test_col', feature)
            train_df[feature] = train_features[train_col]
            test_df[feature] = test_features[test_col]

    feature_map_file = join_path(dump_dir, 'xgb.fmap')
    create_feature_map(features, feature_map_file)

    train_df_flipped = train_df.copy()
    for flip in conf['flip']:
        train_df_flipped[flip[0]] = train_df[[flip[1]]]
        train_df_flipped[flip[1]] = train_df[[flip[0]]]

    train_df = pd.concat([train_df, train_df_flipped], axis=0, ignore_index=True)
    logging.info('Train dataset: %s', train_df.shape)

    y = train_df[[FieldsTrain.is_duplicate]].values.flatten()
    logging.info('Train dataset CTR: %s', y.sum() / len(y))

    class_weight = {int(c['class']): c['weight'] for c in conf['weights']}
    w = np.vectorize(class_weight.get)(y)
    logging.info('Train dataset weighted CTR: %s', sum(y * w) / sum(w))

    q1 = train_df[Fields.question1].values
    q2 = train_df[Fields.question2].values

    train_df.drop([
        FieldsTrain.id,
        FieldsTrain.qid1,
        FieldsTrain.qid2,
        FieldsTrain.question1,
        FieldsTrain.question2,
        FieldsTrain.is_duplicate], axis=1, inplace=True)

    X = train_df.values

    logging.info('Training XGBoost model')
    model, progress, quality = train_xgboost(X, y, w, **conf['xgboost.param'])

    logging.info('Writing model dump')
    model_dump_file = join_path(dump_dir, 'model_dump.txt')
    model.dump_model(model_dump_file, fmap=feature_map_file, with_stats=True)
    model_file = join_path(dump_dir, 'model.bin')
    model.save_model(model_file)

    logging.info('Writing quality')
    # plot_quality(quality, dump_dir)

    logging.info('Writing top errors')
    errors_file = join_path(dump_dir, 'errors.csv')
    with open(errors_file, 'w') as fh:
        fh.write('y,p,question1,question2,sample\n')
        for e in quality['errors']['train']['type_i']:
            fh.write('%d,%s,%s,%s,%s\n' % (0, e[0], q1[e[1]], q2[e[1]], 'train'))
        for e in quality['errors']['train']['type_ii']:
            fh.write('%d,%s,%s,%s,%s\n' % (1, e[0], q1[e[1]], q2[e[1]], 'train'))
        for e in quality['errors']['valid']['type_i']:
            fh.write('%d,%s,%s,%s,%s\n' % (0, e[0], q1[e[1]], q2[e[1]], 'valid'))
        for e in quality['errors']['valid']['type_ii']:
            fh.write('%d,%s,%s,%s,%s\n' % (1, e[0], q1[e[1]], q2[e[1]], 'valid'))

    logging.info('Writing progress file')
    # plot_progress(progress, dump_dir)
    progress_file = join_path(dump_dir, 'progress.json')
    with open(progress_file, 'w') as fh:
        json.dump(progress, fh)

    logging.info('Writing feature scores')
    score_weight = model.get_score(fmap=feature_map_file, importance_type='weight')
    score_gain = model.get_score(fmap=feature_map_file, importance_type='gain')
    score_cover = model.get_score(fmap=feature_map_file, importance_type='cover')
    split_histograms = dict()
    for f in features:
        split_histograms[f] = model.get_split_value_histogram(f, fmap=feature_map_file)

    scores = pd.DataFrame([score_weight, score_gain, score_cover]).transpose()
    scores.index.name = 'feature'
    scores.rename(columns={0: 'weight', 1: 'gain', 2: 'cover'}, inplace=True)
    weight_total = scores['weight'].sum()
    scores['weight'] = scores['weight'] / weight_total
    scores.sort_values(by='weight', ascending=False, inplace=True)
    scores.to_csv(join_path(dump_dir, 'feature_scores.csv'))

    logging.info('Computing test predictions')
    test_ids = test_df[[FieldsTest.test_id]]
    test_df.drop([FieldsTest.test_id, FieldsTest.question1, FieldsTest.question2], axis=1, inplace=True)
    dtest = xgb.DMatrix(test_df.values)
    p_test = model.predict(dtest)

    logging.info('Writing submission file')
    submission_file = join_path(dump_dir, 'submission.csv')
    submission(submission_file, test_ids, p_test)
示例#38
0
def run(args, logger, train_loader, validation_loader, data_shape):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = module.HouseholderSylvesterVAE(args, data_shape)
    #  model = module.OrthogonalSylvesterVAE(args, data_shape)

    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'min',
                                                           factor=0.2,
                                                           patience=5,
                                                           min_lr=1e-8)

    start_epoch = 0

    # restore parameters
    if args.resume is not None:
        checkpt = torch.load(args.resume,
                             map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpt["state_dict"])
        optimizer.load_state_dict(checkpt["optim_state_dict"])
        args = checkpt["args"]
        start_epoch = checkpt["epoch"] + 1
        logger.info("Resuming at epoch {} with args {}.".format(
            start_epoch, args))

    time_meter = utils.RunningAverageMeter(0.97)

    beta = args.beta
    train_loader_break = 500000
    break_train = int(train_loader_break / args.batch_size)
    break_training = 50

    best_loss = float("inf")
    itr = 0
    for epoch in range(start_epoch, args.num_epochs):
        logger.info('Epoch: {}/{} \tBeta: {}'.format(epoch, args.num_epochs,
                                                     beta))

        model.train()
        num_data = 0
        end = time.time()

        for idx_count, data in enumerate(train_loader):
            #  if idx_count > break_training:
            #  break
            if args.data == 'piv':
                x_, y_ = data['ComImages'], data['AllGenDetails']

                if args.heterogen:
                    x = torch.zeros([x_.size(0), 4, 32, 32])
                    x[:, :2, :, :] = x_
                    for idx in range(x_.size(0)):
                        u_vector = torch.zeros([1, 32, 32])
                        u_vector.fill_(y_[idx][0] / 20 * 0.5 + 0.5)

                        v_vector = torch.zeros([1, 32, 32])
                        v_vector.fill_(y_[idx][1] / 20 * 0.5 + 0.5)

                        x[idx, 2, :, :] = u_vector
                        x[idx, 3, :, :] = v_vector

                else:
                    x = x_
                    y = y_

            elif args.data == 'mnist' and args.heterogen:
                x_, y_ = data

                x = torch.zeros([x_.size(0), 2, 28, 28])
                x[:, :1, :, :] = x_
                for idx in range(x_.size(0)):
                    labels = torch.zeros([1, 28, 28])
                    labels.fill_(y_[idx] / 10)

                    x[idx, 1, :, :] = labels

            elif args.data == 'cifar10' and args.heterogen:
                x_, y_ = data

                x = torch.zeros([x_.size(0), 4, 32, 32])
                x[:, :3, :, :] = x_
                for idx in range(x_.size(0)):
                    labels = torch.zeros([1, 32, 32])
                    labels.fill_(y_[idx])

                    x[idx, 3, :, :] = labels

            else:
                x, y = data

            x = x.to(device)

            start = time.time()
            optimizer.zero_grad()

            recon_images, z_mu, z_var, ldj, z0, z_k = model(x)

            loss, rec, kl = loss_function.binary_loss_function(
                recon_images, x, z_mu, z_var, z0, z_k, ldj, beta)

            loss.backward()

            optimizer.step()

            rec = rec.item()
            kl = kl.item()
            num_data += len(x)

            batch_time = time.time() - end
            end = time.time()

            if itr % args.log_freq == 0:
                log_message = (
                    "Epoch {:03d} |  [{:5d}/{:5d} ({:2.0f}%)] | Time {:.3f} | Loss: {:11.6f} |"
                    "rec:{:11.6f} | kl: {:11.6f}".format(
                        epoch, num_data, len(train_loader.sampler),
                        100. * idx_count / len(train_loader), batch_time,
                        loss.item(), rec, kl))
                logger.info(log_message)

            itr += 1

        scheduler.step(loss.item())

        # Evaluate and save model
        if args.evaluate:
            if epoch % args.val_freq == 0:
                model.eval()
                with torch.no_grad():
                    start = time.time()
                    logger.info("validating...")

                    losses_vec_recon_images = []
                    losses_vec_images_recon_images = []
                    losses = []

                    for _, (data) in enumerate(validation_loader):

                        if _ > break_training:
                            break

                        if args.data == 'piv':
                            x_, y_ = data['ComImages'], data['AllGenDetails']

                            if args.heterogen:
                                x = torch.zeros([x_.size(0), 4, 32, 32])
                                x[:, :2, :, :] = x_
                                for idx in range(x_.size(0)):
                                    u_vector = torch.zeros([1, 32, 32])
                                    u_vector.fill_(y_[idx][0] / 20 * 0.5 + 0.5)

                                    v_vector = torch.zeros([1, 32, 32])
                                    v_vector.fill_(y_[idx][1] / 20 * 0.5 + 0.5)

                                    x[idx, 2, :, :] = u_vector
                                    x[idx, 3, :, :] = v_vector

                            else:
                                x = x_
                                y = y_

                        elif args.data == 'mnist' and args.heterogen:
                            x_, y_ = data

                            x = torch.zeros([x_.size(0), 2, 28, 28])
                            x[:, :1, :, :] = x_
                            for idx in range(x_.size(0)):
                                labels = torch.zeros([1, 28, 28])
                                labels.fill_(y_[idx] / 10)

                                x[idx, 1, :, :] = labels

                        elif args.data == 'cifar10' and args.heterogen:
                            x_, y_ = data

                            x = torch.zeros([x_.size(0), 4, 32, 32])
                            x[:, :3, :, :] = x_
                            for idx in range(x_.size(0)):
                                labels = torch.zeros([1, 32, 32])
                                labels.fill_(y_[idx])

                                x[idx, 3, :, :] = labels
                        else:
                            x, y = data

                        x = x.to(device)

                        recon_images, z_mu, z_var, ldj, z0, z_k = model(x)
                        loss, rec, kl = loss_function.binary_loss_function(
                            recon_images, x, z_mu, z_var, z0, z_k, ldj, beta)
                        losses.append(loss.item())

                        if args.data == "piv" and args.heterogen == False:
                            loss_vec_recon_images, loss_vec_images_recon_images = resnet_pretrained.run(
                                args, logger, recon_images, x, y, data_shape)
                            losses_vec_recon_images.append(
                                loss_vec_recon_images.item())
                            losses_vec_images_recon_images.append(
                                loss_vec_images_recon_images.item())

                    if args.data == "piv" and args.heterogen == False:
                        logger.info(
                            "Loss vector reconstructed images {}, Loss vector images reconstructed images {}"
                            .format(np.mean(losses_vec_recon_images),
                                    np.mean(losses_vec_images_recon_images)))

                    loss = np.mean(losses)
                    logger.info(
                        "Epoch {:04d} | Time {:.4f} | Loss {:.4f}".format(
                            epoch,
                            time.time() - start, loss))
                    if loss < best_loss:
                        best_loss = loss
                        utils.makedirs(args.save)
                        torch.save(
                            {
                                "args": args,
                                "epoch": epoch,
                                "state_dict": model.state_dict(),
                                "optim_state_dict": optimizer.state_dict(),
                            }, os.path.join(args.save, "checkpt.pth"))
                        logger.info("Saving model at epoch {}.".format(epoch))

            if beta < 1:
                beta += 0.01

            # Evaluation
            evaluation.save_recon_images(args, model, validation_loader,
                                         data_shape, logger)
            evaluation.save_fixed_z_image(args, model, data_shape, logger)
示例#39
0
def train(args, model, growth_model):
    logger.info(model)
    logger.info("Number of trainable parameters: {}".format(count_parameters(model)))

    #optimizer = optim.Adam(set(model.parameters()) | set(growth_model.parameters()), 
    optimizer = optim.Adam(model.parameters(), 
                           lr=args.lr, weight_decay=args.weight_decay)
    #growth_optimizer = optim.Adam(growth_model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    time_meter = utils.RunningAverageMeter(0.93)
    loss_meter = utils.RunningAverageMeter(0.93)
    nfef_meter = utils.RunningAverageMeter(0.93)
    nfeb_meter = utils.RunningAverageMeter(0.93)
    tt_meter = utils.RunningAverageMeter(0.93)

    end = time.time()
    best_loss = float('inf')
    model.train()
    growth_model.eval()
    for itr in range(1, args.niters + 1):
        optimizer.zero_grad()
        #growth_optimizer.zero_grad()

        ### Train
        if args.spectral_norm: spectral_norm_power_iteration(model, 1)
        #if args.spectral_norm: spectral_norm_power_iteration(growth_model, 1)

        loss = compute_loss(args, model, growth_model)
        loss_meter.update(loss.item())

        if len(regularization_coeffs) > 0:
            # Only regularize on the last timepoint
            reg_states = get_regularization(model, regularization_coeffs)
            reg_loss = sum(
                reg_state * coeff for reg_state, coeff in zip(reg_states, regularization_coeffs) if coeff != 0
            )
            loss = loss + reg_loss

        #if len(growth_regularization_coeffs) > 0:
        #    growth_reg_states = get_regularization(growth_model, growth_regularization_coeffs)
        #    reg_loss = sum(
        #        reg_state * coeff for reg_state, coeff in zip(growth_reg_states, growth_regularization_coeffs) if coeff != 0
        #    )
        #    loss2 = loss2 + reg_loss

        total_time = count_total_time(model)
        nfe_forward = count_nfe(model)

        loss.backward()
        #loss2.backward()
        optimizer.step()
        #growth_optimizer.step()

        ### Eval
        nfe_total = count_nfe(model)
        nfe_backward = nfe_total - nfe_forward
        nfef_meter.update(nfe_forward)
        nfeb_meter.update(nfe_backward)
        time_meter.update(time.time() - end)
        tt_meter.update(total_time)

        log_message = (
            'Iter {:04d} | Time {:.4f}({:.4f}) | Loss {:.6f}({:.6f}) | NFE Forward {:.0f}({:.1f})'
            ' | NFE Backward {:.0f}({:.1f}) | CNF Time {:.4f}({:.4f})'.format(
                itr, time_meter.val, time_meter.avg, loss_meter.val, loss_meter.avg, nfef_meter.val, nfef_meter.avg,
                nfeb_meter.val, nfeb_meter.avg, tt_meter.val, tt_meter.avg
            )
        )
        if len(regularization_coeffs) > 0:
            log_message = append_regularization_to_log(log_message, regularization_fns, reg_states)

        logger.info(log_message)

        if itr % args.val_freq == 0 or itr == args.niters:
            with torch.no_grad():
                model.eval()
                growth_model.eval()
                test_loss = compute_loss(args, model, growth_model)
                test_nfe = count_nfe(model)
                log_message = '[TEST] Iter {:04d} | Test Loss {:.6f} | NFE {:.0f}'.format(itr, test_loss, test_nfe)
                logger.info(log_message)

                if test_loss.item() < best_loss:
                    best_loss = test_loss.item()
                    utils.makedirs(args.save)
                    torch.save({
                        'args': args,
                        'state_dict': model.state_dict(),
                        'growth_state_dict': growth_model.state_dict(),
                    }, os.path.join(args.save, 'checkpt.pth'))
                model.train()

        if itr % args.viz_freq == 0:
            with torch.no_grad():
                model.eval()
                for i, tp in enumerate(timepoints):
                    p_samples = viz_sampler(tp)
                    sample_fn, density_fn = get_transforms(model, int_tps[:i+1])
                    #growth_sample_fn, growth_density_fn = get_transforms(growth_model, int_tps[:i+1])
                    plt.figure(figsize=(9, 3))
                    visualize_transform(
                        p_samples, torch.randn, standard_normal_logprob, transform=sample_fn, inverse_transform=density_fn,
                        samples=True, npts=100, device=device
                    )
                    fig_filename = os.path.join(args.save, 'figs', '{:04d}_{:01d}.jpg'.format(itr, i))
                    utils.makedirs(os.path.dirname(fig_filename))
                    plt.savefig(fig_filename)
                    plt.close()

                    #visualize_transform(
                    #    p_samples, torch.rand, uniform_logprob, transform=growth_sample_fn, 
                    #    inverse_transform=growth_density_fn,
                    #    samples=True, npts=800, device=device
                    #)

                    #fig_filename = os.path.join(args.save, 'growth_figs', '{:04d}_{:01d}.jpg'.format(itr, i))
                    #utils.makedirs(os.path.dirname(fig_filename))
                    #plt.savefig(fig_filename)
                    #plt.close()
                model.train()

        """
        if itr % args.viz_freq_growth == 0:
            with torch.no_grad():
                growth_model.eval()
                # Visualize growth transform
                growth_filename = os.path.join(args.save, 'growth', '{:04d}.jpg'.format(itr))
                utils.makedirs(os.path.dirname(growth_filename))
                visualize_growth(growth_model, data, labels, npts=200, device=device)
                plt.savefig(growth_filename)
                plt.close()
                growth_model.train()
        """

        end = time.time()
    logger.info('Training has finished.')