Exemplo n.º 1
0
def batch_dataset(args):
    # read data from configuration file
    config = Parameters.from_config(args.path)

    # load entire dataset
    train_data = DataLoader.from_files(
        config.data.src_train,
        config.data.tgt_train,
        config.model.max_length,
        config.training.batch_size
    )

    outputfile = Path(args.output)
    with open(outputfile, "w", encoding="utf-8") as ofile:
        for i, batch in enumerate(train_data):
            for src, tgt in zip(*batch):
                s_sen = " ".join(src)
                t_sen = " ".join(tgt)
                ofile.write(f"{s_sen}\t{t_sen}\n")

            # print progress
            print(f"Batching dataset: {i}/{len(train_data)}", end="\r")

    print(" " * 50, end="\r")
    print("Batching dataset: complete")
Exemplo n.º 2
0
def train(args):
    # extract arguments
    resume = args.resume
    batched = args.batched
    params = Parameters.from_config(args.path)

    # initialize trainer
    trainer = Trainer(resume, batched, params)
    trainer.read_data()
    trainer.create_model()
    trainer.train_loop()
    trainer.save_model()
Exemplo n.º 3
0
def get_params(test_config):
    """get params and save them to root dir"""
    prm = Parameters()

    # get giles paths
    prm.override(test_config)
    test_parameter_file = os.path.join(prm.train.train_control.ROOT_DIR,
                                       'test_parameters.ini')
    log_file = os.path.join(prm.train.train_control.ROOT_DIR, 'test.log')

    ret = True
    if os.path.isfile(test_parameter_file):
        warnings.warn('Test parameter file {} already exists'.format(
            test_parameter_file))
        ret = query_yes_no('Overwrite parameter file?')

    if ret:
        dir = os.path.dirname(test_parameter_file)
        if not os.path.exists(dir):
            os.makedirs(dir)
        prm.save(test_parameter_file)

    logging = logging_config(log_file)
    logging.disable(logging.DEBUG)

    return prm
Exemplo n.º 4
0
def get_params(test_config):
    """get params and save them to root dir"""
    prm = Parameters()

    # get giles paths
    prm.override(test_config)  # just to get the LOG_DIR_LIST[0]
    train_log_dir = prm.test.ensemble.LOG_DIR_LIST[0]

    parameter_file = os.path.join(train_log_dir, 'parameters.ini')
    test_parameter_file = os.path.join(prm.train.train_control.ROOT_DIR,
                                       'test_parameters.ini')
    all_parameter_file = os.path.join(prm.train.train_control.ROOT_DIR,
                                      'all_parameters.ini')
    log_file = os.path.join(prm.train.train_control.ROOT_DIR, 'test.log')

    if not os.path.isfile(parameter_file):
        raise AssertionError('Can not find file: {}'.format(parameter_file))

    ret = True
    if os.path.isfile(test_parameter_file):
        warnings.warn('Test parameter file {} already exists'.format(
            test_parameter_file))
        ret = query_yes_no('Overwrite parameter file?')

    if ret:
        dir = os.path.dirname(test_parameter_file)
        if not os.path.exists(dir):
            os.makedirs(dir)
        prm.save(test_parameter_file)

    logging = logging_config(log_file)
    logging.disable(logging.DEBUG)

    # Done saving test parameters. Now doing the integration:
    prm = Parameters()
    prm.override(parameter_file)
    prm.override(test_parameter_file)

    ret = True
    if os.path.isfile(all_parameter_file):
        warnings.warn(
            'All parameter file {} already exists'.format(all_parameter_file))
        ret = query_yes_no('Overwrite parameter file?')

    if ret:
        dir = os.path.dirname(all_parameter_file)
        if not os.path.exists(dir):
            os.makedirs(dir)
        prm.save(all_parameter_file)

    return prm
Exemplo n.º 5
0
    tensor_files = [
        [
            path + f"data/super/train_word_tensor_{data_name}_2.npy",
            path + f"data/super/valid_word_tensor_{data_name}_2.npy",
        ],
        [
            path + f"data/super/train_character_tensor_{data_name}_2.npy",
            path + f"data/super/valid_character_tensor_{data_name}_2.npy",
        ],
    ]
    batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files, path)

    # batch_loader_2 = BatchLoader('')
    params = Parameters(batch_loader_2.max_word_len,
                        batch_loader_2.max_seq_len,
                        batch_loader_2.words_vocab_size,
                        batch_loader_2.chars_vocab_size, data_name, False,
                        False, False)

    neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size)
    if args.use_cuda:
        neg_loss = neg_loss.cuda()

    # NEG_loss is defined over two embedding matrixes with shape of [params.word_vocab_size, params.word_embed_size]
    optimizer = SGD(neg_loss.parameters(), 0.1)

    for iteration in range(args.num_iterations):

        input_idx, target_idx = batch_loader_2.next_embedding_seq(
            args.batch_size)
Exemplo n.º 6
0
                validate()
                # save model
                if not os.path.exists("./checkpoints"):
                    os.makedirs("./checkpoints")
                save_path = saver.save(
                    sess, "./checkpoints/{}.ckpt".format(params.checkpoint))
                print("Model saved in file: %s" % save_path)
        # builder.add_meta_graph_and_variables(sess, ["main_model"])
        if params.use_hdf5 and params.fine_tune:
            batch_gen.h5f.close()
        # run inference
        if params.mode == "inference":
            inference.inference(params, decoder, val_gen, test_gen,
                                image_f_inputs, saver, sess)


if __name__ == '__main__':
    params = Parameters()
    params.parse_args()
    coco_dir = params.coco_dir
    # save parameters for futher usage
    if params.save_params:
        import pickle
        param_fn = "./pickles/params_{}_{}_{}_{}.pickle".format(
            params.prior, params.no_encoder, params.checkpoint, params.use_c_v)
        print("Saving params to: ", param_fn)
        with open(param_fn, 'wb') as wf:
            pickle.dump(file=wf, obj=params)
    # train model, generate captions for val-test sets
    main(params)
        path + 'super/words_vocab_2.pkl', path + 'super/characters_vocab_2.pkl'
    ]

    tensor_files = [[
        path + 'super/train_word_tensor_2.npy',
        path + 'super/valid_word_tensor_2.npy'
    ],
                    [
                        path + 'super/train_character_tensor_2.npy',
                        path + 'super/valid_character_tensor_2.npy'
                    ]]
    batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files, path)

    # batch_loader_2 = BatchLoader('')
    params = Parameters(batch_loader_2.max_word_len,
                        batch_loader_2.max_seq_len,
                        batch_loader_2.words_vocab_size,
                        batch_loader_2.chars_vocab_size, path)

    neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size)
    if args.use_cuda:
        neg_loss = neg_loss.cuda()

    # NEG_loss is defined over two embedding matrixes with shape of [params.word_vocab_size, params.word_embed_size]
    optimizer = SGD(neg_loss.parameters(), 0.1)

    for iteration in range(args.num_iterations):

        input_idx, target_idx = batch_loader_2.next_embedding_seq(
            args.batch_size)

        input = Variable(t.from_numpy(input_idx).long())
Exemplo n.º 8
0
    'batch_size': 20,
    'num_epochs': 20,
    'embed_size': 464,
    'num_hidden': 337,
    'num_layers': 1,
    'learning_rate': 0.001,
    'mode_train': True,
    'sent_max_size': 228,
    'gen_length': 20,
    'temperature': 0.5,
    'keep_rate': 0.66,
    'input': ['GOT', 'PTB'][1],
    'vocab_drop': 3
}
# for back compatibility
params_c = Parameters()
params_c.batch_size = params['batch_size']
params_c.num_epochs = params['num_epochs']
params_c.embed_size = params['embed_size']
params_c.learning_rate = params['learning_rate']
params_c.pre_trained_embed = False
params_c.beam_search = False
params_c.vocab_drop = params['vocab_drop']
params_c.embed_size = params['embed_size']


def online_inference(sess, data_dict, sample, seq, in_state=None, out_state=None, seed='<BOS>'):
    """ Generate sequence one character at a time, based on the previous character
    """
    sentence = [seed]
    state = None
Exemplo n.º 9
0
from utils.factories import Factories
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from lib.active_kmean import KMeansWrapper
from sklearn.datasets import make_blobs


logging = logging_config()

logging.disable(logging.DEBUG)
log = logger.get_logger('main')

prm_file = '/data/gilad/logs/log_2210_220817_wrn-fc2_kmeans_SGD_init_200_clusters_4_cap_204/parameters.ini'

prm = Parameters()
prm.override(prm_file)

dev = prm.network.DEVICE

factories = Factories(prm)

model = factories.get_model()
model.print_stats()  # debug

preprocessor = factories.get_preprocessor()
preprocessor.print_stats()  # debug

train_dataset = factories.get_train_dataset(preprocessor)
validation_dataset = factories.get_validation_dataset(preprocessor)
Exemplo n.º 10
0
                        metavar='BS',
                        help='batch size (default: 20)')
    parser.add_argument('--num-sample',
                        type=int,
                        default=14,
                        metavar='NS',
                        help='num sample (default: 14)')
    parser.add_argument('--use-cuda',
                        type=bool,
                        default=False,
                        metavar='CUDA',
                        help='use cuda (default: True)')
    args = parser.parse_args()

    batch_loader = BatchLoader('')
    params = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size)

    neg_loss = NEG_loss(
        params.vocab_size,
        params.word_embed_size,
        weights=[1 - sqrt(5e-5 / i) for i in batch_loader.words_freq])
    if args.use_cuda:
        neg_loss = neg_loss.cuda()
    """NEG_loss is defined over two embedding matrixes with shape of [params.vocab_size, params.word_embed_size]"""
    optimizer = SGD(neg_loss.parameters(), 0.1)

    for iteration in range(args.num_iterations):

        input_idx, target_idx = batch_loader.next_embedding_seq(
            args.batch_size)
Exemplo n.º 11
0
    ''' ================================= BatchLoader loading ===============================================
    '''
    data_files = [args.path + args.test_file]

    idx_files = [
        args.path + 'words_vocab.pkl', args.path + 'characters_vocab.pkl'
    ]

    tensor_files = [[args.path + 'test_word_tensor.npy'],
                    [args.path + 'test_character_tensor.npy']]

    preprocess_data(data_files, idx_files, tensor_files, args.use_file, str)

    batch_loader = BatchLoader(data_files, idx_files, tensor_files)
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size, args.path)
    ''' ============================ BatchLoader for Question-2 ===============================================
    '''
    data_files = [args.path + 'super/train_2.txt']

    idx_files = [
        args.path + 'super/words_vocab_2.pkl',
        args.path + 'super/characters_vocab_2.pkl'
    ]

    tensor_files = [[args.path + 'super/train_word_tensor_2.npy'],
                    [args.path + 'super/train_character_tensor_2.npy']]
    batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files)
    parameters_2 = Parameters(batch_loader_2.max_word_len,
                              batch_loader_2.max_seq_len,
Exemplo n.º 12
0
if __name__ == '__main__':

    #paths = ['/var/www/lildbibio_scielo_org/proc/xml_path/new', '/var/www/lildbibio_scielo_org/proc/xml_path/inproc', '/var/www/lildbibio_scielo_org/proc/xml_path/archive' ]
    #paths = ['/var/www/lildbibio_scielo_org/proc/teste/new', 'i', 't' ]
    #python3 bhl_lilacs.py /var/www/lildbibio_scielo_org/bases/cisis1660 /var/www/lildbibio_scielo_org/bases/bhl/bhl /var/www/lildbibio_scielo_org/proc/bhl_lilacs  /var/www/lildbibio_scielo_org/bases/bhl/bhl_xml
    from utils.parameters import Parameters
    from utils.report import Report
    from configuration import Configuration

    
    configuration = Configuration('configuration.ini')
    if configuration.check_parameters(['CISIS_PATH', 'REPORT_PATH', 'INBOX_PATH', 'ARCHIVE_PATH', 'DB_FILENAME']):
        cisis_path, report_path, inbox_path, archive_path, db_filename  = configuration.return_parameters(['CISIS_PATH', 'REPORT_PATH', 'INBOX_PATH', 'ARCHIVE_PATH', 'DB_FILENAME']) 
        
        parameter_list = ['', 'source of xml files: new|archive' ]
        parameters = Parameters(parameter_list)
    
        if parameters.check_parameters(sys.argv):
            script, xml_source = sys.argv
        
            if xml_source != 'archived' or xml_source != 'new':
                xml_source = 'new'

            cisis = CISIS(cisis_path)
            files_set = BHL_Files_Set(inbox_path, archive_path)
            report = Report(report_path + '/_bhl_db.log', report_path + '/_bhl_db.err', report_path + '/_bhl_db.txt')

            proc = BHL_LILACS(cisis, files_set, report)
        
            proc.generate_db_files(xml_source)
            proc.generate_db(db_filename)
Exemplo n.º 13
0
from utils.evaluation import evaluate_model
from utils.sampler import WarpSampler
from utils.checkpoint import save_model
from utils import utils

warnings.filterwarnings("ignore", category=DeprecationWarning)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if __name__ == '__main__':
    args = parse_args()
    print('Loading dataset...', end="")
    args.device = device
    args.date_time = datetime.datetime.now()

    dataset = EmbedDataset(args)
    params = Parameters(args, dataset)
    print("\rDataset Statistics:")
    print(f"    Users: {params.num_user} | Lists: {params.num_list} | Items:{params.num_item}")
    print(f"    Train: {params.num_train_instances} | Valid: {params.num_valid_instances} | Test: {params.num_test_instances}")
    print(f"    Density: {100 * params.num_train_instances / (params.num_list * params.num_item):.4f} %")
    print("="*60)
    args.args_str = params.get_args_to_string()
    t1 = time()

    models = Models(params, device=device)
    model = models.get_model()
    model.to(device)
    save_model_path = os.path.join("./saved_models", params.dataset + ".pth.tar")

    criterion_li = torch.nn.BCELoss()
    optimizer_gnn = torch.optim.Adam(model.parameters(), lr=params.lr)
Exemplo n.º 14
0
from utils.arguments import parse_args
from utils.parameters import Parameters
from utils.checkpoint import load_model
from utils.valid_test_error_seq import ValidTestErrorSEQ

warnings.filterwarnings("ignore", category=DeprecationWarning)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if __name__ == '__main__':
    args = parse_args()
    print('Loading dataset...', end="")
    args.device = device
    args.date_time = datetime.datetime.now()

    dataset = EmbedDataset(args)
    params = Parameters(args, dataset)

    print("\rDataset Statistics:")
    print(
        f"    Users: {params.num_user} | Lists: {params.num_list} | Items:{params.num_item}"
    )
    print(
        f"    Train: {params.num_train_instances} | Valid: {params.num_valid_instances} | Test: {params.num_test_instances}"
    )
    print(
        f"    Density: {100 * params.num_train_instances / (params.num_list * params.num_item):.4f} %"
    )

    save_model_path = os.path.join("./saved_models",
                                   params.dataset + ".pth.tar")
Exemplo n.º 15
0
        "-c",
        "--config",
        dest='CONFIG_FILE',
        default=os.path.join(os.path.curdir, os.path.pardir, "config",
                             "train_config.yaml"),
        #                        default=os.path.join(os.path.curdir, os.path.pardir, "config", "debug_config.yaml"),
        help="config file",
    )
    parser.print_help()
    args = parser.parse_args()

    ##############################################################################
    #%% Read config file

    # Read config file
    params = Parameters(args.CONFIG_FILE)

    # Save parameters
    params.write_parameters()

    ###############################################################################
    #%% Build model

    # Init model
    model = models.models_dict[params.getp("MODEL_NAME")](
        nb_channels=params.getp("NB_CHANNELS"),
        nb_classes=params.getp("NB_CLASSES"),
        nb_scales=params.getp("NB_SCALES"))

    # Puts the model on device (GPU)
    model = model.cuda(device=params.getp("DEVICE_ID"))
Exemplo n.º 16
0
def get_params(test_config, parser_args=None):
    """get params and save them to root dir"""

    # Just to get the ROOT_DIR and save prm test_config
    prm = Parameters()
    prm.override(test_config)

    # get manual test parameters from config:
    if parser_args is not None:
        # overriding some parameters manually from parser:
        prm.train.train_control.ROOT_DIR = parser_args.ROOT_DIR
        prm.train.train_control.TEST_DIR = parser_args.ROOT_DIR + '/test'
        prm.train.train_control.PREDICTION_DIR = parser_args.ROOT_DIR + '/prediction'
        prm.train.train_control.CHECKPOINT_DIR = parser_args.ROOT_DIR + '/checkpoint'
        prm.test.test_control.KNN_WEIGHTS = parser_args.KNN_WEIGHTS
        prm.test.test_control.KNN_NORM = parser_args.KNN_NORM
        prm.train.train_control.PCA_REDUCTION = (
            parser_args.PCA_REDUCTION == 'True')
        prm.train.train_control.PCA_EMBEDDING_DIMS = int(
            parser_args.PCA_EMBEDDING_DIMS)
        prm.test.test_control.KNN_NEIGHBORS = int(parser_args.KNN_NEIGHBORS)
        prm.test.test_control.DUMP_NET = (parser_args.DUMP_NET == 'True')
        prm.test.test_control.LOAD_FROM_DISK = (
            parser_args.LOAD_FROM_DISK == 'True')

    ROOT_DIR = prm.train.train_control.ROOT_DIR

    # get time stamp
    ts = get_timestamp()

    # get files paths
    parameter_file = os.path.join(ROOT_DIR, 'parameters.ini')
    test_parameter_file = os.path.join(ROOT_DIR,
                                       'test_parameters_' + ts + '.ini')
    all_parameter_file = os.path.join(ROOT_DIR,
                                      'all_parameters_' + ts + '.ini')
    log_file = os.path.join(ROOT_DIR, 'test_' + ts + '.log')
    logging = logging_config(log_file)
    logging.disable(logging.DEBUG)

    if not os.path.isfile(parameter_file):
        raise AssertionError('Can not find file: {}'.format(parameter_file))

    dir = os.path.dirname(test_parameter_file)
    if not os.path.exists(dir):
        os.makedirs(dir)
    prm.save(test_parameter_file)

    # Done saving test parameters. Now doing the integration:
    prm = Parameters()
    prm.override(parameter_file)
    prm.override(test_parameter_file)
    if parser_args is not None:
        # overriding some parameters manually from parser:
        prm.train.train_control.ROOT_DIR = parser_args.ROOT_DIR
        prm.train.train_control.TEST_DIR = parser_args.ROOT_DIR + '/test'
        prm.train.train_control.PREDICTION_DIR = parser_args.ROOT_DIR + '/prediction'
        prm.train.train_control.CHECKPOINT_DIR = parser_args.ROOT_DIR + '/checkpoint'
        prm.test.test_control.KNN_WEIGHTS = parser_args.KNN_WEIGHTS
        prm.test.test_control.KNN_NORM = parser_args.KNN_NORM
        prm.train.train_control.PCA_REDUCTION = (
            parser_args.PCA_REDUCTION == 'True')
        prm.train.train_control.PCA_EMBEDDING_DIMS = int(
            parser_args.PCA_EMBEDDING_DIMS)
        prm.test.test_control.KNN_NEIGHBORS = int(parser_args.KNN_NEIGHBORS)
        prm.test.test_control.DUMP_NET = (parser_args.DUMP_NET == 'True')
        prm.test.test_control.LOAD_FROM_DISK = (
            parser_args.LOAD_FROM_DISK == 'True')

    dir = os.path.dirname(all_parameter_file)
    if not os.path.exists(dir):
        os.makedirs(dir)
    prm.save(all_parameter_file)

    return prm
Exemplo n.º 17
0
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-l",
        "--log_dir",
        dest='LOG_DIR',
        help=
        "log directory of trained model (should contain config.yaml and subdir models/)",
    )
    parser.print_help()
    args = parser.parse_args()

    ###############################################################################
    #%% Read config file

    # Read config file (loads config file of trained model)
    training_params = Parameters(os.path.join(args.LOG_DIR, "config.yaml"))

    ###############################################################################
    ABSCISSE_COORDINATE = 0  # epoch
    #    ABSCISSE_COORDINATE = 1 # cpt_backward_pass
    #    ABSCISSE_COORDINATE = 5 # time

    ###############################################################################
    nb_classes = training_params.getp("NB_CLASSES")

    LOG_DIR = training_params.getp("LOG_DIR")

    log_files = glob.glob(os.path.join(LOG_DIR, "*.txt"))

    # Training and Validation Loss and Accuracy
    print("\n 1st figure : Loss and Accuracy")
Exemplo n.º 18
0
                        help="Indices to words dictionary")
    parser.add_argument('--gpu',
                        default='',
                        help="Specify GPU number if use GPU")
    parser.add_argument(
        '--c_v_generator',
        default=None,
        help="If use cluster vectors, specify tensorflow api model"
        "For more information look README")
    parser.add_argument('--gen_method',
                        default='greedy',
                        help='greedy, beam_search or sample')
    parser.add_argument('--params_path',
                        default=None,
                        help="specify params pickle file")
    parser.add_argument('--beam_size',
                        default=2,
                        help="If using beam_search, specify beam_size")
    args = parser.parse_args()
    # CUDA settings
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    # parameter of the model
    params = Parameters()
    generator = Generator(checkpoint_path=args.checkpoint,
                          params_path=args.params_path,
                          vocab_path=args.vocab_path,
                          gen_method=args.gen_method)
    caption = generator.generate_caption(args.img_path, args.beam_size)
    print(caption[0]['caption'])
Exemplo n.º 19
0
    )
    parser.add_argument(
        "-l",
        "--log_dir",
        dest='LOG_DIR',
        help=
        "log directory of trained model (should contain config.yaml and subdir models/)",
    )
    parser.print_help()
    args = parser.parse_args()

    ##############################################################################
    #%% Read config file

    # Read config file (loads config file of trained model)
    params = Parameters(os.path.join(args.LOG_DIR, "config.yaml"))
    params.update_parameters(args.CONFIG_FILE)

    # Save parameters
    params.write_parameters()

    ###############################################################################
    #%% Load model

    print()

    # Init model
    model = torch.load(
        os.path.join(params.getp("MODEL_DIR"),
                     "best_train_model_checkpoint_fold_00_sample_000.tar"))
Exemplo n.º 20
0
        [
            path + f"data/train_word_tensor_{args.embeddings_name}.npy",
            path + f"data/valid_word_tensor_{args.embeddings_name}.npy",
        ],
        [
            path + f"data/train_character_tensor_{args.embeddings_name}.npy",
            path + f"data/valid_character_tensor_{args.embeddings_name}.npy",
        ],
    ]

    batch_loader = BatchLoader(data_files, idx_files, tensor_files, path)
    parameters = Parameters(
        batch_loader.max_word_len,
        batch_loader.max_seq_len,
        batch_loader.words_vocab_size,
        batch_loader.chars_vocab_size,
        args.embeddings_name,
        args.res_model,
        args.hrvae,
        args.wae,
    )

    """ =================== Doing the same for encoder-2 ===============================================
    """
    data_files = [path + f"data/super/train_{args.data_name}_2.txt", path + f"data/super/test_{args.data_name}_2.txt"]

    idx_files = [
        path + f"data/super/words_vocab_{args.embeddings_name}_2.pkl",
        path + f"data/super/characters_vocab_{args.embeddings_name}_2.pkl",
    ]

    tensor_files = [
Exemplo n.º 21
0
from utils.split_preprocess_data import SplitPreprocessData

datasets = [
    "paysim", "paysim-custom", "ccfraud", "ieee", "nslkdd", "saperp-ek",
    "saperp-vk", "mnist", "cifar10"
]
methods = ["all", "ocan", "ocan-ae", "ae", "rbm", "vae", "dae", "cnn"]
baselines = ["both", "usv", "sv", "none"]

parser = Parser(datasets, methods, baselines)

dataset_string, verbosity, seed, method, baseline, iteration_count, use_oversampling, cross_validation_count = \
    parser.get_args()

# Set parameters
parameter_class = Parameters(dataset_string)

usv_train, sv_train, sv_train_fraud, test_benign, test_fraud = \
    parameter_class.get_main_parameters(cross_validation_count)

x_ben, x_fraud, preprocess_class = \
    LoadData(dataset_string, parameter_class.get_path(), seed, parameter_class, verbosity).get_data()

# Initialize collections for evaluation results
prec_coll = list()
reca_coll = list()
f1_coll = list()
acc_coll = list()
pr_auc_coll = list()
roc_auc_coll = list()
method_list = list()
Exemplo n.º 22
0
    parser = argparse.ArgumentParser(description='Sampler')
    parser.add_argument('--use-cuda',
                        type=bool,
                        default=True,
                        metavar='CUDA',
                        help='use cuda (default: True)')
    parser.add_argument('--num-sample',
                        type=int,
                        default=10,
                        metavar='NS',
                        help='num samplings (default: 10)')

    args = parser.parse_args()

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE_dilated(parameters)
    rvae.load_state_dict(t.load('trained_RVAE'))
    if args.use_cuda:
        rvae = rvae.cuda()

    for iteration in range(args.num_sample):
        seed = np.random.normal(size=[1, parameters.latent_variable_size])
        result = rvae.sample(batch_loader, 50, seed, args.use_cuda)
        print(result)
        print()
Exemplo n.º 23
0
                        help='dropout (default: 0.12)')
    parser.add_argument('--aux',
                        type=float,
                        default=0.4,
                        metavar='DR',
                        help='aux loss coef (default: 0.4)')
    parser.add_argument('--use-trained',
                        type=bool,
                        default=False,
                        metavar='UT',
                        help='load pretrained model (default: False)')

    args = parser.parse_args()

    batch_loader = BatchLoader()
    parameters = Parameters(batch_loader.vocab_size)

    vae = VAE(parameters.vocab_size, parameters.embed_size,
              parameters.latent_size, parameters.decoder_rnn_size,
              parameters.decoder_rnn_num_layers)
    if args.use_trained:
        vae.load_state_dict(t.load('trained_VAE'))
    if args.use_cuda:
        vae = vae.cuda()

    optimizer = Adam(vae.parameters(), args.learning_rate)

    for iteration in range(args.num_iterations):
        '''Train step'''
        input, decoder_input, target = batch_loader.next_batch(
            args.batch_size, 'train', args.use_cuda)