def create_counterbalance(self, n_rep, seeds_file=None):
     """ Generates seeds of all the trials """
     if seeds_file:
         self.seeds = load_data(seeds_file, 'seed')
         self.truth = load_data(seeds_file, 'answer')
     else:
         n_unique = self.n_trials // n_rep
         max_seed = np.iinfo(np.uint32).max
         self.seeds = np.tile(np.random.randint(0, high=max_seed, size=(n_unique, 1), dtype=np.uint32), (n_rep, 1))
         np.random.shuffle(self.seeds)
         self.truth = None
示例#2
0
def main(arguments):

    if arguments.operation == 'train':
        # get the train data
        # features: train_data[0], labels: train_data[1]
        train_features, train_labels = data.load_data(dataset=arguments.train_dataset)

        # get the validation data
        # features: validation_data[0], labels: validation_data[1]
        validation_features, validation_labels = data.load_data(dataset=arguments.validation_dataset)

        # get the size of the dataset for slicing
        train_size = train_features.shape[0]
        validation_size = validation_features.shape[0]

        # slice the dataset to be exact as per the batch size
        # e.g. train_size = 1898322, batch_size = 256
        # [:1898322-(1898322%256)] = [:1898240]
        # 1898322 // 256 = 7415; 7415 * 256 = 1898240
        train_features = train_features[:train_size-(train_size % BATCH_SIZE)]
        train_labels = train_labels[:train_size-(train_size % BATCH_SIZE)]

        # modify the size of the dataset to be passed on model.train()
        train_size = train_features.shape[0]

        # slice the dataset to be exact as per the batch size
        validation_features = validation_features[:validation_size-(validation_size % BATCH_SIZE)]
        validation_labels = validation_labels[:validation_size-(validation_size % BATCH_SIZE)]

        # modify the size of the dataset to be passed on model.train()
        validation_size = validation_features.shape[0]

        model = GruSoftmax(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, cell_size=CELL_SIZE, dropout_rate=DROPOUT_P_KEEP,
                           num_classes=N_CLASSES, sequence_length=SEQUENCE_LENGTH)

        model.train(checkpoint_path=arguments.checkpoint_path, log_path=arguments.log_path,
                    model_name=arguments.model_name, epochs=HM_EPOCHS, train_data=[train_features, train_labels],
                    train_size=train_size, validation_data=[validation_features, validation_labels],
                    validation_size=validation_size, result_path=arguments.result_path)
    elif arguments.operation == 'test':
        test_features, test_labels = data.load_data(dataset=arguments.validation_dataset)

        test_size = test_features.shape[0]

        test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
        test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]

        test_size = test_features.shape[0]

        GruSoftmax.predict(batch_size=BATCH_SIZE, cell_size=CELL_SIZE, dropout_rate=DROPOUT_P_KEEP,
                           num_classes=N_CLASSES, test_data=[test_features, test_labels], test_size=test_size,
                           checkpoint_path=arguments.checkpoint_path, result_path=arguments.result_path)
示例#3
0
    def __init__(self, args: Namespace, logger: HtmlLogger):
        # init model
        model = self.buildModel(args)
        model = model.cuda()
        # create DataParallel model instance
        self.modelParallel = model
        # self.modelParallel = DataParallel(model, args.gpu)
        # assert (id(model) == id(self.modelParallel.module))

        self.args = args
        self.model = model
        self.logger = logger

        # load data
        self.train_queue, self.valid_queue, self.createSearchQueue = load_data(
            args)
        # init train folder path, where to save loggers, checkpoints, etc.
        self.trainFolderPath = '{}/{}'.format(args.save, args.trainFolder)

        # build statistics containers
        containers = self.buildStatsContainers()
        # build statistics rules
        rules = self.buildStatsRules()
        # init statistics instance
        self.statistics = Statistics(containers, rules, args.save)

        # log parameters
        logParameters(logger, args, model)
示例#4
0
    def testNormalize(self):
        df = dt.load_data(params.global_params['db_path'],
                          'C',
                          index_col='date',
                          from_date=20100101,
                          to_date=20100405,
                          limit=30)
        df = df['close']
        look_back = 20
        look_ahead = 1
        coeff = 3.0
        print(df.tail())
        data = df.values
        print('data.shape', data.shape)

        _, y_data = dt.normalize(data,
                                 look_back=look_back,
                                 look_ahead=look_ahead,
                                 alpha=coeff)

        tmp = dt.denormalize(y_data, data, look_back, look_ahead, coeff)
        print('denorma.shape', tmp.shape)

        plt.plot(data[look_back:], label='actual')
        plt.plot(tmp, label='denorm')
        plt.legend(loc='upper left')
        plt.show()
示例#5
0
def load_page():

    markets, exception = load_data()

    if exception:
        st.sidebar.text(str(exception))
        st.title("⭕️The data was not correctly loaded")
        return

    names = get_names()

    title = st.empty()
    st.sidebar.title("Crypto Explorer")

    # OHLC Visualisation
    st.sidebar.subheader('Choose your asset:')
    base = st.sidebar.selectbox('Select base', ['USDT', 'BTC', 'ETH'])
    keys = get_keys(markets, base=base)

    market = st.sidebar.selectbox('Select market', keys)
    resolution = st.sidebar.selectbox('Select resulution', ['1d', '1h', '1m'])

    code = market.split('/')[0]
    name = names[code] if code in names else code
    title.header(market + ' - ' + name)
    data = get_ohlcv(market, timeframe=resolution)
    range_ = st.sidebar.slider('Historical range',
                               min_value=min([30, data.shape[0]]),
                               max_value=min([2000, data.shape[0]]),
                               value=min([1000, int(data.shape[0] / 2)]),
                               step=10)

    plot_candlestick(data[-range_:])
示例#6
0
def training_pipeline(args):
    seed(args.seed)
    set_random_seed(args.seed)
    x_train, x_test, y_test, x_val, y_val = load_data(args)

    G, D, GAN = load_model(args)
    pretrain(args, G, D, GAN, x_train, x_test, y_test, x_val, y_val)
    train(args, G, D, GAN, x_train, x_test, y_test, x_val, y_val)
示例#7
0
def test_load_data(tmpdir):
    """test load data from csv"""
    save_data(tmpdir.join("test.csv"),
              [["Poprad", "Poprad", "A", "Adam", "Adam"]])
    df = load_data(tmpdir.join("test.csv"))

    assert df.shape == (1, 5), "loaded data has wrong size"
    assert df.iloc[0]["okres"] == "Poprad"
    assert df.iloc[0]["katastralne uzemie"] == "Poprad"
    assert df.iloc[0]["prve pismeno"] == "A"
    assert df.iloc[0]["priezvisko"] == "Adam"
    assert df.iloc[0]["vlastnik"] == "Adam"
示例#8
0
def visualize(model_weights, model, dataset='val', batch_size=1, shuffle=True):

    DATADIR = 'datasets/citys'
    '''device'''
    no_cuda = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_cuda else 'cpu')
    print('using device:', device)

    model = model.to(device)

    model.load_state_dict(model_weights['model_state_dict'])
    print('Finished loading model!')
    model.eval()

    data_generator = load_data(DATADIR, batch_size=batch_size, shuffle=shuffle)
    val_generator = data_generator[dataset]

    data = next(iter(val_generator))
    imgs, mask = data[0].to(device), data[1].to(device)

    with torch.no_grad():
        prediction = model(imgs)

    pred = torch.argmax(prediction, dim=1).cpu()

    mask = 255 * torch.squeeze(mask, dim=1)  # remove redundant channel
    imgs = imgs.permute(0, 2, 3, 1).cpu()

    fig, ax = plt.subplots(nrows=batch_size, ncols=3)

    for j in range(batch_size):

        pred_img = get_color_image(pred[j])
        mask_img = get_color_image(mask[j])

        ax[j, 0].imshow(imgs[j])
        ax[j, 1].imshow(pred_img)
        ax[j, 2].imshow(mask_img)

    np.vectorize(lambda ax: ax.axis('off'))(ax)  # disable axis

    cols = ['image', 'prediction', 'ground truth']  # titles

    for ax, col in zip(ax[0], cols):
        ax.set_title(col)  # set titles

    plt.tight_layout()
    plt.show()

    return
def prediction(groundTruthImgList,
               file,
               model,
               dataset='val',
               batch_size=1,
               shuffle=True):

    DATADIR = 'datasets/citys'
    '''device'''
    no_cuda = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_cuda else 'cpu')
    print('using device:', device)

    model_weights = torch.load('weights/{}.pt'.format(file),
                               map_location=device)
    model = model.to(device)

    model.load_state_dict(model_weights['model_state_dict'])
    print('Finished loading model!')
    model.eval()

    data_generator = load_data(DATADIR, batch_size=batch_size, shuffle=shuffle)
    val_generator = data_generator[dataset]

    for i, (X, y) in enumerate(val_generator):
        imgs = X.to(device)

        with torch.no_grad():
            prediction = model(imgs)

        pred = torch.argmax(prediction, dim=1).cpu()

        # convert to right format to save prediciton image
        image_to_save = torch.squeeze(pred, dim=0).numpy()

        # get name of prediction image to save
        csFile = getCsFileInfo(groundTruthImgList[i])
        # save the prediction images in the 'results' folder
        filePattern = "results/{}/{}/{}_{}_{}_pred.png".format(
            dataset, file, csFile.city, csFile.sequenceNb, csFile.frameNb)

        # save prediction image
        cv2.imwrite(filePattern, image_to_save)


#        if i == 4:
#            break
    print('Prediction images saved.')

    return
def main(arguments):

    model_choice = arguments.model
    model_path = arguments.model_path
    dataset_path = arguments.dataset

    assert (model_choice == 1 or model_choice == 2 or model_choice
            == 3), "Invalid choice: Choose among 1, 2, and 3 only."
    assert os.path.exists(
        path=model_path), "{} does not exist!".format(model_path)
    assert os.path.exists(
        path=dataset_path), "{} does not exist!".format(dataset_path)

    dataset = np.load(dataset_path)

    features, labels = load_data(dataset=dataset)

    labels = one_hot_encode(labels=labels)

    dataset_size = features.shape[0]
    print(features.shape)

    if model_choice == 2:
        features = np.reshape(
            features,
            (
                features.shape[0],
                int(np.sqrt(features.shape[1])),
                int(np.sqrt(features.shape[1])),
            ),
        )
        predictions, accuracies = predict(
            dataset=[features, labels],
            model=model_choice,
            model_path=model_path,
            size=dataset_size,
            batch_size=256,
            cell_size=256,
        )
    else:
        predictions, accuracies = predict(
            dataset=[features, labels],
            model=model_choice,
            model_path=model_path,
            size=dataset_size,
            batch_size=256,
        )

    print("Predictions : {}".format(predictions))
    print("Accuracies : {}".format(accuracies))
    print("Average accuracy : {}".format(np.mean(accuracies)))
示例#11
0
文件: run.py 项目: coffee-cup/yolo
def main(config):
    """The main function."""

    # ----------------------------------------
    # Load pascal voc datasets
    print("\n--- Reading PASCAL {} data".format(config.year))
    dataset_train = load_data(config.data_dir, config.record_file, config.year,
                              'train')
    dataset_val = load_data(config.data_dir, config.record_file, config.year,
                            'val')

    # ----------------------------------------
    # Create the model
    yolo = Yolo(config, dataset_train, dataset_val, debug=config.debug)

    # ----------------------------------------
    # Start training
    try:
        yolo.train()
    except tf.errors.OutOfRangeError:
        pass

    return
def evaluate(model_weights, model, dataset='val', batch_size=1):

    DATADIR = 'datasets/citys'
    '''device'''
    no_cuda = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_cuda else 'cpu')
    print('using device:', device)

    model = model.to(device)

    model.load_state_dict(model_weights['model_state_dict'])
    print('Finished loading model!')
    model.eval()

    data_generator = load_data(DATADIR, batch_size=batch_size)
    val_generator = data_generator[dataset]

    for i, (X, y) in enumerate(val_generator):
        imgs = X.to(device)
        mask = y.to(device)

        with torch.no_grad():
            prediction = model(imgs)

        pred = torch.argmax(prediction, dim=1).cpu()

        if i == 0:
            intersection = np.zeros(34, dtype=int)
            union = np.zeros(34, dtype=int)
        # calculate intersection and union per batch and add to previous batches
        intersection, union = calculate_I_and_U(mask,
                                                pred,
                                                intersection=intersection,
                                                union=union)

    # calculate IoU over full set
    IoU = calculate_IoU(intersection, union, n_classes=34)
    IoU_dict, IoU_average = calculate_average_IoU(IoU)

    print('IoU per class: ')
    for key, value in IoU_dict.items():
        print(key, ' : ', value)
    print('IoU average for 34 classes: ', IoU_average)
    IoU_19_average = calculate_IoU_train_classes(IoU)
    print('IoU average for 19 classes: ', IoU_19_average)

    return
def count_classes(data_set='train', batch_size=100):
    '''device'''
    no_cuda = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_cuda else 'cpu')
    print('using device:', device)

    data_generator = load_data('datasets/citys',
                               batch_size=batch_size)[data_set]

    n_classes = 34
    class_amount = {entry: 0 for entry in range(n_classes)}

    for i, (_, y) in enumerate(data_generator):
        y = (255 * y).int().to(device)
        classes, counts = torch.unique(y, return_counts=True)

        for j in range(len(classes)):
            class_amount[classes[j].item()] += counts[j].item()

    print(class_amount)

    return class_amount
def main(arguments):
    model_choice = arguments.model
    model_path = arguments.model_path
    dataset_path = arguments.dataset

    assert os.path.exists(
        path=model_path), '{} does not exists!'.format(model_path)
    assert os.path.exists(
        path=dataset_path), '{} does not exists!'.format(dataset_path)

    dataset = np.load(dataset_path)
    features, labels = load_data(dataset=dataset)
    labels = one_hot_encode(labels=labels)

    dataset_size = features.shape[0]
    print(features.shape)

    if model_choice == 2:
        features = np.reshape(
            features, (features.shape[0], int(np.sqrt(
                (features.shape[1]))), int(np.sqrt((features.shape[1])))))
        predictions, accuracies = predict(dataset=[features, labels],
                                          model=model_choice,
                                          model_path=model_path,
                                          size=dataset_size,
                                          batch_size=256,
                                          cell_size=256)
    else:
        predictions, accuracies = predict(dataset=[features, labels],
                                          model=model_choice,
                                          model_path=model_path,
                                          size=dataset_size,
                                          batch_size=256)

    print('predictions: {}'.format(predictions))
    print('Accuracies: {}'.format(accuracies))
    print('Avrage accuracy : {}'.format(np.mean(accuracies)))
示例#15
0
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import pandas as pd
import numpy as np
import params
import utils.data as dt

data = pd.DataFrame()
for s in params.symbols:
    symbol = s['symbol']
    df = dt.load_data(params.global_params['db_path'],
                      symbol,
                      to_date=20161231,
                      index_col='date')['close']

    data[symbol] = pd.Series(np.cumsum(df.pct_change()), index=df.index)

print(data.head())

sns.heatmap(data.corr(), annot=True)
plt.show()
示例#16
0
# Set Summary writer for Tensorboard
tb_logdir = 'runs/' + ts
writer = SummaryWriter(log_dir=tb_logdir)

# Information to be logged
optim_name = re.split(' ', str(optim), maxsplit=1)[0]
scheduler_name = str(scheduler).split('.')[3].split(' ')[0]
init_lr = optim.param_groups[0]['lr']

# Load Dataset, Dataloaders, etc
# Grab the stored Lesk scores
# Get and Store WordNet Synsets of each class
print(args)
print("Training ", model.name)
dataset, dataloader, dataset_sizes = load_data(Path(args.data_path),
                                               batch_size=batch_size)
lesk_scores = pd.read_csv(args.lesk_path) if Path(
    args.lesk_path) else exhuastive_lesk_simarity_metric(classes)
scene_synsets = get_scene_synset_dictionary(classes)

# Each epoch has a training and validation phase
phases = ['train', 'val']
for epoch in range(epochs):
    for phase in phases:
        if phase == 'train':
            scheduler.step()
            model.train()  # Set model to training mode
            model = model.to(device)
        else:
            model.eval()  # Set model to evaluate mode
            model = model.to(device)
示例#17
0
import pandas as pd

from utils import data
from utils import metrics
from sklearn.naive_bayes import GaussianNB

# Dataset 1 (Latin letters)
# Training
trainX, trainY = data.load_data('train_1.csv')
clf = GaussianNB()
clf.fit(trainX, trainY)
# Testing
testX, testY = data.load_data('test_with_label_1.csv')
predictions = pd.DataFrame(clf.predict(testX))
data.generate_csv(predictions, 'GNB-DS1.csv')
metrics.compute(predictions, testY, 'GNB-DS1.csv')
data.generate_cm(predictions, testY, 'GNB-DS1.png')

# Dataset 2 (Greek letters)
# Training
trainX, trainY = data.load_data('train_2.csv')
clf = GaussianNB()
clf.fit(trainX, trainY)
# Testing
testX, testY = data.load_data('test_with_label_2.csv')
predictions = pd.DataFrame(clf.predict(testX))
data.generate_csv(predictions, 'GNB-DS2.csv')
metrics.compute(predictions, testY, 'GNB-DS2.csv')
data.generate_cm(predictions, testY, 'GNB-DS2.png')
示例#18
0
from utils.config import set_gpu
from utils.data import BalancedDataGenerator, load_data
from utils.model import load_model
from utils.plot import make_confusion_matrix
from utils.utils import get_acc

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='chestx')
parser.add_argument('--model', type=str, default='inceptionv3')
parser.add_argument('--gpu', type=str, default='0')
args = parser.parse_args()

set_gpu(args.gpu)

X_train, X_test, y_train, y_test = load_data(
    dataset=args.dataset, normalize=True)

model = load_model(
    dataset=args.dataset,
    nb_class=y_train.shape[1],
    model_type=args.model,
    mode='train'
)


def step_decay(epoch):
    lr = 1e-3
    if epoch > 45:
        lr = 1e-5
    elif epoch > 40:
        lr = 1e-4
示例#19
0
    print("Saving: {}".format(path_embedding_latent))
    np.savez(path_embedding_latent, **latent_list)

    if save_loss:
        loss_file = "loss_plots/{}_loss_iters_{}_step_{}_{}.npy".format(
            basename,
            str(ITERATIONS).zfill(6),
            str(SAVE_STEP).zfill(4), init)
        path_loss = os.path.join(SAVING_DIR, loss_file)
        print("Saving Loss: {}".format(path_loss))
        np.save(path_loss, np.array(loss_list))
    return loss_list


# load images from directory
data = u_data.load_data(PATH_DIR)

# testing downsampling
test_name = 'only_embed'
options_lambdas = [0.001, 0.005, 0.01]

# for i in range(len(data)):
#   for lambda_v in options_lambdas:
#     loss_list = run_optimization(data, id = i,
#                                 init = 'w_mean',
#                                 sub_fix=f"_{test_name}_lambda_{lambda_v}",
#                                 save_loss = True,
#                                 lambda_v=lambda_v)

condition_function_options = {
    "colorization": mu.convert2grayscale,
示例#20
0
def main(args):
    # Setting up an experiment
    config, params = setup(args)

    # Setting up logger
    logger = get_logger(config['model_name'], config['dirs']['logs_dir'])

    # Extracting configurations
    data_config = config['data']
    logs_config = config['logs']
    training_config = config['training']
    sampling_config = config['sampling']
    dirs_config = config['dirs']
    logger.info('[SETUP] Experiment configurations')
    logger.info(
        f'[SETUP] Experiment directory: {os.path.abspath(dirs_config["exp_dir"])}'
    )

    # Loading the dataset
    (X_train, len_train), (X_valid, len_valid), (_, _) = load_data(
        data_config=data_config, step_size=training_config['num_pixels'])
    logger.info(f'[LOAD]  Dataset (shape: {X_train[0].shape})')

    # Computing beat size in time steps
    beat_size = float(data_config['beat_resolution'] /
                      training_config['num_pixels'])

    # Preparing inputs for sampling
    intro_songs, save_ids, song_labels = prepare_sampling_inputs(
        X_train, X_valid, sampling_config, beat_size)
    num_save_intro = len(save_ids) // sampling_config['num_save']
    logger.info('[SETUP] Inputs for sampling')

    # Creating the MultINN model
    tf.reset_default_graph()
    model = MultINN(config,
                    params,
                    mode=params['mode'],
                    name=config['model_name'])
    logger.info('[BUILT] Model')

    # Building the sampler and evaluator
    sampler = model.sampler(num_beats=sampling_config['sample_beats'])
    logger.info('[BUILT] Sampler')
    evaluator = model.evaluator()
    logger.info('[BUILT] Evaluator')

    # Building optimizer and training ops
    if args.sgd:
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=training_config['learning_rate'])
    else:
        optimizer = tf.train.AdamOptimizer(
            learning_rate=training_config['learning_rate'], epsilon=1e-4)

    init_ops, update_ops, metrics, metrics_upd, summaries = model.train_generators(
        optimizer=optimizer, lr=training_config['learning_rate'])
    logger.info('[BUILT] Optimizer and update ops')

    # Extracting placeholders, metrics and summaries
    placeholders = model.placeholders
    x, lengths, is_train = placeholders['x'], placeholders[
        'lengths'], placeholders['is_train']

    loss = metrics['batch/loss']
    loglik, global_loglik = metrics['log_likelihood'], metrics['global'][
        'log_likelihood']

    weights_sum, metrics_sum, gradients_sum = summaries['weights'], summaries[
        'metrics'], summaries['gradients']

    # TensorFlow Session set up
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf.set_random_seed(training_config['random_seed'])
    np.random.seed(training_config['random_seed'])

    with tf.Session(config=tf_config) as sess:
        logger.info('[START] TF Session')
        with tf.variable_scope('init_global'):
            init_global = tf.global_variables_initializer()
        with tf.variable_scope('init_local'):
            init_local = tf.local_variables_initializer()
        sess.run([init_global, init_local])

        stats = TrainingStats()

        # Loading the model's weights or using initial weights
        if not args.from_init:
            if args.from_last:
                if model.load(sess, dirs_config['model_last_dir']):
                    last_stats_file = os.path.join(
                        dirs_config['model_last_dir'], 'steps')
                    if os.path.isfile(last_stats_file):
                        stats.load(last_stats_file)
                        logger.info('[LOAD]  Training stats file')

                    logger.info(
                        f'[LOAD]  Pre-trained weights (last, epoch={stats.epoch})'
                    )
                else:
                    logger.info('[LOAD]  Initial weights')
            elif model.load(sess, dirs_config['model_dir']):
                if os.path.isfile(dirs_config['model_stats_file']):
                    stats.load(dirs_config['model_stats_file'])
                    logger.info('[LOAD]  Training file')

                logger.info(
                    f'[LOAD]  Pre-trained weights (best, epoch={stats.epoch})')
            else:
                logger.info('[LOAD]  Initial weights')

                # run initialization update if exists
                if init_ops:
                    sess.run(init_ops, feed_dict={x: X_train[:1600]})
                    logger.info('[END]   Run initialization ops')
        else:
            logger.info('[LOAD]  Initial weights')

        if args.encoders and params['encoder']['type'] != 'Pass':
            encoder_dir = os.path.join(args.encoders, 'ckpt', 'encoders')
            if model.load_encoders(sess, os.path.join(encoder_dir)):
                logger.info('[LOAD]  Encoders\' weights')
            else:
                logger.info('[WARN]  Failed to load encoders\' weights')

        stats.new_run()

        # Preparing to the training
        graph = sess.graph if logs_config['save_graph'] else None
        writer_train = tf.summary.FileWriter(
            f'{dirs_config["logs_dir"]}/Graph/run_{stats.run}/train', graph)
        writer_valid = tf.summary.FileWriter(
            f'{dirs_config["logs_dir"]}/Graph/run_{stats.run}/valid')

        batch_size = training_config['batch_size']
        piece_size = int(training_config['piece_size'] * beat_size)

        logger.info(f'[START] Training, RUN={stats.run}')
        ids = np.arange(X_train.shape[0])

        # Logging initial weights
        if logs_config['log_weights_steps'] > 0:
            writer_train.add_summary(sess.run(weights_sum), stats.steps)
            logger.info('[LOG]   Initial weights')

        loss_accum = LossAccumulator()

        # Training on all of the songs `num_epochs` times
        past_epochs = stats.epoch
        for epoch in range(past_epochs + 1,
                           past_epochs + training_config['epochs'] + 1):
            stats.new_epoch()
            tf.set_random_seed(epoch)
            np.random.seed(epoch)

            start = time.time()

            np.random.shuffle(ids)
            loss_accum.clear()
            base_info = f'\r epoch: {epoch:3d} '

            for i in range(0, X_train.shape[0], batch_size):
                for j in range(0, X_train.shape[1], piece_size):
                    len_batch = len_train[ids[i:i + batch_size]] - j
                    non_empty = np.where(len_batch > 0)[0]

                    if len(non_empty) > 0:
                        len_batch = np.minimum(len_batch[non_empty],
                                               piece_size)
                        max_length = len_batch.max()

                        songs_batch = X_train[ids[i:i + batch_size],
                                              j:j + max_length, ...][non_empty]

                        if logs_config['log_weights_steps'] > 0 \
                                and (stats.steps + 1) % logs_config['log_weights_steps'] == 0 \
                                and j + piece_size >= X_train.shape[1]:
                            _, loss_i, summary = sess.run(
                                [update_ops, loss, weights_sum],
                                feed_dict={
                                    x: songs_batch,
                                    lengths: len_batch,
                                    is_train: True
                                })

                            writer_train.add_summary(summary, stats.steps + 1)
                            del summary
                        else:
                            _, loss_i = sess.run([update_ops, loss],
                                                 feed_dict={
                                                     x: songs_batch,
                                                     lengths: len_batch,
                                                     is_train: True
                                                 })

                        del songs_batch
                        loss_accum.update(loss_i)

                stats.new_step()

                # Log the progress during training
                if logs_config[
                        'log_loss_steps'] > 0 and stats.steps % logs_config[
                            'log_loss_steps'] == 0:
                    info = f' (steps: {stats.steps:5d}) time: {time_to_str(time.time() - start)}' + str(
                        loss_accum)
                    sys.stdout.write(base_info + info)
                    sys.stdout.flush()

            info = f' (steps: {stats.steps:5d})  time: {time_to_str(time.time() - start)}\n' + str(
                loss_accum)
            logger.info(base_info + info)
            logger.info(
                f'[END]   Epoch training time {time_to_str(time.time() - start)}'
            )

            # Evaluating the model on the training and validation data
            if logs_config['evaluate_epochs'] > 0 and epoch % logs_config[
                    'evaluate_epochs'] == 0:
                num_eval = X_valid.shape[0]

                collect_metrics(sess,
                                metrics_upd,
                                data=X_train[:num_eval, ...],
                                data_lengths=len_train[:num_eval, ...],
                                placeholders=placeholders,
                                batch_size=batch_size * 2,
                                piece_size=piece_size)
                summary, loglik_val, gl_loglik_val = sess.run(
                    [metrics_sum, loglik, global_loglik])
                writer_train.add_summary(summary, epoch)
                del summary
                logger.info(
                    f'[EVAL]  Training   set log-likelihood:  '
                    f'gen.={loglik_val:7.3f}  enc.={gl_loglik_val:7.3f}')

                collect_metrics(sess,
                                metrics_upd,
                                data=X_valid,
                                data_lengths=len_valid,
                                placeholders=placeholders,
                                batch_size=batch_size * 2,
                                piece_size=piece_size)
                summary, loglik_val, gl_loglik_val = sess.run(
                    [metrics_sum, loglik, global_loglik])
                writer_valid.add_summary(summary, epoch)
                del summary
                logger.info(
                    f'[EVAL]  Validation set log-likelihood:  '
                    f'gen.={loglik_val:7.3f}  enc.={gl_loglik_val:7.3f}')

            # Sampling input using the model
            if logs_config['generate_epochs'] > 0 and epoch % logs_config[
                    'generate_epochs'] == 0:
                samples = generate_music(
                    sess,
                    sampler,
                    intro_songs,
                    placeholders,
                    num_songs=sampling_config['num_songs'])
                logger.info('[EVAL]  Generated music samples')

                summary_sample = sess.run(evaluator,
                                          feed_dict={
                                              x: samples,
                                              is_train: False
                                          })
                writer_train.add_summary(summary_sample, epoch)
                del summary_sample
                logger.info('[EVAL]  Evaluated music samples')

                samples_to_save = samples[save_ids]
                del samples
                samples_to_save = pad_to_midi(samples_to_save, data_config)

                # Saving the music
                if logs_config[
                        'save_samples_epochs'] > 0 and epoch % logs_config[
                            'save_samples_epochs'] == 0:
                    save_music(samples_to_save,
                               num_intro=num_save_intro,
                               data_config=data_config,
                               base_path=f'{model.name}_e{epoch}',
                               save_dir=dirs_config['samples_dir'],
                               song_labels=song_labels)
                    logger.info('[SAVE]  Saved music samples')

            # Saving the model if the monitored metric decreased
            if loglik_val < stats.metric_best:
                stats.update_metric_best(loglik_val)
                stats.reset_idle_epochs()

                if logs_config['generate_epochs'] > 0 and epoch % logs_config[
                        'generate_epochs'] == 0:
                    save_music(samples_to_save,
                               num_intro=num_save_intro,
                               data_config=data_config,
                               base_path=f'{model.name}_best',
                               save_dir=dirs_config['samples_dir'],
                               song_labels=song_labels)

                if logs_config[
                        'save_checkpoint_epochs'] > 0 and epoch % logs_config[
                            'save_checkpoint_epochs'] == 0:
                    model.save(sess,
                               dirs_config['model_dir'],
                               global_step=stats.steps)
                    stats.save(dirs_config['model_stats_file'])

                    logger.info(
                        f'[SAVE]  Saved model after {epoch} epoch(-s) ({stats.steps} steps)'
                    )
            else:
                stats.new_idle_epoch()

                if stats.idle_epochs >= training_config['early_stopping']:
                    # Early stopping after no improvement
                    logger.info(
                        f'[WARN]  No improvement after {training_config["early_stopping"]} epochs, quiting'
                    )

                    save_music(samples_to_save,
                               num_intro=num_save_intro,
                               data_config=data_config,
                               base_path=f'{model.name}_last',
                               save_dir=dirs_config['samples_dir'],
                               song_labels=song_labels)

                    break

            del samples_to_save
            logger.info(
                f'[END]   Epoch time {time_to_str(time.time() - start)}')

        if not args.save_best_only:
            model.save(sess,
                       dirs_config['model_last_dir'],
                       global_step=stats.steps)
            stats.save(os.path.join(dirs_config['model_last_dir'], 'steps'))
            logger.info(
                f'[SAVE]  Saved model after {epoch} epoch(-s) ({stats.steps} steps)'
            )

        writer_train.close()
        writer_valid.close()
示例#21
0
文件: hmm.py 项目: Dynami/py-shibumi
import numpy as np
from hmmlearn.hmm import GaussianHMM
import utils.data as dt
import params
from matplotlib import cm, pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator

np.random.seed(123)
''' Input parameters '''
symbol = 'GOOG'
look_back = 15  #15
look_ahead = 1
''' Loading data '''
df = dt.load_data(params.global_params['db_path'],
                  symbol,
                  from_date=20100101,
                  to_date=20110101,
                  index_col='date')
df = df[['open', 'high', 'low', 'close', 'volume']]

dates = df.index.values
close_v = df['close'].values
volume = df['volume'].values

#_log_returns = np._log_returns(close_v)
_log_returns = np.diff(np.log(close_v))

dates = dates[1:]
close_v = close_v[1:]
volume = volume[1:]
#
示例#22
0
    elif current_argument in ("--adv"):
        print("training the shared model with adv loss function")
        ADV_LOSS = True

    elif current_argument in ("--firstshared"):
        ADV_LOSS = False
        SHARE_FIRST = True

if LABEL_OF_ALL_1 and NON_NEG_GRAD:
    raise Exception("ONLY ONE OF THE TWO CAN BE TRUE: LABEL_OF_ALL_1, NON_NEG_GRAD")
        

if __name__ == "__main__":

    # load data
    data = load_data(DATASET_NAME, N_CLUSTERS)
    complete_data  = get_complete_data(DATASET_NAME, N_CLUSTERS, LEAVE_INTRA_CLUSTERS)

    # extact the informations stored into data
    adjs, features_, tests, valids, clust_to_node, node_to_clust, com_idx_to_clust_idx = data

    # get the false edges and save them for each couple of clusters
    complete_train_matrix, _, complete_test_matrix, complete_valid_matrix  = complete_data
    
    # turn the dictionary into a list of features
    features = [features_[i] for i in range(len(features_))]

    train_edges = get_edges_formatted(complete_train_matrix, clust_to_node, N_CLUSTERS)
    valid_edges = get_edges_formatted(complete_valid_matrix, clust_to_node, N_CLUSTERS)
    test_edges = get_edges_formatted(complete_test_matrix, clust_to_node, N_CLUSTERS)
示例#23
0
def main(cfg: DictConfig):
    print('Nishika Second-hand Apartment Price Training')
    cur_dir = hydra.utils.get_original_cwd()
    os.chdir(cur_dir)
    data_dir = './input'

    seed_everything(cfg.data.seed)

    experiment = Experiment(api_key=cfg.exp.api_key,
                            project_name=cfg.exp.project_name,
                            auto_output_logging='simple',
                            auto_metric_logging=False)

    experiment.log_parameters(dict(cfg.data))

    # Config  ####################################################################################
    del_tar_col = ['取引時点']
    id_col = 'ID'
    tar_col = '取引価格(総額)_log'
    g_col = 'year'
    criterion = MAE
    cv = KFold(n_splits=cfg.data.n_splits,
               shuffle=True,
               random_state=cfg.data.seed)
    # cv = GroupKFold(n_splits=5)

    # Load Data  ####################################################################################
    if cfg.exp.use_pickle:
        # pickleから読み込み
        df = unpickle('./input/data.pkl')

    else:
        df = load_data(data_dir,
                       sampling=cfg.data.sampling,
                       seed=cfg.data.seed,
                       id_col=id_col,
                       target_col=tar_col)
        # Preprocessing
        print('Preprocessing')
        df = preprocessing(df, cfg)

        # pickle形式で保存
        to_pickle('./input/data.pkl', df)
        try:
            experiment.log_asset(file_data='./input/data.pkl',
                                 file_name='data.pkl')
        except:
            pass

    features = [c for c in df.columns if c not in del_tar_col]

    # Model  ####################################################################################
    model = None
    if cfg.exp.model == 'lgb':
        model = LGBMModel(dict(cfg.lgb))
    elif cfg.exp.model == 'cat':
        model = CatBoostModel(dict(cfg.cat))

    # Train & Predict  ##############################################################################
    trainer = Trainer(model, id_col, tar_col, g_col, features, cv, criterion,
                      experiment)
    trainer.fit(df)
    trainer.predict(df)
    trainer.get_feature_importance()
示例#24
0
DATASET = "chipwhisperer"  # ascad_fixed, ascad_variable, ches_ctf, chipwhisperer
TARGET_BYTE = 0

NUM_OF_FEATURES = 100
FEATURE_SPACING = 1

GE_NUMBER_OF_EXPERIMENTS = 100
GE_NUMBER_OF_TRACES = 10

LEAKAGE_MODEL = LeakageModel.HW  # intermediate, HW

########################################################################
############################## Load data ###############################
########################################################################

train, test = load_data(DATA_ROOT / DATASET, TARGET_BYTE)

(tracesTrain, ptTrain, keyTrain) = train
(tracesTest, ptTest, keyTest) = test

# X = (traces | plain_text)
# y = key
X_train = np.hstack((tracesTrain, ptTrain.reshape(-1, 1)))
y_train = keyTrain
X_test = np.hstack((tracesTest, ptTest.reshape(-1, 1)))
y_test = keyTest

########################################################################
############################## Profiling ###############################
########################################################################
                        central),
         "ocean":
         determine_path("ocean", config, glacier_name, central)
         if config["ocean_PATH"] else None
     }
 except FileNotFoundError as e:
     if "data path not exists" in str(e):
         print(str(e))
         continue
     else:
         traceback.print_exc()
         sys.exit()
 try:
     x_all, y_all = load_data(glacier_name,
                              logger=logger,
                              use_summary=config["use_summary"],
                              use_pca=config["use_pca"],
                              n=config["n"],
                              **path_dict)
     target_shape = 1
     if config["combine"]:
         test_size = int(len(y_all) / 3) % 7
         if first:
             (x_combine_train, x_combine_test, y_combine_train,
              y_combine_test) = train_test_split(
                  x_all, y_all, test_size=test_size)
             first = False
         else:
             (x_train, x_test, y_train,
              y_test) = train_test_split(x_all,
                                         y_all,
                                         test_size=test_size)
示例#26
0
def train_model(symbol='C',
                look_back=5,
                look_ahead=1,
                train_size=0.95,
                plot=True):
    np.random.seed(123)
    ''' Input parameters '''
    input_fields = [0, 1, 2, 3, 4]  # open, high, low, close, volume
    output_fields = [5]  # returns
    ''' Internal parameters'''
    saved_models_dir = params.global_params['models_dir']
    save_models = bool(params.global_params['save_models'])
    ''' Hyper parameters '''
    epochs = 100
    validation_split = 0.05  # part of the training set
    ''' Loading data '''
    df = dt.load_data(params.global_params['db_path'],
                      symbol,
                      index_col='date')

    df = df[['open', 'high', 'low', 'close', 'volume']]
    df = df.join(pd.Series(pd.Series(df['close'].diff(1), name='returns')))
    ''' Preparing data '''
    c_w = df['close'].rolling(center=False, window=look_back)
    c_mean = c_w.mean()
    c_std = c_w.std()

    df['close'] = (df['close'] - c_mean) / (2 * c_std)
    df['open'] = (df['open'] - c_mean) / (2 * c_std)
    df['high'] = (df['high'] - c_mean) / (2 * c_std)
    df['low'] = (df['low'] - c_mean) / (2 * c_std)

    df['returns'] = df['returns'].fillna(0)
    df['returns'] = np.where(df['returns'].values > 0, 0,
                             1)  # 0 upward # 1 downward
    df['returns'] = df['returns'].shift(-look_ahead)
    v_w = df['volume'].rolling(center=False, window=look_back)
    v_mean = v_w.mean()
    v_std = v_w.std()
    df['volume'] = (df['volume'] - v_mean) / (v_std)

    df = df[look_back:]
    ''' Inline data as input parameters '''
    data = df.values

    x_data = []
    y_data = []
    for index in range(data.shape[0] - look_back):
        x_data.append(
            np.reshape(data[index:index + look_back, input_fields],
                       (look_back * len(input_fields), 1)))
        y_data.append(
            np.reshape(data[index, output_fields], (len(output_fields), 1)))

    x_data = np.array(x_data)
    y_data = np.array(y_data)

    train_rows = int(round(x_data.shape[0] * train_size))

    x_close_train = x_data[:train_rows]
    y_train = y_data[:train_rows]

    x_close_test = x_data[train_rows:]
    y_test = y_data[train_rows:]

    y_train = y_train.astype(int)
    y_train = np.reshape(y_train, (y_train.shape[0]))
    y_train = dt.onehottify(y_train, dtype=float)

    y_test = y_test.astype(int)
    y_test = np.reshape(y_test, (y_test.shape[0]))
    y_test = dt.onehottify(y_test, dtype=float)

    x_close_train = np.reshape(
        x_close_train, (x_close_train.shape[0], x_close_train.shape[1]))
    x_close_test = np.reshape(x_close_test,
                              (x_close_test.shape[0], x_close_test.shape[1]))
    ''' Build model '''
    model_file = saved_models_dir + 'model.' + symbol + '.json'

    if (save_models and os.path.exists(model_file)):
        json_file = open(model_file, 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        model = model_from_json(loaded_model_json)
    else:
        model = Sequential()
        model.add(
            Dense(100,
                  activation='relu',
                  input_shape=(x_close_train.shape[1], )))
        model.add(Dropout(0.3))
        model.add(Dense(2, activation='softmax'))

        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        model_json = model.to_json()
        with open(model_file, "w") as json_file:
            json_file.write(model_json)
            json_file.close()
    ''' Train model '''
    model_weights = saved_models_dir + 'model.' + symbol + '.h5'

    history = None
    if (save_models and os.path.exists(model_weights)):
        model.load_weights(model_weights)
    else:
        history = model.fit(
            x_close_train,
            y_train,
            epochs=epochs,
            #callbacks=[utils.plot_learning.plot_learning],
            validation_split=validation_split
            #validation_data=(x_close_test, y_test)
        )

        if (save_models and not os.path.exists(model_weights)):
            model.save_weights(model_weights)
            print("Saved model to disk")
    ''' Predictions on test set (different from validation set) '''
    predictions = model.predict(x_close_test)
    tmp = predictions * y_test
    tmp = np.sum(tmp, axis=1)
    tmp = np.where(tmp > 0.5, 1, 0)
    accuracy = np.sum(tmp) / len(tmp)
    print(symbol, accuracy)
    ''' Print model output '''

    if (plot and not history is None):
        print(history.history.keys())
        plt.figure(1)
        plt.subplot(211)

        plt.plot(history.history['acc'])
        plt.plot(history.history['val_acc'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        #plt.show()
        # summarize history for loss
        plt.subplot(212)
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
示例#27
0
def join(path: str):
    """join all csv to one"""
    tables = [load_data(file) for file in os.listdir(path) if file.endswith(".csv") and file.startswith("cica-")]
    df = join_tables(tables)
    df.to_csv(f"result-cica-{datetime.now():%Y%m%d-%H%M}.csv", index=False, sep=";")
    # set random behavior
    rng = check_random_state(args.seed)

    # load model configuration
    model = select_model(args.model)

    # prepare output directory
    data_name = os.path.basename(os.path.normpath(args.dataset))
    out_dir = os.path.join('params', 'membership',
                           model.name + '_' + data_name + '_weak')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # load data: playlists, splits, features and artist info
    data = load_data(args.dataset, args.msd, model)
    playlists_coo, split_weak, split_strong, features, song2artist = data

    # playlists_coo are the playlists stored in coordinate format
    playlists_idx, songs_idx, position, idx2song = playlists_coo

    # split_strong defines a playlist-disjoint split
    # this is just to validate the model, use any disjoint split
    fold_strong = split_strong[0]
    train_idx_dsj, test_idx_dsj = np.hstack(fold_strong[:2]), fold_strong[2]

    # split_weak provides a query/continuation split
    query_idx, cont_idx = np.hstack(split_weak[:2]), split_weak[2]

    # define splits for this experiment
    # train model on intersection of disjoint training split and queries
示例#29
0
np.random.seed(123)

''' Input parameters '''
symbol = 'C'
look_back = 5;
look_ahead = 1
train_size=0.95
input_fields=[0, 1, 2, 3, 4, 6] # open, high, low, close, volume
output_fields=[5] # returns

''' Hyper parameters '''
epochs = 100
validation_split =0.05 # part of the training set

''' Loading data '''
df = dt.load_data(params.global_params['db_path'], symbol, index_col='date')

df = df[['open', 'high', 'low', 'close', 'volume']]
print('df[high].at[0]', df['high'].iat[0])
df = ta.myRSI(df, 5)
df = df.join(pd.Series(df['close'].diff(1), name='returns'))

''' Preparing data '''
c_w = df['close'].rolling(center=False, window=look_back)
c_mean = c_w.mean()
c_std = c_w.std()
stds = 2.

df = df.join(pd.Series((df['close'] - c_mean)/(stds*c_std), name='bb'))

df['open'] =  (df['open'] - df['close'].shift(1)) / c_std
示例#30
0
from utils.model import load_model
from utils.plot import make_adv_img, make_confusion_matrix
from utils.utils import get_targeted_success_rate, set_art

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='chestx')
parser.add_argument('--model', type=str, default='inceptionv3')
parser.add_argument('--norm', type=str, default='l2')
parser.add_argument('--eps', type=float, default=0.04)
parser.add_argument('--target', type=str, default='PNEUMONIA')
parser.add_argument('--gpu', type=str, default='0')
args = parser.parse_args()

set_gpu(args.gpu)

X_train, X_test, y_train, y_test, mean_l2_train, mean_linf_train = load_data(
    dataset=args.dataset, normalize=True, norm=True)

model = load_model(dataset=args.dataset,
                   nb_class=y_train.shape[1],
                   model_type=args.model,
                   mode='inference')

# # Generate adversarial examples

classifier, norm, eps = set_art(model, args.norm, args.eps, mean_l2_train,
                                mean_linf_train)

adv_crafter = TargetedUniversalPerturbation(classifier,
                                            attacker='fgsm',
                                            delta=0.000001,
                                            attacker_params={