Exemplo n.º 1
0
    def __init__(self, cfg):
        self.cfg = cfg
        self.nEpochs = cfg['nEpochs']
        self.checkpoint_dir = cfg['checkpoint']
        self.epoch = 1

        self.timestamp = int(time.time())

        if cfg['gpu_mode']:
            self.num_workers = cfg['threads']
        else:
            self.num_workers = 0

        self.train_dataset = get_data(cfg, cfg['train_dataset'],
                                      cfg['data']['upsacle'])
        self.train_loader = DataLoader(self.train_dataset,
                                       cfg['data']['batch_size'],
                                       shuffle=True,
                                       num_workers=self.num_workers)
        self.val_dataset = get_data(cfg, cfg['valid_dataset'],
                                    cfg['data']['upsacle'])
        self.val_loader = DataLoader(self.val_dataset,
                                     cfg['data']['batch_size'],
                                     shuffle=False,
                                     num_workers=self.num_workers)

        self.records = {'Epoch': [], 'PSNR': [], 'SSIM': [], 'Loss': []}

        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)
Exemplo n.º 2
0
def train(**kwargs):

    #Set attributes
    for k, v in kwargs.items():
        setattr(opt, k, v)
    if opt.vis:
        visualizer = Visualizer()

    # Data
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True)

    # Model
    model = LyricsModel(len(word2ix), opt.embedding_dim, opt.latent_dim)
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path, map_location="cpu"))

    # Define optimizer and loss
    optimizer = Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    loss_meter = meter.AverageValueMeter()

    if opt.use_gpu:
        model.cuda()
        criterion.cuda()

    #================================================#
    #               Start Training                   #
    #================================================#

    for epoch in tqdm.tqdm(range(opt.num_epoch)):

        for (ii, data) in enumerate(dataloader):
            # Prepare data
            data = data.long().transpose(1, 0).contiguous()
            if opt.use_gpu: data = data.cuda()
            inputs, targets = Variable(data[:-1, :]), Variable(data[1:, :])
            outputs, hidden = model(inputs)

            # Initialize and backward
            optimizer.zero_grad()
            loss = criterion(outputs, targets.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())

            if (1 + ii) % opt.print_every == 0:
                print("Current Loss: %d" % loss.item())
                if opt.vis:
                    visualizer.plot('loss', loss_meter.value()[0])
        if (epoch + 1) % 20 == 0:
            t.save(model.state_dict(), 'checkpoints/%s.pth' % epoch)
Exemplo n.º 3
0
def egg(key):
    data = get_data()
    if (key in data):
        datum = data[key]
        egg_no = datum['egg_number']
        hints = shuffle_hints(datum['hints'])

        return render_template('egg.html', egg_no=egg_no, hints=hints)

    else:
        return render_template('404.html', title='404'), 404
Exemplo n.º 4
0
def generate(**kwargs):

    #Set attributes
    for k, v in kwargs.items():
        setattr(opt, k, v)

    # Data
    data, word2ix, ix2word = get_data(opt)

    # Load model
    model = LyricsModel(len(word2ix), opt.embedding_dim, opt.latent_dim)
    if opt.model_path:
        model.load_state_dict(
            t.load(opt.model_path, map_location=lambda s, l: s))

    #================================================#
    #               Start Decoding                   #
    #================================================#

    results = list(opt.start_words)
    input_ = Variable(t.LongTensor([word2ix["<START>"]])).view(1, 1)
    hidden = None

    if opt.use_gpu:
        model.cuda()
        input_ = input_.cuda()

    if opt.prefix_words:
        for w in opt.prefix_words:
            output, hidden = model(input_, hidden)
            input_ = Variable(t.LongTensor([word2ix[w]])).view(1, 1)

    for i in range(opt.max_gen_len):

        output, hidden = model(input_, hidden)

        if i < len(opt.start_words):
            word = opt.start_words[i]
            input_ = Variable(t.LongTensor([word2ix[word]])).view(1, 1)
        else:
            top_index = output.data[0].topk(1)[1]
            word = ix2word[top_index.data.item()]
            results.append(word)
            input_ = Variable(t.LongTensor([top_index])).view(1, 1)
        if word == '<EOS>':
            break

    if "<EOS>" in results:
        results.remove("<EOS>")
    print(''.join(results).rstrip())
Exemplo n.º 5
0
 def post(self):
     list_of_names = ['exons', 'cpg', 'fStomach-DS17659', 'fSkin_fibro_bicep_R-DS19745', 'fKidney_renal_cortex_L-DS17550', 'fLung_R-DS15632']
     list_of_bedtools = get_data()
     data = request.json
     firstGene = data['firstGene']
     secondGene = data['secondGene']
     indexOfFirstGene = 0
     indexOfSecondGene = 0
     for i in range(len(list_of_names)):
         if firstGene == list_of_names[i]:
             indexOfFirstGene = i
         elif secondGene == list_of_names[i]:
             indexOfSecondGene = i
     random_shuffle_second_gene = list_of_bedtools[indexOfSecondGene].shuffle(genome='hg19', chrom=True)
     jaccard = list_of_bedtools[indexOfFirstGene].jaccard(random_shuffle_second_gene.sort())
     return jaccard
Exemplo n.º 6
0
    def __init__(self, cfg, name):
        super(Solver, self).__init__(cfg)
        self.init_epoch = self.cfg['schedule']

        net_name = self.cfg['algorithm'].lower()
        lib = importlib.import_module('model.' + net_name)
        net = lib.Net

        self.model = net(num_channels=self.cfg['data']['n_colors'],
                         base_filter=64,
                         scale_factor=self.cfg['data']['upsacle'],
                         args=self.cfg)

        self.train_dataset = get_data(
            self.cfg,
            str(self.cfg['train_dataset']) + '/' + str(name) + '.png',
            str(self.cfg['train_dataset']) + '/' + str(name) + '.png',
            self.cfg['data']['upsacle'])
        self.train_loader = DataLoader(self.train_dataset,
                                       self.cfg['data']['batch_size'],
                                       shuffle=False,
                                       num_workers=self.num_workers)

        for iteration, batch in enumerate(self.train_loader, 1):
            lr, hr, bic, hr_ref, bic_ref, file_name = Variable(
                batch[0]), Variable(batch[1]), Variable(batch[2]), Variable(
                    batch[4]), Variable(batch[5]), (batch[6])
        self.hr_ref = hr_ref
        self.lr = lr
        self.file_name = file_name

        self.noise_init = get_noise(
            32, 'noise',
            (self.cfg['data']['patch_size'] * self.cfg['data']['upsacle'],
             self.cfg['data']['patch_size'] * self.cfg['data']['upsacle']))
        self.noise = self.noise_init.detach().clone()

        self.optimizer = maek_optimizer(self.cfg['schedule']['optimizer'], cfg,
                                        self.model.parameters())
        self.loss = CycleLoss(scale=1 / 4, loss_type='MSE')

        self.log_name = self.cfg['algorithm'] + '_' + str(
            self.cfg['data']['upsacle']) + '_' + str(self.timestamp)
        # save log
        self.writer = SummaryWriter('log/' + str(self.log_name))
        save_net_config(self.log_name, self.model)
        save_yml(cfg, os.path.join('log/' + str(self.log_name), 'config.yml'))
Exemplo n.º 7
0
def main():
    """
    Main execution function to train machine learning models to predict pointsWon in Cy Young races.
    """
    create_directories([
        DIAGNOSTICS_DIRECTORY, MODELS_DIRECTORY, PLOTS_DIRECTORY,
        TEST_SET_DIRECTORY, SHAP_VALUES_DIRECTORY
    ],
                       parent=DIAGNOSTICS_DIRECTORY)
    df = get_data()
    make_diagnostic_plots(df)
    x_train, y_train, x_test, y_test = create_custom_train_test_split(
        df, TARGET, 0.2, INDIVIDUAL_ID)
    custom_cv = create_custom_cv(x_train, INDIVIDUAL_ID, CV_SPLITS)
    for key, value in MODEL_TRAINING_DICT.items():
        train_model(x_train, y_train, x_test, y_test, key, construct_pipeline,
                    value[0], value[1], custom_cv, value[2])
Exemplo n.º 8
0
    def post(self):
        list_of_names = [
            'exons', 'cpg', 'fStomach-DS17659', 'fSkin_fibro_bicep_R-DS19745',
            'fKidney_renal_cortex_L-DS17550', 'fLung_R-DS15632'
        ]
        list_of_bedtools = get_data()
        data = request.json
        firstGene = data['firstGene']
        secondGene = data['secondGene']
        indexOfFirstGene = 0
        indexOfSecondGene = 0
        for i in range(len(list_of_names)):
            if firstGene == list_of_names[i]:
                indexOfFirstGene = i
            elif secondGene == list_of_names[i]:
                indexOfSecondGene = i

        intersect = list_of_bedtools[indexOfFirstGene].jaccard(
            list_of_bedtools[indexOfSecondGene])
        return intersect
Exemplo n.º 9
0
def main(args):
    all_X, all_Y, column_names = data.get_data(get_feature_names=True)
    input_dim = len(all_X[0])

    # Load the models
    nn = models.get_nn_model(input_dim)
    rf = models.get_random_forest_model()

    # Create training set
    n_train = 1000
    X = all_X[:n_train]
    Y = all_Y[:n_train]
    # Fit the model
    nn.fit(X, Y, epochs=50, batch_size=10, verbose=2)

    # Get some samples to test our model on, our test set
    test_set_range = (10000, 20000)
    x_test = all_X[test_set_range[0]:test_set_range[1]]
    y_test = all_Y[test_set_range[0]:test_set_range[1]]

    # Calculate predictions
    nn_predictions = nn.predict(x_test)

    # Change to 0/1 predictions and get accuracy
    nn_predictions = map(lambda x: int(round(x)) if not math.isnan(x) else 0, nn_predictions)
    print "\nNeural Network Accuracy: %.4f%%" % (accuracy_score(y_test, nn_predictions))

    rf = rf.fit(X, Y.reshape(n_train, ))
    rf_predictions = rf.predict(x_test)

    print "Random Forest Accuracy: %.4f%%" % (accuracy_score(y_test, rf_predictions))

    # Importance as found by random forest model
    if args.feature_importance:
        print "\nRandom Forest Feature importance:"
        for i in zip(column_names, rf.feature_importances_):
            print "%s importance: %.2f" % i
Exemplo n.º 10
0
path_check = '{}/check/checkpoint.pt'.format(eval_args.model)

torch.manual_seed(eval_args.seed)

###############
## Load args ##
###############

with open(path_args, 'rb') as f:
    args = pickle.load(f)

##################
## Specify data ##
##################

_, _, data_shape = get_data(args)

###################
## Specify model ##
###################

model = get_model(args, data_shape=data_shape)
if args.parallel == 'dp':
    model = DataParallelDistribution(model)
checkpoint = torch.load(path_check)
model.load_state_dict(checkpoint['model'])
print('Loaded weights for model at {}/{} epochs'.format(
    checkpoint['current_epoch'], args.epochs))

############
## Sample ##
Exemplo n.º 11
0
def dataConstructor(data=get_data(), w=get_weather()):
    """Add the weather's, holidays and daylight hours varaibles to the bicing dataframe from get_data().
    Parameters
    ----------
    data : pandas.DataFrame data from function get_data()
    Returns
    -------
    data : with new columns
    """

    data = add_holidays(data)
    data = add_daylight_hrs(data)
    data = add_weather(data, w)

    smallDataFrames = False
    if smallDataFrames:
        # https://www.reddit.com/r/learnpython/comments/5err0o/memoryerror_merging_two_dataframes_with_pandas/
        # MemoryError merging two dataframes with pandas will happen in line: X = X.join(pd.get_dummies(data[k]))
        # Therefore, just skip indeces
        data["day_of_week"] = data.index.day_name()
        data["hour"] = data.index.hour
    else:
        data.reset_index(level=0, inplace=True)
        data["day_of_week"] = data.loc[:, "updateTime"].dt.day_name()
        data["hour"] = data.loc[:, "updateTime"].dt.hour
        try:
            data.drop(columns=["updateTime"], inplace=True)
        except Exception as err:
            print(f"\tError: {err}" + "\n" + 80 * "~")

    boolVars = ["status", "type"]
    dummyVars = ["day_of_week"]

    X = data.drop(columns=[
        *boolVars,
        *dummyVars,
        "latitude",
        "longitude",
        "nearbyStations",
        "streetName",
        "streetNumber",
    ])  # .copy()

    for k in boolVars:
        X[k] = LabelEncoder().fit_transform(data[k].values)
    for k in dummyVars:
        X = X.join(pd.get_dummies(data[k]))

    if 0:
        X.rename(
            columns={
                "Monday": "Day1",
                "Tuesday": "Day2",
                "Wednesday": "Day3",
                "Thursday": "Day4",
                "Friday": "Day5",
                "Saturday": "Day6",
                "Sunday": "Day7",
            },
            inplace=True,
        )

    return X
Exemplo n.º 12
0
## Setup ##
###########

parser = argparse.ArgumentParser()
add_exp_args(parser)
add_data_args(parser)
add_model_args(parser)
add_optim_args(parser)
args = parser.parse_args()
set_seeds(args.seed)

##################
## Specify data ##
##################

train_loader, eval_loader = get_data(args)
data_id = get_data_id(args)

###################
## Specify model ##
###################

model = get_model(args)
model_id = get_model_id(args)

#######################
## Specify optimizer ##
#######################

optimizer, scheduler_iter, scheduler_epoch = get_optim(args,
                                                       model.parameters())
Exemplo n.º 13
0
# Adjust args
args.name = time.strftime("%Y-%m-%d_%H-%M-%S")
args.epochs = more_args.new_epochs
args.lr = more_args.new_lr
args.resume = None

# Store more_args
args.start_model = more_args.model
args.new_epochs = more_args.new_epochs
args.new_lr = more_args.new_lr

##################
## Specify data ##
##################

train_loader, eval_loader, data_shape = get_data(args)
data_id = get_data_id(args)

###################
## Specify model ##
###################

model = get_model(args, data_shape=data_shape)
model_id = get_model_id(args)

#######################
## Specify optimizer ##
#######################

optimizer, _, _ = get_optim(args, model)
optim_id = 'more'
Exemplo n.º 14
0
    raise Exception("framework not support!!!")
from data.data import get_data_npz, get_data
from model_search import run_model_search_cnn, run_model_search_mlp

if args.dataset[-4:] == '.npz':
    # use .npz files
    dataset = args.dataset[:-4]
    data = get_data_npz(data_folder=args.data_folder,
                        dataset=args.dataset,
                        val_split=args.val_split,
                        problem_type=args.problem_type)
else:
    # use framework built-in datasets
    dataset = args.dataset
    data = get_data(data_folder=args.data_folder,
                    dataset=args.dataset,
                    val_split=args.val_split,
                    augment=args.augment)
dataset_code = 'M' if dataset == 'mnist' else 'F' if dataset == 'fmnist' else 'R' if dataset == 'rcv1_2000' else 'XXX'

if args.network == 'cnn':
    run_model_search_cnn(data=data,
                         dataset_code=dataset_code,
                         input_size=args.input_size,
                         output_size=args.output_size,
                         problem_type=args.problem_type,
                         verbose=args.verbose,
                         wc=args.wc,
                         tbar_epoch=args.tbar_epoch,
                         numepochs=args.numepochs,
                         val_patience=args.val_patience,
                         bo_prior_states=args.bo_prior_states,
Exemplo n.º 15
0
torch.manual_seed(eval_args.seed)

###############
## Load args ##
###############

with open(path_args, 'rb') as f:
    args = pickle.load(f)

args.batch_size = eval_args.samples

##################
## Specify data ##
##################

eval_loader, data_shape, cond_shape = get_data(args, eval_only=True)

###################
## Specify model ##
###################

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = get_model(args, data_shape=data_shape, cond_shape=cond_shape)
if args.parallel == 'dp':
    model = DataParallelDistribution(model)
checkpoint = torch.load(path_check, map_location=torch.device(device))
model.load_state_dict(checkpoint['model'])
model = model.to(device)
model = model.eval()
print('Loaded weights for model at {}/{} epochs'.format(
Exemplo n.º 16
0
import sys
sys.path.append('/home/polichism/test/')

from data import data as _data
if __name__ == "__main__":
	print _data.get_data('http://www.google.com')