Пример #1
0
    def testPinObject(self):
        X = pin_in_object_store("hello")

        @ray.remote
        def f():
            return get_pinned_object(X)

        self.assertEqual(ray.get(f.remote()), "hello")
Пример #2
0
    def testFetchPinned(self):
        X = pin_in_object_store("hello")

        def train(config, reporter):
            get_pinned_object(X)
            reporter(timesteps_total=100, done=True)

        register_trainable("f1", train)
        [trial] = run_experiments({"foo": {
            "run": "f1",
        }})
        self.assertEqual(trial.status, Trial.TERMINATED)
        self.assertEqual(trial.last_result[TIMESTEPS_TOTAL], 100)
Пример #3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser("Speech Verification")
    parser.add_argument("--ray", action='store_true', default=False)
    parser.add_argument("--data-parallel", action='store_true', default=False)
    parser.add_argument("--chunks", type=int, default=1)

    args = parser.parse_args()
    print(args)
    ray.init(num_gpus=1)
    ray.register_custom_serializer(torch.Tensor,
                                   serializer=serializer,
                                   deserializer=deserializer)

    # Load train set
    nspeakers, train_set = load_train_set(args)
    train_set_id = pin_in_object_store(train_set)

    print("Loaded train. pinned={}".format(True))

    # Load dev set
    dev_set = load_dev_set(args)
    dev_set_id = pin_in_object_store(dev_set)
    print("Loaded dev. pinned={}".format(True))

    tune.register_trainable('train_sc', train.Trainer)
    exp = Experiment(
        name="speaker classification",
        run='train_sc',
        config={
            "stop": {
                'training_iteration': 500
Пример #4
0
def run_ray_logistic(latents_path, tags, kf, idx, log_name):

    ray.init(num_cpus=5, num_gpus=1)
    data_train_list = []
    data_val_list = []
    for train_idx, val_idx in kf.split(idx):
        train_idx = idx[train_idx]  #Indexes from the full tensor.
        val_idx = idx[val_idx]  #Indexes from the full tensor.

        latents_train, latents_val = PCA_macau_samples(dir_path=latents_path,
                                                       idx_train=train_idx,
                                                       idx_val=val_idx)

        data_train_list += [latent_dataset(latents_train, tags[train_idx])]
        data_val_list += [latent_dataset(latents_val, tags[val_idx])]

    data_train = pin_in_object_store(data_train_list)
    data_val = pin_in_object_store(data_val_list)

    class train_class(Trainable):
        def _setup(self):
            self.device = torch.device("cuda:0")
            mod_opt = {'type': "plain_fact", 'cov': False, 'latents': 20}
            self.nfolds = self.config["nfolds"]
            #data_train=TensorFactDataset(csv_file_serie="complete_tensor_train1.csv",cov_path="complete_covariates")
            self.mod = []
            self.dataloader = []
            self.data_val = get_pinned_object(data_val)
            for fold in range(self.nfolds):
                mod_fold = MLP_class_mod(
                    get_pinned_object(data_train)[fold].get_dim())
                mod_fold.to(self.device)
                self.mod += [mod_fold]
                #self.mod=MLP_class_mod(get_pinned_object(data_train).get_dim())

                self.dataloader += [
                    DataLoader(get_pinned_object(data_train)[fold],
                               batch_size=5000,
                               shuffle=True)
                ]
                #self.dataloader_val += DataLoader(get_pinned_object(data_val),batch_size=1000,shuffle=False)
            #self.dataloader=DataLoader(data_train,batch_size=65000,shuffle=True,num_workers=2)
            self.timestep = 0
            print("SETUUUUP")

        def _train(self):
            self.timestep += 1

            print("Timestep")
            print(self.timestep)

            #Select learning rate depending on the epoch.
            if self.timestep < 40:
                l_r = 0.005
            elif self.timestep < 60:
                l_r = 0.0015
            else:
                l_r = 0.0005

            auc_mean_folds = 0
            for fold in range(self.nfolds):
                optimizer = torch.optim.Adam(self.mod[fold].parameters(),
                                             lr=l_r,
                                             weight_decay=self.config["L2"])

                criterion = nn.BCEWithLogitsLoss()
                total_loss = 0
                for idx, sampled_batch in enumerate(self.dataloader[fold]):
                    optimizer.zero_grad()
                    target = sampled_batch[1].to(self.device)
                    preds = self.mod[fold].fwd(sampled_batch[0].to(
                        self.device))
                    loss = criterion(preds, target)
                    loss.backward()
                    optimizer.step()

                with torch.no_grad():
                    loss_val = 0
                    target = self.data_val[fold].tags.to(self.device)
                    preds = self.mod[fold].fwd(self.data_val[fold].latents.to(
                        self.device))
                    loss_val += roc_auc_score(target, preds)
                    auc_mean = loss_val
                #rmse_val_loss_computed=(np.sqrt(loss_val.detach().cpu().numpy()/(i_val+1)))
                auc_mean_folds += auc_mean

            #return TrainingResult(mean_accuracy=(auc_mean_folds/self.nfolds),timesteps_this_iter=1)
            return {
                "mean_accuracy": (auc_mean_folds / self.nfolds),
                "time_steps_this_iter": 1
            }

        def _save(self, checkpoint_dir):
            print("Saving")
            path = os.path.join(checkpoint_dir, "checkpoint")
            state_dict_list = []
            for fold in range(self.nfolds):
                state_dict_list += [self.mod[fold].state_dict()]
            torch.save(state_dict_list, path)
            print("SAVIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIING")
            #raise Exception()
            #torch.cuda.empty_cache()
            np.save(path + "_timestep.npy", self.timestep)
            return path

        def _restore(self, checkpoint_path):
            print("LOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADING")
            state_dict_list = torch.load(checkpoint_path)
            for fold in range(self.nfolds):
                self.mod[fold].load_state_dict(state_dict_list[fold])
            self.timestep = np.load(checkpoint_path + "_timestep.npy").item()

    tune.register_trainable("my_class", train_class)

    hyperband = HyperBandScheduler(time_attr="timesteps_total",
                                   reward_attr="mean_accuracy",
                                   max_t=100)

    exp = {
        'run': "my_class",
        'num_samples': 50,
        'trial_resources': {
            "gpu": 1
        },
        'stop': {
            "training_iteration": 100
        },
        'config': {
            "L2": lambda spec: 10**(8 * random.random() - 4),
            "nfolds": kf.get_n_splits()
        }
    }

    tune.run_experiments({log_name: exp}, scheduler=hyperband)
Пример #5
0
            h5.attrs['alpha'] = alpha
            h5['counts'] = np.zeros(shape=(resolution, resolution),
                                    dtype=np.uint32)

    h5 = h5py.File(f_save, 'r+')

    if 'zi' not in h5:
        zi = grid_targets(alpha, iterations)
        h5['zi'] = zi
    else:
        zi = h5['zi'][...]

    # Drop the objects into the queue
    object_ids = []

    zi_obj = pin_in_object_store(zi)

    for k in range(parallel_iterations):
        args = (N, alpha, iterations, resolution, extent, zi_obj)
        obj = compute_set.remote(*args, seed=k)
        object_ids.append(obj)

    # Accumulate the results
    counts = np.zeros((resolution, resolution), dtype=np.uint64)

    with tqdm(total=len(object_ids)) as progress:
        while len(object_ids):
            obj, object_ids = ray.wait(object_ids, num_returns=1)
            counts += ray.get(obj[0]).copy()
            progress.update()
Пример #6
0
            'optimizer': self.optimizer.state_dict()
        }
        torch.save(cpd, checkpoint_dir + "/save")

    def _restore(self, path):
        cpd = torch.load(path)
        self.iteration = cpd['iteration']
        self.sc.load_state_dict(cpd['state_dict'])
        self.optimizer.load_state_dict(cpd['optimizer'])


if __name__ == "__main__":
    ray.init()
    dset = TensorDataset(
        torch.randn(100, 64, 1024), torch.randn(100, 1024),
        torch.randint(100, size=(100, )).type(torch.LongTensor))

    dset_id = pin_in_object_store(dset)
    tune.register_trainable('train_sc', Trainer)
    exp = Experiment(name="speaker classification",
                     run='train_sc',
                     stop={"timesteps_total": 1},
                     config={
                         "lr": 1e-3,
                         "dset_id": dset_id,
                         "nspeakers": 100,
                         "batch_size": 1,
                     })

    tune.run_experiments(exp)
Пример #7
0
        return path
    def _restore(self,checkpoint_path):
        print("LOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADING")
        self.mod.load_state_dict(torch.load(checkpoint_path))
        self.timestep=np.load(checkpoint_path+"_timestep.npy").item()

if __name__=="__main__":

    opt=parser.parse_args()
    if opt.unique:
        train(torch.device("cuda:0"),opt.maxepochs,opt.L2)
    else:
        ray.init(num_cpus=10,num_gpus=2)


        data_train=pin_in_object_store(LSTMDataset_ByPat(file_path="~/Data/MIMIC/"))
        data_val=pin_in_object_store(LSTMDataset_ByPat(csv_file_serie="LSTM_tensor_val.csv",file_path="~/Data/MIMIC/",cov_path="LSTM_covariates_val",tag_path="LSTM_death_tags_val.csv"))
        means_df=pd.Series.from_csv("~/Data/MIMIC/mean_features.csv")
        means_vec=pin_in_object_store(torch.tensor(means_df.as_matrix(),dtype=torch.float))

        tune.register_trainable("my_class", train_class)

        hyperband=AsyncHyperBandScheduler(time_attr="training_iteration",reward_attr="mean_accuracy",max_t=350,grace_period=15)

        exp={
                'run':"my_class",
                'repeat':30,
                'stop':{"training_iteration":350},
                'trial_resources':{
                                "gpu":1,
                                "cpu":1
Пример #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--train-pos-path",
        default='datasets/constrained_classification/k16/pos.train.article.txt',
    )
    parser.add_argument(
        "--train-neg-path",
        default='datasets/constrained_classification/k16/neg.train.article.txt',
    )
    parser.add_argument(
        "--eval-pos-path",
        default='datasets/constrained_classification/k16/pos.valid.article.txt',
    )
    parser.add_argument(
        "--eval-neg-path",
        default='datasets/constrained_classification/k16/neg.valid.article.txt',
    )

    args = parser.parse_args()

    ray.init()

    train_df = load_df(args.train_pos_path, args.train_neg_path, 0.01)
    eval_df = load_df(args.eval_pos_path, args.eval_neg_path, 0.01)

    class_weights = class_weight.compute_class_weight('balanced', [0, 1],
                                                      train_df.label)

    train_df_id = pin_in_object_store(train_df)
    eval_df_id = pin_in_object_store(eval_df)

    model_args = {
        'evaluate_during_training': True,
        'log_tune': True,
        'train_batch_size': 32,
        'gradient_accumulation_steps': 1,
        'eval_batch_size': 32,
        'num_train_epochs': 1,
        'eval_steps': 10000000,
        'save_steps': 10000000,
        'cache_dir': os.path.join(os.getcwd(), 'cache_dir'),
        'overwrite_output_dir': True
    }

    config = {
        'warmup_ratio': 0.04,
        'train_df_id': train_df_id,
        'eval_df_id': eval_df_id,
        'model_args': model_args,
        'class_weights': class_weights
    }

    space = {
        'adam_epsilon': hp.loguniform('adam_epsilon', np.log(1e-8),
                                      np.log(1e-7)),
        'weight_decay': hp.choice('weight_decay', [0, 0.01]),
        'learning_rate': hp.loguniform('learning_rate', np.log(1e-6),
                                       np.log(1e-4)),
        'max_seq_length': hp.quniform('max_seq_length', 96, 160, 1),
    }

    resources_per_trial = {
        "cpu": 8,
        "gpu": 1,
    }

    current_best_params = [{
        "adam_epsilon": 1e-8,
        'weight_decay': 0,
        'learning_rate': 1e-5,
        'max_seq_length': 128
    }]

    algo = HyperOptSearch(space,
                          metric="mcc",
                          max_concurrent=5,
                          mode="max",
                          points_to_evaluate=current_best_params)

    analysis = tune.run(Classifier,
                        config=config,
                        search_alg=algo,
                        resources_per_trial=resources_per_trial,
                        scheduler=tune.schedulers.MedianStoppingRule(
                            time_attr='training_iteration',
                            metric='mcc',
                            mode='max',
                            grace_period=3))

    print("Best config: ", analysis.get_best_config(metric='mcc'))
Пример #9
0
latents_train = latents[:n_train, :]
latents_val = latents[n_train:n_train + n_val, :]
print(latents_train.shape)
print(latents_val.shape)

tags_train = pd.read_csv("~/Data/MIMIC/Clean_data/LSTM_death_tags_train.csv"
                         ).sort_values("UNIQUE_ID")
tag_mat_train = tags_train[["DEATHTAG", "UNIQUE_ID"]].as_matrix()[:, 0]

tags_val = pd.read_csv(
    "~/Data/MIMIC/Clean_data/LSTM_death_tags_val.csv").sort_values("UNIQUE_ID")
tag_mat_val = tags_val[["DEATHTAG", "UNIQUE_ID"]].as_matrix()[:, 0]
print(tag_mat_train.shape)
print(tag_mat_val.shape)

data_train = pin_in_object_store(latent_dataset(latents_train, tag_mat_train))
data_val = pin_in_object_store(latent_dataset(latents_val, tag_mat_val))

tune.register_trainable("my_class", train_class)

hyperband = AsyncHyperBandScheduler(time_attr="training_iteration",
                                    reward_attr="mean_accuracy",
                                    max_t=100)

exp = {
    'run': "my_class",
    'num_samples': 30,
    'stop': {
        "training_iteration": 100
    },
    'config': {
Пример #10
0
        #torch.cuda.empty_cache()
        np.save(path + "_timestep.npy", self.timestep)
        return path

    def _restore(self, checkpoint_path):
        print("LOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADING")
        self.mod.load_state_dict(torch.load(checkpoint_path))
        self.timestep = np.load(checkpoint_path + "_timestep.npy").item()


#shutil.rmtree("/home/edward/ray_results/my_Experiment/")

ray.init(num_gpus=3, num_cpus=10)

data_train = pin_in_object_store(
    TensorFactDataset(csv_file_serie="complete_tensor_train1.csv",
                      cov_path="complete_covariates"))
data_val = pin_in_object_store(
    TensorFactDataset(csv_file_serie="complete_tensor_val1.csv",
                      cov_path="complete_covariates"))

tune.register_trainable("my_class", train_class)

hyperband = HyperBandScheduler(time_attr="timesteps_total",
                               reward_attr="neg_mean_loss",
                               max_t=100)

exp = {
    'run': "my_class",
    'trial_resources': {
        "gpu": 1
import ray
import ray.tune as tune
from ray.tune.util import pin_in_object_store, get_pinned_object
import tensorflow as tf
from tf_train_simple.mnist_data_grabber import DataGrab
from tf_train_simple.mnist_model_builder import build_model

ray.init()
data = pin_in_object_store(DataGrab('/tmp/ray/tf/mnist/input_data'))


def train_func(config, reporter):  # add a reporter arg
    my_lr = config["lr"]
    cur_data = get_pinned_object(data)
    with tf.Session() as sess:
        x, y_, keep_prob, train_step, accuracy = build_model(my_lr)
        print('number of global vars:', len(tf.global_variables()))

        sess.run(tf.global_variables_initializer())
        for i in range(2000):
            batch = cur_data.get_next_train(50)
            if i % 100 == 0:
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 1.0
                })
                print('step %d, learning rate %f training accuracy %g' %
                      (i, my_lr, train_accuracy))
                reporter(timesteps_total=i, mean_accuracy=train_accuracy)
            train_step.run(feed_dict={
Пример #12
0
        return TrainingResult(mean_accuracy=auc/n_splits,timesteps_this_iter=1)

    def _save(self,checkpoint_dir):
        return path
    def _restore(self,checkpoint_path):
        return checkpoint_path

file_path=sys.argv[1:][0] # This file should contain a numpy array with the latents and the label as first columnself.

ray.init(num_cpus=3)

latents=np.load(file_path)
tags=pd.read_csv("~/Data/MIMIC/complete_death_tags.csv").sort_values("UNIQUE_ID")
tag_mat=tags[["DEATHTAG","UNIQUE_ID"]].as_matrix()[:,0]
x=pin_in_object_store(latents.T)
y=pin_in_object_store(tag_mat)

tune.register_trainable("my_class", train_class)

hyperband=HyperBandScheduler(time_attr="timesteps_total",reward_attr="mean_accuracy",max_t=100)

exp={
        'run':"my_class",
        'repeat':50,
        'stop':{"training_iteration":1},
        'config':{
        "C":lambda spec: 10**(8*random.random()-4),
        "gamma":lambda spec: 10**(8*random.random()-4),
    }
 }
Пример #13
0
    def _save(self, checkpoint_dir):
        path = os.path.join(checkpoint_dir, "checkpoint")
        torch.save(self.mod.state_dict(), path)
        np.save(path + "_timestep.npy", self.timestep)
        return path

    def _restore(self, checkpoint_path):
        self.mod.load_state_dict(torch.load(checkpoint_path))
        self.timestep = np.load(checkpoint_path + "_timestep.npy").item()


if __name__ == "__main__":
    #train()
    ray.init(num_cpus=10, num_gpus=2)
    data_train = pin_in_object_store(
        GRU_teach_dataset(file_path="~/Data/MIMIC/Clean_data/"))
    data_val = pin_in_object_store(
        GRU_teach_dataset(file_path="~/Data/MIMIC/Clean_data/",
                          csv_file_serie="LSTM_tensor_val.csv",
                          cov_path="LSTM_covariates_val.csv",
                          tag_path="LSTM_death_tags_val.csv"))
    data_test = pin_in_object_store(
        GRU_teach_dataset(file_path="~/Data/MIMIC/Clean_data/",
                          csv_file_serie="LSTM_tensor_test.csv",
                          cov_path="LSTM_covariates_test.csv",
                          tag_path="LSTM_death_tags_test.csv"))

    tune.register_trainable("my_class", train_class)

    hyperband = AsyncHyperBandScheduler(time_attr="training_iteration",
                                        reward_attr="mean_accuracy",
Пример #14
0
def main(args):

    ray.init(num_cpus=args.rayNumCpu, num_gpus=args.rayNumGpu)

    t_loader, v_loader = get_loaders(train_batch_size=16,
                                     num_workers=1,
                                     data_folder=args.dataFolder,
                                     cuda_available=torch.cuda.is_available())
    pinned_obj_dict['data_loader_train'] = pin_in_object_store(t_loader)
    pinned_obj_dict['data_loader_valid'] = pin_in_object_store(v_loader)
    pinned_obj_dict['args'] = pin_in_object_store(args)

    trainable_name = 'hyp_search_train'
    register_trainable(trainable_name, TrainerClass)

    reward_attr = "acc"

    #############################
    # Define hyperband scheduler
    #############################
    hpb = AsyncHyperBandScheduler(time_attr="training_iteration",
                                  reward_attr=reward_attr,
                                  grace_period=40,
                                  max_t=300)

    ##############################
    # Define hyperopt search algo
    ##############################
    space = {
        'lr': hp.uniform('lr', 0.001, 0.1),
        'optimizer':
        hp.choice("optimizer",
                  ['SGD', 'Adam'
                   ]),  #, 'Adadelta']), # Adadelta gets the worst results
        'batch_accumulation': hp.choice("batch_accumulation", [4, 8, 16])
    }
    hos = HyperOptSearch(space, max_concurrent=4, reward_attr=reward_attr)

    #####################
    # Define experiments
    #####################
    exp_name = "resnet152_hyp_search_hyperband_hyperopt_{}".format(
        time.strftime("%Y-%m-%d_%H.%M.%S"))
    exp = Experiment(
        name=exp_name,
        run=trainable_name,
        num_samples=args.numSamples,  # the number of experiments
        resources_per_trial={
            "cpu": args.trialNumCpu,
            "gpu": args.trialNumGpu
        },
        checkpoint_freq=args.checkpointFreq,
        checkpoint_at_end=True,
        stop={
            reward_attr: 0.95,
            "training_iteration": args.
            trainingIteration,  # how many times a specific config will be trained
        })

    ##################
    # Run tensorboard
    ##################
    if args.runTensorBoard:
        thread = threading.Thread(target=launch_tensorboard, args=[exp_name])
        thread.start()
        launch_tensorboard(exp_name)

    ##################
    # Run experiments
    ##################
    run_experiments(exp, search_alg=hos, scheduler=hpb, verbose=False)
Пример #15
0
ray.init(num_cpus=10)

latents_pat = np.load(file_path_pat)
latents_feat = np.load(file_path_feat)
latents_time = np.load(file_path_time)

tags = pd.read_csv("~/Data/MIMIC/LSTM_death_tags_train.csv").sort_values(
    "UNIQUE_ID")
tag_mat = tags[["DEATHTAG", "UNIQUE_ID"]].as_matrix()[:, 0]

train_idx, test_idx = train_test_split(np.arange(tag_mat.shape[0]),
                                       test_size=0.2,
                                       random_state=42)

data_train = pin_in_object_store(
    reconstructed_ts(latents_pat[train_idx], latents_feat[train_idx],
                     latents_time[train_idx], tag_mat[train_idx]),
    "~/Data/MIMIC/LSTM_tensor_train.csv", train_idx)
data_train = pin_in_object_store(
    reconstructed_ts(latents_pat[test_idx], latents_feat[test_idx],
                     latents_time[test_idx], tag_mat[test_idx]),
    "~/Data/MIMIC/LSTM_tensor_train.csv", test_idx)

tune.register_trainable("my_class", train_class)

hyperband = HyperBandScheduler(time_attr="timesteps_total",
                               reward_attr="mean_accuracy",
                               max_t=100)

exp = {
    'run': "my_class",
    'repeat': 50,