Exemplo n.º 1
0
                  cache=args.caching_dir,
                  preloading=args.data_preload,
                  features_name=args.features_name,
                  labels_name=args.labels_name)
    # We initialize the Data object with the training data list
    # so that we can use it to count the number of training examples
    data.set_file_names(train_list)
    validate_every = int(data.count_data() / args.batch)

    # Some input arguments may be ignored depending on chosen algorithm
    if args.mode == 'easgd':
        algo = Algo(None,
                    loss=args.loss,
                    validate_every=validate_every,
                    mode='easgd',
                    sync_every=args.sync_every,
                    worker_optimizer=args.worker_optimizer,
                    worker_optimizer_params=args.worker_optimizer_params,
                    elastic_force=args.elastic_force / (comm.Get_size() - 1),
                    elastic_lr=args.elastic_lr,
                    elastic_momentum=args.elastic_momentum)
    elif args.mode == 'gem':
        algo = Algo('gem',
                    loss=args.loss,
                    validate_every=validate_every,
                    mode='gem',
                    sync_every=args.sync_every,
                    worker_optimizer=args.worker_optimizer,
                    worker_optimizer_params=args.worker_optimizer_params,
                    learning_rate=args.gem_lr,
                    momentum=args.gem_momentum,
                    kappa=args.gem_kappa)
Exemplo n.º 2
0
                  cache=args.caching_dir,
                  preloading=args.data_preload,
                  features_name=args.features_name,
                  labels_name=args.labels_name)
    # We initialize the Data object with the training data list
    # so that we can use it to count the number of training examples
    data.set_file_names(train_list)
    validate_every = data.count_data() / args.batch

    # Some input arguments may be ignored depending on chosen algorithm
    if args.easgd:
        algo = Algo(None,
                    loss=args.loss,
                    validate_every=validate_every,
                    mode='easgd',
                    sync_every=args.sync_every,
                    worker_optimizer=args.worker_optimizer,
                    elastic_force=args.elastic_force / (comm.Get_size() - 1),
                    elastic_lr=args.elastic_lr,
                    elastic_momentum=args.elastic_momentum)
    else:
        algo = Algo(args.optimizer,
                    loss=args.loss,
                    validate_every=validate_every,
                    sync_every=args.sync_every,
                    worker_optimizer=args.worker_optimizer)

    # Creating the MPIManager object causes all needed worker and master nodes to be created
    manager = MPIManager(comm=comm,
                         data=data,
                         algo=algo,
Exemplo n.º 3
0
                  cache=args.caching_dir,
                  preloading=args.data_preload,
                  features_name=args.features_name,
                  labels_name=args.labels_name)
    # We initialize the Data object with the training data list
    # so that we can use it to count the number of training examples
    data.set_file_names(train_list)
    validate_every = int(data.count_data() / args.batch)

    # Some input arguments may be ignored depending on chosen algorithm
    if args.mode == 'easgd':
        algo = Algo(None,
                    loss=args.loss,
                    validate_every=validate_every,
                    mode='easgd',
                    sync_every=args.sync_every,
                    worker_optimizer=args.worker_optimizer,
                    elastic_force=args.elastic_force / (comm.Get_size() - 1),
                    elastic_lr=args.elastic_lr,
                    elastic_momentum=args.elastic_momentum)
    elif args.mode == 'gem':
        algo = Algo('gem',
                    loss=args.loss,
                    validate_every=validate_every,
                    mode='gem',
                    sync_every=args.sync_every,
                    worker_optimizer=args.worker_optimizer,
                    learning_rate=args.gem_lr,
                    momentum=args.gem_momentum,
                    kappa=args.gem_kappa)
    else:
Exemplo n.º 4
0
    def _execute_MPI(self,
                    comm=None,
                    # masters=1,
                    # easgd=False,
                    archiveTraining=True,
                    archiveValidation=True,
                    verbose=1):
        from mpi4py import MPI
        from mpi_learn.mpi.manager import MPIManager, get_device
        from mpi_learn.train.algo import Algo
        from mpi_learn.train.data import H5Data
        from mpi_learn.train.model import ModelFromJson

        #return prep_func
        #print(self.custom_objects)
        #print(custom_objects)
        #print(Lorentz, Slice)
        #raise ValueError()
        load_weights = True
        # synchronous = False
        # sync_every = 1
        # MPIoptimizer = "rmsprop"
        # batch_size = 100
        
        if(comm == None):
            comm = MPI.COMM_WORLD.Dup()



        # if(not isinstance(self.train_procedure,list)): self.train_procedure = [self.train_procedure]
        # if(not isinstance(self.val_procedure,list)): self.val_procedure = [self.val_procedure]
        if(not(isinstance(self.train_procedure,list))):
            raise ValueError("Trial attribute train_procedure: expected list of DataProcedures or paths but got type %r" % type(self.train_procedure))
        if(not(isinstance(self.val_procedure,list))):
            raise ValueError("Trial attribute val_procedure: expected list of DataProcedures or paths but got type %r" % type(self.val_procedure))

        train = [DataProcedure.from_json(self.archive_dir,x) if isinstance(x,DataProcedure) else str(x) for x in self.train_procedure]
        val = [DataProcedure.from_json(self.archive_dir,x) if isinstance(x,DataProcedure) else str(x) for x in self.val_procedure]

        # if(not isinstance(train, list) or not False in [isinstance(x,DataProcedure) or isinstance(x,string_types) for x in train]):
        #     raise ValueError("Train procedure must be list of DataProcedures")
        # if(not isinstance(val, list) or not False in [isinstance(x, DataProcedure) or isinstance(x, string_types) for x in val]):
        #     raise ValueError("Validation procedure must be list of DataProcedures")
        batchAssertArchived(train)
        batchAssertArchived(val)
        def assertStr(x):
            if(isinstance(x,DataProcedure)):
                return dp.get_path() + "archive.h5"
            elif(os.path.isfile(x)):
                return x  
            else:
                raise IOError("Cannot find %r" % x)
                
        train_list = [assertStr(x) for x in train]
        val_list = [assertStr(x) for dp in val]
        # print("Train List:", train_list)
        # print("Val List:", val_list)

        # There is an issue when multiple processes import Keras simultaneously --
        # the file .keras/keras.json is sometimes not read correctly.  
        # as a workaround, just try several times to import keras.
        # Note: importing keras imports theano -- 
        # impossible to change GPU choice after this.
        for try_num in range(10):
            try:
                from keras.models import model_from_json
                import keras.callbacks as cbks
                break
            except ValueError:
                print "Unable to import keras. Trying again: %d" % try_num
                sleep(0.1)


        custom_objects = {}
        for name, module in self.custom_objects.items():
            try:
                #my_module = importlib.import_module('os.path')
                custom_objects[name] = getattr(importlib.import_module(module), name)
                #exec("from " + module +  " import " + name)
            except:
                raise ValueError("Custom Object %r does not exist in %r. \
                    For best results Custom Objects should be importable and not locally defined." % (str(name), str(module)))

        # We initialize the Data object with the training data list
        # so that we can use it to count the number of training examples

        data = H5Data(batch_size=self.batch_size, 
                features_name=self.features_name, labels_name=self.labels_name)
        data.set_file_names(train_list)
        num_train = data.count_data()
        


        # if comm.Get_rank() == 0:
        validate_every = num_train/self.batch_size
       
        

        if self.easgd:
            # raise NotImplementedError("Not implemented")
            algo = Algo(None, loss=self.loss, validate_every=validate_every,
                    mode='easgd', elastic_lr=1.0, sync_every=self.sync_every,
                    worker_optimizer='sgd',
                    elastic_force=0.9/(comm.Get_size()-1)) 
        else:
            algo = Algo(self.master_optimizer, loss=self.loss, validate_every=validate_every,
                    sync_every=self.sync_every, worker_optimizer=self.optimizer) 

        #model = self.compile(custom_objects=custom_objects)
        #model_arch = model.to_json()
        #print(self.get_path()+"trial.json")
        model_builder = ModelFromJson( comm,json_str=self.model,custom_objects=custom_objects )

        callbacks = self._generateCallbacks(verbose=verbose)

        # Creating the MPIManager object causes all needed worker and master nodes to be created
        manager = MPIManager(comm=comm, data=data, num_epochs=self.epochs if hasattr(self,'epochs') else self.nb_epoch,
                             algo=algo, model_builder=model_builder,
                             train_list=train_list, val_list=val_list, num_masters=self.masters,
                             synchronous=self.synchronous, callbacks=callbacks, custom_objects=custom_objects)


        # Process 0 defines the model and propagates it to the workers.
        if comm.Get_rank() == 0:
            record = self.read_record()
            if(not "num_train" in record):
                self.to_record({"num_train": num_train})
            if(not "num_val" in record):
                val_data = H5Data( val_list, batch_size=self.batch_size,
                features_name=self.features_name, labels_name=self.labels_name)
                self.to_record({"num_val": val_data.count_data()})

            print(custom_objects)
            
            
            print algo
            #weights = model.get_weights()

            #manager.process.set_model_info( model_arch, algo, weights )
            t_0 = time()
            histories = manager.process.train() 
            delta_t = time() - t_0
            manager.free_comms()
            print "Training finished in %.3f seconds" % delta_t
            print(histories)
Exemplo n.º 5
0
    ]

    # MPI process 0 coordinates the Bayesian optimization procedure
    if block_num == 0:
        model_fn = lambda x, y, z: mpi.test_cnn(x, y, np.exp(-z))
        opt_coordinator = coordinator.Coordinator(comm_world, num_blocks,
                                                  param_ranges, model_fn)
        opt_coordinator.run(num_iterations=30)
    else:
        data = H5Data(batch_size=args.batch,
                      features_name='Images',
                      labels_name='Labels')
        data.set_file_names(train_list)
        validate_every = data.count_data() / args.batch
        algo = Algo(args.optimizer,
                    loss=args.loss,
                    validate_every=validate_every,
                    sync_every=args.sync_every)
        os.environ['KERAS_BACKEND'] = backend
        import_keras()
        import keras.callbacks as cbks
        callbacks = []
        if args.early_stopping is not None:
            callbacks.append(
                cbks.EarlyStopping(patience=args.early_stopping, verbose=1))
        block = process_block.ProcessBlock(comm_world,
                                           comm_block,
                                           algo,
                                           data,
                                           device,
                                           args.epochs,
                                           train_list,