Пример #1
0
def main(archive_dir, hashcode, test_hashcode, num_test, useMpi=False):
    if (isinstance(useMpi, str)): useMpi = True if useMpi == "True" else False
    print("STARTING: %s" % hashcode)
    sys.stdout.flush()
    if (useMpi):
        p = "../../mpi_learn"
        if not p in sys.path:
            sys.path.append(p)
        from CMS_Deep_Learning.storage.MPIArchiving import MPI_KerasTrial
        trial = MPI_KerasTrial.find(archive_dir, hashcode)
    else:
        trial = KerasTrial.find(archive_dir, hashcode)
    print("EXECUTING: %s" % hashcode)
    sys.stdout.flush()
    trial.execute()  #custom_objects={"Lorentz":Lorentz,"Slice": Slice})
    rsyncStorable(
        trial.hash(), archive_dir,
        "[email protected]:/bigdata/shared/Delphes/keras_archive_4_1"
    )
    #addCommitPushDir(trial.get_path())

    return
    raise NotImplementedError(
        "Will not run test, evaluate_generator acts weird on CSCS")
    print("TESTING: %s, num_samples: %r" % (hashcode, num_test))
    sys.stdout.flush()
    test = DataProcedure.find(archive_dir, test_hashcode)
    metrics = trial.test(test_proc=test,
                         test_samples=num_test,
                         custom_objects={
                             "Lorentz": Lorentz,
                             "Slice": Slice
                         })
    print("DONE: %r" % metrics)
Пример #2
0
def batchExecuteAndTestTrials(tups, time_str="24:00:00", repo="/scratch/snx3000/dweiteka/CMS_Deep_Learning/", trial_out_dir='/scratch/snx3000/dweiteka/trial_out/',use_mpi=False, verbose=1):
    '''Takes in a list of tuples 'tups' of the form (trial (a KerasTrial), test (a DataProcedure), num_test (an Integer), deps (a list)), and executes/tests 
        each trial, either in in order or in separate batches in the case of CSCS.
    '''
    isdaint = "daint" in socket.gethostname()
    scripts_dir = repo + "scripts/" 
    for trial, test, num_test, deps in tups:
        archive_dir = trial.archive_dir
        hashcode = trial.hash()

        test_hashcode = None
        if(test != None):
            
            test.write()
            test_hashcode = test.hash()
        if(isdaint):
            if(not os.path.exists(trial_out_dir)):
                os.makedirs(trial_out_dir)
            dep_clause = "" if len(deps)==0 else "--dependency=afterok:" + ":".join(deps)
            ofile = trial_out_dir + hashcode[:5] + ".%j"
            sbatch = 'sbatch -C gpu -t %s -o %s -e %s %s ' % (time_str,ofile,ofile,dep_clause)
            sbatch += '%srunTrial.sh %s %s %s %s %s %s\n' % (scripts_dir,repo,archive_dir,hashcode, test_hashcode, num_test, use_mpi)
            if(verbose >=1): print(sbatch)
            out = os.popen(sbatch).read()
            if(verbose >=1): print("THIS IS THE OUTPUT:",out)
        else:
            if(use_mpi):
                trial = KerasTrial.find(archive_dir, hashcode) 
            else:
                from CMS_Deep_Learning.storage.MPIArchiving import MPI_KerasTrial
                trial = MPI_KerasTrial.find(archive_dir, hashcode)
            if(verbose >=1): print("EXECUTE %r" % trial.hash())
            trial.execute()#custom_objects={"Lorentz":Lorentz,"Slice": Slice})

            if(test_hashcode != None):
                if(verbose >=1): print("TEST %r" % trial.hash())
                test = DataProcedure.find(archive_dir, test_hashcode)
                trial.test(test_proc=test,
                             test_samples=num_test,
                             custom_objects={"Lorentz":Lorentz,"Slice": Slice})
args = parser.parse_args()

archive_dir = args.archive_dir
hashcode = args.hashcode
masters = args.masters
max_gpus = args.max_gpus
# print("Mooop")
# if(len(sys.argv) != 3):
#     raise ValueError("MPIKerasTrail_execute.py -- Incorrect number of arguments.")

# if(len(sys.argv)) >
# numProcesses = sys.argv[3]

print(archive_dir, hashcode, masters, max_gpus)

comm = MPI.COMM_WORLD.Dup()
# We have to assign GPUs to processes before importing Theano.
device = get_device(comm, masters, gpu_limit=max_gpus, gpu_for_master=True)
print("Process", comm.Get_rank(), "using device", device)
os.environ['THEANO_FLAGS'] = "device=%s,floatX=float32" % (device)
from CMS_Deep_Learning.storage.MPIArchiving import MPI_KerasTrial

trial = MPI_KerasTrial.find_by_hashcode(archive_dir, hashcode)
if (trial == None):
    raise ValueError("hashcode does not exist")
if (not isinstance(trial, MPI_KerasTrial)):
    raise TypeError("Trial is not MPI_KerasTrial, got type %r" % type(trial))
trial._execute_MPI(comm=comm)
# print(sys.argv[0])
# print(sys.argv[1])
Пример #4
0
def build_trial(name,
                model,
                train,
                val,
                archive_dir=None,
                nb_train=None,
                nb_val=None,
                workers=1,
                loss='categorical_crossentropy',
                optimizer='rmsprop',
                metrics=['accuracy'],
                nb_epoch=10,
                callbacks=None,
                max_q_size=100,
                keys_to_record=[],
                **kargs):
    if isinstance(model, types.FunctionType):
        model = model(**kargs)
    if (workers == 1):
        trial = KerasTrial(archive_dir, name=name, model=model, seed=0)
        val, nb_val = assert_dataset(val,
                                     nb_data=nb_val,
                                     as_generator=True,
                                     archive_dir=archive_dir,
                                     **kargs)
        train, nb_train = assert_dataset(train,
                                         nb_train,
                                         as_generator=True,
                                         archive_dir=archive_dir,
                                         **kargs)
    else:
        print("USING MPI")
        p = "../../mpi_learn"
        if not p in sys.path:
            sys.path.append(p)
        from CMS_Deep_Learning.storage.MPIArchiving import MPI_KerasTrial
        trial = MPI_KerasTrial(archive_dir,
                               name=name,
                               model=model,
                               workers=workers,
                               seed=0,
                               features_name="Particles",
                               labels_name="Labels")
        val, nb_val = assert_dataset(val,
                                     nb_data=nb_val,
                                     archive_dir=archive_dir,
                                     **kargs)
        train, nb_train = assert_dataset(train,
                                         nb_train,
                                         archive_dir=archive_dir,
                                         **kargs)

    trial.set_train(
        train_procedure=train,  # train_dps,
        samples_per_epoch=nb_train)
    trial.set_validation(
        val_procedure=val,  # val_dps,
        nb_val_samples=nb_val)

    trial.set_compilation(loss=loss, optimizer=optimizer, metrics=metrics)

    trial.set_fit_generator(nb_epoch=nb_epoch,
                            callbacks=callbacks,
                            max_q_size=max_q_size)
    trial.write()

    trial.to_record({k: kargs[k] for k in keys_to_record})
    return trial