def main(): args = getArgs() if args.seed is None: seed = random.randrange(sys.maxsize) args.seed = seed print(f"generating new random seed: {seed}") else: print(f"setting random seed to: {args.seed}") random.seed(args.seed) datasetlist = args.datasets print(f"doing experiment with {datasetlist} in that order") k = args.kfold results = {} runlist = args.methods set_keras_growth(args.gpu) prefix = "runner" if args.prefix is not None: prefix = args.prefix rootpath = prefix + datetime.now().strftime('%Y_%m_%d_%H_%M_%S') createdir(rootpath) min_retain_losses = list() min_losses = list() writejson(f"{rootpath}/settings.json", sys.argv[1:]) if args.callbacks is not None: callbacks = args.callbacks else: callbacks = list() alphalist = [0.8] nresults = list() for i in range(0, args.n): nresults.append( runalldatasets(args, callbacks, datasetlist, rootpath, runlist, alphalist=alphalist, n=i, printcvresults=args.cvsummary, printcv=args.printcv, doevaluation=args.doevaluation, learning_rate=args.learning_rate)) writejson(f"{rootpath}/data.json", nresults) resdf = pd.DataFrame(nresults) resdf.to_csv( f"{rootpath}/results_{args.kfold}kfold_{args.epochs}epochs_{args.onehot}onehot.csv" )
def runalldatasetsMPI(args, callbacks, datasetlist, mpicomm, mpirank, rootpath, runlist, alphalist, n, printcvresults=False, printcv=False, doevaluation=True, evalcount=False): datasetsdone = list() dataset_results = dict() for dataset in datasetlist: d = Dataset(dataset) datasetsdone.append(dataset) # d = ds.getDataset(dataset) dsl, colmap, stratified_fold_generator = fromDataSetToSKLearn( d, args.onehot, n_splits=args.kfold) data = dsl.getFeatures() target = dsl.getTargets() anynominal = False datasetInfo = dsl.dataset.datasetInfo if not args.onehot: for key, value in colmap.items(): isclass = False for col in datasetInfo["cols"]: if col["name"] == key and col["class"] == True: isclass = True if value["type"] is "nominal" and not isclass: anynominal = True # kfold = 0 min_retain_losses = list() min_losses = list() dataset_result = dict() fold_number = 0 # for each split in the kfold validation foldlist = list() for train, test in stratified_fold_generator: foldlist.append((train, test)) mpicomm.barrier() train, test = foldlist[mpirank] if printcv: trainstr = "-".join(str(x) for x in train) trainhash = hashlib.md5(trainstr.encode()).digest() teststr = "-".join(str(x) for x in test) testhash = hashlib.md5(teststr.encode()).digest() print( f"summary of this cv-fold, first train: {train[0]} trainhash: {trainhash}" f"first test: {test[0]} testhash: {testhash}") fold_number = mpirank fold_results = dict() mpi_rank_rootpath = f"{rootpath}/{mpirank}" createdir(mpi_rank_rootpath) # dataset_results[str(fold_number)] = fold_results, # run all the methods in the split, so we can compare them internally # later if they are within each others standard deviation fold_results, ranlist = rundataset(anynominal, args, callbacks, colmap, data, dataset, dsl, fold_results, min_losses, min_retain_losses, mpi_rank_rootpath, runlist, target, test, train, alphalist=alphalist, printcvresults=printcvresults, n=n, doevaluation=doevaluation, fold_number=fold_number) mpicomm.barrier() if mpirank == 0: dataset_result[str(fold_number)] = fold_results for i in range(1, args.kfold): recv_fold_results = mpicomm.recv(source=i) dataset_result[i] = recv_fold_results elif mpirank != 0: mpicomm.send(fold_results, dest=0) if mpirank == 0: dataset_results[dataset] = dataset_result # if not args.mlp: writejson(f"{rootpath}/n{n}-data.json", dataset_results) #if len(alphalist) == 1: # plotResults3(datasetsdone, dataset_results, rootpath, args.kfold) printSummary(dataset, dataset_result, ranlist, n, args) # modelsize = get_model_memory_usage(args.gabel_batchsize,gabel_model) # model size is in GB, so setting this as gpu fraction is 12 x what we need.. # set_keras_parms(threads=0,gpu_fraction=modelsize) return dataset_results
def main(mpirank, mpisize, mpicomm): args = getArgs() if args.seed is None: seed = random.randrange(sys.maxsize) args.seed = seed print(f"generating new random seed:{seed}") random.seed(args.seed) datasetlist = args.datasets k = args.kfold results = {} runlist = args.methods if "," in args.gpu: gpus = args.gpu.split(",") mygpu = gpus[mpirank % 2] set_keras_growth(int(mygpu)) else: set_keras_growth(args.gpu) dataset_results = dict() prefix = "runner" if args.prefix is not None: prefix = args.prefix rootpath = prefix + datetime.now().strftime('%Y_%m_%d_%H_%M_%S') if mpirank == 0: createdir(rootpath) writejson(f"{rootpath}/settings.json", sys.argv[1:]) min_retain_losses = list() min_losses = list() datasetsdone = list() if args.callbacks is not None: callbacks = args.callbacks else: callbacks = list() nresults = list() alphalist = [ 0.8 ] # this code does not iterate over alpha, see mpi_deesweighting.py for i in range(0, args.n): dataset_results = runalldatasetsMPI(args, callbacks, datasetlist, mpicomm, mpirank, rootpath, runlist, alphalist, n=i, printcvresults=args.cvsummary, printcv=args.printcv, doevaluation=args.doevaluation) nresults.append(dataset_results) if mpirank == 0: writejson(f"{rootpath}/data.json", nresults) resdf = pd.DataFrame(results) resdf.to_csv( f"{rootpath}/results_{args.kfold}kfold_{args.epochs}epochs_{args.onehot}onehot.csv" )
def main(mpirank, mpisize, mpicomm): args = getArgs() if args.seed is not None: random.seed(args.seed) datasetlist = args.datasets print(f"doing experiment with {datasetlist} in that order") k = args.kfold results = {} runlist = args.methods if "," in args.gpu: gpus = args.gpu.split(",") mygpu = gpus[mpirank % 2] set_keras_growth(int(mygpu)) else: set_keras_growth(args.gpu) dataset_results = dict() prefix = "runner" if args.prefix is not None: prefix = args.prefix rootpath = prefix + datetime.now().strftime('%Y_%m_%d_%H_%M_%S') if mpirank == 0: createdir(rootpath) writejson(f"{rootpath}/settings.json", vars(args)) min_retain_losses = list() min_losses = list() if args.alpharange is not None: splits = args.alpharange.split(":") alphastart = float(splits[0]) alphastop = float(splits[1]) alpharange = np.linspace(alphastart, alphastop, args.alphacount) else: alpharange = np.linspace(0.000001, 1.00001, args.alphacount) datasetsdone = list() if args.callbacks is not None: callbacks = args.callbacks else: callbacks = list() nresults = list() for i in range(0, args.n): dataset_results = runalldatasetsMPI(args, callbacks, datasetlist, mpicomm, mpirank, rootpath, runlist, alpharange, n=i, printcvresults=args.cvsummary, printcv=args.printcv) nresults.append(dataset_results) if mpirank == 0: # plotNAlphaResults(datasetlist, nresults, rootpath) writejson(f"{rootpath}/data.json", nresults) resdf = pd.DataFrame(results) resdf.to_csv( f"{rootpath}/results_{args.kfold}kfold_{args.epochs}epochs_{args.onehot}onehot.csv" )