def main(args): client, cluster = launch_dask(args.num_gpus, args.min_gpus, args.k8s, args.adapt, args.spec) if args.dataset == 'synthetic': # generate data on the GPU data, labels, t_gen = data_utils.generate_dataset( coilType = 'helix', nSamples = args.num_rows) elif args.dataset =="higgs": data, labels, _ = data_utils.load_higgs_dataset("data/higgs", args.num_rows) elif args.dataset == "airline": data, labels, _ = data_utils.load_airline_dataset("data/airline", args.num_rows) elif args.dataset == "fashion-mnist": data, labels, _ = data_utils.load_fashion_mnist_dataset("data/fmnist", args.num_rows) if args.dataset == 'synthetic': trainData, trainLabels, testData, testLabels, _ = data_utils.split_train_test_nfolds ( data, labels, trainTestOverlap = args.train_test_overlap ) else: trainData, testData, trainLabels, testLabels = cuml.train_test_split( data, labels, shuffle = False, train_size=args.train_size ) # apply standard scaling trainMeans, trainSTDevs, t_scaleTrain = data_utils.scale_dataframe_inplace ( trainData ) _, _, t_scaleTest = data_utils.scale_dataframe_inplace ( testData, trainMeans, trainSTDevs ) # launch HPO on Dask nEpochs = args.num_epochs nParticles = args.num_particles allowAsync = args.async_flag randomSearch = args.random_search # TODO: add ranges to argparser paramRanges = { 0: ['max_depth', 3, 20, 'int'], 1: ['learning_rate', .001, 1, 'float'], 2: ['gamma', 0, 2, 'float'] } mode = {'allowAsyncUpdates': args.async_flag, 'randomSearch': args.random_search } particleHistory, globalBest, elapsedTime = swarm.run_hpo(client, mode, paramRanges, trainData, trainLabels, testData, testLabels, nParticles, nEpochs, wMomentum = .05, wIndividual = .25, wBest = .45, wExplore = 0, plotFlag=False) # Shut down K8S workers close_dask(cluster, args.k8s)
def main(): np.random.seed(1) num_iterations = 10000 ### dataset loading 하기. X_train, Y_train, X_test, Y_test = generate_dataset() # plt.title("Data distribution") # plt.scatter(X_train[0, :], X_train[1, :], c=Y_train[0,:], s=20, cmap=plt.cm.RdBu) # plt.show() ## 코딩시작 nInput = X_train.shape[0] nHidden = 4 nOutput = Y_train.shape[0] nSample = X_train.shape[1] ## 코딩시작 simpleNN = NeuralNetwork(nInput, nHidden, nOutput, nSample) ### prediction predictions = simpleNN.predict(X_train) print('Accuracy: %d' % float( (np.dot(Y_train, predictions.T) + np.dot(1 - Y_train, 1 - predictions.T)) / float(Y_train.size) * 100) + '%') decision_boundary(lambda x: simpleNN.predict(x.T), X_train, Y_train) ### training ## 코딩 시작 for i in range(0, num_iterations): A2 = simpleNN.forward(X_train) ##forward propagation 하기. cost = simpleNN.compute_cost(A2, Y_train) ## cost function을 이용해 cost 구하기. simpleNN.backward() ## 위에 만든 함수를 이용하여 backpropagation 진행 simpleNN.update_params() ## 위에 만든 함수를 이용하여 network의 weight update하기. if i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) ## 코딩끝 ### prediction predictions = simpleNN.predict(X_train) print('Accuracy: %d' % float( (np.dot(Y_train, predictions.T) + np.dot(1 - Y_train, 1 - predictions.T)) / float(Y_train.size) * 100) + '%') decision_boundary(lambda x: simpleNN.predict(x.T), X_train, Y_train) predictions = simpleNN.predict(X_test) print('Accuracy: %d' % float( (np.dot(Y_test, predictions.T) + np.dot(1 - Y_test, 1 - predictions.T)) / float(Y_test.size) * 100) + '%') decision_boundary(lambda x: simpleNN.predict(x.T), X_test, Y_test)
def main(): m = 40000 dataset, human, machine, inv_machine = generate_dataset(m) Tx = 30 Ty = 10 n_a = 64 n_s = 32 dictionary = {'human': human, 'machine':machine, 'inv_machine':inv_machine} with open('dictionary.json', 'w') as f: f.write(json.dumps(dictionary)) X, Y, Xoh, Yoh = preprocess_dataset(dataset, human, machine, Tx, Ty) print("\n") print('machine_vocab is ' + str(len(machine))) print('human_vocab is ' + str(len(human))) print('X shape is ' + str(np.shape(X))) print('Y shape is ' + str(np.shape(Y))) print('Xoh shape is ' + str(np.shape(Xoh))) print('Yoh shape is ' + str(np.shape(Yoh))) # X (m, Tx) # Y (m, Ty) # Xoh (m, Tx, len(human_vocab)] # Yoh (m, Ty, len(machine_vocab)] model = create_model(Tx, Ty, n_a, n_s, human, machine) # initialize hidden and cell to zeros h0 = np.zeros((m, n_s)) c0 = np.zeros((m, n_s)) # Yoh [m, Ty, len(machin_vocab) # swap the axis Yoh = list(np.transpose(Yoh, [1, 0, 2])) print('new Yoh shape is ' + str(np.shape(Yoh))) optimizer = keras.optimizers.Adam(lr=0.01) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() model.fit([Xoh, h0, c0], Yoh, batch_size=128, epochs=6, validation_split=0.1) # model.save('my_model.h5') model.save_weights('my_model_weight.h5')
import numpy as np from sklearn.model_selection import train_test_split import argparse parser = argparse.ArgumentParser() parser.add_argument('--Mode', type=str, default="train_val", help='train_val or test ') opt = parser.parse_args() dir_1 = "Shrec_change_detection_dataset_public/2016/" dir_2 = "Shrec_change_detection_dataset_public/2020/" classified_dir = "Shrec_change_detection_dataset_public/labeled_point_lists_train/2016-2020/" d16, d20, data_col = du.generate_dataset(dir_1, dir_2, classified_dir) ids = np.arange(len(d16)) tr = [] tes = [] if opt.Mode == "train_val": tr, tes = train_test_split(ids, test_size=0.20) if opt.Mode == "val": tr = ids tes = [] train16 = [] train20 = [] traincol = [] test16 = [] test20 = []
def experiment_harness(args): experimentLog = {} experimentCount = 0 # TODO: add ranges to argparser paramRanges = { 0: ['max_depth', 3, 20, 'int'], 1: ['learning_rate', .001, 1, 'float'], 2: ['gamma', 0, 2, 'float'] } client = cluster = None # create logfile and write headers logFilename = 'local_cluster_experiment_results.csv' if not os.path.isfile(logFilename): with open(logFilename, mode='w+') as outputCSV: outputCSV.write( "elapsedTime,nSamples,asyncMode,nGPUs,nParticles,nEpochs,globalBestAccuracy,globalBest_max_depth,globalBest_learning_rate,globalBest_gamma,globalBest_nTrees,datasetName\n" ) for iDataSamples in args.num_rows: # generate or load data directly to the GPU if args.dataset == 'synthetic': data, labels, t_gen = data_utils.generate_dataset( coilType='helix', nSamples=iDataSamples) elif args.dataset == "higgs": data, labels, t_gen = data_utils.load_higgs_dataset( "data/higgs", iDataSamples) elif args.dataset == "airline": data, labels, t_gen = data_utils.load_airline_dataset( "data/airline", iDataSamples) elif args.dataset == "fashion-mnist": data, labels, t_gen = data_utils.load_fashion_mnist_dataset( "data/fmnist", iDataSamples) # split data into train and test if args.dataset == 'synthetic': trainData, trainLabels, testData, testLabels, _ = data_utils.split_train_test_nfolds( data, labels, trainTestOverlap=args.train_test_overlap) else: trainData, testData, trainLabels, testLabels = cuml.train_test_split( data, labels, shuffle=False, train_size=args.train_size) # apply standard scaling trainMeans, trainSTDevs, t_scaleTrain = data_utils.scale_dataframe_inplace( trainData) _, _, t_scaleTest = data_utils.scale_dataframe_inplace( testData, trainMeans, trainSTDevs) print( f'training data shape : {trainData.shape}, test data shape {testData.shape}' ) for iGPUs in args.num_gpus: for iParticles in args.num_particles: for iEpochs in args.num_epochs: mode = { 'allowAsyncUpdates': args.async_flag, 'randomSearch': False } client, cluster = launch_dask(iGPUs) particleHistory, globalBest, elapsedTime = swarm.run_hpo( client, mode, paramRanges, trainData, trainLabels, testData, testLabels, iParticles, iEpochs) stringToOutput = f"{elapsedTime},{iDataSamples},{args.async_flag},{iGPUs},{iParticles},{iEpochs}," stringToOutput += f"{globalBest['accuracy']},{globalBest['params'][0]},{globalBest['params'][1]},{globalBest['params'][2]}," stringToOutput += f"{globalBest['nTrees']},{args.dataset}\n" print(stringToOutput) with open(logFilename, mode='a') as outputCSV: outputCSV.write(stringToOutput) print( 'closing dask cluster in between experiment runs [ sleeping for 5 seconds ]' ) client.close() cluster.close() time.sleep(5)