def set_data(filenames, file_count,subb, config, count, cur, img_mean, gpu_data, gpu_data_remote, ctx, icomm,img_batch_empty): load_time = time.time() data=None # aa = config['rank']+count/subb*size # img_list = range(aa*config['file_batch_size'],(aa+1)*config['file_batch_size'],1) #print rank, img_list if config['data_source'] in ['hkl','both']: data_hkl = hkl.load(str(filenames[file_count]))# c01b data = data_hkl if config['data_source'] in ['lmdb', 'both']: data_lmdb = lmdb_load_cur(cur,config,img_batch_empty) data = data_lmdb if config['data_source']=='both': if config['rank']==0: print (rank,(data_hkl-data_lmdb)[1,0:3,1,1].tolist()) load_time = time.time()-load_time #)* sub_time = time.time() #( data = data -img_mean sub_time = time.time()-sub_time crop_time = time.time() #( for minibatch_index in range(subb): count+=1 batch_data = data[:,:,:,minibatch_index*config['batch_size']:(minibatch_index+1)*batch_size] if mode == 'train': rand_arr = get_rand3d(config['random'], count+(rank+1)*n_files*(subb)) else: rand_arr = np.float32([0.5, 0.5, 0]) batch_data = crop_and_mirror(batch_data, rand_arr, flag_batch=config['batch_crop_mirror'],cropsize=config['input_width']) gpu_data[minibatch_index].set(batch_data) crop_time = time.time() - crop_time #) #print 'load_time: %f (load %f, sub %f, crop %f)' % (load_time+crop_time+sub_time, load_time,sub_time, crop_time) # wait for computation on last file to finish msg = icomm.recv(source=MPI.ANY_SOURCE,tag=35) assert msg == "calc_finished" for minibatch_index in range(subb): # copy from preload area drv.memcpy_dtod(gpu_data_remote[minibatch_index].ptr, gpu_data[minibatch_index].ptr, gpu_data[minibatch_index].dtype.itemsize * gpu_data[minibatch_index].size ) ctx.synchronize() icomm.isend("copy_finished",dest=0,tag=55) return count
def train_net(config): # UNPACK CONFIGS (flag_para_load, train_filenames, val_filenames, train_labels, val_labels, img_mean) = unpack_configs(config) # pycuda set up drv.init() dev = drv.Device(int(config['gpu'][-1])) ctx = dev.make_context() if flag_para_load: # zmq set up sock = zmq.Context().socket(zmq.PAIR) sock.connect('tcp://*****:*****@ iter = ', num_iter print 'training cost:', cost_ij if config['print_train_error']: print 'training error rate:', train_error() if flag_para_load and (count < len(minibatch_range)): load_send_queue.put('calc_finished') ############### Test on Validation Set ################## DropoutLayer.SetDropoutOff() this_validation_error, this_validation_loss = get_val_error_loss( rand_arr, shared_x, shared_y, val_filenames, val_labels, flag_para_load, img_mean, batch_size, validate_model, send_queue=load_send_queue, recv_queue=load_recv_queue) print('epoch %i: validation loss %f ' % (epoch, this_validation_loss)) print('epoch %i: validation error %f %%' % (epoch, this_validation_error * 100.)) val_record.append([this_validation_error, this_validation_loss]) np.save(config['weights_dir'] + 'val_record.npy', val_record) DropoutLayer.SetDropoutOn() ############################################ # Adapt Learning Rate step_idx = adjust_learning_rate(config, epoch, step_idx, val_record, learning_rate) # Save weights if epoch % config['snapshot_freq'] == 0: save_weights(layers, config['weights_dir'], epoch) np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy', learning_rate.get_value()) save_momentums(vels, config['weights_dir'], epoch) print('Optimization complete.')
def train_net(config): # UNPACK CONFIGS (flag_para_load, train_filenames, val_filenames, train_labels, val_labels, img_mean) = unpack_configs(config) if flag_para_load: # zmq set up sock = zmq.Context().socket(zmq.PAIR) sock.connect('tcp://*****:*****@ iter = %i" % (num_iter)) logger.info("training cost: %lf" % (cost_ij)) if config['print_train_error']: logger.info('training error rate: %lf' % train_error()) if flag_para_load and (count < len(minibatch_range)): load_send_queue.put('calc_finished') ############### Test on Validation Set ################## #""" DropoutLayer.SetDropoutOff() result_list = get_val_error_loss( rand_arr, shared_x, shared_y, val_filenames, val_labels, flag_para_load, img_mean, batch_size, validate_model, send_queue=load_send_queue, recv_queue=load_recv_queue, flag_top_5=flag_top5) logger.info(('epoch %i: validation loss %f ' % (epoch, result_list[-1]))) if flag_top5: logger.info(('epoch %i: validation error (top 1) %f %%, (top5) %f %%' % (epoch, result_list[0] * 100., result_list[1] * 100.))) else: logger.info(('epoch %i: validation error %f %%' % (epoch, result_list[0] * 100.))) val_record.append(result_list) np.save(config['weights_dir'] + 'val_record.npy', val_record) DropoutLayer.SetDropoutOn() ############################################ # Adapt Learning Rate step_idx = adjust_learning_rate(config, epoch, step_idx, val_record, learning_rate) # Save weights if epoch % config['snapshot_freq'] == 0: save_weights(layers, config['weights_dir'], epoch) np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy', learning_rate.get_value()) save_momentums(vels, config['weights_dir'], epoch) #""" print('Optimization complete.')
def train_net(config, private_config): # UNPACK CONFIGS (flag_para_load, flag_datalayer, train_filenames, val_filenames, train_labels, val_labels, img_mean) = \ unpack_configs(config, ext_data=private_config['ext_data'], ext_label=private_config['ext_label']) gpu_send_queue = private_config['queue_gpu_send'] gpu_recv_queue = private_config['queue_gpu_recv'] # pycuda and zmq set up drv.init() dev = drv.Device(int(private_config['gpu'][-1])) ctx = dev.make_context() sock_gpu = zmq.Context().socket(zmq.PAIR) if private_config['flag_client']: sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter print 'training cost:', cost_ij if config['print_train_error']: error_ij = train_error() gpu_send_queue.put(error_ij) that_error = gpu_recv_queue.get() error_ij = (error_ij + that_error) / 2. if private_config['flag_verbose']: print 'training error rate:', error_ij if flag_para_load and (count < len(minibatch_range)): load_send_queue.put('calc_finished') ############### Test on Validation Set ################## DropoutLayer.SetDropoutOff() this_val_error, this_val_loss = get_val_error_loss( rand_arr, shared_x, shared_y, val_filenames, val_labels, flag_datalayer, flag_para_load, batch_size, validate_model, send_queue=load_send_queue, recv_queue=load_recv_queue) # report validation stats gpu_send_queue.put(this_val_error) that_val_error = gpu_recv_queue.get() this_val_error = (this_val_error + that_val_error) / 2. gpu_send_queue.put(this_val_loss) that_val_loss = gpu_recv_queue.get() this_val_loss = (this_val_loss + that_val_loss) / 2. if private_config['flag_verbose']: print('epoch %i: validation loss %f ' % (epoch, this_val_loss)) print('epoch %i: validation error %f %%' % (epoch, this_val_error * 100.)) val_record.append([this_val_error, this_val_loss]) if private_config['flag_save']: np.save(config['weights_dir'] + 'val_record.npy', val_record) DropoutLayer.SetDropoutOn() ############################################ # Adapt Learning Rate step_idx = adjust_learning_rate(config, epoch, step_idx, val_record, learning_rate) # Save Weights, only one of them will do if private_config['flag_save']: if epoch % config['snapshot_freq'] == 0: save_weights(layers, config['weights_dir'], epoch) np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy', learning_rate.get_value()) save_momentums(vels, config['weights_dir'], epoch) print('Optimization complete.')
def train_net(config): # UNPACK CONFIGS (flag_para_load, train_filenames, val_filenames, train_labels, val_labels, img_mean) = unpack_configs(config) if flag_para_load: # zmq set up sock = zmq.Context().socket(zmq.PAIR) sock.connect('tcp://*****:*****@iter " + str(count) if count == 1: s = time.time() if count == 20: e = time.time() print "time per 20 iter:", (e - s) logger.info("time per 20 iter: %f" % (e - s)) cost_ij = train_model_wrap(train_model, shared_x, shared_y, rand_arr, img_mean, count, minibatch_index, minibatch_range, batch_size, train_filenames, train_labels, flag_para_load, config['batch_crop_mirror'], send_queue=load_send_queue, recv_queue=load_recv_queue) if num_iter % config['print_freq'] == 0: #print 'training @ iter = ', num_iter #print 'training cost:', cost_ij logger.info("training @ iter = %i" % (num_iter)) logger.info("training cost: %lf" % (cost_ij)) if config['print_train_error']: logger.info('training error rate: %lf' % train_error()) #print 'training error rate:', train_error() if flag_para_load and (count < len(minibatch_range)): load_send_queue.put('calc_finished') ############### Test on Validation Set ################## #""" DropoutLayer.SetDropoutOff() # result_list = [ this_validation_error, this_validation_error_top5, this_validation_loss ] # or # result_list = [ this_validation_error, this_validation_loss ] result_list = get_val_error_loss( #this_validation_error, this_validation_loss = get_val_error_loss( rand_arr, shared_x, shared_y, val_filenames, val_labels, flag_para_load, img_mean, batch_size, validate_model, send_queue=load_send_queue, recv_queue=load_recv_queue, flag_top_5=flag_top5) logger.info(('epoch %i: validation loss %f ' % (epoch, result_list[-1]))) #print('epoch %i: validation loss %f ' % # (epoch, this_validation_loss)) if flag_top5: logger.info(('epoch %i: validation error (top 1) %f %%, (top5) %f %%' % (epoch, result_list[0] * 100., result_list[1] * 100.))) else: logger.info(('epoch %i: validation error %f %%' % (epoch, result_list[0] * 100.))) #print('epoch %i: validation error %f %%' % # (epoch, this_validation_error * 100.)) val_record.append(result_list) #val_record.append([this_validation_error, this_validation_loss]) np.save(config['weights_dir'] + 'val_record.npy', val_record) DropoutLayer.SetDropoutOn() ############################################ # Adapt Learning Rate step_idx = adjust_learning_rate(config, epoch, step_idx, val_record, learning_rate) # Save weights if epoch % config['snapshot_freq'] == 0: save_weights(layers, config['weights_dir'], epoch) np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy', learning_rate.get_value()) save_momentums(vels, config['weights_dir'], epoch) #""" print('Optimization complete.')
def train_net(config, private_config): # UNPACK CONFIGS (flag_para_load, train_filenames, val_filenames, train_labels, val_labels, img_mean) = \ unpack_configs(config, ext_data=private_config['ext_data'], ext_label=private_config['ext_label']) gpu_send_queue = private_config['queue_gpu_send'] gpu_recv_queue = private_config['queue_gpu_recv'] # pycuda and zmq set up drv.init() dev = drv.Device(int(private_config['gpu'][-1])) ctx = dev.make_context() sock_gpu = zmq.Context().socket(zmq.PAIR) if private_config['flag_client']: sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter log_iter.write("%d\n" % num_iter) log_iter.flush() print 'training cost:', cost_ij log_err_cost.write("%f\n" % cost_ij) log_err_cost.flush() if config['print_train_error']: error_ij = train_error() gpu_send_queue.put(error_ij) that_error = gpu_recv_queue.get() error_ij = (error_ij + that_error) / 2. if private_config['flag_verbose']: print 'training error rate:', error_ij log_err_rate.write("%f\n" % error_ij) log_err_rate.flush() if flag_para_load and (count < len(minibatch_range)): load_send_queue.put('calc_finished') if count%20 == 0: e = time.time() print "time per 20 iter:", (e - s) ############### Test on Validation Set ################## DropoutLayer.SetDropoutOff() this_val_error, this_val_loss = get_val_error_loss( rand_arr, shared_x, shared_y, val_filenames, val_labels, flag_para_load, img_mean, batch_size, validate_model, send_queue=load_send_queue, recv_queue=load_recv_queue) # report validation stats gpu_send_queue.put(this_val_error) that_val_error = gpu_recv_queue.get() this_val_error = (this_val_error + that_val_error) / 2. gpu_send_queue.put(this_val_loss) that_val_loss = gpu_recv_queue.get() this_val_loss = (this_val_loss + that_val_loss) / 2. if private_config['flag_verbose']: print('epoch %i: validation loss %f ' % (epoch, this_val_loss)) print('epoch %i: validation error %f %%' % (epoch, this_val_error * 100.)) val_record.append([this_val_error, this_val_loss]) if private_config['flag_save']: np.save(config['weights_dir'] + 'val_record.npy', val_record) np.savetxt(config['weights_dir'] + 'val_record_txt.txt', val_record) DropoutLayer.SetDropoutOn() ############################################ # Adapt Learning Rate step_idx = adjust_learning_rate(config, epoch, step_idx, val_record, learning_rate) # Save Weights, only one of them will do if private_config['flag_save']: if epoch % config['snapshot_freq'] == 0: save_weights(layers, config['weights_dir'], epoch) np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy', learning_rate.get_value()) save_momentums(vels, config['weights_dir'], epoch) print('Optimization complete.')