for target_array in target_arrays: noise = np.random.randn(*target_array.shape).astype('float32') * std new_target_arrays.append(np.clip(target_array + noise, 0, 1)) new_target_arrays.append(labels) yield new_target_arrays, chunk_size ### Alternative image loader and processor which does pysex centering # pysex_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz") # pysex_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz") pysex_params_train = load_data.load_gz("data/pysex_params_gen2_train.npy.gz") pysex_params_test = load_data.load_gz("data/pysex_params_gen2_test.npy.gz") pysexgen1_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz") pysexgen1_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz") center_x, center_y = (IMAGE_WIDTH - 1) / 2.0, (IMAGE_HEIGHT - 1) / 2.0 # def build_pysex_center_transform(img_index, subset='train'): # if subset == 'train': # x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_train[img_index] # elif subset == 'test': # x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_test[img_index] # return build_augmentation_transform(translation=(x - center_x, y - center_y))
"Creates a gzipped CSV submission file from a gzipped numpy file with testset predictions." ) print("Usage: create_submission_from_npy.py <input.npy.gz>") sys.exit() src_path = sys.argv[1] src_dir = os.path.dirname(src_path) src_filename = os.path.basename(src_path) tgt_filename = src_filename.replace(".npy.gz", ".csv") tgt_path = os.path.join(src_dir, tgt_filename) test_ids = load_data.test_ids print("Loading %s" % src_path) data = load_data.load_gz(src_path) assert data.shape[0] == load_data.num_test print("Saving %s" % tgt_path) with open(tgt_path, 'wb') as csvfile: writer = csv.writer(csvfile) # , delimiter=',', quoting=csv.QUOTE_MINIMAL) # write header writer.writerow([ 'GalaxyID', 'Class1.1', 'Class1.2', 'Class1.3', 'Class2.1', 'Class2.2', 'Class3.1', 'Class3.2', 'Class4.1', 'Class4.2', 'Class5.1', 'Class5.2', 'Class5.3', 'Class5.4', 'Class6.1', 'Class6.2', 'Class7.1', 'Class7.2', 'Class7.3', 'Class8.1', 'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5', 'Class8.6', 'Class8.7', 'Class9.1', 'Class9.2', 'Class9.3', 'Class10.1', 'Class10.2', 'Class10.3', 'Class11.1', 'Class11.2',
(sbcuda.cuda_ndarray.cuda_ndarray.mem_info()[0] / 1024. / 1024.)) def save_exit(): # winsol.save() print "Done!" print ' run for %s' % timedelta(seconds=(time.time() - start_time)) exit() sys.exit(0) if not REPREDICT_EVERYTIME and os.path.isfile( target_path_valid) and os.path.isfile(TRAIN_LOSS_SF_PATH): print 'Loading validation predictions from %s and loss from %s ' % ( target_path_valid, TRAIN_LOSS_SF_PATH) predictions = load_data.load_gz(target_path_valid) else: try: print '' print 'Re-evalulating and predicting' if DO_VALID: evalHist = winsol.evaluate([xs_valid[0], xs_valid[1]], y_valid=y_valid) winsol.save_loss(modelname='model_norm_metrics') evalHist = winsol.load_loss(modelname='model_norm_metrics') print '' predictions = winsol.predict([xs_valid[0], xs_valid[1]]) print "Write predictions to %s" % target_path_valid
valid_ids = train_ids[num_train:] train_ids = train_ids[:num_train] train_indices = np.arange(num_train) valid_indices = np.arange(num_train, num_train + num_valid) test_indices = np.arange(num_test) # paths of all the files to blend. predictions_test_paths = glob.glob(os.path.join(predictions_test_dir, "*.npy.gz")) predictions_valid_paths = [os.path.join(predictions_valid_dir, os.path.basename(path)) for path in predictions_test_paths] print "Loading validation set predictions" predictions_list = [load_data.load_gz(path) for path in predictions_valid_paths] predictions_stack = np.array(predictions_list).astype(theano.config.floatX) # num_sources x num_datapoints x 37 del predictions_list print print "Compute individual prediction errors" individual_prediction_errors = np.sqrt(((predictions_stack - y_valid[None])**2).reshape(predictions_stack.shape[0], -1).mean(1)) print print "Compiling Theano functions" X = theano.shared(predictions_stack) # source predictions t = theano.shared(y_valid) # targets W = T.vector('W')
train_indices = np.arange(num_train) valid_indices = np.arange(num_train, num_train + num_valid) test_indices = np.arange(num_test) # paths of all the files to blend. predictions_test_paths = glob.glob( os.path.join(predictions_test_dir, "*.npy.gz")) predictions_valid_paths = [ os.path.join(predictions_valid_dir, os.path.basename(path)) for path in predictions_test_paths ] print("Loading validation set predictions") predictions_list = [ load_data.load_gz(path) for path in predictions_valid_paths ] predictions_stack = np.array(predictions_list).astype( theano.config.floatX) # num_sources x num_datapoints x 37 del predictions_list print() print("Compute individual prediction errors") individual_prediction_errors = np.sqrt( ((predictions_stack - y_valid[None])**2).reshape( predictions_stack.shape[0], -1).mean(1)) print() print("Compiling Theano functions") X = theano.shared(predictions_stack) # source predictions t = theano.shared(y_valid) # targets
import load_data output_names = [ "smooth", "featureOrdisk", "NoGalaxy", "EdgeOnYes", "EdgeOnNo", "BarYes", "BarNo", "SpiralYes", "SpiralNo", "BulgeNo", "BulgeJust", "BulgeObvious", "BulgDominant", "OddYes", "OddNo", "RoundCompletly", "RoundBetween", "RoundCigar", "Ring", "Lense", "Disturbed", "Irregular", "Other", "Merger", "DustLane", "BulgeRound", "BlulgeBoxy", "BulgeNo2", "SpiralTight", "SpiralMedium", "SpiralLoose", "Spiral1Arm", "Spiral2Arm", "Spiral3Arm", "Spiral4Arm", "SpiralMoreArms", "SpiralCantTell" ] #d = pd.read_csv(TRAIN_LABELS_PATH) #targets = d.as_matrix()[1:, 1:].astype('float32') targets = load_data.load_gz( 'predictions/final/augmented/valid/try_convent_continueAt0p02_next.npy.gz') targets = targets.T output_corr = np.zeros((37, 37)) print targets.shape for i in xrange(0, 37): for j in xrange(i, 37): output_corr[i][j] = np.corrcoef(targets[i], targets[j])[0][1] if i != j and np.abs(output_corr[i][j]) > 0.3: if np.abs(output_corr[i][j]) > 0.7: print colored( "%s, %s: %s" % (output_names[i], output_names[j], output_corr[i][j]), 'green') else:
print "Usage: create_submission_from_npy.py <input.npy.gz>" sys.exit() src_path = sys.argv[1] src_dir = os.path.dirname(src_path) src_filename = os.path.basename(src_path) tgt_filename = src_filename.replace(".npy.gz", ".csv") tgt_path = os.path.join(src_dir, tgt_filename) test_ids = load_data.test_ids print "Loading %s" % src_path data = load_data.load_gz(src_path) assert data.shape[0] == load_data.num_test print "Saving %s" % tgt_path with open(tgt_path, "wb") as csvfile: writer = csv.writer(csvfile) # , delimiter=',', quoting=csv.QUOTE_MINIMAL) # write header writer.writerow( [ "GalaxyID", "Class1.1", "Class1.2", "Class1.3", "Class2.1",