def remove_bad_missing_values(data): category_names = utils.get_category_names() sub_jet_names = category_names['subjet1'] if True: if data[sub_jet_names.index('JetFitter_nVTX')] == 0: print('jet failure') data[sub_jet_names.index('JetFitter_energyFraction')] = np.nan data[sub_jet_names.index('JetFitter_mass')] = np.nan data[sub_jet_names.index('JetFitter_significance3d')] = np.nan data[sub_jet_names.index('JetFitter_deltaphi')] = np.nan data[sub_jet_names.index('JetFitter_deltaeta')] = np.nan data[sub_jet_names.index('JetFitter_massUncorr')] = np.nan data[sub_jet_names.index('JetFitter_dRFlightDir')] = np.nan data[sub_jet_names.index('JetFitter_nSingleTracks')] = np.nan data[sub_jet_names.index('JetFitter_nTracksAtVtx')] = np.nan data[sub_jet_names.index('JetFitter_N2Tpair')] = np.nan if data[sub_jet_names.index('SV1_masssvx')] == -1: print("mass failure") data[sub_jet_names.index('SV1_efracsvx')] = np.nan data[sub_jet_names.index('SV1_significance3d')] = np.nan data[sub_jet_names.index('SV1_dstToMatLay')] = np.nan data[sub_jet_names.index('SV1_deltaR')] = np.nan data[sub_jet_names.index('SV1_Lxy')] = np.nan data[sub_jet_names.index('SV1_L3d')] = np.nan data[sub_jet_names.index('SV1_N2Tpair')] = np.nan data[sub_jet_names.index('SV1_NGTinSvx')]= np.nan return data
def main(): args = create_arguments() gpu = args.gpu cuda = torch.cuda.is_available() use_cuda = gpu and cuda model = load_model(args.checkpoint) category_names = get_category_names(args.category_names) if args.filepath == None: img_num = random.randint(1, 102) image = random.choice( os.listdir('./flowers/test/' + str(img_num) + '/')) img_path = './flowers/test/' + str(img_num) + '/' + image top_p, probable_classes = predict(img_path, model, args.top_k, use_cuda) else: img_path = args.filepath top_p, probable_classes = predict(img_path, model, args.top_k, use_cuda) print('Probabilities of {} most probable classes are {}'.format( args.top_k, top_p)) print('{} most probable class ids are {}'.format(args.top_k, probable_classes)) print('{} most probable classes are {}'.format( args.top_k, [category_names[category_name] for category_name in probable_classes]))
def main(): maxNumUserCat = config['DEFAULT']['MAX_NUMBER_USER_CATEGORIES'] numAugm = config['DEFAULT']['NUMBER_AUGMENTATIONS_PER_IMAGE'] batchSize = config['DEFAULT']['TRAIN_BATCH_SIZE'] srRate = config['REGRESSION']['LEARNING_RATE'] srEpochs = config['REGRESSION']['EPOCHS'] cnnRate = config['CNN']['LEARNING_RATE'] cnnEpochs = config['CNN']['EPOCHS'] predefined_categories = config['DEFAULT']['PREDEFINED_CATEGORIES'].split(",") data = {'image_size': IMAGE_SIZE, 'numAugm': numAugm, 'batchSize': batchSize, 'srRate': srRate, 'srEpochs': srEpochs, 'cnnRate': cnnRate, 'cnnEpochs': cnnEpochs, 'maxNumUserCat': maxNumUserCat, 'cats_img_number': utils.get_number_of_images_per_category(), 'categories': list(set().union(utils.get_category_names(), predefined_categories)), 'user_categories': list(set(utils.get_category_names()) - set(predefined_categories))} return render_template('index.html', data=data)
def get_position_indexes_from_names(feature_names): original_names = utils.get_category_names() sub_selection_indexes = {} for feature_group_name in feature_names.keys(): feature_group = feature_names[feature_group_name] if feature_group.__class__ is not tuple: feature_group = [feature_group] original_group = list(original_names[feature_group_name]) feature_indexes = [] for feature_name in feature_group: index = original_group.index(feature_name) feature_indexes.append(index) sub_selection_indexes[feature_group_name] = feature_indexes return sub_selection_indexes
def create_validation_set(train_data, train_labels, train_ratio): train_data_result = [] train_labels_result = [] validation_data_result = [] validation_labels_result = [] number_per_category = {c: 0.0 for c in range(NUM_LABELS)} for i, x in enumerate(train_data): category = [z for z in range(len(train_labels[i])) if train_labels[i][z] == 1.0][0] number_per_category[category] += 1.0 number_per_category_in_validation = {c: 0.0 for c in range(NUM_LABELS)} number_per_category_in_training = {c: 0.0 for c in range(NUM_LABELS)} for i, x in enumerate(train_data): category = [z for z in range(len(train_labels[i])) if train_labels[i][z] == 1.0][0] if number_per_category_in_training[category] < number_per_category[category] * train_ratio: number_per_category_in_training[category] += 1.0 train_data_result.append(x) train_labels_result.append(train_labels[i]) else: number_per_category_in_validation[category] += 1.0 validation_data_result.append(x) validation_labels_result.append(train_labels[i]) if not number_per_category_in_validation.values(): raise Exception("Please add at least one category.") elif min(number_per_category_in_validation.values()) == 0: # at least one of the categories has no items in the validation set (not enough training examples) msg = "<b>Error</b> while preparing data:" for idx in range(0, len(number_per_category_in_validation.values())): if list(number_per_category_in_validation.values())[idx] == 0: img = "images" if number_per_category[idx] > 1 else "image" msg += " category '<b>" + utils.get_category_names()[idx] + "</b>' has just <b>" + str(int( number_per_category[idx])) + "</b> " + img + "," exception_msg = msg + " but at least <b>%d</b> images are required for each category." \ % utils.get_number_of_images_required() print(exception_msg) raise Exception(exception_msg) else: return numpy.array(train_data_result), numpy.array(train_labels_result), \ numpy.array(validation_data_result), numpy.array(validation_labels_result)
try: return open_file.create_dataset(feature_name, shape, dtype=np.float32) except(RuntimeError): return open_file.get(feature_name) def save_index_conversion(file_name, original_indexes, merged_file_indexes, new_to_original): original_to_new = {} for original, new in zip(original_indexes, merged_file_indexes): original_to_new[original] = new new_to_original[new] = (original, file_name) #np.save("%s_original_to_new" % file_name.replace('.h5', ''), original_to_new) #np.save(path+"new_to_original_%s" % sys.argv[1], new_to_original) category_names = utils.get_category_names() # Merges many hdf5 files into one data_path = "/baldig/physicsprojects/atlas/hbb/raw_data/v_6/" tag = sys.argv[1] assert tag is not None, "please specify a tag (signal, bg, other)" new_file_dataset_name = "temporary_flattened_data_%s.h5"%tag if tag == "signal": round_down = 1.0 # This is in case you want to use only a percentage of the samples in each file, default is to use all (1.0) elif tag=='bg': round_down = 1.0 elif tag=='top': round_down = 1.0
def __init__(self, shuffle=False): self.categories = utils.get_category_names() self.num_categories = len(self.categories) self.train = Dataset("train", self.num_categories, shuffle) self.test = Dataset("test", self.num_categories, shuffle) self.num_points = self.train._points.shape[1]