def scrape_all_disease_contents_once(disease_db): common.get_logger().warning("Scraping all disease contents once more...") main_thread = threading.currentThread() disease_names = list(disease_db[common.ALL_DISEASES_VIEW].keys()) n_diseases = len(disease_names) i_disease = 0 is_still_need_to_scrape = False # create threads to scrape disease contents while (i_disease < n_diseases): if (threading.active_count() - 1 < common.MAX_THREADS) and \ (i_disease < n_diseases): disease_name = disease_names[i_disease] the_disease = disease_db[common.ALL_DISEASES_VIEW][disease_name] if not the_disease.is_already_scraped: is_still_need_to_scrape = True t = threading.Thread(target=scrape_all_contents_of_a_disease, args=(the_disease, i_disease, n_diseases)) t.start() i_disease += 1 if threading.active_count() - 1 >= common.MAX_THREADS: time.sleep(common.TIMEOUT_WAIT_THREAD_FINISH) # wait for all the threads to finish while (threading.active_count() > 1): time.sleep(common.TIMEOUT_WAIT_THREAD_FINISH) return is_still_need_to_scrape
def explode(key, patches, positions, output_dir, patchesPerImage): output_name = key + ".hdf5" ds = Dataset(output_name, output_dir, 1, patchesPerImage, 'DECAF', patch_dim=patches.shape[1], patch_type='float32', pos_type='uint16') ds.append(key, patches, positions) get_logger().info("dataset with " + str(ds.keys.shape) + " elements, and patches " + str(patches.shape)) ds.close()
def scrape_all_contents_of_a_disease(disease_obj, idx, n_diseases): common.get_logger().warning(''.join(['Scraping the ', str(idx), 'th/', str(n_diseases), ' disease "', disease_obj.name, '"...'])) for attr in DISEASE_ATTRS: setattr(disease_obj, attr[0], scrape_disease_attr_contents_from_url(disease_obj.url, attr[1])) disease_obj.is_already_scraped = True
def do_nbnn(train_folder, test_folder): train = load_patches(args.train_folder) test = load_patches(args.test_folder) if options.relu: get_logger().info("Applying RELU") for class_data in train: class_data.patches = class_data.patches.clip(min=0) for class_data in test: class_data.patches = class_data.patches.clip(min=0) if options.scale: get_logger().info("Applying standardization") scaler = StandardScaler(copy=False) scaler.fit(np.vstack([t.patches for t in train])) for class_data in train: class_data.patches = scaler.transform(class_data.patches) for class_data in test: class_data.patches = scaler.transform(class_data.patches) if options.pca: get_logger().info("Calculating PCA") pca = RandomizedPCA(n_components=options.pca) pca.fit(np.vstack([t.patches for t in train])) #for class_data in train: #get_logger().info("Fitting class " + class_data.name) #pca.partial_fit(class_data.patches) get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) + ") components\nApplying PCA") for class_data in train: class_data.patches = pca.transform(class_data.patches) for class_data in test: class_data.patches = pca.transform(class_data.patches) nbnn(train, test, NN_Engine())
def load_patches(folder): #import pdb; pdb.set_trace() files = glob.glob(folder + "*.hdf5") num_classes = len(files) get_logger().info("Loading " + str(num_classes) + " classes from " + folder) all_features = [] for pfile in files: f = hfile(pfile) iid = f["image_index"] class_patches = f["patches"][0:iid[:].max(), :] if options.use_position: class_positions = f["positions"][0:iid[:].max(), :] class_patches = np.hstack([class_patches, class_positions]) all_features.append(ImageClass(class_patches, pfile, iid[:])) return all_features
def __init__(self, raw_msg=None, log=False): """ 根据接收到的消息实体初始化会自动设置被动回复的to/from user.若主动发送客服消息则无需消息实体 :param raw_msg: 消息的XML实体 :param log: 是否将接收到的XML记录到日志 """ self._receive = {} self._reply = {} self._items = [] # 通常为用户可见的消息实体 if __debug__: self.log = get_logger(Message.__name__, "debug") else: self.log = get_logger(Message.__name__, "info") if raw_msg is not None: self.receiveMsg(raw_msg, log)
def translate_db(disease_db_en, language_code='vi'): translated_disease_db = dict() common.init_disease_db(translated_disease_db) all_diseases = list(disease_db_en[common.ALL_DISEASES_VIEW].values()) for i in range(0, len(all_diseases)): original_disease = all_diseases[i] common.get_logger().info(''.join(['Translating the ', str(i), 'th/', str(len(all_diseases)), ' disease "', original_disease.name, '"...'])) translated_disease = translate_a_disease(original_disease, language_code) translated_disease_db[common.ALL_DISEASES_VIEW][\ translated_disease.name] = translated_disease return disease_db_translated
def _test_grid_search(self, dataset, columns): dimension = [16, 32, 64] batchSize = [32, 64] learning_rate = [0.1] margin = [1, 0.1] regularizer_scale = [0.1] epochs = [50, 100, 500] count = 0 max_fscore = 0 max_prec_at_1 = 0 model = dataset() logger = get_logger('RL.Test.GridSearch.VEER.' + str(model)) for d, bs, lr, m, reg, e in \ itertools.product(dimension, batchSize, learning_rate, margin, regularizer_scale, epochs): params = { 'learning_rate': lr, 'margin': m, 'dimension': d, 'epochs': e, 'batchSize': bs, 'regularizer_scale': reg } logger.info("\nTest:%d, PARAMS: %s", count, str(params)) count = count + 1 cur_fscore, cur_prec_at_1 = self._test_veer( dataset, columns, params) if max_fscore <= cur_fscore: max_fscore = cur_fscore if max_prec_at_1 <= cur_prec_at_1: max_prec_at_1 = cur_prec_at_1 logger.info("Ran total %d Tests.", count) logger.info("Max Fscore: %f", max_fscore) logger.info("Max Mean Precision@1: %f", max_prec_at_1)
def archive_prices(): log = common.get_logger() log.info('Archiving Prices..') conn = common.get_connection() trans = conn.begin() try: #Insert new users conn.execute(""" INSERT INTO priceHistory (entity_id, price) SELECT entity.id, entity.price FROM entity WHERE (SELECT count(price) FROM pricehistory WHERE entity_id=entity.id)=0 AND price IS NOT NULL """) #Update existing users conn.execute("""INSERT INTO priceHistory (entity_id, price) SELECT entity.id, entity.price FROM entity WHERE entity.price != (SELECT price FROM priceHistory WHERE entity_id=entity.id ORDER BY timestamp DESC LIMIT 1)""") trans.commit() except: trans.rollback() raise conn.close() log.info('..done.')
def _test_grid_search(self, model): dimension= [64, 256] batchSize= [32, 128] learning_rate= [0.1, 0.5] margin= [1] regularizer_scale = [0.1] epochs = [1000] neg_rel_rate = [1, 4] neg_rate = [1, 7] logger = get_logger('RL.Test.ear.GridSearch.KR_EAR' + str(model)) count = 0 max_fscore = 0 max_prec_at_1 = 0 for d, bs, lr, m, reg, e, nr, nrr in \ itertools.product(dimension, batchSize, learning_rate, margin, regularizer_scale, epochs, neg_rate, neg_rel_rate): params = {'learning_rate': lr, 'margin': m, 'dimension': d, 'epochs': e, 'batchSize' : bs, 'regularizer_scale' : reg, 'neg_rate' : nr, 'neg_rel_rate' : nrr} logger.info("\nPARAMS: %s", str(params)) count = count + 1 cur_fscore, cur_prec_at_1 = self._test_kr_ear(model, params) if max_fscore <= cur_fscore: max_fscore = cur_fscore if max_prec_at_1 <= cur_prec_at_1: max_prec_at_1 = cur_prec_at_1 logger.info("Ran total %d Tests.", count) logger.info("Max Fscore: %f", max_fscore) logger.info("Max Precision@1: %f", max_prec_at_1)
def test_census(self): logger = get_logger('RL.Test.KmeansClustering.CENSUS') census = Census() compare_cl = census.get_comparision_object() features = compare_cl.compute(census.candidate_links, census.trainDataA, census.trainDataB) logger.info("Features %s", str(features.describe())) # Train K-Means Classifier logrg = recordlinkage.KMeansClassifier(algorithm='full', max_iter=1000, random_state=42) logrg.fit(features) result = logrg.predict(features) log_quality_results(logger, result, census.true_links, len(census.candidate_links)) #Test the classifier compare_cl = census.get_comparision_object() features = compare_cl.compute(census.test_links, census.testDataA, census.testDataB) logger.info("Features %s", str(features.describe())) result = logrg.predict(features) log_quality_results(logger, result, census.true_test_links, len(census.test_links))
def test_febrl(self): logger = get_logger('RL.Test.KmeansClustering.FEBRL') febrl = FEBRL() compare_cl = febrl.get_comparision_object() features = compare_cl.compute(febrl.candidate_links, febrl.trainDataA, febrl.trainDataB) logger.info("Features %s", str(features.describe())) # Train K-Means Classifier logrg = recordlinkage.KMeansClassifier() logrg.fit(features) result = logrg.predict(features) log_quality_results(logger, result, febrl.true_links, len(febrl.candidate_links)) #Test the classifier compare_cl = febrl.get_comparision_object() features = compare_cl.compute(febrl.test_links, febrl.testDataA, febrl.testDataB) logger.info("Features %s", str(features.describe())) result = logrg.predict(features) log_quality_results(logger, result, febrl.true_test_links, len(febrl.test_links))
def test_cora(self): logger = get_logger('RL.Test.KmeansClustering.CORA') #Read Train data in dataset A & B cora = Cora() ## Extarct Features compare_cl = cora.get_comparision_object() features = compare_cl.compute(cora.candidate_links, cora.trainDataA, cora.trainDataB) logger.info("Features %s", str(features.describe())) # Train K-Means Classifier logrg = recordlinkage.KMeansClassifier() logrg.fit(features) result = logrg.predict(features) log_quality_results(logger, result, cora.true_links, len(cora.candidate_links)) #Test the classifier compare_cl = cora.get_comparision_object() features = compare_cl.compute(cora.test_links, cora.testDataA, cora.testDataB) logger.info("Features %s", str(features.describe())) result = logrg.predict(features) log_quality_results(logger, result, cora.true_test_links, len(cora.test_links))
def __init__(self, request, client_address, server): # Read configuration parameter self.__isdebug = CONTEXT['debug'] self.__size = CONTEXT['request_size'] self.__logger = get_logger("TCPRequestHandler") # Compression Utility self.__compressedcontent = CONTEXT['compressed_content'] self.__compression = Utility() self.__client = client_address # populate the seller list self.__sellerObj = server.sellerObject # Initialize Ad Exchange self.__adExObject = server.adExObject # Initialize the DB connection self.__dbConnection = server.dbConnection if DB_PARAMS['truncate']: self.__dbConnection.query("truncate table `GreyFiber`.`IPAllocation`") self.__infra_tested = TEST_PARAMS['infra_tested'] if self.__infra_tested == "MININET": self.__mininetConnection = server.mininetConnection self.__floodlightConnection = server.floodlightConnection # Call base class BaseRequestHandler.__init__(self, request, client_address, server)
def __init__(self, buyer_data=TEST_PARAMS['buyer_file_name'], path_to_data=TEST_PARAMS['path'], totalReqs=-1): ''' Class constructor ''' Thread.__init__(self) # Remote service bindings self.__serverhosts = SERVER_BINDING['address'] self.__serverport = int(SERVER_BINDING['port']) # Buffer settings self.__bufferdim = int(CONTEXT['client_socket_buffer']) # Compression helper self.__compression = Utility() self.__resourcepath = TEST_PARAMS['client_path'] self.__buyerFile = buyer_data self.__client_request_type = TEST_PARAMS['client_request_type'] self.__client_request_code = TEST_PARAMS['client_request_code'] self.__logger = get_logger("TCPClient") self.__conn_timeout = 1 self.__recv_timeout = 6000 self.totalReqs = totalReqs
def __init__(self, act_size): self.name = 'stochastic' self.act_size = act_size self.logger = get_logger(self.name) self.local_model = None self.predictor = None
def __init__(self, request, client_address, server): # Read configuration parameter self.__isdebug = CONTEXT['debug'] self.__size = CONTEXT['request_size'] self.__logger = get_logger("TCPRequestHandler") # Compression Utility self.__compressedcontent = CONTEXT['compressed_content'] self.__compression = Utility() self.__client = client_address # populate the seller list self.__sellerObj = server.sellerObject # Initialize Ad Exchange self.__adExObject = server.adExObject # Initialize the DB connection self.__dbConnection = DBConnection() if DB_PARAMS['truncate']: self.__dbConnection.query("truncate table `VirtualFiber`.`IPAllocation`") self.__infra_tested = TEST_PARAMS['infra_tested'] # Call base class BaseRequestHandler.__init__(self, request, client_address, server)
def __init__(self, output_name, output_dir, num_files, patches, feature_type, patch_dim=128, patch_type='uint8', pos_type='uint16'): self.log = get_logger() output_subdir = output_dir try: makedirs(output_subdir) except: pass output_filename = join(output_subdir, basename(output_name)) self.log.debug('Saving extracted descriptors to %s', output_filename) self.mode = 'creating' dt = special_dtype(vlen=bytes) patches += 10 #for safety self.hfile = HDF5File(output_filename, 'w', compression='gzip', fillvalue=0.0) self.patches = self.hfile.create_dataset('patches', (num_files * patches, patch_dim), dtype=patch_type, chunks=True) self.positions = self.hfile.create_dataset('positions', (num_files * patches, 2), dtype=pos_type, chunks=True) self.image_index = self.hfile.create_dataset('image_index', (num_files, 2), dtype='uint64') # Start, End positions of an image self.keys = self.hfile.create_dataset('keys', (num_files, ), dtype=dt) self.key_set = set() self.patches.attrs['cursor'] = 0 self.patches.attrs['feature_type'] = feature_type self.output_filename = output_filename
def extract_decaf(input_dir, output_dir, network_data_dir, files, num_patches, patch_size, image_dim, levels, oversample, layer_name, decaf_oversample, extraction_method): log = get_logger() BATCH_SIZE = 16 #ex = DecafExtractor.DecafExtractor(layer_name) #ex = CaffeExtractorPlus.CaffeExtractorPlus( #network_data_dir + 'hybridCNN_iter_700000_upgraded.caffemodel', #network_data_dir + 'hybridCNN_deploy_no_relu_upgraded.prototxt', #network_data_dir + 'hybrid_mean.npy') ex = NewCaffeExtractor.NewCaffeExtractor() #import pdb; pdb.set_trace() ex.set_parameters(patch_size, num_patches, levels, image_dim, BATCH_SIZE) if oversample: log.info('Extracting with mirror combinations (X,Y,X-Y,Y-X') ex.enable_data_augmentation() ds = Dataset(input_dir, output_dir, len(files), num_patches * ex.get_number_of_features_per_image(), 'CAFFE', patch_dim=ex.get_descriptor_size(), patch_type='float32', pos_type='uint16') for f in files: if f in ds: log.info('Skipping <%s>. Already in the dataset.', basename(f)) continue try: features = ex.extract_image(f) except: features = None if features is not None and features.cursor > 0: (patches6, patches7, positions) = features.get() ds.append(f, patches6, patches7, positions)
def get_arguments(): log = get_logger() parser = ArgumentParser(description='SVM based classification for whole images.') parser.add_argument("--input-dir", dest="input_dir", help="Directory containing HDF5 files.") parser.add_argument("--num-train-images", dest="num_train_images", type=int, help="Number of images to use from training set.") parser.add_argument("--num-test-images", dest="num_test_images", type=int, help="Number of images to use from the test set.") parser.add_argument("--patch_name", dest="patch_name", help="The name of the patches in the HDF5 File.") parser.add_argument("--patches-per-image", dest="patches_per_image", type=int, help="Number of patches for each image.") parser.add_argument("--cmd", dest="cmd", choices=['whole-image-svm', 'svm-nbnl'], help="Command to execute.") args = parser.parse_args() patchOptions.patch_name=args.patch_name if not 'input_dir' in args: log.error('input dir is required, but not present.') exit() if not 'cmd' in args: log.error('cmd is required, but not present.') exit() if not 'num_train_images' in args: log.error('num_train_images is required, but not present.') exit() if not 'num_test_images' in args: log.error('num_test_images is required, but not present.') exit() return args
def archive_values(): log = common.get_logger() log.info('Archiving values..') conn = common.get_connection() trans = conn.begin() try: #Insert new users conn.execute(""" INSERT INTO ValueHistory (user_id, value) (select id, value from User where (select count(value) from ValueHistory where user_id=User.id)=0 and value is not NULL) """) #Update existing users conn.execute("""INSERT INTO ValueHistory (user_id, value) SELECT User.id, User.value FROM User WHERE User.value != (SELECT value FROM ValueHistory WHERE user_id=User.id ORDER BY timestamp DESC LIMIT 1)""") trans.commit() except: trans.rollback() raise conn.close() log.info('..done.')
def load_split_whole_image_only(input_folder, nTrain, nTest): logger = get_logger() files = sorted(glob( join(input_folder, '*.hdf5') ), key=basename) nClasses = len(files); logger.info("Loading " + str(nClasses) + " classes") train_patches = np.empty([nClasses*nTrain, patchOptions.size]) # nClasses*nSamples x nFeatures test_patches = np.empty([nClasses*nTest, patchOptions.size]) train_labels = np.empty([nClasses*nTrain]) test_labels = np.empty([nClasses*nTest]) start = time.clock() train_patch_count = test_patch_count = 0 for (classNumber,filename) in enumerate(files): hfile = HDF5File(filename, 'r') iid = hfile["image_index"][:] nImages = iid.shape[0] assert nImages >= (nTrain + nTest), "Not enough images!" np.random.shuffle(iid) trainIdx = iid[0:nTrain] testIdx = iid[nTrain:nTrain+nTest] patches = hfile[patchOptions.patch_name] for iid in trainIdx: train_patches[train_patch_count]=patches[iid[0]] train_patch_count += 1 train_labels[classNumber*nTrain:(classNumber+1)*nTrain]=classNumber*np.ones(nTrain) for iid in testIdx: test_patches[test_patch_count]=patches[iid[0]] test_patch_count += 1 test_labels[classNumber*nTest:(classNumber+1)*nTest]=classNumber*np.ones(nTest) logger.info("Patch count: " + str(train_patch_count) + " training and " + str(test_patch_count) + " test patches for class " + filename) hfile.close() end = time.clock() logger.info("It took " + str((end-start)) + " seconds"); LoadedData = namedtuple("LoadedData","train_patches train_labels test_patches test_labels") return LoadedData(train_patches, train_labels, test_patches, test_labels)
def _test_grid_search(self, dataset): dimension = [50, 80, 120] batchSize = [100] learning_rate = [0.1, 0.2] margin = [0.5, 1] regularizer_scale = [0.1, 0.2] epochs = [100, 500] neg_rel_rate = [1, 2, 5] neg_rate = [1, 5, 10] count = 0 max_fscore = 0 model = dataset() logger = get_logger('RL.Test.GridSearch.TransE.' + str(model)) for d, bs, lr, m, reg, e, nr, nrr in \ itertools.product(dimension, batchSize, learning_rate, margin, regularizer_scale, epochs, neg_rate, neg_rel_rate): params = { 'learning_rate': lr, 'margin': m, 'dimension': d, 'epochs': e, 'batchSize': bs, 'regularizer_scale': reg, 'neg_rate': nr, 'neg_rel_rate': nrr } logger.info("\nPARAMS: %s", str(params)) count = count + 1 cur_fscore = self._test_transe(dataset, params) if max_fscore <= cur_fscore: max_fscore = cur_fscore logger.info("Ran total %d Tests.", count) logger.info("Max Fscore: %f", max_fscore)
def select_random_support(train_dir, support_dir, num_train_images, support_size, position_influence): log = get_logger() train_files = [ f for f in glob(join(train_dir, '*')) if splitext(f.lower())[1] == '.hdf5' ] try: os.makedirs(support_dir) except: pass for target_file in train_files: log.info('Extracting random support from "%s"...', basename(target_file)) #(patches, _)= get_standardized_patches(target_file, num_train_images, position_influence) (patches, _) = get_patches(target_file, num_train_images, position_influence) rand_ix = random.sample(range(patches.shape[0]), min(patches.shape[0], support_size)) patches = patches[np.array(rand_ix), :] fh = HDF5File(join(support_dir, basename(target_file)), 'w') ds = fh.create_dataset('support', patches.shape, dtype='float') ds[:] = patches ds.attrs['cursor'] = patches.shape[0] fh.close()
def classify_with_support(engine, test_dir, support_dir, num_train_images, num_test_images, position_influence, support_size=0): log = get_logger() test_files = sorted(glob(join(test_dir, '*.hdf5')), key=basename) num_classes = len(test_files) log.info('Testing w.r.t. %d classes.' % num_classes) if position_influence > 0: log.info('Position influence (alpha) is %.2f.', position_influence) # Allocating distances for each test class dists = np.ndarray((num_classes, num_classes, num_test_images)) # Identifying labels labels = np.vstack([ c * np.ones((1, num_test_images), dtype=np.int) for c in range(num_classes) ]) log.info('Looking for nearest neighbors...') for (support_class, f) in enumerate(test_files): support_filename = join(support_dir, basename(f)) if is_selected_support(support_filename): support = get_support(support_filename, support_size) else: support, _ = get_patches(support_filename, num_train_images, position_influence) # Creating index for current class log.info('\tBuilding index from support of class "%s"...', basename(f)) engine.fit(support) del support # Evaluating test samples for all classes using current index for (test_class, test_filename) in enumerate(test_files): (test_patches, test_image_index) = get_patches(test_filename, num_test_images, position_influence) log.info('\tLooking for NNs of "%s"...', basename(test_filename)) im_to_class_dists = engine.dist(test_patches) if len(im_to_class_dists.shape) > 1: # In case of k-NN, we average im_to_class_dists = im_to_class_dists.mean(axis=1) dists[support_class, test_class, :] = \ np.array([sum(im_to_class_dists[ix[0]:ix[1]]) for ix in test_image_index]) predictions = dists.argmin(axis=0) acc = (labels == predictions).mean() log.info('*** Recognition accuracy is: %.2f%%', acc * 100) return acc
def fetch_email(config_file, run_forever): """ Start Backup Service """ # Load config config = common.load_config_file(config_file) # Load logging config common.setup_logging_config("%s/../config/" % __abs_dirpath__) # Get logger logger = common.get_logger("app") logger.info("Starting mail parsing...") credentials_dir = os.path.join(__abs_dirpath__, '../.credentials') # Enhance configuration config['credentials_dir'] = credentials_dir # Instantiate services mail_reader = GmailReader(config=config, logger=logger) job_queuer = RedisJobQueuer(config=config, logger=logger) hackpad_processor = HackpadMailProcessor(config=config, mail_reader=mail_reader, job_queuer=job_queuer, logger=logger) if run_forever: hackpad_processor.run_forever() else: hackpad_processor.fetch_and_process_emails()
def get_arguments(): log = get_logger() parser = ArgumentParser(description='HD5 Splitter.') parser.add_argument("--input-dir", dest="input_dir", help="Directory with HDF5 images.") parser.add_argument("--output-dir", dest="output_dir", help="Directory to put HDF5 files to.") parser.add_argument("--patches", dest="patches", type=int, default=100, help="Number of patches to extract per image.") args = parser.parse_args() if not args.input_dir: log.error('input-dir option is required, but not present.') exit() if not args.output_dir: log.error('output-dir option is required, but not present.') exit() return args
def do_nbnl(args): logger = get_logger() logger.info("Getting indexes") data = get_indexes(args.input_dir, args.num_train_images, args.num_test_images, args.patches_per_image) train = data.Train num_classes = len(train) logger.info("Loading training patches") X = np.vstack([t.get_patches() for t in train]) for t in train: t.unload() Y = np.vstack([c*np.ones((train[c].get_num_patches(),1), dtype=np.int) for c in range(num_classes)]) clf = svm.LinearSVC(dual=False) logger.info("Training Linear SVM at patch level") logger.info(str(X.shape) + " X, " + str(Y.shape) + " Y") clf.fit(X,Y.ravel()) logger.info("Training completed, freeing training patches") del X, Y test = data.Test testX = np.vstack([t.get_patches() for t in test]) for t in test: t.unload() testY = np.vstack([c*np.ones((test[c].get_num_patches(),1), dtype=np.int) for c in range(num_classes)]) logger.info(str(testX.shape) + " testX, " + str(testY.shape) + " testY") logger.info("Evaluating test patches...") confidence = clf.decision_function(testX) predicted = np.argmax(confidence,1) correct=(predicted==testY.ravel()).sum() score = clf.score(testX, testY) logger.info("Accuracy " + str(score) + " at patch level " + str((100.0*correct)/len(predicted))) test_indexes = np.empty([num_classes, args.num_test_images, 2]) for c in range(num_classes): test_indexes[c]=test[c].get_new_indexes() image_labels = np.vstack([c*np.ones((args.num_test_images,1)) for c in range(num_classes)]) nbnl(confidence, test_indexes, image_labels)
def __init__(self): # Constants describing the training process. self.moving_average_decay = 0.9999 # The decay to use for the moving average. self.num_steps_per_decay = 1000 # Epochs after which learning rate decays. self.learning_rate_decay_factor = 0.95 # Learning rate decay factor. self.intial_learning_rate = args.learning_rate self.batch_size = args.batch_size self.eval_batch_size = args.batch_size self.num_examples_per_epoch_for_val = 100 self.val_iter = self.num_examples_per_epoch_for_val / self.batch_size self.image_h = args.image_h self.image_w = args.image_w self.image_c = args.image_c self.num_classes = args.num_classes # cup, disc, other self.max_steps = args.total_epoches self.batch_size = args.batch_size self.image_path = args.train_path self.test_path = args.test_path self.finetune_ckpt = args.finetune self.test_ckpt = args.test self.loss_func = args.loss self.save_image = args.save_image self.log_dir = os.path.join('logs', args.note) self.output = get_logger('segnet', folder=self.log_dir) self.dataset = Dataset(args) self.sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) self.sess_config.gpu_options.allow_growth = True
def _test_transh(self, dataset, params): graph = Graph_ER(dataset) model = dataset() logger = get_logger('RL.Test.er.TransH.' + str(model)) transh = TransH(graph, dimension=params['dimension'], learning_rate=params['learning_rate'], margin=params['margin'], regularizer_scale=params['regularizer_scale'], batchSize=params['batchSize'], neg_rate=params['neg_rate'], neg_rel_rate=params['neg_rel_rate']) loss = transh.train(max_epochs=params['epochs']) logger.info("Training Complete with loss: %f", loss) ent_embeddings = transh.get_ent_embeddings() result_prob = [] for i in range(0, len(graph.entity_pairs)): distance = abs( spatial.distance.cosine( ent_embeddings[graph.entity_pairs[i][0]], ent_embeddings[graph.entity_pairs[i][1]])) result_prob.append( (graph.entity_pairs[i][0], graph.entity_pairs[i][1], distance)) #logger.info("i: %d, distance: %f true_pairs: %s", i, distance, graph.entity_pairs[i] in true_pairs) #Write Embeddings to file export_embeddings('er', str(model), 'TransH', graph.entity, ent_embeddings) export_result_prob(dataset, 'er', str(model), 'TransH', graph.entity, result_prob, graph.true_pairs) optimal_threshold, max_fscore = get_optimal_threshold( result_prob, graph.true_pairs) try: logger.info("MAX FSCORE: %f AT : %f", max_fscore, optimal_threshold) result = pd.MultiIndex.from_tuples([(e1, e2) for (e1, e2, d) in result_prob if d <= optimal_threshold]) params['threshold'] = optimal_threshold log_quality_results(logger, result, graph.true_pairs, len(graph.entity_pairs), params) export_false_negatives(dataset, 'er', str(model), 'TransH', graph.entity, result_prob, graph.true_pairs, result, graph.entity) export_false_positives(dataset, 'er', str(model), 'TransH', graph.entity, result_prob, graph.true_pairs, result, graph.entity) except: logger.info("Zero Reults") #Log MAP, MRR and Hits@K ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs) p_at_1 = ir_metrics.log_metrics(logger, params) transh.close_tf_session() return (max_fscore, p_at_1)
def get_arguments(): log = get_logger() parser = ArgumentParser(description='Patch/descriptor extraction utility.') parser.add_argument("--patches", dest="patches", type=int, default=1000, help="Number of patches to extract per image.") parser.add_argument("--patch-size", dest="patch_size", type=int, default=16, help="Size of the patch.") parser.add_argument("--image-dim", dest="image_dim", type=int, help="Size of the largest image dimension.") parser.add_argument("--levels", dest="levels", type=int, default=3, help="Number of hierarchical levels to extract patches from. Procedure starts from <patch-size> and divides it by 2 at each level.") parser.add_argument("--descriptor", dest="descriptor", default='DECAF', choices=['DECAF'], help="Type of feature descriptor.") parser.add_argument("--input-dir", dest="input_dir", help="Directory with JPEG images.") parser.add_argument("--output-dir", dest="output_dir", help="Directory to put HDF5 files to.") parser.add_argument("--num-train-images", dest="num_train_images", type=int, help="Number of train images.") parser.add_argument("--num-test-images", dest="num_test_images", type=int, help="Number of test images.") parser.add_argument("--split", dest="split", type=int, help="Split to extract.") parser.add_argument("--oversample", dest="oversample", action='store_true', help="Add patch flipping.") parser.add_argument("--decaf-oversample", dest="decaf_oversample", action='store_true', help="Caffe oversampling. Flip X, Y, etc.") parser.add_argument("--layer-name", dest="layer_name", help="Decaf layer name.") parser.add_argument("--network-data-dir", dest="network_data_dir", help="Directory holding the network weights.") parser.add_argument("--patch-method", dest="patch_method", help="What method to use to extract patches.") args = parser.parse_args() if not args.input_dir: log.error('input-dir option is required, but not present.') exit() if not args.output_dir: log.error('output-dir option is required, but not present.') exit() if not args.image_dim: log.error('image-dim option is required, but not present.') exit() if not args.num_train_images: log.error('num_train_images option is required, but not present.') exit() if not args.num_test_images: log.error('num_test_images option is required, but not present.') exit() return args
def balanced_extract(self, im, feature_storage, check_patch_coords, transform, filename): (w, h) = im.size log = get_logger() # Extracting features from patches preprocessedPatches = np.empty([self.patches_per_image, 3, 227, 227], dtype="float32") positions = np.zeros((self.patches_per_image, 2), dtype="uint16") # Calculating patch step if self.levels > 0: patchesXLevel = self.patches_per_image / len(self.patch_sizes) log.info("Patches per level: " + str(patchesXLevel)) k = 0 if isinstance( transform, NopTransform): # Hacky.... #TODO why only for NopTransform? # Extracting features for the whole image preprocessedPatches[k, ...] = self.transformer.preprocess( 'data', self.to_rgb(im)) positions[k, ...] = np.matrix([0, 0]) k += 1 expected = 0 skipped = 0 for l in range(self.levels): countLevel = 0 _w = w - self.patch_sizes[l] _h = h - self.patch_sizes[l] if (_w < 0 or _h < 0): continue patch_step = int((_w * _h / patchesXLevel)**0.52) + 2 w_steps = np.arange(0, _w + 1, patch_step) h_steps = np.arange(0, _h + 1, patch_step) print "Image size (" + str(w)+", "+str(h)+") - patch size: " + str(self.patch_sizes[l]) + " patch step: " + str(patch_step) + " available pixels: (" + str(_w) +", "+str(_h)+") " \ "\n\twsteps: " + str(w_steps) + " \n\th_steps: " + str(h_steps) for i in range(len(w_steps)): for j in range(len(h_steps)): expected += 1 x = w_steps[i] y = h_steps[j] patch_left = x + self.patch_sizes[l] patch_bottom = y + self.patch_sizes[l] if (check_patch_coords(x, y, patch_left, patch_bottom) and patch_left <= w and patch_bottom <= h): patch = im.crop((x, y, patch_left, patch_bottom)) patch.load() countLevel += 1 preprocessedPatches[k, ...] = self.transformer.preprocess( 'data', self.to_rgb(patch)) positions[k, ...] = np.matrix([x, y]) k += 1 else: skipped += 1 print "got " + str(countLevel) + " for level " + str(l) self.load_caffe_patches(preprocessedPatches[0:k], positions[0:k], feature_storage) print "Expected " + str(expected) + " skipped: " + str(skipped)
def __init__(self): ''' Add connection object here if exchange is physically located in a separate server ''' self.__logger = get_logger("AdExchange") self.__logger.info("### Starting Ad Exchange...") self.__auction = ADEX['auction'] self.__reserve = ADEX['reserve']
def extract(input_dir, output_dir, network_data_dir, num_patches, patch_size, image_dim, levels, layer_name): log = get_logger() BATCH_SIZE = 1 log.info("Walking " + input_dir) ex = NewCaffeExtractor.NewCaffeExtractor() ex.set_parameters(patch_size, num_patches, levels, image_dim, BATCH_SIZE) params = namedtuple("Params","input_dir output_dir extractor") os.path.walk(input_dir, walk, params(input_dir, output_dir, ex))
def __init__(self, act_size, n_servers): self.name = 'greedy' self.n_servers = n_servers self.act_size = act_size self.logger = get_logger(self.name) self.local_model = None self.predictor = None
def __init__(self, act_size): self.name = 'round_robin' self.act_size = act_size self.action = 0 self.logger = get_logger(self.name) self.local_model = None self.predictor = None
def get_arguments(): log = get_logger() parser = ArgumentParser(description='NN support selection and classification tool.') parser.add_argument("--train-dir", dest="train_dir", help="Directory containing training HDF5 files.") parser.add_argument("--test-dir", dest="test_dir", help="Directory containing testing HDF5 files.") parser.add_argument("--support", dest="support", help="Directory or file to store/get NN support.") parser.add_argument("--result-dir", dest="result_dir", help="Directory to store NN distances.") parser.add_argument("--support-size", dest="support_size", type=int, help="Support size to select from each class.") parser.add_argument("--num-train-images", dest="num_train_images", type=int, help="Number of images to use from training set.") parser.add_argument("--num-test-images", dest="num_test_images", type=int, help="Number of images to use from the test set.") parser.add_argument("--gamma", dest="gamma", type=float, help="KDE bandwidth.") parser.add_argument("--knn", dest="knn", type=int, default=1, help="Number of nearest neighbors to look for.") parser.add_argument("--alpha", dest="alpha", type=float, default=0, help="Patch position influence.") parser.add_argument("--cmd", dest="cmd", choices=['select-random', 'classify'], help="Command to execute.") parser.add_argument("--alg-type", dest="alg_type", default='nn', choices=['nn', 'kde'], help="Nearest neighbor algorithm type.") parser.add_argument("--on_the_fly_splits", dest="on_the_fly_splits", action='store_true', help="Splits are computed on the fly.") parser.add_argument("--overwrite", dest="overwrite", action='store_true', help="Overwrite result of command (if any).") parser.add_argument("--patch_name", dest="patch_name", help="The name of the patches in the HDF5 File.") args = parser.parse_args() if not 'cmd' in args: log.error('cmd option is required, but not present.') exit() if args.cmd is 'train' and not 'train_dir' in args: log.error('train-dir option is required, but not present.') exit() if not 'support' in args: log.error('support option is required, but not present.') exit() if args.cmd is 'nn' and not 'result_dir' in args: log.error('resilt-dir option is required, but not present.') exit() return args
def balanced_extract(self, im, feature_storage, check_patch_coords, transform, filename): (w, h) = im.size log = get_logger() # Extracting features from patches preprocessedPatches = np.empty( [self.patches_per_image, 3, 227, 227], dtype="float32") positions = np.zeros((self.patches_per_image, 2), dtype="uint16") # Calculating patch step if self.levels > 0: patchesXLevel = self.patches_per_image / len(self.patch_sizes) log.info("Patches per level: " + str(patchesXLevel)) k = 0 if isinstance(transform, NopTransform): # Hacky.... #TODO why only for NopTransform? # Extracting features for the whole image preprocessedPatches[k, ...] = self.transformer.preprocess( 'data', self.to_rgb(im)) positions[k, ...] = np.matrix([0, 0]) k += 1 expected = 0 skipped = 0 largestSide = max(im.size) smallestPatch = int(round(largestSide * float(32) / 200)) self.patch_sizes = [smallestPatch, smallestPatch * 2, smallestPatch * 4] for l in range(self.levels): countLevel = 0 _w = w - self.patch_sizes[l] _h = h - self.patch_sizes[l] if(_w < 0 or _h < 0): continue patch_step = int((_w * _h / patchesXLevel) ** 0.52) + 2 w_steps = np.arange(0, _w + 1, patch_step) h_steps = np.arange(0, _h + 1, patch_step) print "Image size (" + str(w) + ", " + str(h) + ") - patch size: " + str(self.patch_sizes[l]) + " patch step: " + str(patch_step) + " available pixels: (" + str(_w) + ", " + str(_h) + ") " \ "\n\twsteps: " + str(w_steps) + " \n\th_steps: " + str(h_steps) for i in range(len(w_steps)): for j in range(len(h_steps)): expected += 1 x = w_steps[i] y = h_steps[j] patch_left = x + self.patch_sizes[l] patch_bottom = y + self.patch_sizes[l] if (check_patch_coords(x, y, patch_left, patch_bottom) and patch_left <= w and patch_bottom <= h and k < self.patches_per_image): patch = im.crop((x, y, patch_left, patch_bottom)) patch.load() countLevel += 1 preprocessedPatches[k, ...] = self.transformer.preprocess( 'data', self.to_rgb(patch)) positions[k, ...] = np.matrix([x, y]) k += 1 else: skipped += 1 print "got " + str(countLevel) + " for level " + str(l) self.load_caffe_patches(preprocessedPatches[0:k], positions[ 0:k], feature_storage) print "Expected " + str(expected) + " skipped: " + str(skipped)
def _test_rl_transe(self, dataset, params): #Load Graph Data graph = Graph_ER(dataset) model = dataset() logger = get_logger('RL.Test.er.RLTransE.' + str(model)) transe = TransE(graph, dimension=params['dimension'], learning_rate=params['learning_rate'], margin=params['margin'], regularizer_scale=params['regularizer_scale'], batchSize=params['batchSize'], neg_rate=params['neg_rate'], neg_rel_rate=params['neg_rel_rate']) loss = transe.train(max_epochs=params['epochs']) logger.info("Training Complete with loss: %f", loss) ent_embeddings = transe.get_ent_embeddings() result_prob = [] for (a, b) in graph.entity_pairs: a_triples = [(h, t, r) for (h, t, r) in graph.triples if h == a] b_triples = [(h, t, r) for (h, t, r) in graph.triples if h == b] distance = abs( spatial.distance.cosine(ent_embeddings[a], ent_embeddings[b])) for (ah, at, ar) in a_triples: bt = [t for (h, t, r) in b_triples if r == ar] if len(bt): distance = distance + abs(spatial.distance.cosine(\ ent_embeddings[at], ent_embeddings[bt[0]])) result_prob.append((a, b, distance)) #logger.info("a: %d, b: %d distance: %f true_pairs: %s", a, b, distance, (a, b) in graph.true_pairs) #Write Embeddings to file export_embeddings('er', str(model), 'RLTransE', graph.entity, ent_embeddings) export_result_prob(dataset, 'er', str(model), 'RLTransE', graph.entity, result_prob, graph.true_pairs) optimal_threshold, max_fscore = get_optimal_threshold( result_prob, graph.true_pairs, max_threshold=3.0) try: params['threshold'] = optimal_threshold result = pd.MultiIndex.from_tuples([(e1, e2) for (e1, e2, d) in result_prob if d <= optimal_threshold]) log_quality_results(logger, result, graph.true_pairs, len(graph.entity_pairs), params) except: logger.info("Zero Reults") #Log MAP, MRR and Hits@K ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs) precison_at_1 = ir_metrics.log_metrics(logger, params) transe.close_tf_session() return (max_fscore, precison_at_1)
def generate_splitFiles(available_classes, dir_name, files): get_logger().info("Folder " + dir_name + " contains " + str(len(files)) + " files") folder_name = os.path.basename(os.path.normpath(dir_name)) if not hasNumbers(folder_name): #we are only interested in instance level folders (they contain numbers) return basename = nregex.match(folder_name).group(1) for f in files: #get_logger().info("Parsing " + join(dir_name,f)) print("Parsing " + join(dir_name,f)) old_file = join(dir_name,f) if os.path.isdir(old_file): continue elif not is_image(old_file): get_logger().info("Skipping " + old_file + ": not an image") continue if basename in available_classes: class_n = available_classes.index(basename) print(old_file + " " + str(class_n))
def create_hdf5_dataset(output_filename, patches, positions): log = get_logger() log.debug('Saving extracted descriptors to %s', output_filename) hfile = HDF5File(output_filename, 'w', compression='gzip', fillvalue=0.0) hpatches = hfile.create_dataset('patches', patches.shape, dtype="float32", chunks=True) hpositions = hfile.create_dataset('positions', positions.shape, dtype="uint16", chunks=True) hpatches[:]=patches hpositions[:]=positions hfile.close()
def load(self): get_logger().info("Loading patches for " + self.file_name) hfile = HDF5File(self.file_name, 'r') patches = hfile[self.patch_name] feature_dim = patches.shape[1] indexes = self.indexes num_patches=(indexes[:,1]-indexes[:,0]).sum() self.patches = np.empty([num_patches, feature_dim]) self.new_index = np.empty([indexes.shape[0],2]) patch_start = n_image = 0 for iid in indexes: n_patches = iid[1]-iid[0] self.patches[patch_start:patch_start+n_patches,:] = patches[iid[0]:iid[1],:] self.new_index[n_image] = [patch_start, patch_start+n_patches] patch_start += n_patches n_image += 1 hfile.close() get_logger().info("Loaded " + str(num_patches) + " patches")
def nbnn(train, test, engine): num_classes = len(train) num_test_images = len(test[0].iid) dists = np.ndarray((num_classes, num_classes, num_test_images)) # Identifying labels labels = np.vstack([c * np.ones((1, num_test_images), dtype=np.int) for c in range(num_classes)]) for (support_class, class_data) in enumerate(train): get_logger().info("Loading class " + class_data.name + " as support - " + str(class_data.patches.shape)) engine.fit(class_data.patches) for test_class, test_data in enumerate(test): #get_logger().info("Testing class " + test_data.name) im_to_class_dists = engine.dist(test_data.patches) #import pdb; pdb.set_trace() dists[support_class, test_class, :] = \ np.array([sum(im_to_class_dists[ix[0]:ix[1]]) for ix in test_data.iid]) predictions = dists.argmin(axis=0) acc = (labels == predictions).mean() get_logger().info('*** Recognition accuracy by ' + engine.name + ' is: ' + str(acc * 100))
def __init__(self): ''' Initialize seller class ''' self.__rsp = TEST_PARAMS['path'] self.__sf = TEST_PARAMS['seller_file_name'] self.__compressed = CONTEXT['compressed_content'] self.__sellerGraph = nx.Graph() self.__logger = get_logger("Seller")
def get_arguments(): log = get_logger() parser = ArgumentParser(description='Descriptor checker utility.') parser.add_argument("--input-dir", dest="input_dir", help="Directory with HDF5 files.") args = parser.parse_args() return args
def _test_etranse(self, dataset, params): model = dataset() graph = Graph_ERER(dataset) logger = get_logger("RL.Test.erer.ETransE." + str(model)) etranse = ETransE(graph, dimension=params['dimension'], batchSize=params['batchSize'], learning_rate=params['learning_rate'], margin=params['margin'], neg_rate=params['neg_rate'], neg_rel_rate=params['neg_rel_rate'], regularizer_scale=params['regularizer_scale'], alpha=params['alpha'], beta=params['beta']) etranse.train(max_epochs=params['max_epochs']) ent_embeddings_a = etranse.get_ent_embeddings_A() ent_embeddings_b = etranse.get_ent_embeddings_B() result_prob = [] for i in range(0, len(graph.entity_pairs)): distance = abs( spatial.distance.cosine( ent_embeddings_a[int(graph.entity_pairs[i][0])], ent_embeddings_b[int(graph.entity_pairs[i][1])])) result_prob.append( (graph.entity_pairs[i][0], graph.entity_pairs[i][1], distance)) #logger.info("i: %d, distance: %f true_pairs: %s", i, distance, graph.entity_pairs[i] in true_pairs) #Write Embeddings to file export_embeddings('erer', str(model), 'ETransE', graph.entityA, ent_embeddings_a) export_embeddings('erer', str(model), 'ETransE', graph.entityB, ent_embeddings_b) export_result_prob(dataset, 'erer', str(model), 'ETransE', graph.entityA, result_prob, graph.true_pairs, graph.entityB) optimal_threshold, max_fscore = get_optimal_threshold( result_prob, graph.true_pairs) try: params['threshold'] = optimal_threshold result = pd.MultiIndex.from_tuples([(e1, e2) for (e1, e2, d) in result_prob if d <= optimal_threshold]) log_quality_results(logger, result, graph.true_pairs, len(graph.entity_pairs), params) except Exception as e: logger.info("Zero Reults") logger.error(e) #Log MAP, MRR and Hits@K ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs) prec_at_1 = ir_metrics.log_metrics(logger, params) etranse.close_tf_session() return (max_fscore, prec_at_1)
def extract(input_dir, output_dir, network_data_dir, num_patches, patch_size, image_dim, levels, layer_name): log = get_logger() BATCH_SIZE = 1 log.info("Walking " + input_dir) ex = NewCaffeExtractor.NewCaffeExtractor() ex.set_parameters(patch_size, num_patches, levels, image_dim, BATCH_SIZE) params = namedtuple("Params", "input_dir output_dir extractor") os.path.walk(input_dir, walk, params(input_dir, output_dir, ex))
def _test_erer(self, dataset, er_algo, params): model = dataset() graph = Graph_ERER(dataset) graph_er = graph.get_er_model() er_model = er_algo(graph_er, dimension=params['dimension'], learning_rate=params['learning_rate'], margin=params['margin'], regularizer_scale=params['regularizer_scale'], batchSize=params['batchSize'], neg_rate=params['neg_rate'], neg_rel_rate=params['neg_rel_rate']) loss = er_model.train(max_epochs=params['epochs']) logger = get_logger('RL.Test.erer.ERER.' + str(model) + "." + str(er_model)) logger.info("Training Complete with loss: %f", loss) ent_embeddings = er_model.get_ent_embeddings() result_prob = [] for i in range(0, len(graph_er.entity_pairs)): distance = abs( spatial.distance.cosine( ent_embeddings[graph_er.entity_pairs[i][0]], ent_embeddings[graph_er.entity_pairs[i][1]])) result_prob.append((graph_er.entity_pairs[i][0], graph_er.entity_pairs[i][1], distance)) #logger.info("i: %d, distance: %f true_pairs: %s", i, distance, graph_er.entity_pairs[i] in graph_er.true_pairs) #Write Embeddings to file export_embeddings("erer", str(model), str(er_model), graph_er.entity, ent_embeddings) export_result_prob(dataset, 'erer', str(model), str(er_model), graph_er.entity, result_prob, graph_er.true_pairs) optimal_threshold, max_fscore = get_optimal_threshold( result_prob, graph_er.true_pairs) try: params['threshold'] = optimal_threshold result = pd.MultiIndex.from_tuples([(e1, e2) for (e1, e2, d) in result_prob if d <= optimal_threshold]) log_quality_results(logger, result, graph_er.true_pairs, len(graph_er.entity_pairs), params) except: logger.info("Zero Reults") #Log MAP, MRR and Hits@K ir_metrics = InformationRetrievalMetrics(result_prob, graph_er.true_pairs) ir_metrics.log_metrics(logger) er_model.close_tf_session() return max_fscore
def _test_seea(self, dataset, params): model = dataset() graph = Graph_EAR(dataset) logger = get_logger('RL.Test.ear.SEEA.' + str(model)) seea = SEEA(graph, dimension=params['dimension'], learning_rate=params['learning_rate'], batchSize=params['batchSize'], margin=params['margin'], regularizer_scale=params['regularizer_scale'], neg_rate=params['neg_rate'], neg_rel_rate=params['neg_rel_rate']) #Begin SEEA iterations, passing true pairs only to debug the alignments. results = seea.seea_iterate(beta=params['beta'], max_iter=params['max_iter'], max_epochs=params['max_epochs']) try: result_pairs = pd.MultiIndex.from_tuples(results) fscore = log_quality_results(logger, result_pairs, graph.true_pairs, len(graph.entity_pairs), params) except Exception as e: logger.error(e) logger.info("No Aligned pairs found.") ent_embeddings = seea.get_ent_embeddings() export_embeddings('ear', str(model), 'SEEA', graph.entity, ent_embeddings) result_prob = [] for (e1, e2) in graph.entity_pairs: distance = abs( spatial.distance.cosine(ent_embeddings[e1], ent_embeddings[e2])) result_prob.append((e1, e2, distance)) export_result_prob(dataset, 'ear', str(model), 'SEEA', graph.entity, result_prob, graph.true_pairs) try: export_false_negatives(dataset, 'ear', str(model), 'SEEA', graph.entity, result_prob, graph.true_pairs, result_pairs, graph.entity) export_false_positives(dataset, 'ear', str(model), 'SEEA', graph.entity, result_prob, graph.true_pairs, result_pairs, graph.entity) except Exception as e: logger.error(e) #Log MAP, MRR and Hits@K ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs) prec_at_1 = ir_metrics.log_metrics(logger, params) seea.close_tf_session() return (fscore, prec_at_1)
def load(self): get_logger().info("Loading patches for " + self.file_name) hfile = HDF5File(self.file_name, 'r') patches = hfile[self.patch_name] feature_dim = patches.shape[1] indexes = self.indexes num_patches = (indexes[:, 1] - indexes[:, 0]).sum() self.patches = np.empty([num_patches, feature_dim]) self.new_index = np.empty([indexes.shape[0], 2]) patch_start = n_image = 0 for iid in indexes: n_patches = iid[1] - iid[0] self.patches[patch_start:patch_start + n_patches, :] = patches[iid[0]:iid[1], :] self.new_index[n_image] = [patch_start, patch_start + n_patches] patch_start += n_patches n_image += 1 hfile.close() get_logger().info("Loaded " + str(num_patches) + " patches")
def scrape_category_metadata(disease_db, view_name, cat_name, cat_url): common.get_logger().warning(''.join([ 'Scraping the category metadata "', cat_name, '"...'])) disease_db[view_name][cat_name] = dict() tree = get_page_tree(cat_url) disease_ids = tree.xpath(XPATH_DISEASE_ID) disease_urls_tmp = tree.xpath(XPATH_DISEASE_URL) disease_urls = [''.join([DISEASES_DB_BASE_URL, url]) \ for url in disease_urls_tmp] disease_names = tree.xpath(XPATH_DISEASE_NAME) for i in range(0, len(disease_names)): disease_name = disease_names[i] disease_id = disease_ids[i] disease_url = disease_urls[i] disease_db[view_name][cat_name][disease_name] = \ add_disease(disease_db, disease_name, disease_id, disease_url)
def __init__(self): ''' Initialize DB Class ''' self.__host = DB_PARAMS['address'] self.__username = DB_PARAMS['username'] self.__password = DB_PARAMS['password'] self.__db = DB_PARAMS['database'] self.__logger = get_logger("DBConnection") self.connect()
def __init__(self): self.logger = get_logger(__name__) self.config = get_config() self.bce_access_key_id = self.config['BCE_ACCESS_KEY_ID'] self.bce_secret_access_key = self.config['BCE_SECRET_ACCESS_KEY'] self.bce_bos_host = self.config['BCE_BOS_HOST'] self.bce_sts_host = self.config['BCE_STS_HOST'] self.bos_src_bucket = self.config['BOS_SRC_BUCKET'] self.bos_storage_class = self.config['BOS_STORAGE_CLASS'] self.bos_des_dir = self.config['BCE_SECRET_ACCESS_KEY']
def load_settings(): c = load_configuration() ServerState.cluster_nodes = comma_string_to_list(c.get('CoreMQ', 'cluster_nodes', ',')) ServerState.allowed_replicants = comma_string_to_list(c.get('CoreMQ', 'allowed_replicants', '')) ServerState.allowed_replicants.extend(ServerState.cluster_nodes) address = c.get('CoreMQ', 'address', '0.0.0.0') port = int(c.get('CoreMQ', 'port', '6747')) ServerState.listen_address = (address, port) ServerState.logger = get_logger(c, 'CoreMQ')
def __init__(self, selfAddress, partnerAddresses): ''' Add connection object here if exchange is physically located in a separate server ''' cfg = SyncObjConf(logCompactionMinEntries=2147483647, logCompactionMinTime=2147483647) super(AdExchange, self).__init__(selfAddress, partnerAddresses, cfg) self.__logger = get_logger("AdExchange") self.__logger.info("### Starting Ad Exchange...") self.__auction = ADEX['auction'] self.__reserve = ADEX['reserve']
def generate_splitFiles(available_classes, dir_name, files): get_logger().info("Folder " + dir_name + " contains " + str(len(files)) + " files") folder_name = os.path.basename(os.path.normpath(dir_name)) if not hasNumbers( folder_name ): #we are only interested in instance level folders (they contain numbers) return basename = nregex.match(folder_name).group(1) for f in files: #get_logger().info("Parsing " + join(dir_name,f)) print("Parsing " + join(dir_name, f)) old_file = join(dir_name, f) if os.path.isdir(old_file): continue elif not is_image(old_file): get_logger().info("Skipping " + old_file + ": not an image") continue if basename in available_classes: class_n = available_classes.index(basename) print(old_file + " " + str(class_n))
def translate_a_disease(original_disease, language_code='vi'): translated_name = translate_one_paragraph(original_disease.name, language_code) common.get_logger().info(''.join(['Name in "', language_code, '" language: "', translated_name, '"'])) translated_summary = translate_one_paragraph(original_disease.summary, language_code) translated_symptoms = translate_paragraphs(original_disease.symptoms, language_code) translated_phenotypes = translate_paragraphs(original_disease.phenotypes, language_code) translated_disease = disease.Disease( name=translated_name, url=original_disease.url, disease_id=original_disease.disease_id, summary=translated_summary, symptoms=translated_symptoms, phenotypes=translated_phenotypes ) translated_disease.is_already_scraped = True return translated_disease