def load(): """ If docs fail to load, new ones are created and saved. """ prefs_file_path = userfolders.get_config_dir() + PREFS_DOC recents_file_path = userfolders.get_config_dir() + RECENT_DOC global prefs, recent_projects try: prefs = utils.unpickle(prefs_file_path) except: prefs = EditorPreferences() with atomicfile.AtomicFileWriter(prefs_file_path, "wb") as afw: write_file = afw.get_file() pickle.dump(prefs, write_file) # Override deprecated preferences to default values. prefs.delta_overlay = True prefs.auto_play_in_clip_monitor = False prefs.empty_click_exits_trims = True prefs.quick_enter_trims = True prefs.remember_monitor_clip_frame = True try: recent_projects = utils.unpickle(recents_file_path) except: recent_projects = utils.EmptyClass() recent_projects.projects = [] with atomicfile.AtomicFileWriter(recents_file_path, "wb") as afw: write_file = afw.get_file() pickle.dump(recent_projects, write_file) # Remove non-existing projects from recents list remove_list = [] for proj_path in recent_projects.projects: if os.path.isfile(proj_path) == False: remove_list.append(proj_path) if len(remove_list) > 0: for proj_path in remove_list: recent_projects.projects.remove(proj_path) with atomicfile.AtomicFileWriter(recents_file_path, "wb") as afw: write_file = afw.get_file() pickle.dump(recent_projects, write_file) # Versions of program may have different prefs objects and # we may need to to update prefs on disk if user has e.g. # installed later version of Flowblade. current_prefs = EditorPreferences() if len(prefs.__dict__) != len(current_prefs.__dict__): current_prefs.__dict__.update(prefs.__dict__) prefs = current_prefs with atomicfile.AtomicFileWriter(prefs_file_path, "wb") as afw: write_file = afw.get_file() pickle.dump(prefs, write_file) print("prefs updated to new version, new param count:", len(prefs.__dict__))
def main(): # Unpickles data from file, stores it as dictionary of length 4 setup(isTraining=True, overwrite=False) print("done processing training data") data = unpickle("data/training.pickle") # A dictionary that keys every word in our vocabulary to an index train_vocab = data[0] # A list of the tweets that we will be training on (2914 tweets) train_sentences = data[1] # print("Sentences", len(sentences)) # An embedding matrix that maps each word to a 300 Dimensional Embedding train_embeddings = tf.convert_to_tensor(data[2], tf.float32) # A dictionary that maps the index of a word to a list containing the indices of its 4 synonyms train_synonym_indices = tf.convert_to_tensor(data[3], tf.int32) # A list of sentiment labels corresponding to tweets; labels can be -1 (negative), 0 (objective), or (1) positive # (2914, 1) train_sentiment_labels = tf.convert_to_tensor(data[4], tf.float32) # A list of emotion labels corresponding to tweets; each label has 8 slots, where a 1 in that position corresponds to that # emotion being labelled. So, each tweet can be associated to several different emotions # Shape (2914, 8) train_emotion_labels = tf.convert_to_tensor(data[5], tf.float32) data = None model = Model("emotion_only_s") train( model, train_sentences, train_emotion_labels, train_sentiment_labels, train_embeddings, train_synonym_indices, ) setup(isTraining=False, overwrite=False) print("done processing testing data") data = unpickle("data/testing.pickle") test_vocab = data[0] test_sentences = data[1] """for i in range(len(train_sentences)): train_sentences[i] = tf.convert_to_tensor(train_sentences[i], tf.int32)""" test_embeddings = tf.convert_to_tensor(data[2], tf.float32) test_synonym_indices = tf.convert_to_tensor(data[3], tf.int32) test_sentiment_labels = tf.convert_to_tensor(data[4], tf.float32) test_emotion_labels = tf.convert_to_tensor(data[5], tf.float32) data = None test( model, test_sentences, test_emotion_labels, test_sentiment_labels, test_embeddings, test_synonym_indices, )
def main(args): mkdir_if_missing(args.output_dir) # training data data = [] labels = [] for i in xrange(1, 6): dic = unpickle(osp.join(args.data_root, 'data_batch_{}'.format(i))) data.append(dic['data']) labels = np.r_[labels, dic['labels']] data = np.vstack(data) make_data(data, labels, args.output_dir, 'train') # test data dic = unpickle(osp.join(args.data_root, 'test_batch')) make_data(dic['data'], dic['labels'], args.output_dir, 'test')
def run(): print('* Reading files') if not os.path.isdir(config['w2v_root']): raise FileNotFoundError('Files directory not found') if not os.path.isfile(os.path.join(config['w2v_root'], 'word_ids.pickle')): raise FileNotFoundError('File word_ids.pickle not found') if not os.path.isfile(os.path.join(config['w2v_root'], 'embedding_matrix.npy')): raise FileNotFoundError('File embedding_matrix.npy not found') word_id_map = unpickle(os.path.join(config['w2v_root'], 'word_ids.pickle')) embeddings = np.load(os.path.join(config['w2v_root'], 'embedding_matrix.npy')) print('* Applying TSNE') tsne = TSNE(n_components=2, random_state=42) embeddings_2d = tsne.fit_transform(embeddings) print('* Plotting All embeddings') plot_embeddings(word_id_map, embeddings_2d) print('* Plotting x-bounds (4.0, 4.2) and y-bounds (-0.5, -0.1)') plot_embeddings(word_id_map, embeddings_2d, x_bounds=(4.0, 4.2), y_bounds=(-0.5, -0.1), plot_text=True) print('* Plotting x-bounds (14, 17) and y-bounds (4, 7.5)') plot_embeddings(word_id_map, embeddings_2d, x_bounds=(14, 17), y_bounds=(4, 7.5), plot_text=True)
def gaussMixture(testX, goodSample, data=None, train=False, plot=False): if train==True: n_classes = 3 covar_type = 'full' est = GMM(n_components=n_classes, covariance_type=covar_type) est.fit(data) utils.pickle(est, 'SrcTeam/capsuleData/capsule_gauss') else: est = utils.unpickle('SrcTeam/capsuleData/capsule_gauss') numMatch = 0.0 numGood = goodSample.shape[0] testData = np.reshape(testX,(1,testX.size)) predLabel = est.predict(testData) for i in range(numGood): if est.predict(goodSample[i:i+1,:]) == predLabel: numMatch += 1 if plot==True: fig = pl.figure() pl.clf() ax = Axes3D(fig) labels = est.predict(data) ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float)) pl.show() return float(numMatch) / numGood
def k_means(testX, goodSample, data=None, train=False, plot=False): if train==True: n_clusters = 3 est = KMeans(n_clusters) est.fit(data) centers = est.cluster_centers_ utils.pickle(est, 'SrcTeam/capsuleData/capsule_k_means') else: est = utils.unpickle('SrcTeam/capsuleData/capsule_k_means') numMatch = 0.0 numGood = goodSample.shape[0] #sampleLabel = clusterLabel(centers, sample) testLabel = est.predict(testX) for i in range(numGood): if est.predict(goodSample[i,:]) == testLabel: numMatch += 1 if plot==True: fig = pl.figure() pl.clf() ax = Axes3D(fig) labels = est.labels_ ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float)) pl.show() return float(numMatch) / numGood
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( '-i', '--inputfile', action="store", dest='inputfile', required=True, help='Text file containing a list of paths to bz2 files to process') parser.add_argument('-l', '--lang', action="store", dest='lang', required=True, type=check_lang, help='Two-letter language tag to fetch') parser.add_argument( '-p', '--prefix', action="store", dest='prefix', default='', help='Classifier-specific label prefix, e.g. __label__') args = parser.parse_args() data = unpickle(args.inputfile) add = label(data['add'], label='neutral', prefix=args.prefix, separator='\t') rem = label(data['rem'], label='biased', prefix=args.prefix, separator='\t') split_dataset(add, rem, args.lang)
def read_misc_session_data(session_id, file_name): folder = _get_session_folder(session_id) data_path = folder + "/" + file_name misc_data = utils.unpickle(data_path) # toolsencoding.ToolsRenderData object return misc_data
def main(): trained_model = './trained_model.pth' test_batch_dir = './cifar-10/test_batch' classifier = CNNModel() classifier.load_state_dict(torch.load(trained_model)) classifier.cuda() classifier.eval() test_x, test_y = unpickle(test_batch_dir) test_x, test_y = torch.tensor(np.reshape( test_x, (len(test_x), 3, 32, 32))).to( 'cuda', dtype=torch.float), torch.tensor(test_y).cuda() classes = [ 'Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck' ] # calculating the accuracy of our classifier; print("Calculating accuracy...") correct = 0 total = len(test_x) with torch.no_grad(): out = classifier(test_x) _, predicted = torch.max(out, 1) # calculate the total accuracy correct += (predicted == test_y).sum().item() print('Accuracy: %5d %%' % (correct / total * 100))
def _load_effect_stack_values_dialog_callback(dialog, response_id): if response_id == Gtk.ResponseType.ACCEPT: load_path = dialog.get_filenames()[0] stack_data = utils.unpickle(load_path) for effect_data in stack_data.effects_data: filter_info, properties, non_mlt_properties = effect_data data = { "clip": _filter_stack.clip, "filter_info": filter_info, "filter_edit_done_func": filter_edit_done_stack_update } action = edit.add_filter_action(data) set_stack_update_blocked() action.do_edit() set_stack_update_unblocked() filters = _filter_stack.get_filters() filter_object = filters[len(filters) - 1] filter_object.properties = copy.deepcopy(properties) filter_object.non_mlt_properties = copy.deepcopy( non_mlt_properties) filter_object.update_mlt_filter_properties_all() _filter_stack.reinit_stack_item(filter_object) dialog.destroy()
def load_render_data(): global _render_data try: render_data_path = _session_folder + "/" + RENDER_DATA_FILE _render_data = utils.unpickle(render_data_path) # toolsencoding.ToolsRenderData object except: _render_data = None
def get_train_data(batch_no): train_set = unpickle("cifar10/data_batch_" + str(batch_no)) train_data = train_set[b"data"] train_images = unserialize(train_data) labels = train_set[b"labels"] images = prepare_pixels(train_images) labels = prepare_labels(labels) return images, labels
def get_test_data(): test_set = unpickle("cifar10/test_batch") test_data = test_set[b"data"] test_images = unserialize(test_data) labels = test_set[b"labels"] images = prepare_pixels(test_images) labels = prepare_labels(labels) return images, labels
def __init__(self, datadir, modelname='CNRecognizer.pkl'): self._datadir = datadir self._modelname = modelname self._trained = False if os.path.exists(datadir + '/models/' + modelname): inst = unpickle(datadir + '/models/' + modelname) for att in dir(inst): setattr(self, att, getattr(inst, att))
def load_current_colors(): load_path = _colors_data_path() colors = utils.unpickle(load_path) sel, bg, button = colors global _selected_bg_color, _bg_color, _button_colors _selected_bg_color = Gdk.RGBA(*sel) _bg_color = Gdk.RGBA(*bg) _button_colors = Gdk.RGBA(*button)
def __init__(self, image_size=IMAGE_SIZE, calibration_file=CALIBRATION_PICKLE_FILE): # Get camera calibration points_object, points_image = (utils.unpickle(calibration_file) if os.path.exists(calibration_file) else self._calibrate()) # Get mtx and dist for undistorting new images _, self.mtx, self.dist, _, _ = cv2.calibrateCamera( points_object, points_image, image_size, None, None)
def main(args): mkdir_if_missing(args.output_dir) # training data data = [] labels = [] for i in xrange(1, 6): dic = unpickle(osp.join(args.data_root, 'data_batch_{}'.format(i))) data.append(dic['data']) labels = np.r_[labels, dic['labels']] data = np.vstack(data) make_data(data, labels, args.output_dir, 'train') # test data dic = unpickle(osp.join(args.data_root, 'test_batch')) make_data(dic['data'], dic['labels'], args.output_dir, 'test') # Identity for confusion initialization matrix_I = np.identity(10) write_matrix(matrix_I, osp.join(args.output_dir, 'identity.txt')) pickle(matrix_I, osp.join(args.output_dir, 'identity.pkl'))
def read_CIFAR_100(cifar_path, train=True): """ :param cifar_path: data path for cifar-100 :param train: check if its the train mode :return: data and its label Note: data (#samples, 32, 32, 3) labels (#samples, 100) """ data = [] labels = [] if train: # If reading train set file_name = cifar_path + "train" data_dict = utils.unpickle(file_name) batch_data = data_dict[b"data"] batch_labels = data_dict[b'fine_labels'] data.append(batch_data) labels.append(batch_labels) else: # If reading test set file_name = cifar_path + "test" data_dict = utils.unpickle(file_name) batch_data = data_dict[b"data"] batch_labels = data_dict[b"fine_labels"] data.append(batch_data) labels.append(batch_labels) data = np.asarray(data) data = np.reshape(data, (data.shape[0] * data.shape[1], 3, 32, 32)).transpose( 0, 2, 3, 1) labels = np.asarray(labels) labels = np.reshape(labels, (labels.shape[0] * labels.shape[1], )).tolist() labels = utils.index_to_one_hot(labels, 100) return data, labels
def __init__(self): append = lambda x: '/data2/andrewliao11/cifar-10-batches-py/data_batch_' + x self.train_filename = [append(str(i + 1)) for i in range(5)] self.test_filename = '/data2/andrewliao11/cifar-10-batches-py/test_batch' self.num_train = 50000 self.num_test = 10000 self.input_size = 3072 self.imgs = np.zeros([self.num_train, self.input_size], dtype='float32') self.labels = np.zeros([self.num_train], dtype='int32') self.current = 0 for i in range(5): data_batch = utils.unpickle(self.train_filename[i]) self.imgs[i * 10000:(i + 1) * 10000] = data_batch['data'] / 255. self.labels[i * 10000:(i + 1) * 10000] = np.asarray( data_batch['labels']) data_batch = utils.unpickle(self.test_filename) self.test_imgs = data_batch['data'] / 255. self.test_labels = np.asarray(data_batch['labels'])
def __init__(self): self.trainX = [] self.trainY = [] data_dir = './cifar-10/training batches' batches = os.listdir(data_dir) for batch in batches: batch_data, batch_labels = unpickle(os.path.join(data_dir, batch)) self.trainX.extend(batch_data) self.trainY.extend(batch_labels)
def __load_data(self, test_data_path): if not os.path.exists(test_data_path): raise InvalidTestSetPath( 'invalid test data path: {}'.format(test_data_path)) test = unpickle(test_data_path) test_data = test[b'data'] x_test = test_data.reshape(test_data.shape[0], 3, 32, 32) x_test = x_test.transpose(0, 2, 3, 1) x_test = norm_images(x_test) y_test = test[b'fine_labels'] return x_test, y_test
def read_CIFAR_10(cifar_path, train=True): """ Assumes the raw CIFAR-10 data is located in cifar_path, reads the dataset, and returns it as numpy arrays: data (#samples, 32, 32, 3) labels (#samples, 10) The boolean argument train determines whether the train or test set is read. """ data = [] labels = [] if train: # If reading train set for i in range(1, 6): file_name = cifar_path + "data_batch_" + str(i) data_dict = utils.unpickle(file_name) batch_data = data_dict[b"data"] batch_labels = data_dict[b"labels"] data.append(batch_data) labels.append(batch_labels) else: # If reading test set file_name = cifar_path + "test_batch" data_dict = utils.unpickle(file_name) batch_data = data_dict[b"data"] batch_labels = data_dict[b"labels"] data.append(batch_data) labels.append(batch_labels) data = np.asarray(data) data = np.reshape(data, (data.shape[0] * data.shape[1], 3, 32, 32)).transpose( 0, 2, 3, 1) labels = np.asarray(labels) labels = np.reshape(labels, (labels.shape[0] * labels.shape[1], )).tolist() labels = utils.index_to_one_hot(labels, 10) return data, labels
def get_labels(self): if not self.is_valid_path: print( "ALERT: Path is not valid or does not contain all cifar 10 files." ) return if self.label_names == []: # Unpickle batches.meta and get label names f = unpickle(os.path.join(self.data_dir, 'batches.meta'), quiet=True) self.label_names = [lb.decode() for lb in f[b'label_names']] return self.label_names
def _load(self, filenames): images, labels = None, [] for i, filename in enumerate(filenames): datafile = utils.unpickle(filename) if i == 0: images = datafile['data'] else: images = np.append(images, datafile['data'], axis=0) labels.extend(datafile['labels']) print(images.shape, len(labels)) return images, utils.onehot(np.asarray(labels), label_size=self.labels_size)
def load(self): data = [] if self.training: for i in range(1, 6): filename = '{}/data_batch_{}'.format(self.path, i) batch_data = unpickle(filename) if len(data) > 0: data = np.vstack((data, batch_data[b'data'])) else: data = batch_data[b'data'] else: filename = '{}/test_batch'.format(self.path) batch_data = unpickle(filename) data = batch_data[b'data'] w = 32 h = 32 s = w * h data = np.array(data) data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:])) data = data.reshape((-1, w, h, 3)) return data
def wait_run_end(workers_results, model, timeout=None): # TODO: use timeout weights = pickle.dumps(model.get_weights()) for w, res in workers_results.items(): while not res.ready: sleep(1) res = utils.unpickle(res.value) grads = res["grads"] model.add_grads(grads) new_res = Pyro4.Future(w.run)(weights) workers_results[w] = new_res
def load_model(model_params, contF=True): from dcgan import DCGAN import os model = DCGAN(model_params, ltype=os.environ['LTYPE']) if contF: # print '...Continuing from Last time''' from utils import unpickle _model = unpickle(os.environ['LOAD_PATH']) np_gen_params= [param.get_value() for param in _model.gen_network.params] np_dis_params= [param.get_value() for param in _model.dis_network.params] model.load(np_dis_params, np_gen_params, verbose=False) return model
def _load_compositor_values_dialog_callback(dialog, response_id): if response_id == Gtk.ResponseType.ACCEPT: load_path = dialog.get_filenames()[0] compositor_data = utils.unpickle(load_path) if compositor_data.data_applicable(compositor.transition.info): compositor_data.set_values(compositor) set_compositor(compositor) else: saved_name_comp_name = mlttransitions.name_for_type[compositor_data.info.name] current_comp_name = mlttransitions.name_for_type[compositor.transition.info.name] primary_txt = _("Saved Compositor data not applicaple for this compositor!") secondary_txt = _("Saved data is for ") + saved_name_comp_name + " compositor,\n" + _(", current compositor is ") + current_comp_name + "." dialogutils.warning_message(primary_txt, secondary_txt, gui.editor_window.window) dialog.destroy()
def _load_layers_dialog_callback(self, dialog, response_id): if response_id == Gtk.ResponseType.ACCEPT: try: filenames = dialog.get_filenames() load_path = filenames[0] new_data = utils.unpickle(load_path) global _titler_data _titler_data = new_data self.load_titler_data() except: dialog.destroy() # INFOWINDOW return dialog.destroy() else: dialog.destroy()
def parse_test_data(input_path): # Load pickled source file try: data = utils.unpickle(os.path.join(input_path, TEST_DATA_FILENAME)) except Exception as ex: logging.error('Failed to load input files from: ' + args.input_path) logging.error('Exception: %s', ex) exit(1) # Prepare features features = data[b'data'] features = features.reshape(features.shape[0], 3, 32, 32) features = features.transpose(0, 2, 3, 1).astype('uint8') # Prepare labels labels = np.asarray(data[b'labels']) return features, labels
def _load_images_labels(self, files): raw_images = None labels = None for f in files: data = unpickle(f, quiet=True) # Get the raw images if raw_images is None: raw_images = data[b'data'].astype(dtype='float32') else: raw_images = np.vstack( (raw_images, data[b'data'].astype(dtype='float32'))) # Get the labels if labels is None: labels = data[b'labels'] else: # Stack labels together labels = np.concatenate((labels, data[b'labels'])) return labels, raw_images
def _param_run( self, param_set: ParamSet) -> Tuple[ExperimentResults, RunnerUUID]: log(f'Running param set: {param_set}') uuid = hash_dict(param_set) if self._experiment_result_exists(uuid): log('Loading experiment results from cache') log(uuid) experiment_results = unpickle( self._file_path_experiment_results(uuid)) else: log(f'Running uuid {uuid}') experiment_results = train_kd(**param_set) pickle_object(experiment_results, self._file_path_experiment_results(uuid)) return experiment_results, uuid
def _load_effect_values_dialog_callback(dialog, response_id): if response_id == Gtk.ResponseType.ACCEPT: load_path = dialog.get_filenames()[0] effect_data = utils.unpickle(load_path) filter_object = clip.filters[current_filter_index] if effect_data.data_applicable(filter_object.info): effect_data.set_effect_values(filter_object) effect_selection_changed() else: # Info window saved_effect_name = effect_data.info.name current_effect_name = filter_object.info.name primary_txt = _("Saved Filter data not applicaple for this Filter!") secondary_txt = _("Saved data is for ") + saved_effect_name + " Filter,\n" + _("current edited Filter is ") + current_effect_name + "." dialogutils.warning_message(primary_txt, secondary_txt, gui.editor_window.window) dialog.destroy()
def registerInitialState(self, gameState): """ Do any necessary initialization """ super(ActionBasisAgent, self).registerInitialState(gameState) # Here, you may do any necessary initialization, e.g., import some # parameters you've learned, as in the following commented out lines # learned_params = cPickle.load("myparams.pkl") # learned_params = np.load("myparams.npy") # get all the allowed actions, encode them for learners self.actions = self.actionBasis.allActions[:] self.actionCodes = { action: i for (i, action) in enumerate(self.actions) } # remember basis function dimensions self.basis_dimensions = self.basis.dimensions # try to load the learner from a file if(self.learn_file and os.path.isfile(self.learn_file) and not self.restart_learning): self.learner = utils.unpickle(self.learn_file) self.learner.reset() else: self.learner = self.learner_class(self.basis_dimensions, self.actionCodes.values()) if(self.use_initializer): if (self.basis.__name__, self.actionBasis.__name__) in basis.initializers: basis.initializers[(self.basis.__name__, self.actionBasis.__name__)](self.learner) print "Initialized successfully" else: print "No initializer found" raise "Whoops" # initialize score self.score = 0 # count number of actions taken self.action_count = 0
def merge_to_final_output(oplog_output_file, profiler_output_file, output_file): """ * Why merge files: we need to merge the docs from two sources into one. * Why not merge earlier: It's definitely inefficient to merge the entries when we just retrieve these documents from mongodb. However we designed this script to be able to pull the docs from differnt servers, as a result it's hard to do the on-time merge since you cannot determine if some "old" entries will come later.""" oplog = open(oplog_output_file, "rb") profiler = open(profiler_output_file, "rb") output = open(output_file, "wb") logger = utils.LOG logger.info("Starts completing the insert options") oplog_doc = utils.unpickle(oplog) profiler_doc = utils.unpickle(profiler) inserts = 0 noninserts = 0 severe_inconsistencies = 0 mild_inconsistencies = 0 while oplog_doc and profiler_doc: if (noninserts + inserts) % 2500 == 0: logger.info("processed %d items", noninserts + inserts) if profiler_doc["op"] != "insert": dump_op(output, profiler_doc) noninserts += 1 profiler_doc = utils.unpickle(profiler) else: # Replace the the profiler's insert operation doc with oplog's, # but keeping the canonical form of "ts". profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple()) oplog_ts = oplog_doc["ts"].time # only care about the second-level precision. # This is a lame enforcement of consistency delta = abs(profiler_ts - oplog_ts) if delta > 3: # TODO strictly speaking, this ain't good since the files are # not propertly closed. logger.error( "oplog and profiler results are inconsistent `ts`\n" " oplog: %d\n" " profiler: %d", oplog_ts, profiler_ts) severe_inconsistencies += 1 elif delta != 0: logger.warn("Slightly inconsistent timestamp\n" " oplog: %d\n" " profiler %d", oplog_ts, profiler_ts) mild_inconsistencies += 1 oplog_doc["ts"] = profiler_doc["ts"] # make sure "op" is "insert" instead of "i". oplog_doc["op"] = profiler_doc["op"] dump_op(output, oplog_doc) inserts += 1 oplog_doc = utils.unpickle(oplog) profiler_doc = utils.unpickle(profiler) while profiler_doc and profiler_doc["op"] != "insert": dump_op(output, profiler_doc) noninserts += 1 profiler_doc = utils.unpickle(profiler) logger.info("Finished completing the insert options, %d inserts and" " %d noninserts\n" " severe ts incosistencies: %d\n" " mild ts incosistencies: %d\n", inserts, noninserts, severe_inconsistencies, mild_inconsistencies) for f in [oplog, profiler, output]: f.close() return True
def _load(fname): fpath = osp.join(output_dir, fname) assert osp.isfile(fpath), "Must have extracted detections and " \ "features first before evaluation" return unpickle(fpath)
def analyzer_main(): print 'Loading data...' cfda_agency_map = dict((p.program_number, p.agency_id) for p in Program.objects.all()) awards = unpickle(os.path.join(DATA_DIR, 'cfda_awards.out.bin')) print 'Building cube' c = Cube() for (idx, (award_id, award)) in enumerate(awards.iteritems()): # Simple progress ticker if idx % 1000 == 0: sys.stdout.write('.') sys.stdout.flush() cfda = award['cfda'] agency = cfda_agency_map.get(cfda, None) if agency: fed_amount = award['fed_amount'] fiscal_year = award['fiscal_year'] reporting_lag = award['reporting_lag'] fiscal_year_lag = award['fiscal_year_lag'] # select only fy 2007-2009 inclusive if fiscal_year not in FISCAL_YEARS: continue # We need to set an upper bound on the fiscal year lag in order to # make comparisons between fiscal years useful. if fiscal_year_lag > FISCAL_YEAR_LAG_THRESHOLD: continue # reporting lag of negative days converted to 0 reporting_lag = reporting_lag if reporting_lag > 0 else 0 # add record to data cube c.add({'fy':fiscal_year, 'cfda':cfda, 'agency':agency}, {'days':reporting_lag, 'dollars':fed_amount}) awards = None print 'Querying cfda aggregates...' result = c.query(groups=['cfda','fy']) print 'Loading cfda results into db...' ProgramTimeliness.objects.all().delete() for (cfda, cfda_results) in result.values.iteritems(): program = Program.objects.get(program_number=cfda) for fy in FISCAL_YEARS: cfda_fy_results = cfda_results.values[fy] metric = ProgramTimeliness.objects.create( program=program, agency=program.agency, fiscal_year=fy, late_dollars=cfda_fy_results.get_data(sum_dollars_45days_late), late_rows=cfda_fy_results.get_data(count_records_45days_late), total_dollars=cfda_fy_results.get_data(sum_dollars), total_rows=cfda_fy_results.get_data(len), avg_lag_rows=cfda_fy_results.get_data(avg_days_by_awards), avg_lag_dollars=cfda_fy_results.get_data(avg_days_by_dollars) ) if metric.total_dollars > 0: metric.late_pct = metric.late_dollars * 100 / metric.total_dollars metric.save() print 'Querying agency aggregates...' result = c.query(groups=['agency','fy']) print 'Loading agency results into db...' AgencyTimeliness.objects.all().delete() for (agency_id, agency_results) in result.values.iteritems(): agency = Agency.objects.get(pk=agency_id) for fy in FISCAL_YEARS: agency_fy_results = agency_results.values[fy] metric = AgencyTimeliness.objects.create( agency=agency, fiscal_year=fy, late_dollars=agency_fy_results.get_data(sum_dollars_45days_late), late_rows=agency_fy_results.get_data(count_records_45days_late), total_dollars=agency_fy_results.get_data(sum_dollars), total_rows=agency_fy_results.get_data(len), avg_lag_rows=agency_fy_results.get_data(avg_days_by_awards), avg_lag_dollars=agency_fy_results.get_data(avg_days_by_dollars) ) if metric.total_dollars > 0: metric.late_pct = metric.late_dollars * 100 / metric.total_dollars metric.save()
def evaluate(protoc, images, result_dir): gallery_det = unpickle(osp.join(result_dir, 'gallery_detections.pkl')) gallery_feat = unpickle(osp.join(result_dir, 'gallery_features.pkl')) gallery_det = gallery_det[1] gallery_feat = gallery_feat[1] probe_feat = unpickle(osp.join(result_dir, 'probe_features.pkl')) assert len(images) == len(gallery_det) assert len(images) == len(gallery_feat) name_to_det_feat = {} for name, det, feat in zip(images, gallery_det, gallery_feat): scores = det[:, 4].ravel() inds = np.where(scores >= 0.5)[0] if len(inds) > 0: det = det[inds] feat = feat[inds] name_to_det_feat[name] = (det, feat) num_probe = len(protoc) assert len(probe_feat) == num_probe aps = [] top1_acc = [] for i in xrange(num_probe): y_true, y_score = [], [] feat_p = probe_feat[i][np.newaxis, :] count_gt = 0 count_tp = 0 for item in protoc['Gallery'][i].squeeze(): gallery_imname = str(item[0][0]) gt = item[1][0].astype(np.int32) count_gt += (gt.size > 0) if gallery_imname not in name_to_det_feat: continue det, feat_g = name_to_det_feat[gallery_imname] dis = np.sum((feat_p - feat_g) ** 2, axis=1) label = np.zeros(len(dis), dtype=np.int32) if gt.size > 0: w, h = gt[2], gt[3] gt[2:] += gt[:2] thresh = min(0.5, (w * h * 1.0) / ((w + 10) * (h + 10))) inds = np.argsort(dis) dis = dis[inds] # set the label of the first box matched to gt to 1 for j, roi in enumerate(det[inds, :4]): if _compute_iou(roi, gt) >= thresh: label[j] = 1 count_tp += 1 break y_true.extend(list(label)) y_score.extend(list(-dis)) assert count_tp <= count_gt recall_rate = count_tp * 1.0 / count_gt ap = average_precision_score(y_true, y_score) * recall_rate if not np.isnan(ap): aps.append(ap) else: aps.append(0) maxind = np.argmax(y_score) top1_acc.append(y_true[maxind]) print 'mAP: {:.2%}'.format(np.mean(aps)) print 'top-1: {:.2%}'.format(np.mean(top1_acc))
def merge_to_final_output(oplog_output_file, profiler_output_files, output_file): """ * Why merge files: we need to merge the docs from two sources into one. * Why not merge earlier: It's definitely inefficient to merge the entries when we just retrieve these documents from mongodb. However we designed this script to be able to pull the docs from differnt servers, as a result it's hard to do the on-time merge since you cannot determine if some "old" entries will come later.""" oplog = open(oplog_output_file, "rb") # create a map of profiler file names to files profiler_files = {} for profiler_file in profiler_output_files: profiler_files[profiler_file] = open(profiler_file, "rb") output = open(output_file, "wb") logger = utils.LOG logger.info("Starts completing the insert options") oplog_doc = utils.unpickle(oplog) # Create a map of tuple(doc's timestamp, profiler file name) to doc for # each profiler. This makes it easy to fetch the earliest doc in the group # on each iteration. profiler_docs = {} for file_name in profiler_files: doc = utils.unpickle(profiler_files[file_name]) # associate doc with a tuple representing the ts and source filename # this makes it easy to fetch the earliest doc in the group on each # iteration if doc: profiler_docs[(doc["ts"], file_name)] = doc inserts = 0 noninserts = 0 severe_inconsistencies = 0 mild_inconsistencies = 0 # read docs until either we exhaust the oplog or all ops in the profile logs while oplog_doc and len(profiler_docs) > 0: if (noninserts + inserts) % 2500 == 0: logger.info("processed %d items", noninserts + inserts) # get the earliest profile doc out of all profiler_docs key = min(profiler_docs.keys()) profiler_doc = profiler_docs[key] # remove the doc and fetch a new one del(profiler_docs[key]) # the first field in the key is the file name doc = utils.unpickle(profiler_files[key[1]]) if doc: profiler_docs[(doc["ts"], key[1])] = doc # If the retrieved operation is not an insert, we can simply dump it # to the output file. Otherwise, we need to cross-reference the # profiler's insert operation with an oplog entry (because the # profiler doesn't contain the inserted object's details). if profiler_doc["op"] != "insert": dump_op(output, profiler_doc) noninserts += 1 else: # Compare the profile doc's ts with the oplog doc's ts. In the # ideal scenario, every insert we capture via the profile # collection should match a consecutive oplog entry (the oplog # tailer only looks at insert ops). profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple()) oplog_ts = oplog_doc["ts"].time delta = abs(profiler_ts - oplog_ts) if delta > 3: # TODO strictly speaking, this ain't good since the files are # not propertly closed. logger.error( "oplog and profiler results are inconsistent `ts`\n" " oplog: %d\n" " profiler: %d", oplog_ts, profiler_ts) severe_inconsistencies += 1 elif delta != 0: logger.warn("Slightly inconsistent timestamp\n" " oplog: %d\n" " profiler %d", oplog_ts, profiler_ts) mild_inconsistencies += 1 oplog_doc["ts"] = profiler_doc["ts"] # we still want to keep the canonical form of the ts oplog_doc["op"] = profiler_doc["op"] # make sure "op" is "insert" instead of "i" dump_op(output, oplog_doc) inserts += 1 # Get the next doc from the oplog oplog_doc = utils.unpickle(oplog) # finish up any remaining non-insert ops while len(profiler_docs) > 0: # get the earliest profile doc out of all profiler_docs key = min(profiler_docs.keys()) profiler_doc = profiler_docs[key] # remove the doc and fetch a new one del(profiler_docs[key]) doc = utils.unpickle(profiler_files[key[1]]) if doc: profiler_docs[(doc["ts"], key[1])] = doc if profiler_doc["op"] == "insert": break dump_op(output, profiler_doc) noninserts += 1 logger.info("Finished completing the insert options, %d inserts and" " %d noninserts\n" " severe ts incosistencies: %d\n" " mild ts incosistencies: %d\n", inserts, noninserts, severe_inconsistencies, mild_inconsistencies) for f in [oplog, output]: f.close() for f in profiler_files.values(): f.close() # Clean up temporary files (oplog + profiler files), since everything is # already in the main output file for f in profiler_output_files: os.remove(f) os.remove(oplog_output_file) return True
import numpy as np #import SrcTeam.utils as utils #import SrcTeam.classifyCapsule as classify import utils import classifyCapsule as classify """from sklearn import datasets data = datasets.load_iris().data classify.k_means(data[0,:], goodSample=data[1:3,:], data=data, train=True, plot=True)""" data = utils.unpickle('SrcTeam/clusterData/capsule_train') #classify.k_means(data[0,:], goodSample=data[1:3,:], # data=data, train=True, plot=True) print classify.gaussMixture(data[1,:], goodSample=data[1:10,:], data=data, train=True, plot=False) #print closest((1,1))
def gt_roidb(self): cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.isfile(cache_file): roidb = unpickle(cache_file) return roidb # Load all images and build a dict from image to boxes all_imgs = loadmat(osp.join(self._root_dir, 'annotation', 'Images.mat')) all_imgs = all_imgs['Img'].squeeze() name_to_boxes = {} name_to_pids = {} for im_name, __, boxes in all_imgs: im_name = str(im_name[0]) boxes = np.asarray([b[0] for b in boxes[0]]) boxes = boxes.reshape(boxes.shape[0], 4) valid_index = np.where((boxes[:, 2] > 0) & (boxes[:, 3] > 0))[0] assert valid_index.size > 0, \ 'Warning: {} has no valid boxes.'.format(im_name) boxes = boxes[valid_index] name_to_boxes[im_name] = boxes.astype(np.int32) name_to_pids[im_name] = -1 * np.ones(boxes.shape[0], dtype=np.int32) def _set_box_pid(boxes, box, pids, pid): for i in xrange(boxes.shape[0]): if np.all(boxes[i] == box): pids[i] = pid return print 'Warning: person {} box {} cannot find in Images'.format(pid, box) # Load all the train / test persons and label their pids from 0 to N-1 # Assign pid = -1 for unlabeled background people if self._image_set == 'train': train = loadmat(osp.join(self._root_dir, 'annotation/test/train_test/Train.mat')) train = train['Train'].squeeze() for index, item in enumerate(train): scenes = item[0, 0][2].squeeze() for im_name, box, __ in scenes: im_name = str(im_name[0]) box = box.squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) else: test = loadmat(osp.join(self._root_dir, 'annotation/test/train_test/TestG50.mat')) test = test['TestG50'].squeeze() for index, item in enumerate(test): # query im_name = str(item['Query'][0,0][0][0]) box = item['Query'][0,0][1].squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) # gallery gallery = item['Gallery'].squeeze() for im_name, box, __ in gallery: im_name = str(im_name[0]) if box.size == 0: break box = box.squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) # Construct the gt_roidb gt_roidb = [] for im_name in self.image_index: boxes = name_to_boxes[im_name] boxes[:, 2] += boxes[:, 0] boxes[:, 3] += boxes[:, 1] pids = name_to_pids[im_name] num_objs = len(boxes) gt_classes = np.ones((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps[:, 1] = 1.0 overlaps = csr_matrix(overlaps) gt_roidb.append({ 'boxes': boxes, 'gt_classes': gt_classes, 'gt_overlaps': overlaps, 'gt_pids': pids, 'flipped': False}) pickle(gt_roidb, cache_file) print "wrote gt roidb to {}".format(cache_file) return gt_roidb
from argparse import ArgumentParser from ast import literal_eval if 'external/caffe/python' not in sys.path: sys.path.insert(0, 'external/caffe/python') import caffe from utils import unpickle if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('source', help="Pickled data file") parser.add_argument('output', help="Output file path") parser.add_argument('--shape', help="(num, channels, height, width)") args = parser.parse_args() data = unpickle(args.source) if args.shape is not None: data = data.reshape(literal_eval(args.shape)) else: shape = data.shape shape = (1,) * (4 - len(shape)) + shape data = data.reshape(shape) blob = caffe.proto.caffe_pb2.BlobProto() blob.num, blob.channels, blob.height, blob.width = data.shape blob.data.extend(list(data.ravel().astype(float))) with open(args.output, 'wb') as f: f.write(blob.SerializeToString())
def merge_to_final_output(oplog_output_file, profiler_output_files, output_file): """ * Why merge files: we need to merge the docs from two sources into one. * Why not merge earlier: It's definitely inefficient to merge the entries when we just retrieve these documents from mongodb. However we designed this script to be able to pull the docs from differnt servers, as a result it's hard to do the on-time merge since you cannot determine if some "old" entries will come later.""" oplog = open(oplog_output_file, "rb") # create a map of profiler file names to files profiler_files = {} for profiler_file in profiler_output_files: profiler_files[profiler_file] = open(profiler_file, "rb") output = open(output_file, "wb") logger = utils.LOG logger.info("Starts completing the insert options") oplog_doc = utils.unpickle(oplog) # create a map of (profiler file names, doc ts) to doc profiler_docs = {} for file_name in profiler_files: doc = utils.unpickle(profiler_files[file_name]) # associate doc with a tuple representing the ts and source filename # this makes it easy to fetch the earliest doc in the group on each # iteration if doc: profiler_docs[(doc["ts"], file_name)] = doc inserts = 0 noninserts = 0 severe_inconsistencies = 0 mild_inconsistencies = 0 # read docs until either we exhaust the oplog or all ops in the profile logs while oplog_doc and len(profiler_docs) > 0: if (noninserts + inserts) % 2500 == 0: logger.info("processed %d items", noninserts + inserts) # get the earliest profile doc out of all profiler_docs key = min(profiler_docs.keys()) profiler_doc = profiler_docs[key] # remove the doc and fetch a new one del (profiler_docs[key]) # the first field in the key is the file name doc = utils.unpickle(profiler_files[key[1]]) if doc: profiler_docs[(doc["ts"], key[1])] = doc if profiler_doc["op"] != "insert": dump_op(output, profiler_doc) noninserts += 1 else: # Replace the the profiler's insert operation doc with oplog's, # but keeping the canonical form of "ts". profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple()) oplog_ts = oplog_doc["ts"].time # only care about the second-level precision. # This is a lame enforcement of consistency delta = abs(profiler_ts - oplog_ts) if delta > 3: # TODO strictly speaking, this ain't good since the files are # not propertly closed. logger.error( "oplog and profiler results are inconsistent `ts`\n" " oplog: %d\n" " profiler: %d", oplog_ts, profiler_ts, ) severe_inconsistencies += 1 elif delta != 0: logger.warn( "Slightly inconsistent timestamp\n" " oplog: %d\n" " profiler %d", oplog_ts, profiler_ts ) mild_inconsistencies += 1 oplog_doc["ts"] = profiler_doc["ts"] # make sure "op" is "insert" instead of "i". oplog_doc["op"] = profiler_doc["op"] dump_op(output, oplog_doc) inserts += 1 oplog_doc = utils.unpickle(oplog) # finish up any remaining non-insert ops while len(profiler_docs) > 0: # get the earliest profile doc out of all profiler_docs key = min(profiler_docs.keys()) profiler_doc = profiler_docs[key] # remove the doc and fetch a new one del (profiler_docs[key]) doc = utils.unpickle(profiler_files[key[1]]) if doc: profiler_docs[(doc["ts"], key[1])] = doc if profiler_doc["op"] == "insert": break dump_op(output, profiler_doc) noninserts += 1 logger.info( "Finished completing the insert options, %d inserts and" " %d noninserts\n" " severe ts incosistencies: %d\n" " mild ts incosistencies: %d\n", inserts, noninserts, severe_inconsistencies, mild_inconsistencies, ) for f in [oplog, output]: f.close() for f in profiler_files.values(): f.close() return True