Python unpickle примеры, utils.unpickle Python примеры использования

Пример #1

0

Показать файл

def load():
    """
    If docs fail to load, new ones are created and saved.
    """
    prefs_file_path = userfolders.get_config_dir() + PREFS_DOC
    recents_file_path = userfolders.get_config_dir() + RECENT_DOC

    global prefs, recent_projects

    try:
        prefs = utils.unpickle(prefs_file_path)
    except:
        prefs = EditorPreferences()
        with atomicfile.AtomicFileWriter(prefs_file_path, "wb") as afw:
            write_file = afw.get_file()
            pickle.dump(prefs, write_file)

    # Override deprecated preferences to default values.
    prefs.delta_overlay = True
    prefs.auto_play_in_clip_monitor = False
    prefs.empty_click_exits_trims = True
    prefs.quick_enter_trims = True
    prefs.remember_monitor_clip_frame = True

    try:
        recent_projects = utils.unpickle(recents_file_path)
    except:
        recent_projects = utils.EmptyClass()
        recent_projects.projects = []
        with atomicfile.AtomicFileWriter(recents_file_path, "wb") as afw:
            write_file = afw.get_file()
            pickle.dump(recent_projects, write_file)

    # Remove non-existing projects from recents list
    remove_list = []
    for proj_path in recent_projects.projects:
        if os.path.isfile(proj_path) == False:
            remove_list.append(proj_path)

    if len(remove_list) > 0:
        for proj_path in remove_list:
            recent_projects.projects.remove(proj_path)
        with atomicfile.AtomicFileWriter(recents_file_path, "wb") as afw:
            write_file = afw.get_file()
            pickle.dump(recent_projects, write_file)

    # Versions of program may have different prefs objects and
    # we may need to to update prefs on disk if user has e.g.
    # installed later version of Flowblade.
    current_prefs = EditorPreferences()

    if len(prefs.__dict__) != len(current_prefs.__dict__):
        current_prefs.__dict__.update(prefs.__dict__)
        prefs = current_prefs
        with atomicfile.AtomicFileWriter(prefs_file_path, "wb") as afw:
            write_file = afw.get_file()
            pickle.dump(prefs, write_file)
        print("prefs updated to new version, new param count:",
              len(prefs.__dict__))

Пример #2

0

Показать файл

Файл: assignment.py Проект: clivejj/peglegs

def main():
	# Unpickles data from file, stores it as dictionary of length 4
	setup(isTraining=True, overwrite=False)
	print("done processing training data")
	data = unpickle("data/training.pickle")
	# A dictionary that keys every word in our vocabulary to an index
	train_vocab = data[0]
	# A list of the tweets that we will be training on (2914 tweets)
	train_sentences = data[1]
	# print("Sentences", len(sentences))
	# An embedding matrix that maps each word to a 300 Dimensional Embedding
	train_embeddings = tf.convert_to_tensor(data[2], tf.float32)
	# A dictionary that maps the index of a word to a list containing the indices of its 4 synonyms
	train_synonym_indices = tf.convert_to_tensor(data[3], tf.int32)

	# A list of sentiment labels corresponding to tweets; labels can be -1 (negative), 0 (objective), or (1) positive
	# (2914, 1)
	train_sentiment_labels = tf.convert_to_tensor(data[4], tf.float32)
	# A list of emotion labels corresponding to tweets; each label has 8 slots, where a 1 in that position corresponds to that
	# emotion being labelled. So, each tweet can be associated to several different emotions
	# Shape (2914, 8)
	train_emotion_labels = tf.convert_to_tensor(data[5], tf.float32)
	data = None

	model = Model("emotion_only_s")

	train(
		model,
		train_sentences,
		train_emotion_labels,
		train_sentiment_labels,
		train_embeddings,
		train_synonym_indices,
	)

	setup(isTraining=False, overwrite=False)
	print("done processing testing data")
	data = unpickle("data/testing.pickle")

	test_vocab = data[0]
	test_sentences = data[1]
	"""for i in range(len(train_sentences)):
		train_sentences[i] = tf.convert_to_tensor(train_sentences[i], tf.int32)"""
	test_embeddings = tf.convert_to_tensor(data[2], tf.float32)
	test_synonym_indices = tf.convert_to_tensor(data[3], tf.int32)
	test_sentiment_labels = tf.convert_to_tensor(data[4], tf.float32)
	test_emotion_labels = tf.convert_to_tensor(data[5], tf.float32)
	data = None

	test(
		model,
		test_sentences,
		test_emotion_labels,
		test_sentiment_labels,
		test_embeddings,
		test_synonym_indices,
	)

Пример #3

0

Показать файл

Файл: convert_raw_dataset.py Проект: NobodyInAmerica/noisy_label

def main(args):
    mkdir_if_missing(args.output_dir)
    # training data
    data = []
    labels = []
    for i in xrange(1, 6):
        dic = unpickle(osp.join(args.data_root, 'data_batch_{}'.format(i)))
        data.append(dic['data'])
        labels = np.r_[labels, dic['labels']]
    data = np.vstack(data)
    make_data(data, labels, args.output_dir, 'train')
    # test data
    dic = unpickle(osp.join(args.data_root, 'test_batch'))
    make_data(dic['data'], dic['labels'], args.output_dir, 'test')

Пример #4

0

Показать файл

def main(args):
    mkdir_if_missing(args.output_dir)
    # training data
    data = []
    labels = []
    for i in xrange(1, 6):
        dic = unpickle(osp.join(args.data_root, 'data_batch_{}'.format(i)))
        data.append(dic['data'])
        labels = np.r_[labels, dic['labels']]
    data = np.vstack(data)
    make_data(data, labels, args.output_dir, 'train')
    # test data
    dic = unpickle(osp.join(args.data_root, 'test_batch'))
    make_data(dic['data'], dic['labels'], args.output_dir, 'test')

Пример #5

0

Показать файл

Файл: visualize.py Проект: piyush2896/Word2Vecs

def run():
    print('* Reading files')
    if not os.path.isdir(config['w2v_root']):
        raise FileNotFoundError('Files directory not found')
    if not os.path.isfile(os.path.join(config['w2v_root'], 'word_ids.pickle')):
        raise FileNotFoundError('File word_ids.pickle not found')
    if not os.path.isfile(os.path.join(config['w2v_root'], 'embedding_matrix.npy')):
        raise FileNotFoundError('File embedding_matrix.npy not found')

    word_id_map = unpickle(os.path.join(config['w2v_root'], 'word_ids.pickle'))
    embeddings = np.load(os.path.join(config['w2v_root'], 'embedding_matrix.npy'))

    print('* Applying TSNE')
    tsne = TSNE(n_components=2, random_state=42)
    embeddings_2d = tsne.fit_transform(embeddings)

    print('* Plotting All embeddings')
    plot_embeddings(word_id_map, embeddings_2d)
    print('* Plotting x-bounds (4.0, 4.2) and y-bounds (-0.5, -0.1)')
    plot_embeddings(word_id_map, embeddings_2d,
                    x_bounds=(4.0, 4.2), y_bounds=(-0.5, -0.1),
                    plot_text=True)
    print('* Plotting x-bounds (14, 17) and y-bounds (4, 7.5)')
    plot_embeddings(word_id_map, embeddings_2d,
                    x_bounds=(14, 17), y_bounds=(4, 7.5), plot_text=True)

Пример #6

0

Показать файл

Файл: classifyCapsule.py Проект: rhedshi/cs181-final-project

def gaussMixture(testX, goodSample,
                 data=None, train=False, plot=False):
    if train==True:
        n_classes = 3
        covar_type = 'full'
        est = GMM(n_components=n_classes, covariance_type=covar_type)
        est.fit(data)
        utils.pickle(est, 'SrcTeam/capsuleData/capsule_gauss')
    else:
        est = utils.unpickle('SrcTeam/capsuleData/capsule_gauss')

    numMatch = 0.0
    numGood = goodSample.shape[0]

    testData = np.reshape(testX,(1,testX.size))
    predLabel = est.predict(testData)
    for i in range(numGood):
        if est.predict(goodSample[i:i+1,:]) == predLabel:
            numMatch += 1

    if plot==True:
        fig = pl.figure()
        pl.clf()
        ax = Axes3D(fig)
        labels = est.predict(data)
        ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float))
        pl.show()

    return float(numMatch) / numGood

Пример #7

0

Показать файл

Файл: classifyCapsule.py Проект: rhedshi/cs181-final-project

def k_means(testX, goodSample,
            data=None, train=False, plot=False):
    if train==True:
        n_clusters = 3
        est = KMeans(n_clusters)
        est.fit(data)
        centers = est.cluster_centers_
        utils.pickle(est, 'SrcTeam/capsuleData/capsule_k_means')
    else:
        est = utils.unpickle('SrcTeam/capsuleData/capsule_k_means')

    numMatch = 0.0
    numGood = goodSample.shape[0]

    #sampleLabel = clusterLabel(centers, sample)
    testLabel = est.predict(testX)
    for i in range(numGood):
        if est.predict(goodSample[i,:]) == testLabel:
            numMatch += 1

    if plot==True:
        fig = pl.figure()
        pl.clf()
        ax = Axes3D(fig)
        labels = est.labels_
        ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float))
        pl.show()

    return float(numMatch) / numGood

Пример #8

0

Показать файл

Файл: dataset.py Проект: desialex/wiki-bias

def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i',
        '--inputfile',
        action="store",
        dest='inputfile',
        required=True,
        help='Text file containing a list of paths to bz2 files to process')
    parser.add_argument('-l',
                        '--lang',
                        action="store",
                        dest='lang',
                        required=True,
                        type=check_lang,
                        help='Two-letter language tag to fetch')
    parser.add_argument(
        '-p',
        '--prefix',
        action="store",
        dest='prefix',
        default='',
        help='Classifier-specific label prefix, e.g. __label__')
    args = parser.parse_args()

    data = unpickle(args.inputfile)
    add = label(data['add'],
                label='neutral',
                prefix=args.prefix,
                separator='\t')
    rem = label(data['rem'],
                label='biased',
                prefix=args.prefix,
                separator='\t')
    split_dataset(add, rem, args.lang)

Пример #9

0

Показать файл

def read_misc_session_data(session_id, file_name):
    folder = _get_session_folder(session_id)
    data_path = folder + "/" + file_name

    misc_data = utils.unpickle(data_path)  # toolsencoding.ToolsRenderData object
    
    return misc_data

Пример #10

0

Показать файл

def main():
    trained_model = './trained_model.pth'
    test_batch_dir = './cifar-10/test_batch'

    classifier = CNNModel()
    classifier.load_state_dict(torch.load(trained_model))
    classifier.cuda()
    classifier.eval()

    test_x, test_y = unpickle(test_batch_dir)
    test_x, test_y = torch.tensor(np.reshape(
        test_x, (len(test_x), 3, 32, 32))).to(
            'cuda', dtype=torch.float), torch.tensor(test_y).cuda()

    classes = [
        'Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog',
        'Horse', 'Ship', 'Truck'
    ]

    # calculating the accuracy of our classifier;
    print("Calculating accuracy...")
    correct = 0
    total = len(test_x)

    with torch.no_grad():
        out = classifier(test_x)
        _, predicted = torch.max(out, 1)

        # calculate the total accuracy
        correct += (predicted == test_y).sum().item()
        print('Accuracy: %5d %%' % (correct / total * 100))

Пример #11

0

Показать файл

Файл: clipeffectseditor.py Проект: jliljebl/flowblade

def _load_effect_stack_values_dialog_callback(dialog, response_id):
    if response_id == Gtk.ResponseType.ACCEPT:
        load_path = dialog.get_filenames()[0]
        stack_data = utils.unpickle(load_path)

        for effect_data in stack_data.effects_data:
            filter_info, properties, non_mlt_properties = effect_data

            data = {
                "clip": _filter_stack.clip,
                "filter_info": filter_info,
                "filter_edit_done_func": filter_edit_done_stack_update
            }
            action = edit.add_filter_action(data)

            set_stack_update_blocked()
            action.do_edit()
            set_stack_update_unblocked()

            filters = _filter_stack.get_filters()
            filter_object = filters[len(filters) - 1]

            filter_object.properties = copy.deepcopy(properties)
            filter_object.non_mlt_properties = copy.deepcopy(
                non_mlt_properties)
            filter_object.update_mlt_filter_properties_all()

            _filter_stack.reinit_stack_item(filter_object)

    dialog.destroy()

Пример #12

0

Показать файл

def load_render_data():
    global _render_data
    try:
        render_data_path = _session_folder + "/" + RENDER_DATA_FILE
        _render_data = utils.unpickle(render_data_path)  # toolsencoding.ToolsRenderData object
    except:
        _render_data = None

Пример #13

0

Показать файл

Файл: preprocessing.py Проект: enesdemirag/cifar10-classification

def get_train_data(batch_no):
    train_set = unpickle("cifar10/data_batch_" + str(batch_no))
    train_data = train_set[b"data"]
    train_images = unserialize(train_data)
    labels = train_set[b"labels"]
    images = prepare_pixels(train_images)
    labels = prepare_labels(labels)
    return images, labels

Пример #14

0

Показать файл

Файл: preprocessing.py Проект: enesdemirag/cifar10-classification

def get_test_data():
    test_set = unpickle("cifar10/test_batch")
    test_data = test_set[b"data"]
    test_images = unserialize(test_data)
    labels = test_set[b"labels"]
    images = prepare_pixels(test_images)
    labels = prepare_labels(labels)
    return images, labels

Пример #15

0

Показать файл

Файл: CNRecognizer.py Проект: bmagyar/segmented_object_recognition

 def __init__(self, datadir, modelname='CNRecognizer.pkl'):
     self._datadir = datadir
     self._modelname = modelname
     self._trained = False
     if os.path.exists(datadir + '/models/' + modelname):
         inst = unpickle(datadir + '/models/' + modelname)
         for att in dir(inst):
             setattr(self, att, getattr(inst, att))

Пример #16

0

Показать файл

Файл: gui.py Проект: tamwaiban/flowblade

def load_current_colors():
    load_path = _colors_data_path()
    colors = utils.unpickle(load_path)

    sel, bg, button = colors
    global _selected_bg_color, _bg_color, _button_colors
    _selected_bg_color = Gdk.RGBA(*sel)
    _bg_color = Gdk.RGBA(*bg)
    _button_colors = Gdk.RGBA(*button)

Пример #17

0

Показать файл

Файл: calibration.py Проект: amirziai/CarND-Advanced-Lane-Lines

 def __init__(self,
              image_size=IMAGE_SIZE,
              calibration_file=CALIBRATION_PICKLE_FILE):
     # Get camera calibration
     points_object, points_image = (utils.unpickle(calibration_file)
                                    if os.path.exists(calibration_file) else
                                    self._calibrate())
     # Get mtx and dist for undistorting new images
     _, self.mtx, self.dist, _, _ = cv2.calibrateCamera(
         points_object, points_image, image_size, None, None)

Пример #18

0

Показать файл

Файл: raw.py Проект: msoliman6/MVA

def main(args):
    mkdir_if_missing(args.output_dir)
    # training data
    data = []
    labels = []
    for i in xrange(1, 6):
        dic = unpickle(osp.join(args.data_root, 'data_batch_{}'.format(i)))
        data.append(dic['data'])
        labels = np.r_[labels, dic['labels']]
    data = np.vstack(data)
    make_data(data, labels, args.output_dir, 'train')
    # test data
    dic = unpickle(osp.join(args.data_root, 'test_batch'))
    make_data(dic['data'], dic['labels'], args.output_dir, 'test')
    
    # Identity for confusion initialization
    matrix_I = np.identity(10)
    write_matrix(matrix_I, osp.join(args.output_dir, 'identity.txt'))
    pickle(matrix_I, osp.join(args.output_dir, 'identity.pkl'))

Пример #19

0

Показать файл

def read_CIFAR_100(cifar_path, train=True):
    """

    :param cifar_path: data path for cifar-100
    :param train: check if its the train mode
    :return: data and its label

    Note:
        data (#samples, 32, 32, 3)
        labels (#samples, 100)
    """

    data = []
    labels = []

    if train:  # If reading train set

        file_name = cifar_path + "train"
        data_dict = utils.unpickle(file_name)
        batch_data = data_dict[b"data"]
        batch_labels = data_dict[b'fine_labels']
        data.append(batch_data)
        labels.append(batch_labels)

    else:  # If reading test set
        file_name = cifar_path + "test"
        data_dict = utils.unpickle(file_name)
        batch_data = data_dict[b"data"]
        batch_labels = data_dict[b"fine_labels"]
        data.append(batch_data)
        labels.append(batch_labels)

    data = np.asarray(data)
    data = np.reshape(data,
                      (data.shape[0] * data.shape[1], 3, 32, 32)).transpose(
                          0, 2, 3, 1)

    labels = np.asarray(labels)
    labels = np.reshape(labels, (labels.shape[0] * labels.shape[1], )).tolist()
    labels = utils.index_to_one_hot(labels, 100)

    return data, labels

Пример #20

0

Показать файл

Файл: data_loader.py Проект: newebug/DNI-tensorflow

    def __init__(self):

        append = lambda x: '/data2/andrewliao11/cifar-10-batches-py/data_batch_' + x
        self.train_filename = [append(str(i + 1)) for i in range(5)]
        self.test_filename = '/data2/andrewliao11/cifar-10-batches-py/test_batch'
        self.num_train = 50000
        self.num_test = 10000
        self.input_size = 3072
        self.imgs = np.zeros([self.num_train, self.input_size],
                             dtype='float32')
        self.labels = np.zeros([self.num_train], dtype='int32')
        self.current = 0
        for i in range(5):
            data_batch = utils.unpickle(self.train_filename[i])
            self.imgs[i * 10000:(i + 1) * 10000] = data_batch['data'] / 255.
            self.labels[i * 10000:(i + 1) * 10000] = np.asarray(
                data_batch['labels'])
        data_batch = utils.unpickle(self.test_filename)
        self.test_imgs = data_batch['data'] / 255.
        self.test_labels = np.asarray(data_batch['labels'])

Пример #21

0

Показать файл

    def __init__(self):
        self.trainX = []
        self.trainY = []

        data_dir = './cifar-10/training batches'
        batches = os.listdir(data_dir)

        for batch in batches:
            batch_data, batch_labels = unpickle(os.path.join(data_dir, batch))
            self.trainX.extend(batch_data)
            self.trainY.extend(batch_labels)

Пример #22

0

Показать файл

Файл: cifar100.py Проект: panzhixuan/tensorflow-cifar100

 def __load_data(self, test_data_path):
     if not os.path.exists(test_data_path):
         raise InvalidTestSetPath(
             'invalid test data path: {}'.format(test_data_path))
     test = unpickle(test_data_path)
     test_data = test[b'data']
     x_test = test_data.reshape(test_data.shape[0], 3, 32, 32)
     x_test = x_test.transpose(0, 2, 3, 1)
     x_test = norm_images(x_test)
     y_test = test[b'fine_labels']
     return x_test, y_test

Пример #23

0

Показать файл

def read_CIFAR_10(cifar_path, train=True):
    """
    Assumes the raw CIFAR-10 data is located in cifar_path,
    reads the dataset, and returns it as numpy arrays:

    data (#samples, 32, 32, 3)
    labels (#samples, 10)

    The boolean argument train determines whether the train or test set is read.
    """

    data = []
    labels = []

    if train:  # If reading train set
        for i in range(1, 6):
            file_name = cifar_path + "data_batch_" + str(i)
            data_dict = utils.unpickle(file_name)
            batch_data = data_dict[b"data"]
            batch_labels = data_dict[b"labels"]
            data.append(batch_data)
            labels.append(batch_labels)

    else:  # If reading test set
        file_name = cifar_path + "test_batch"
        data_dict = utils.unpickle(file_name)
        batch_data = data_dict[b"data"]
        batch_labels = data_dict[b"labels"]
        data.append(batch_data)
        labels.append(batch_labels)

    data = np.asarray(data)
    data = np.reshape(data,
                      (data.shape[0] * data.shape[1], 3, 32, 32)).transpose(
                          0, 2, 3, 1)

    labels = np.asarray(labels)
    labels = np.reshape(labels, (labels.shape[0] * labels.shape[1], )).tolist()
    labels = utils.index_to_one_hot(labels, 10)

    return data, labels

Пример #24

0

Показать файл

Файл: cifar10.py Проект: rafaelpadilla/Deep-Learning-Topics

 def get_labels(self):
     if not self.is_valid_path:
         print(
             "ALERT: Path is not valid or does not contain all cifar 10 files."
         )
         return
     if self.label_names == []:
         # Unpickle batches.meta and get label names
         f = unpickle(os.path.join(self.data_dir, 'batches.meta'),
                      quiet=True)
         self.label_names = [lb.decode() for lb in f[b'label_names']]
     return self.label_names

Пример #25

0

Показать файл

 def _load(self, filenames):
     images, labels = None, []
     for i, filename in enumerate(filenames):
         datafile = utils.unpickle(filename)
         if i == 0:
             images = datafile['data']
         else:
             images = np.append(images, datafile['data'], axis=0)
         labels.extend(datafile['labels'])
         print(images.shape, len(labels))
     return images, utils.onehot(np.asarray(labels),
                                 label_size=self.labels_size)

Пример #26

0

Показать файл

Файл: datasets.py Проект: z1z9b89/StyleTransfer

    def load(self):
        data = []
        if self.training:
            for i in range(1, 6):
                filename = '{}/data_batch_{}'.format(self.path, i)
                batch_data = unpickle(filename)
                if len(data) > 0:
                    data = np.vstack((data, batch_data[b'data']))
                else:
                    data = batch_data[b'data']

        else:
            filename = '{}/test_batch'.format(self.path)
            batch_data = unpickle(filename)
            data = batch_data[b'data']

        w = 32
        h = 32
        s = w * h
        data = np.array(data)
        data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
        data = data.reshape((-1, w, h, 3))
        return data

Пример #27

0

Показать файл

Файл: meta_learner.py Проект: lwei0402/retro_contest

def wait_run_end(workers_results, model, timeout=None):
    # TODO: use timeout
    weights = pickle.dumps(model.get_weights())

    for w, res in workers_results.items():

        while not res.ready:
            sleep(1)

        res = utils.unpickle(res.value)
        grads = res["grads"]
        model.add_grads(grads)

        new_res = Pyro4.Future(w.run)(weights)
        workers_results[w] = new_res

Пример #28

0

Показать файл

def load_model(model_params, contF=True):
    
    from dcgan import DCGAN
    import os
    model = DCGAN(model_params, ltype=os.environ['LTYPE'])
    if contF:
        # print '...Continuing from Last time'''
        from utils import unpickle
        _model = unpickle(os.environ['LOAD_PATH'])
        
        np_gen_params= [param.get_value() for param in _model.gen_network.params]
        np_dis_params= [param.get_value() for param in _model.dis_network.params]
        
        model.load(np_dis_params, np_gen_params, verbose=False)
        
    return model

Пример #29

0

Показать файл

Файл: compositeeditor.py Проект: usolmz/flowblade

def _load_compositor_values_dialog_callback(dialog, response_id):
    if response_id == Gtk.ResponseType.ACCEPT:
        load_path = dialog.get_filenames()[0]
        compositor_data = utils.unpickle(load_path)

        if compositor_data.data_applicable(compositor.transition.info):
            compositor_data.set_values(compositor)
            set_compositor(compositor)
        else:
            saved_name_comp_name = mlttransitions.name_for_type[compositor_data.info.name]
            current_comp_name = mlttransitions.name_for_type[compositor.transition.info.name]
            primary_txt = _("Saved Compositor data not applicaple for this compositor!")
            secondary_txt = _("Saved data is for ") + saved_name_comp_name + " compositor,\n" + _(", current compositor is ") + current_comp_name + "."
            dialogutils.warning_message(primary_txt, secondary_txt, gui.editor_window.window)

    dialog.destroy()

Пример #30

0

Показать файл

Файл: titler.py Проект: yichuang-zh/flowblade

    def _load_layers_dialog_callback(self, dialog, response_id):
        if response_id == Gtk.ResponseType.ACCEPT:
            try:
                filenames = dialog.get_filenames()
                load_path = filenames[0]
                new_data = utils.unpickle(load_path)
                global _titler_data
                _titler_data = new_data
                self.load_titler_data()
            except:
                dialog.destroy()
                # INFOWINDOW
                return

            dialog.destroy()
        else:
            dialog.destroy()

Пример #31

0

Показать файл

Файл: cifar10.py Проект: dataset-fun/Preprocess-CIFAR

def parse_test_data(input_path):
    # Load pickled source file
    try:
        data = utils.unpickle(os.path.join(input_path, TEST_DATA_FILENAME))
    except Exception as ex:
        logging.error('Failed to load input files from: ' + args.input_path)
        logging.error('Exception: %s', ex)
        exit(1)

    # Prepare features
    features = data[b'data']
    features = features.reshape(features.shape[0], 3, 32, 32)
    features = features.transpose(0, 2, 3, 1).astype('uint8')

    # Prepare labels
    labels = np.asarray(data[b'labels'])

    return features, labels

Пример #32

0

Показать файл

Файл: cifar10.py Проект: rafaelpadilla/Deep-Learning-Topics

 def _load_images_labels(self, files):
     raw_images = None
     labels = None
     for f in files:
         data = unpickle(f, quiet=True)
         # Get the raw images
         if raw_images is None:
             raw_images = data[b'data'].astype(dtype='float32')
         else:
             raw_images = np.vstack(
                 (raw_images, data[b'data'].astype(dtype='float32')))
         # Get the labels
         if labels is None:
             labels = data[b'labels']
         else:
             # Stack labels together
             labels = np.concatenate((labels, data[b'labels']))
     return labels, raw_images

Пример #33

0

Показать файл

    def _param_run(
            self, param_set: ParamSet) -> Tuple[ExperimentResults, RunnerUUID]:
        log(f'Running param set: {param_set}')

        uuid = hash_dict(param_set)

        if self._experiment_result_exists(uuid):
            log('Loading experiment results from cache')
            log(uuid)
            experiment_results = unpickle(
                self._file_path_experiment_results(uuid))
        else:
            log(f'Running uuid {uuid}')
            experiment_results = train_kd(**param_set)
            pickle_object(experiment_results,
                          self._file_path_experiment_results(uuid))

        return experiment_results, uuid

Пример #34

0

Показать файл

Файл: clipeffectseditor.py Проект: imaami/flowblade

def _load_effect_values_dialog_callback(dialog, response_id):
    if response_id == Gtk.ResponseType.ACCEPT:
        load_path = dialog.get_filenames()[0]
        effect_data = utils.unpickle(load_path)
        
        filter_object = clip.filters[current_filter_index]
        
        if effect_data.data_applicable(filter_object.info):
            effect_data.set_effect_values(filter_object)
            effect_selection_changed()
        else:
            # Info window
            saved_effect_name = effect_data.info.name
            current_effect_name = filter_object.info.name
            primary_txt = _("Saved Filter data not applicaple for this Filter!")
            secondary_txt = _("Saved data is for ") + saved_effect_name + " Filter,\n" + _("current edited Filter is ") + current_effect_name + "."
            dialogutils.warning_message(primary_txt, secondary_txt, gui.editor_window.window)
    
    dialog.destroy()

Пример #35

0

Показать файл

Файл: studentAgents.py Проект: rhedshi/cs181-final-project

    def registerInitialState(self, gameState):
        """
        Do any necessary initialization
        """
        super(ActionBasisAgent, self).registerInitialState(gameState)

        # Here, you may do any necessary initialization, e.g., import some
        # parameters you've learned, as in the following commented out lines
        # learned_params = cPickle.load("myparams.pkl")
        # learned_params = np.load("myparams.npy")



        # get all the allowed actions, encode them for learners
        self.actions = self.actionBasis.allActions[:]
        self.actionCodes = { action: i for (i, action) in enumerate(self.actions) }

        # remember basis function dimensions
        self.basis_dimensions = self.basis.dimensions

        # try to load the learner from a file
        if(self.learn_file and os.path.isfile(self.learn_file) and not self.restart_learning):
            self.learner = utils.unpickle(self.learn_file)
            self.learner.reset()
        else:
            self.learner = self.learner_class(self.basis_dimensions, self.actionCodes.values())
            if(self.use_initializer):
                if (self.basis.__name__, self.actionBasis.__name__) in basis.initializers:
                    basis.initializers[(self.basis.__name__, self.actionBasis.__name__)](self.learner)
                    print "Initialized successfully"
                else:
                    print "No initializer found"
                    raise "Whoops"

        # initialize score
        self.score = 0

        # count number of actions taken
        self.action_count = 0

Пример #36

0

Показать файл

Файл: merge.py Проект: liukai/flashback

def merge_to_final_output(oplog_output_file, profiler_output_file, output_file):
    """
    * Why merge files:
        we need to merge the docs from two sources into one.
    * Why not merge earlier:
        It's definitely inefficient to merge the entries when we just retrieve
        these documents from mongodb. However we designed this script to be able
        to pull the docs from differnt servers, as a result it's hard to do the
        on-time merge since you cannot determine if some "old" entries will come
        later."""
    oplog = open(oplog_output_file, "rb")
    profiler = open(profiler_output_file, "rb")
    output = open(output_file, "wb")
    logger = utils.LOG

    logger.info("Starts completing the insert options")
    oplog_doc = utils.unpickle(oplog)
    profiler_doc = utils.unpickle(profiler)
    inserts = 0
    noninserts = 0
    severe_inconsistencies = 0
    mild_inconsistencies = 0

    while oplog_doc and profiler_doc:
        if (noninserts + inserts) % 2500 == 0:
            logger.info("processed %d items", noninserts + inserts)

        if profiler_doc["op"] != "insert":
            dump_op(output, profiler_doc)
            noninserts += 1
            profiler_doc = utils.unpickle(profiler)
        else:
            # Replace the the profiler's insert operation doc with oplog's,
            # but keeping the canonical form of "ts".
            profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple())
            oplog_ts = oplog_doc["ts"].time
            # only care about the second-level precision.
            # This is a lame enforcement of consistency
            delta = abs(profiler_ts - oplog_ts)
            if delta > 3:
                # TODO strictly speaking, this ain't good since the files are
                # not propertly closed.
                logger.error(
                    "oplog and profiler results are inconsistent `ts`\n"
                    "  oplog:    %d\n"
                    "  profiler: %d", oplog_ts, profiler_ts)
                severe_inconsistencies += 1
            elif delta != 0:
                logger.warn("Slightly inconsistent timestamp\n"
                            "  oplog:   %d\n"
                            "  profiler %d", oplog_ts, profiler_ts)
                mild_inconsistencies += 1

            oplog_doc["ts"] = profiler_doc["ts"]
            # make sure "op" is "insert" instead of "i".
            oplog_doc["op"] = profiler_doc["op"]
            dump_op(output, oplog_doc)
            inserts += 1
            oplog_doc = utils.unpickle(oplog)
            profiler_doc = utils.unpickle(profiler)

    while profiler_doc and profiler_doc["op"] != "insert":
        dump_op(output, profiler_doc)
        noninserts += 1
        profiler_doc = utils.unpickle(profiler)

    logger.info("Finished completing the insert options, %d inserts and"
                " %d noninserts\n"
                "  severe ts incosistencies: %d\n"
                "  mild ts incosistencies: %d\n", inserts, noninserts,
                severe_inconsistencies, mild_inconsistencies)
    for f in [oplog, profiler, output]:
        f.close()

    return True

Пример #37

0

Показать файл

Файл: eval_test.py Проект: ShuangLI59/person_search

 def _load(fname):
     fpath = osp.join(output_dir, fname)
     assert osp.isfile(fpath), "Must have extracted detections and " \
                               "features first before evaluation"
     return unpickle(fpath)

Пример #38

0

Показать файл

Файл: analyzer.py Проект: imclab/clearspending

def analyzer_main():
    print 'Loading data...'
    cfda_agency_map = dict((p.program_number, p.agency_id) 
                           for p in Program.objects.all())
   
    awards = unpickle(os.path.join(DATA_DIR, 'cfda_awards.out.bin'))
    
    print 'Building cube'
    c = Cube()
    for (idx, (award_id, award)) in enumerate(awards.iteritems()):
        # Simple progress ticker
        if idx % 1000 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()

        cfda = award['cfda']
        agency = cfda_agency_map.get(cfda, None)
        if agency:
            fed_amount = award['fed_amount']
            fiscal_year  = award['fiscal_year']
            reporting_lag = award['reporting_lag']
            fiscal_year_lag = award['fiscal_year_lag']
            
            # select only fy 2007-2009 inclusive 
            if fiscal_year not in FISCAL_YEARS:
                continue

            # We need to set an upper bound on the fiscal year lag in order to
            # make comparisons between fiscal years useful.
            if fiscal_year_lag > FISCAL_YEAR_LAG_THRESHOLD:
                continue 
            
            # reporting lag of negative days converted to 0
            reporting_lag = reporting_lag if reporting_lag > 0 else 0
            
            # add record to data cube
            c.add({'fy':fiscal_year, 'cfda':cfda, 'agency':agency}, {'days':reporting_lag, 'dollars':fed_amount})


    awards = None 

    print 'Querying cfda aggregates...'
    result = c.query(groups=['cfda','fy'])
    
    print 'Loading cfda results into db...'
    ProgramTimeliness.objects.all().delete()

    for (cfda, cfda_results) in result.values.iteritems():
        program = Program.objects.get(program_number=cfda)
        
        for fy in FISCAL_YEARS:
            cfda_fy_results = cfda_results.values[fy]
            metric = ProgramTimeliness.objects.create(
                program=program, 
                agency=program.agency, 
                fiscal_year=fy,
                late_dollars=cfda_fy_results.get_data(sum_dollars_45days_late),
                late_rows=cfda_fy_results.get_data(count_records_45days_late),
                total_dollars=cfda_fy_results.get_data(sum_dollars),
                total_rows=cfda_fy_results.get_data(len),
                avg_lag_rows=cfda_fy_results.get_data(avg_days_by_awards),
                avg_lag_dollars=cfda_fy_results.get_data(avg_days_by_dollars)
            )
            if metric.total_dollars > 0:
                metric.late_pct = metric.late_dollars * 100 / metric.total_dollars
            metric.save()
            
            
    print 'Querying agency aggregates...' 
    result = c.query(groups=['agency','fy'])
    
    print 'Loading agency results into db...'
    AgencyTimeliness.objects.all().delete()

    for (agency_id, agency_results) in result.values.iteritems():
        agency = Agency.objects.get(pk=agency_id)
        
        for fy in FISCAL_YEARS:
            agency_fy_results = agency_results.values[fy]
            metric = AgencyTimeliness.objects.create(
                agency=agency, 
                fiscal_year=fy,
                late_dollars=agency_fy_results.get_data(sum_dollars_45days_late),
                late_rows=agency_fy_results.get_data(count_records_45days_late),
                total_dollars=agency_fy_results.get_data(sum_dollars),
                total_rows=agency_fy_results.get_data(len),
                avg_lag_rows=agency_fy_results.get_data(avg_days_by_awards),
                avg_lag_dollars=agency_fy_results.get_data(avg_days_by_dollars)
            )
            if metric.total_dollars > 0:
                metric.late_pct = metric.late_dollars * 100 / metric.total_dollars
            metric.save()

Пример #39

0

Показать файл

Файл: test_net.py Проект: luckykelfor/person_search

def evaluate(protoc, images, result_dir):
    gallery_det = unpickle(osp.join(result_dir, 'gallery_detections.pkl'))
    gallery_feat = unpickle(osp.join(result_dir, 'gallery_features.pkl'))
    gallery_det = gallery_det[1]
    gallery_feat = gallery_feat[1]
    probe_feat = unpickle(osp.join(result_dir, 'probe_features.pkl'))

    assert len(images) == len(gallery_det)
    assert len(images) == len(gallery_feat)
    name_to_det_feat = {}
    for name, det, feat in zip(images, gallery_det, gallery_feat):
        scores = det[:, 4].ravel()
        inds = np.where(scores >= 0.5)[0]
        if len(inds) > 0:
            det = det[inds]
            feat = feat[inds]
            name_to_det_feat[name] = (det, feat)

    num_probe = len(protoc)
    assert len(probe_feat) == num_probe
    aps = []
    top1_acc = []
    for i in xrange(num_probe):
        y_true, y_score = [], []
        feat_p = probe_feat[i][np.newaxis, :]
        count_gt = 0
        count_tp = 0
        for item in protoc['Gallery'][i].squeeze():
            gallery_imname = str(item[0][0])
            gt = item[1][0].astype(np.int32)
            count_gt += (gt.size > 0)
            if gallery_imname not in name_to_det_feat: continue
            det, feat_g = name_to_det_feat[gallery_imname]
            dis = np.sum((feat_p - feat_g) ** 2, axis=1)
            label = np.zeros(len(dis), dtype=np.int32)
            if gt.size > 0:
                w, h = gt[2], gt[3]
                gt[2:] += gt[:2]
                thresh = min(0.5, (w * h * 1.0) / ((w + 10) * (h + 10)))
                inds = np.argsort(dis)
                dis = dis[inds]
                # set the label of the first box matched to gt to 1
                for j, roi in enumerate(det[inds, :4]):
                    if _compute_iou(roi, gt) >= thresh:
                        label[j] = 1
                        count_tp += 1
                        break
            y_true.extend(list(label))
            y_score.extend(list(-dis))
        assert count_tp <= count_gt
        recall_rate = count_tp * 1.0 / count_gt
        ap = average_precision_score(y_true, y_score) * recall_rate
        if not np.isnan(ap):
            aps.append(ap)
        else:
            aps.append(0)
        maxind = np.argmax(y_score)
        top1_acc.append(y_true[maxind])

    print 'mAP: {:.2%}'.format(np.mean(aps))
    print 'top-1: {:.2%}'.format(np.mean(top1_acc))

Пример #40

0

Показать файл

Файл: merge.py Проект: Codejacks/flashback

def merge_to_final_output(oplog_output_file, profiler_output_files, output_file):
    """
    * Why merge files:
        we need to merge the docs from two sources into one.
    * Why not merge earlier:
        It's definitely inefficient to merge the entries when we just retrieve
        these documents from mongodb. However we designed this script to be able
        to pull the docs from differnt servers, as a result it's hard to do the
        on-time merge since you cannot determine if some "old" entries will come
        later."""
    oplog = open(oplog_output_file, "rb")

    # create a map of profiler file names to files
    profiler_files = {}
    for profiler_file in profiler_output_files:
        profiler_files[profiler_file] = open(profiler_file, "rb")

    output = open(output_file, "wb")
    logger = utils.LOG

    logger.info("Starts completing the insert options")
    oplog_doc = utils.unpickle(oplog)

    # Create a map of tuple(doc's timestamp, profiler file name) to doc for
    # each profiler. This makes it easy to fetch the earliest doc in the group
    # on each iteration.
    profiler_docs = {}
    for file_name in profiler_files:
        doc = utils.unpickle(profiler_files[file_name])
        # associate doc with a tuple representing the ts and source filename
        # this makes it easy to fetch the earliest doc in the group on each
        # iteration
        if doc:
            profiler_docs[(doc["ts"], file_name)] = doc
    inserts = 0
    noninserts = 0
    severe_inconsistencies = 0
    mild_inconsistencies = 0

    # read docs until either we exhaust the oplog or all ops in the profile logs
    while oplog_doc and len(profiler_docs) > 0:
        if (noninserts + inserts) % 2500 == 0:
            logger.info("processed %d items", noninserts + inserts)

        # get the earliest profile doc out of all profiler_docs
        key = min(profiler_docs.keys())
        profiler_doc = profiler_docs[key]
        # remove the doc and fetch a new one
        del(profiler_docs[key])
        # the first field in the key is the file name
        doc = utils.unpickle(profiler_files[key[1]])
        if doc:
            profiler_docs[(doc["ts"], key[1])] = doc

        # If the retrieved operation is not an insert, we can simply dump it
        # to the output file. Otherwise, we need to cross-reference the
        # profiler's insert operation with an oplog entry (because the
        # profiler doesn't contain the inserted object's details).
        if profiler_doc["op"] != "insert":
            dump_op(output, profiler_doc)
            noninserts += 1
        else:
            # Compare the profile doc's ts with the oplog doc's ts. In the
            # ideal scenario, every insert we capture via the profile
            # collection should match a consecutive oplog entry (the oplog
            # tailer only looks at insert ops).
            profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple())
            oplog_ts = oplog_doc["ts"].time
            delta = abs(profiler_ts - oplog_ts)
            if delta > 3:
                # TODO strictly speaking, this ain't good since the files are
                # not propertly closed.
                logger.error(
                    "oplog and profiler results are inconsistent `ts`\n"
                    "  oplog:    %d\n"
                    "  profiler: %d", oplog_ts, profiler_ts)
                severe_inconsistencies += 1
            elif delta != 0:
                logger.warn("Slightly inconsistent timestamp\n"
                            "  oplog:   %d\n"
                            "  profiler %d", oplog_ts, profiler_ts)
                mild_inconsistencies += 1

            oplog_doc["ts"] = profiler_doc["ts"]  # we still want to keep the canonical form of the ts
            oplog_doc["op"] = profiler_doc["op"]  # make sure "op" is "insert" instead of "i"
            dump_op(output, oplog_doc)
            inserts += 1

            # Get the next doc from the oplog
            oplog_doc = utils.unpickle(oplog)

    # finish up any remaining non-insert ops
    while len(profiler_docs) > 0:
        # get the earliest profile doc out of all profiler_docs
        key = min(profiler_docs.keys())
        profiler_doc = profiler_docs[key]
        # remove the doc and fetch a new one
        del(profiler_docs[key])
        doc = utils.unpickle(profiler_files[key[1]])
        if doc:
            profiler_docs[(doc["ts"], key[1])] = doc

        if profiler_doc["op"] == "insert":
            break
        dump_op(output, profiler_doc)
        noninserts += 1

    logger.info("Finished completing the insert options, %d inserts and"
                " %d noninserts\n"
                "  severe ts incosistencies: %d\n"
                "  mild ts incosistencies: %d\n", inserts, noninserts,
                severe_inconsistencies, mild_inconsistencies)

    for f in [oplog, output]:
        f.close()
    for f in profiler_files.values():
        f.close()

    # Clean up temporary files (oplog + profiler files), since everything is
    # already in the main output file
    for f in profiler_output_files:
        os.remove(f)
    os.remove(oplog_output_file)

    return True

Пример #41

0

Показать файл

Файл: testClassifyCapsule.py Проект: rhedshi/cs181-final-project

import numpy as np
#import SrcTeam.utils as utils
#import SrcTeam.classifyCapsule as classify
import utils
import classifyCapsule as classify

"""from sklearn import datasets
data = datasets.load_iris().data
classify.k_means(data[0,:], goodSample=data[1:3,:],
            data=data, train=True, plot=True)"""

data = utils.unpickle('SrcTeam/clusterData/capsule_train')
#classify.k_means(data[0,:], goodSample=data[1:3,:],
#            data=data, train=True, plot=True)
print classify.gaussMixture(data[1,:], goodSample=data[1:10,:],
            data=data, train=True, plot=False)

#print closest((1,1))

Пример #42

0

Показать файл

Файл: psdb.py Проект: ShuangLI59/person_search

    def gt_roidb(self):
        cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if osp.isfile(cache_file):
            roidb = unpickle(cache_file)
            return roidb

        # Load all images and build a dict from image to boxes
        all_imgs = loadmat(osp.join(self._root_dir, 'annotation', 'Images.mat'))
        all_imgs = all_imgs['Img'].squeeze()
        name_to_boxes = {}
        name_to_pids = {}
        for im_name, __, boxes in all_imgs:
            im_name = str(im_name[0])
            boxes = np.asarray([b[0] for b in boxes[0]])
            boxes = boxes.reshape(boxes.shape[0], 4)
            valid_index = np.where((boxes[:, 2] > 0) & (boxes[:, 3] > 0))[0]
            assert valid_index.size > 0, \
                'Warning: {} has no valid boxes.'.format(im_name)
            boxes = boxes[valid_index]
            name_to_boxes[im_name] = boxes.astype(np.int32)
            name_to_pids[im_name] = -1 * np.ones(boxes.shape[0], dtype=np.int32)

        def _set_box_pid(boxes, box, pids, pid):
            for i in xrange(boxes.shape[0]):
                if np.all(boxes[i] == box):
                    pids[i] = pid
                    return
            print 'Warning: person {} box {} cannot find in Images'.format(pid, box)

        # Load all the train / test persons and label their pids from 0 to N-1
        # Assign pid = -1 for unlabeled background people
        if self._image_set == 'train':
            train = loadmat(osp.join(self._root_dir,
                                     'annotation/test/train_test/Train.mat'))
            train = train['Train'].squeeze()
            for index, item in enumerate(train):
                scenes = item[0, 0][2].squeeze()
                for im_name, box, __ in scenes:
                    im_name = str(im_name[0])
                    box = box.squeeze().astype(np.int32)
                    _set_box_pid(name_to_boxes[im_name], box,
                                 name_to_pids[im_name], index)
        else:
            test = loadmat(osp.join(self._root_dir,
                                    'annotation/test/train_test/TestG50.mat'))
            test = test['TestG50'].squeeze()
            for index, item in enumerate(test):
                # query
                im_name = str(item['Query'][0,0][0][0])
                box = item['Query'][0,0][1].squeeze().astype(np.int32)
                _set_box_pid(name_to_boxes[im_name], box,
                             name_to_pids[im_name], index)
                # gallery
                gallery = item['Gallery'].squeeze()
                for im_name, box, __ in gallery:
                    im_name = str(im_name[0])
                    if box.size == 0: break
                    box = box.squeeze().astype(np.int32)
                    _set_box_pid(name_to_boxes[im_name], box,
                                 name_to_pids[im_name], index)

        # Construct the gt_roidb
        gt_roidb = []
        for im_name in self.image_index:
            boxes = name_to_boxes[im_name]
            boxes[:, 2] += boxes[:, 0]
            boxes[:, 3] += boxes[:, 1]
            pids = name_to_pids[im_name]
            num_objs = len(boxes)
            gt_classes = np.ones((num_objs), dtype=np.int32)
            overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
            overlaps[:, 1] = 1.0
            overlaps = csr_matrix(overlaps)
            gt_roidb.append({
                'boxes': boxes,
                'gt_classes': gt_classes,
                'gt_overlaps': overlaps,
                'gt_pids': pids,
                'flipped': False})

        pickle(gt_roidb, cache_file)
        print "wrote gt roidb to {}".format(cache_file)

        return gt_roidb

Пример #43

0

Показать файл

Файл: convert_to_blobproto.py Проект: NobodyInAmerica/noisy_label

from argparse import ArgumentParser
from ast import literal_eval


if 'external/caffe/python' not in sys.path:
    sys.path.insert(0, 'external/caffe/python')
import caffe

from utils import unpickle


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('source', help="Pickled data file")
    parser.add_argument('output', help="Output file path")
    parser.add_argument('--shape', help="(num, channels, height, width)")
    args = parser.parse_args()

    data = unpickle(args.source)
    if args.shape is not None:
        data = data.reshape(literal_eval(args.shape))
    else:
        shape = data.shape
        shape = (1,) * (4 - len(shape)) + shape
        data = data.reshape(shape)
    blob = caffe.proto.caffe_pb2.BlobProto()
    blob.num, blob.channels, blob.height, blob.width = data.shape
    blob.data.extend(list(data.ravel().astype(float)))
    with open(args.output, 'wb') as f:
        f.write(blob.SerializeToString())

Пример #44

0

Показать файл

Файл: merge.py Проект: nderzhak/flashback

def merge_to_final_output(oplog_output_file, profiler_output_files, output_file):
    """
    * Why merge files:
        we need to merge the docs from two sources into one.
    * Why not merge earlier:
        It's definitely inefficient to merge the entries when we just retrieve
        these documents from mongodb. However we designed this script to be able
        to pull the docs from differnt servers, as a result it's hard to do the
        on-time merge since you cannot determine if some "old" entries will come
        later."""
    oplog = open(oplog_output_file, "rb")

    # create a map of profiler file names to files
    profiler_files = {}
    for profiler_file in profiler_output_files:
        profiler_files[profiler_file] = open(profiler_file, "rb")

    output = open(output_file, "wb")
    logger = utils.LOG

    logger.info("Starts completing the insert options")
    oplog_doc = utils.unpickle(oplog)
    # create a map of (profiler file names, doc ts) to doc
    profiler_docs = {}
    for file_name in profiler_files:
        doc = utils.unpickle(profiler_files[file_name])
        # associate doc with a tuple representing the ts and source filename
        # this makes it easy to fetch the earliest doc in the group on each
        # iteration
        if doc:
            profiler_docs[(doc["ts"], file_name)] = doc
    inserts = 0
    noninserts = 0
    severe_inconsistencies = 0
    mild_inconsistencies = 0

    # read docs until either we exhaust the oplog or all ops in the profile logs
    while oplog_doc and len(profiler_docs) > 0:
        if (noninserts + inserts) % 2500 == 0:
            logger.info("processed %d items", noninserts + inserts)

        # get the earliest profile doc out of all profiler_docs
        key = min(profiler_docs.keys())
        profiler_doc = profiler_docs[key]
        # remove the doc and fetch a new one
        del (profiler_docs[key])
        # the first field in the key is the file name
        doc = utils.unpickle(profiler_files[key[1]])
        if doc:
            profiler_docs[(doc["ts"], key[1])] = doc

        if profiler_doc["op"] != "insert":
            dump_op(output, profiler_doc)
            noninserts += 1
        else:
            # Replace the the profiler's insert operation doc with oplog's,
            # but keeping the canonical form of "ts".
            profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple())
            oplog_ts = oplog_doc["ts"].time
            # only care about the second-level precision.
            # This is a lame enforcement of consistency
            delta = abs(profiler_ts - oplog_ts)
            if delta > 3:
                # TODO strictly speaking, this ain't good since the files are
                # not propertly closed.
                logger.error(
                    "oplog and profiler results are inconsistent `ts`\n" "  oplog:    %d\n" "  profiler: %d",
                    oplog_ts,
                    profiler_ts,
                )
                severe_inconsistencies += 1
            elif delta != 0:
                logger.warn(
                    "Slightly inconsistent timestamp\n" "  oplog:   %d\n" "  profiler %d", oplog_ts, profiler_ts
                )
                mild_inconsistencies += 1

            oplog_doc["ts"] = profiler_doc["ts"]
            # make sure "op" is "insert" instead of "i".
            oplog_doc["op"] = profiler_doc["op"]
            dump_op(output, oplog_doc)
            inserts += 1
            oplog_doc = utils.unpickle(oplog)

    # finish up any remaining non-insert ops
    while len(profiler_docs) > 0:
        # get the earliest profile doc out of all profiler_docs
        key = min(profiler_docs.keys())
        profiler_doc = profiler_docs[key]
        # remove the doc and fetch a new one
        del (profiler_docs[key])
        doc = utils.unpickle(profiler_files[key[1]])
        if doc:
            profiler_docs[(doc["ts"], key[1])] = doc

        if profiler_doc["op"] == "insert":
            break
        dump_op(output, profiler_doc)
        noninserts += 1

    logger.info(
        "Finished completing the insert options, %d inserts and"
        " %d noninserts\n"
        "  severe ts incosistencies: %d\n"
        "  mild ts incosistencies: %d\n",
        inserts,
        noninserts,
        severe_inconsistencies,
        mild_inconsistencies,
    )
    for f in [oplog, output]:
        f.close()
    for f in profiler_files.values():
        f.close()

    return True

Python unpickle примеры использования