Python load_pickle примеры, libs.utils.load_pickle Python примеры использования

Пример #1

0

Показать файл

    def load_info(self):
        if self.iterInfoPath.is_file():
            self.iterInfo = utils.load_pickle(self.iterInfoPath)
        else:
            self.iterInfo = IterInfo(self.unlabeledFolder,
                                     self.unlabeledIndexPath, self.loopFolder)
            dirs.create_folder(self.loopFolder)

            utils.save_pickle(self.iterInfo, self.iterInfoPath)

        return self.iterInfo

Пример #2

0

Показать файл

Файл: utils.py Проект: olavosamp/semiauto-video-annotation

def dataset_inference_unlabeled(dataset_path,
                                data_transforms,
                                model_path,
                                save_path,
                                batch_size=64,
                                force=False,
                                seed=None,
                                verbose=True):
    '''
        Perform inference on an unlabeled dataset, using a csv Index file as reference.
        
        force: Boolean
            If force is False, search for an existing output file and use it, if it exists. If force is
        True or output file doesn't exist, compute dataset output and save to file.
    '''
    if os.path.isfile(save_path) and not (force):
        outputDf = utils.load_pickle(save_path)
        if len(outputDf) > 0:
            return outputDf

    unlabelIndex = IndexManager(dataset_path)

    # Drop duplicated files
    unlabelIndex.index = dutils.remove_duplicates(unlabelIndex.index,
                                                  "FrameHash")

    # Drop missing or corrupt images
    unlabelIndex.index = dutils.check_df_files(unlabelIndex.index,
                                               utils.check_empty_file,
                                               "FramePath")

    imagePathList = unlabelIndex.index["FramePath"].values
    datasetLen = len(imagePathList)

    if verbose:
        print("\nUnlabeled set inference")
        print("\nDataset information: ")
        print("\t", datasetLen, "images.")

    # Label list for an unlabeled dataset (bit of a hack? is there a better way?)
    labelList = np.zeros(datasetLen)

    outputDf = _model_inference(imagePathList, data_transforms, labelList,
                                model_path, batch_size)

    ## Save output to pickle file
    if verbose:
        print("\nSaving outputs file to ", save_path)
    outputDf.to_pickle(save_path)

Пример #3

0

Показать файл

Файл: utils.py Проект: olavosamp/semiauto-video-annotation

def dataset_inference_val(dataset_path,
                          data_transforms,
                          model_path,
                          save_path,
                          batch_size=64,
                          force=False,
                          seed=None,
                          verbose=True):
    '''
        Perform inference on validation set and save outputs to file.

        force: Boolean
            If force is False, search for an existing output file and use it, if it exists. If force is
        True or output file doesn't exist, compute dataset output and save to file.
    '''
    if os.path.isfile(save_path) and not (force):
        outputDf = utils.load_pickle(save_path)
        if len(outputDf) > 0:
            return outputDf

    # Get list of image paths from dataset folder
    dataset = datasets.ImageFolder(str(dataset_path),
                                   transform=data_transforms,
                                   is_valid_file=utils.check_empty_file)
    imageTupleList = dataset.imgs
    datasetLen = len(imageTupleList)
    labelList = dataset.targets

    imagePathList = np.array(dataset.imgs)[:, 0]

    if verbose:
        print("Validation set inference.")
        print("\nDataset information: ")
        print("\t", datasetLen, "images.")
        print("\nClasses: ")
        for key in dataset.class_to_idx.keys():
            print("\t{}: {}".format(dataset.class_to_idx[key], key))

    outputDf = _model_inference(imagePathList, data_transforms, labelList,
                                model_path, batch_size)

    ## Save output to pickle file
    if verbose:
        print("\nSaving outputs file to ", save_path)
    outputDf.to_pickle(save_path)
    return outputDf

Пример #4

0

Показать файл

Файл: dataset.py Проект: olavosamp/kaggle-real-or-not

def bow_matrix(train_text, test_text, max_features, load_path=None, save_path=None):
    vectorizer = CountVectorizer(max_features=max_features, preprocessor=lambda x: x, tokenizer=lambda x: x)

    if load_path:
        vectorizer.vocabulary_ = utils.load_pickle(load_path)
        features_train = vectorizer.transform(train_text).toarray()
    else:
        features_train = vectorizer.fit_transform(train_text).toarray()

    vocabulary    = vectorizer.vocabulary_
    feature_names = vectorizer.get_feature_names()

    features_test = vectorizer.transform(test_text).toarray()

    new_train_df = pd.DataFrame(data=features_train, columns=feature_names)
    new_test_df  = pd.DataFrame(data=features_test, columns=feature_names)

    if save_path:
        utils.save_pickle(vocabulary, save_path)

    return new_train_df, new_test_df, vocabulary

Пример #5

0

Показать файл

    unlabelNoManualIndex.to_csv(unlabelNoManualPath, index=False)

    # If outputs file already exist, skip inference
    print("\nSTEP: Perform inference on remaining unlabeled set.")
    if not(fullOutputPath.is_file()):
        mutils.dataset_inference_unlabeled(unlabelNoManualPath, dataTransforms['val'], modelPath,
                            fullOutputPath, batch_size=inferBatchSize, seed=seed, verbose=True)
    else:
        print("Output file already exists: {}\nSkipping inference.".format(fullOutputPath))

    print("\nUsing thresholds:\nUpper: {:.4f}\nLower: {:.4f}".format(upperThresh, lowerThresh))

    ## Perform automatic labeling
    print("\nSTEP: Automatic labeling.")
    unlabeledNoManualIndex = pd.read_csv(unlabelNoManualPath)
    pickleData             = utils.load_pickle(fullOutputPath)

    outputs, imgHashes, _  = dutils.load_outputs_df(fullOutputPath)
    outputs = outputs[:, 0]

    print("\nAutomatic labeling with upper positive ratio {:.1f}%:".format(upperThreshPercent*100))
    autoIndex = dutils.automatic_labeling(outputs, imgHashes, unlabeledNoManualIndex, upperThresh,
                                                     lowerThresh, rede, target_class=target_class)
    autoIndex.to_csv(autoLabelIndexPath, index=False)

    plot_outputs_histogram(outputs, lower_thresh=lowerThresh, upper_thresh=upperThresh,
                        title="Unlabeled Outputs Histogram", save_path=unlabelHistogramPath,
                        log=True, show=False)

    ## Merge labeled sets
    print("\nMerge auto and manual labeled sets.")

Пример #6

0

Показать файл

    / "history_{}_no_finetune_{}_epochs_rede_{}_iteration_{}.pickle".format(datasetName, epochs, rede, iteration)

resultsFolder        = Path(dirs.results) / historyPath.stem
nameEnd  = "history_{}_epochs_rede_{}_iteration_{}.pdf".format(epochs, rede, iteration)
lossName = "loss_"     + nameEnd
accName  = "accuracy_" + nameEnd
f1Name   = "f1_"       + nameEnd

if not(historyPath.is_file()):
    print("History file does not exist.\nFile:\n", historyPath)
    print("\nExiting program.")
    exit()

dirs.create_folder(resultsFolder)

history = utils.load_pickle(historyPath)

print(history.keys())
valLoss     = history['loss-val']
trainLoss   = history['loss-train']
trainAcc    = history['acc-train']
valAcc      = history['acc-val']
trainF1     = np.array((history['f1-train']))[:, 0]
valF1       = np.array((history['f1-val']))[:, 0]

plot_model_history([trainLoss, valLoss], data_labels=["Train Loss", "Val Loss"], xlabel="Epochs",
                     ylabel="Loss", title="Training loss history", save_path=resultsFolder / lossName,
                     show=False)

plot_model_history([trainAcc, valAcc], data_labels=["Train Acc", "Val Acc"], xlabel="Epochs",
                     ylabel="Acc", title="Training accuracy history", save_path=resultsFolder / accName,

Пример #7

0

Показать файл

Файл: get_dataset_counts.py Проект: olavosamp/semiauto-video-annotation

                    fileLen = entryDf.shape[0]

                    entryDf['Class'] = entryDf['FramePath'].apply(get_class)
                    entryDf['Rede'] = [rede]*fileLen
                    entryDf['Validation'] = [val_type]*fileLen
                    entryDf['Dataset'] = [net_type]*fileLen
                    entryDf['Set'] = entryDf['FramePath'].apply(get_set)

                    print(entryDf.groupby('Class').count())
                    if allDatasets is None:
                        allDatasets = entryDf
                    else:
                        allDatasets = pd.concat([allDatasets, entryDf], ignore_index=True)
                    utils.save_pickle(allDatasets, dfPath)
    else:
        allDatasets = utils.load_pickle(dfPath)

    print(allDatasets.groupby('Rede').count())
    print()
    print(allDatasets.groupby('Dataset').count())
    print(allDatasets.groupby('Set').count())
    targetNet = 'reference'
    tablePath = Path(dirs.results) / 'dataset_counts_sets_{}.xlsx'.format(targetNet)

    index = allDatasets['Dataset'] == targetNet
    view = allDatasets.loc[index, :]
    
    index = allDatasets['Rede'] == 1
    view = allDatasets.loc[index, :]
    
    index = allDatasets['Validation'] == 'ref'

Пример #8

0

Показать файл

indexPath    = Path(dirs.iter_folder) / \
                "full_dataset_softmax/iteration_{}/unlabeled_images_iteration_{}.csv".format(iteration-1, iteration-1)
savedModelsFolder = Path(
    dirs.saved_models) / "full_dataset_rede_{}_softmax/iteration_{}".format(
        rede, iteration)
outputPath   = savedModelsFolder / \
                "outputs_full_dataset_{}_epochs_rede_{}_iteration_{}.pickle".format(epochs, rede, iteration)
newIndexPath = Path(dirs.iter_folder) / \
                "full_dataset/iteration_{}/automatic_labeled_images_iteration_{}.csv".format(iteration, iteration)

idealUpperThresh = 0.8923  # Ratio 99%
idealLowerThresh = 0.0904  # Ratio 1%

indexDf = pd.read_csv(indexPath)
pickleData = utils.load_pickle(outputPath)

indexDf = dutils.remove_duplicates(indexDf, "FrameHash")
outputs, imgHashes, _ = dutils.load_outputs_df(outputPath)

outputs = outputs[:, 0]

indexDf.set_index("FrameHash", drop=False, inplace=True)

print("\nAutomatic labeling with upper positive ratio 99%:")
posHashes, negHashes = dutils.automatic_labeling(outputs, imgHashes,
                                                 idealUpperThresh,
                                                 idealLowerThresh)

newLabeledIndex = dutils.get_classified_index(indexDf,
                                              posHashes,

Python load_pickle примеры использования