def getBanditData(dataset, logger, frac, count):
    alterDataset = DatasetReader.DatasetReader(copy_dataset=dataset,
                                               verbose=False)
    alterDataset.trainFeatures = dataset.testFeatures
    alterDataset.trainLabels = dataset.testLabels

    streamer = Logger.DataStream(dataset=alterDataset, verbose=False)
    replayed_dataset = DatasetReader.DatasetReader(copy_dataset=alterDataset,
                                                   verbose=False)
    features, labels = streamer.generateStream(subsampleFrac=frac,
                                               replayCount=count)
    replayed_dataset.trainFeatures = features
    replayed_dataset.trainLabels = labels
    sampledLabels, sampledLogPropensity, sampledLoss = logger.generateLog(
        replayed_dataset)
    bandit_dataset = DatasetReader.BanditDataset(dataset=replayed_dataset,
                                                 verbose=False)

    replayed_dataset.freeAuxiliaryMatrices()
    del replayed_dataset

    alterDataset.freeAuxiliaryMatrices()
    del alterDataset

    bandit_dataset.registerSampledData(sampledLabels, sampledLogPropensity,
                                       sampledLoss)
    return bandit_dataset
Пример #2
0
def getLogger(dataset, frac):
    streamer = Logger.DataStream(dataset=dataset, verbose=False)
    features, labels = streamer.generateStream(subsampleFrac=frac,
                                               replayCount=1)
    subsampled_dataset = DatasetReader.DatasetReader(copy_dataset=dataset,
                                                     verbose=False)
    subsampled_dataset.trainFeatures = features
    subsampled_dataset.trainLabels = labels

    logger = Logger.Logger(subsampled_dataset,
                           loggerC=-1,
                           stochasticMultiplier=1,
                           verbose=False)

    subsampled_dataset.freeAuxiliaryMatrices()
    del subsampled_dataset
    return logger
Пример #3
0
                svm_scores.append(svm.test())

                crf = Skylines.CRF(dataset=supervised_dataset,
                                   tol=1e-6,
                                   minC=-2,
                                   maxC=2,
                                   verbose=True,
                                   parallel=pool)
                crf_time.append(crf.validate())
                crf_scores.append(crf.test())
                crf_expected_scores.append(crf.expectedTestLoss())

                supervised_dataset.freeAuxiliaryMatrices()
                del supervised_dataset

                streamer = Logger.DataStream(dataset=dataset, verbose=True)
                features, labels = streamer.generateStream(subsampleFrac=0.05,
                                                           replayCount=1)

                subsampled_dataset = DatasetReader.DatasetReader(
                    copy_dataset=dataset, verbose=True)
                subsampled_dataset.trainFeatures = features
                subsampled_dataset.trainLabels = labels
                logger = Logger.Logger(subsampled_dataset,
                                       loggerC=-1,
                                       stochasticMultiplier=1,
                                       verbose=True)
                logger_map_scores.append(logger.crf.test())
                logger_scores.append(logger.crf.expectedTestLoss())

                replayed_dataset = DatasetReader.DatasetReader(