def getBanditData(dataset, logger, frac, count): alterDataset = DatasetReader.DatasetReader(copy_dataset=dataset, verbose=False) alterDataset.trainFeatures = dataset.testFeatures alterDataset.trainLabels = dataset.testLabels streamer = Logger.DataStream(dataset=alterDataset, verbose=False) replayed_dataset = DatasetReader.DatasetReader(copy_dataset=alterDataset, verbose=False) features, labels = streamer.generateStream(subsampleFrac=frac, replayCount=count) replayed_dataset.trainFeatures = features replayed_dataset.trainLabels = labels sampledLabels, sampledLogPropensity, sampledLoss = logger.generateLog( replayed_dataset) bandit_dataset = DatasetReader.BanditDataset(dataset=replayed_dataset, verbose=False) replayed_dataset.freeAuxiliaryMatrices() del replayed_dataset alterDataset.freeAuxiliaryMatrices() del alterDataset bandit_dataset.registerSampledData(sampledLabels, sampledLogPropensity, sampledLoss) return bandit_dataset
def getLogger(dataset, frac): streamer = Logger.DataStream(dataset=dataset, verbose=False) features, labels = streamer.generateStream(subsampleFrac=frac, replayCount=1) subsampled_dataset = DatasetReader.DatasetReader(copy_dataset=dataset, verbose=False) subsampled_dataset.trainFeatures = features subsampled_dataset.trainLabels = labels logger = Logger.Logger(subsampled_dataset, loggerC=-1, stochasticMultiplier=1, verbose=False) subsampled_dataset.freeAuxiliaryMatrices() del subsampled_dataset return logger
svm_scores.append(svm.test()) crf = Skylines.CRF(dataset=supervised_dataset, tol=1e-6, minC=-2, maxC=2, verbose=True, parallel=pool) crf_time.append(crf.validate()) crf_scores.append(crf.test()) crf_expected_scores.append(crf.expectedTestLoss()) supervised_dataset.freeAuxiliaryMatrices() del supervised_dataset streamer = Logger.DataStream(dataset=dataset, verbose=True) features, labels = streamer.generateStream(subsampleFrac=0.05, replayCount=1) subsampled_dataset = DatasetReader.DatasetReader( copy_dataset=dataset, verbose=True) subsampled_dataset.trainFeatures = features subsampled_dataset.trainLabels = labels logger = Logger.Logger(subsampled_dataset, loggerC=-1, stochasticMultiplier=1, verbose=True) logger_map_scores.append(logger.crf.test()) logger_scores.append(logger.crf.expectedTestLoss()) replayed_dataset = DatasetReader.DatasetReader(