def getBanditData(dataset, logger, frac, count): alterDataset = DatasetReader.DatasetReader(copy_dataset=dataset, verbose=False) alterDataset.trainFeatures = dataset.testFeatures alterDataset.trainLabels = dataset.testLabels streamer = Logger.DataStream(dataset=alterDataset, verbose=False) replayed_dataset = DatasetReader.DatasetReader(copy_dataset=alterDataset, verbose=False) features, labels = streamer.generateStream(subsampleFrac=frac, replayCount=count) replayed_dataset.trainFeatures = features replayed_dataset.trainLabels = labels sampledLabels, sampledLogPropensity, sampledLoss = logger.generateLog( replayed_dataset) bandit_dataset = DatasetReader.BanditDataset(dataset=replayed_dataset, verbose=False) replayed_dataset.freeAuxiliaryMatrices() del replayed_dataset alterDataset.freeAuxiliaryMatrices() del alterDataset bandit_dataset.registerSampledData(sampledLabels, sampledLogPropensity, sampledLoss) return bandit_dataset
def ComputeCdf(pathToDatasets, pathToOutput): dic = {} globalDic = {} windowRange = [0.1, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5] for window in windowRange: for (dirpath, dirnames, filenames) in os.walk(pathToDatasets): for filename in filenames: filepath = os.path.join(dirpath, filename) print ('Start processing of dataset: {}'.format(filepath)) print ('window = {}'.format(window)) videoId = dirpath.split('/')[-1] print('Video id = {}'.format(videoId)) dr = DatasetReader.DatasetReader(filepath) dr.ReadDataset() if videoId not in dic: dic[videoId] = {} if window not in dic[videoId]: dic[videoId][window] = [] if window not in globalDic: globalDic[window] = [] # r = dr.ComputeStatistic(window) r = dr.ComputeAllPositions(window) dic[videoId][window] += r globalDic[window] += r #compute the cdf: for videoId in dic: print ("Compute CDF for video {}".format(videoId)) WritePercentile('{}/{}.csv'.format(pathToOutput, videoId), dic[videoId]) print ("Compute CDF for global") WritePercentile('{}/global.csv'.format(pathToOutput), globalDic)
def getLogger(dataset, frac): streamer = Logger.DataStream(dataset=dataset, verbose=False) features, labels = streamer.generateStream(subsampleFrac=frac, replayCount=1) subsampled_dataset = DatasetReader.DatasetReader(copy_dataset=dataset, verbose=False) subsampled_dataset.trainFeatures = features subsampled_dataset.trainLabels = labels logger = Logger.Logger(subsampled_dataset, loggerC=-1, stochasticMultiplier=1, verbose=False) subsampled_dataset.freeAuxiliaryMatrices() del subsampled_dataset return logger
def get_SNPOEM_rec(x, t, y, nominal_Q, x_test): mydata = DatasetReader.BanditDataset(None, False) mydata.trainFeatures = np.hstack((x.copy(), np.ones((len(x), 1)))) mydata.sampledLabels = np.zeros((len(t), max(t) + 1)) mydata.sampledLabels[range(len(t)), t] = 1. mydata.trainLabels = np.empty(mydata.sampledLabels.shape) mydata.sampledLoss = y.copy() mydata.sampledLoss -= mydata.sampledLoss.min() mydata.sampledLoss /= mydata.sampledLoss.max() # computed on training set mydata.sampledLogPropensity = np.log(nominal_Q) #ones_like vs ones_line? mydata.testFeatures = np.hstack( (np.ones_like(x_test), np.ones((len(x_test), 1)))) mydata.testLabels = np.array([]) mydata.createTrainValidateSplit() pool = None coef = None maj = Skylines.PRMWrapper(mydata, n_iter=1000, tol=1e-6, minC=0, maxC=-1, minV=0, maxV=-1, minClip=0, maxClip=0, estimator_type='Self-Normalized', verbose=True, parallel=pool, smartStart=coef) maj.calibrateHyperParams() maj.validate() Xtest1 = np.hstack((x_test, np.ones((len(x_test), 1)))) rec = Xtest1.dot(maj.labeler.coef_).argmax(1) return [maj, rec]
def ComputeExpectedQoE(pathToDatasets, pathToOutput, pathToDistToQoE, pathToQec): #windowRange = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5] windowRange = [0.5, 1, 2, 3, 5] #windowRange = [1] qecReader = DatasetReader.QECReader(pathToQec) distToQoEReader = DatasetReader.DistanceToQoEReader(pathToDistToQoE) drDict = {} for (dirpath, dirnames, filenames) in os.walk(pathToDatasets): for filename in filenames: filepath = os.path.join(dirpath, filename) print('Start processing of dataset: {}'.format(filepath)) videoId = dirpath.split('/')[-1] + filename[:3] print('Video id = {}'.format(videoId)) drDict[videoId] = DatasetReader.DatasetReader(filepath) drDict[videoId].ReadDataset() for window in windowRange: print('Start processing of window = {}'.format(window)) with open('{}/qoeForWindow{}s.csv'.format(pathToOutput, window), 'w') as o: o.write('nbQec avgQoe minQoe maxQoe medQoe\n') qMin = {} qMax = {} for nbQec in range(1, 33): if nbQec == 13: continue #randoms = [None] if nbQec > 5 or nbQec == 1 else range(1,11) randoms = range(1, 11) qoeList = [] for random in randoms: dic = {} for videoId in drDict: dr = drDict[videoId] if videoId not in dic: dic[videoId] = {} if window not in dic[videoId]: dic[videoId][window] = [] # r = dr.ComputeStatistic(window) r = dr.ComputeAllPositionsWithTimestamp( window, qecReader, nbQec, random) dic[videoId][window] = r #globalDic[window] = r layout = 'qualityCubeMapLower' #compute the QoE for videoId in dic: r = ComputeExpectedLiveQoE( '{}/{}nbQec{}window{}RandomId{}.csv'.format( pathToOutput, videoId, nbQec, window, random if random is not None else 0), dic[videoId], distToQoEReader, layout, window) qoeList += r #[sum(r)/len(r)] if nbQec == 1: qMin[videoId] = sum(qoeList) / len(qoeList) if nbQec == 7: qMax[videoId] = sum(qoeList) / len(qoeList) o.write('{} {} {} {} {}\n'.format(nbQec, sum(qoeList) / len(qoeList), min(qoeList), max(qoeList), np.percentile(qoeList, 50))) best = None qBest = None for videoId in qMin: q = qMax[videoId] - qMin[videoId] if qBest is None or qBest < q: qBest = q best = videoId print(best, qBest)
if __name__ == '__main__': fracs = [0.02, 0.05, 0.08, 0.11, 0.14, 0.17, 0.20] log2_frac = 0.30 syst_frac = 0.35 ratios = [0.1, 0.25, 0.5, 1, 3, 5, 7, 9] name = sys.argv[1] # test_frac = float(sys.argv[2]) fname = name test_frac = 1 results = open("../results2/" + fname + ".txt", 'w') log = open("../results2/" + fname + "-log.txt", 'w') dataset = DatasetReader.DatasetReader(copy_dataset=None, verbose=False) if name == 'rcv1_topics': dataset.loadDataset(corpusName=name, labelSubset=[33, 59, 70, 102]) else: dataset.loadDataset(corpusName=name) n = np.shape(dataset.testFeatures)[0] dataset.testFeatures = dataset.testFeatures[:int(n * test_frac), :] dataset.testLabels = dataset.testLabels[:int(n * test_frac), :] loggers = [] for f in fracs: loggers.append(getLogger(dataset, f)) logger2 = getLogger(dataset, log2_frac) syst = getLogger(dataset, syst_frac)
''' JOSE JAVIER JO ESCOBAR 14343 MAIN ''' import numpy as np from NNet import NNet import DatasetReader as DR NN = NNet([784, 60, 10]) Training_data, Verification_data, Testing_data = DR.data_load() Training_data = list(map(lambda x, y : (x, y), Training_data[0], Training_data[1])) test = list(map(lambda x, y : (x, y), Testing_data[0], Testing_data[1])) NN.SGD(Training_data, 5, 40, 3.0, test_data=test) # 30,10,3.0 best x = np.array(DR.image_read()) # for i in range(28): # print([x[i*28 + j] for j in range(28)]) # print(x) res = NN.ff(np.array([x]).reshape(784,1)) #print(res.shape) res = res * 100 print(' \n 0 ---> CIRCULO\n' ' 1 ---> HUEVO\n' ' 2 ---> CASA\n' ' 3 ---> INTERROGACION\n' ' 4 ---> CARA TRISTE\n' ' 5 ---> CARA FELIZ\n'
import ImageTools import DatasetReader input_directory = "./dataset/" output_directory = "./dataset-reshaped/" list_batch_file = \ ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5", "test_batch"] # for cifar10_file in list_batch_file: # ImageTools.reshape_images_cifar10_file( # input_directory + cifar10_file, output_directory + cifar10_file) reader = DatasetReader.DatasetReader("./dataset-reshaped/data_batch_1", 3) batch = reader.get_batch() print batch["labels"][1] ImageTools.show_image_from_pixels(batch["data"][1])
import PDTTest from matplotlib import pyplot as plt if __name__ == '__main__': exptNum = 1 pool = None if len(sys.argv) > 1: exptNum = int(sys.argv[1]) if len(sys.argv) > 2: import pathos.multiprocessing as mp pool = mp.ProcessingPool(7) if exptNum == 1: for name in ['scene', 'yeast', 'rcv1_topics', 'tmc2007']: dataset = DatasetReader.DatasetReader(copy_dataset=None, verbose=True) if name == 'rcv1_topics': dataset.loadDataset(corpusName=name, labelSubset=[33, 59, 70, 102]) else: dataset.loadDataset(corpusName=name) svm_scores = [] crf_scores = [] crf_expected_scores = [] logger_scores = [] logger_map_scores = [] prm_scores = [] prm_map_scores = [] erm_scores = [] erm_map_scores = []
import PDTTest from matplotlib import pyplot as plt if __name__ == '__main__': exptNum = 1 pool = None if len(sys.argv) > 1: exptNum = int(sys.argv[1]) if len(sys.argv) > 2: import pathos.multiprocessing as mp pool = mp.ProcessingPool(7) if exptNum == 1: for name in ['scene', 'yeast', 'rcv1_topics', 'tmc2007']: dataset = DatasetReader.DatasetReader(copy_dataset = None, verbose = True) if name == 'rcv1_topics': dataset.loadDataset(corpusName = name, labelSubset = [33, 59, 70, 102]) else: dataset.loadDataset(corpusName = name) svm_scores = [] crf_scores = [] crf_expected_scores = [] logger_scores = [] logger_map_scores = [] prm_scores = [] prm_map_scores = [] erm_scores = [] erm_map_scores = [] poem_scores = []
with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print "Starting training" training_start_time = time.time() iteration = 0 learning_rate_manager = LearningRateManager.LearningRate(0.001, 0.6, 80) for epoch in range(0, EPOCHS): print "Epoch: " + str(epoch) # Training with all the cifar10 files for each epoch for cifar10_file in LIST_BATCH_FILES: print "File " + cifar10_file reader = DatasetReader.DatasetReader( DIR_BATCH_FILES + cifar10_file, BATCH_SIZE) batch = reader.get_batch() while batch != {}: _, loss = sess.run( [training_op, error], feed_dict={ input_batch: batch["data"], label_batch: batch["labels"], learning_rate: learning_rate_manager.learning_rate }) OutputAnalyzer.write_error_to_file(ERROR_FILE, iteration, loss) learning_rate_manager.add_error(loss) iteration = iteration + 1