def getBanditData(dataset, logger, frac, count):
    alterDataset = DatasetReader.DatasetReader(copy_dataset=dataset,
                                               verbose=False)
    alterDataset.trainFeatures = dataset.testFeatures
    alterDataset.trainLabels = dataset.testLabels

    streamer = Logger.DataStream(dataset=alterDataset, verbose=False)
    replayed_dataset = DatasetReader.DatasetReader(copy_dataset=alterDataset,
                                                   verbose=False)
    features, labels = streamer.generateStream(subsampleFrac=frac,
                                               replayCount=count)
    replayed_dataset.trainFeatures = features
    replayed_dataset.trainLabels = labels
    sampledLabels, sampledLogPropensity, sampledLoss = logger.generateLog(
        replayed_dataset)
    bandit_dataset = DatasetReader.BanditDataset(dataset=replayed_dataset,
                                                 verbose=False)

    replayed_dataset.freeAuxiliaryMatrices()
    del replayed_dataset

    alterDataset.freeAuxiliaryMatrices()
    del alterDataset

    bandit_dataset.registerSampledData(sampledLabels, sampledLogPropensity,
                                       sampledLoss)
    return bandit_dataset
Пример #2
0
def ComputeCdf(pathToDatasets, pathToOutput):
    dic = {}
    globalDic = {}
    windowRange = [0.1, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]
    for window in windowRange:
        for (dirpath, dirnames, filenames) in os.walk(pathToDatasets):
            for filename in filenames:
                filepath = os.path.join(dirpath, filename)
                print ('Start processing of dataset: {}'.format(filepath))
                print ('window = {}'.format(window))
                videoId = dirpath.split('/')[-1]
                print('Video id = {}'.format(videoId))

                dr = DatasetReader.DatasetReader(filepath)
                dr.ReadDataset()
                if videoId not in dic:
                    dic[videoId] = {}
                if window not in dic[videoId]:
                    dic[videoId][window] = []
                if window not in globalDic:
                    globalDic[window] = []
                # r = dr.ComputeStatistic(window)
                r = dr.ComputeAllPositions(window)
                dic[videoId][window] += r
                globalDic[window] += r

    #compute the cdf:
    for videoId in dic:
        print ("Compute CDF for video {}".format(videoId))
        WritePercentile('{}/{}.csv'.format(pathToOutput, videoId), dic[videoId])
    print ("Compute CDF for global")
    WritePercentile('{}/global.csv'.format(pathToOutput), globalDic)
Пример #3
0
def getLogger(dataset, frac):
    streamer = Logger.DataStream(dataset=dataset, verbose=False)
    features, labels = streamer.generateStream(subsampleFrac=frac,
                                               replayCount=1)
    subsampled_dataset = DatasetReader.DatasetReader(copy_dataset=dataset,
                                                     verbose=False)
    subsampled_dataset.trainFeatures = features
    subsampled_dataset.trainLabels = labels

    logger = Logger.Logger(subsampled_dataset,
                           loggerC=-1,
                           stochasticMultiplier=1,
                           verbose=False)

    subsampled_dataset.freeAuxiliaryMatrices()
    del subsampled_dataset
    return logger
Пример #4
0
def get_SNPOEM_rec(x, t, y, nominal_Q, x_test):
    mydata = DatasetReader.BanditDataset(None, False)
    mydata.trainFeatures = np.hstack((x.copy(), np.ones((len(x), 1))))
    mydata.sampledLabels = np.zeros((len(t), max(t) + 1))
    mydata.sampledLabels[range(len(t)), t] = 1.
    mydata.trainLabels = np.empty(mydata.sampledLabels.shape)
    mydata.sampledLoss = y.copy()
    mydata.sampledLoss -= mydata.sampledLoss.min()
    mydata.sampledLoss /= mydata.sampledLoss.max()
    # computed on training set
    mydata.sampledLogPropensity = np.log(nominal_Q)
    #ones_like vs ones_line?
    mydata.testFeatures = np.hstack(
        (np.ones_like(x_test), np.ones((len(x_test), 1))))
    mydata.testLabels = np.array([])
    mydata.createTrainValidateSplit()
    pool = None
    coef = None
    maj = Skylines.PRMWrapper(mydata,
                              n_iter=1000,
                              tol=1e-6,
                              minC=0,
                              maxC=-1,
                              minV=0,
                              maxV=-1,
                              minClip=0,
                              maxClip=0,
                              estimator_type='Self-Normalized',
                              verbose=True,
                              parallel=pool,
                              smartStart=coef)
    maj.calibrateHyperParams()
    maj.validate()
    Xtest1 = np.hstack((x_test, np.ones((len(x_test), 1))))
    rec = Xtest1.dot(maj.labeler.coef_).argmax(1)

    return [maj, rec]
Пример #5
0
def ComputeExpectedQoE(pathToDatasets, pathToOutput, pathToDistToQoE,
                       pathToQec):
    #windowRange = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]
    windowRange = [0.5, 1, 2, 3, 5]
    #windowRange = [1]
    qecReader = DatasetReader.QECReader(pathToQec)
    distToQoEReader = DatasetReader.DistanceToQoEReader(pathToDistToQoE)
    drDict = {}
    for (dirpath, dirnames, filenames) in os.walk(pathToDatasets):
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            print('Start processing of dataset: {}'.format(filepath))
            videoId = dirpath.split('/')[-1] + filename[:3]
            print('Video id = {}'.format(videoId))

            drDict[videoId] = DatasetReader.DatasetReader(filepath)
            drDict[videoId].ReadDataset()

    for window in windowRange:
        print('Start processing of  window = {}'.format(window))
        with open('{}/qoeForWindow{}s.csv'.format(pathToOutput, window),
                  'w') as o:
            o.write('nbQec avgQoe minQoe maxQoe medQoe\n')
            qMin = {}
            qMax = {}
            for nbQec in range(1, 33):
                if nbQec == 13:
                    continue
                #randoms = [None] if nbQec > 5 or nbQec == 1 else range(1,11)
                randoms = range(1, 11)

                qoeList = []
                for random in randoms:
                    dic = {}
                    for videoId in drDict:
                        dr = drDict[videoId]
                        if videoId not in dic:
                            dic[videoId] = {}
                        if window not in dic[videoId]:
                            dic[videoId][window] = []
                        # r = dr.ComputeStatistic(window)
                        r = dr.ComputeAllPositionsWithTimestamp(
                            window, qecReader, nbQec, random)
                        dic[videoId][window] = r
                        #globalDic[window] = r

                    layout = 'qualityCubeMapLower'

                    #compute the QoE
                    for videoId in dic:
                        r = ComputeExpectedLiveQoE(
                            '{}/{}nbQec{}window{}RandomId{}.csv'.format(
                                pathToOutput, videoId, nbQec, window,
                                random if random is not None else 0),
                            dic[videoId], distToQoEReader, layout, window)
                        qoeList += r  #[sum(r)/len(r)]
                        if nbQec == 1:
                            qMin[videoId] = sum(qoeList) / len(qoeList)
                        if nbQec == 7:
                            qMax[videoId] = sum(qoeList) / len(qoeList)
                o.write('{} {} {} {} {}\n'.format(nbQec,
                                                  sum(qoeList) / len(qoeList),
                                                  min(qoeList), max(qoeList),
                                                  np.percentile(qoeList, 50)))
        best = None
        qBest = None
        for videoId in qMin:
            q = qMax[videoId] - qMin[videoId]
            if qBest is None or qBest < q:
                qBest = q
                best = videoId
        print(best, qBest)
Пример #6
0
if __name__ == '__main__':
    fracs = [0.02, 0.05, 0.08, 0.11, 0.14, 0.17, 0.20]
    log2_frac = 0.30
    syst_frac = 0.35

    ratios = [0.1, 0.25, 0.5, 1, 3, 5, 7, 9]

    name = sys.argv[1]
    #  test_frac = float(sys.argv[2])
    fname = name
    test_frac = 1

    results = open("../results2/" + fname + ".txt", 'w')
    log = open("../results2/" + fname + "-log.txt", 'w')
    dataset = DatasetReader.DatasetReader(copy_dataset=None, verbose=False)
    if name == 'rcv1_topics':
        dataset.loadDataset(corpusName=name, labelSubset=[33, 59, 70, 102])
    else:
        dataset.loadDataset(corpusName=name)

    n = np.shape(dataset.testFeatures)[0]
    dataset.testFeatures = dataset.testFeatures[:int(n * test_frac), :]
    dataset.testLabels = dataset.testLabels[:int(n * test_frac), :]

    loggers = []
    for f in fracs:
        loggers.append(getLogger(dataset, f))
    logger2 = getLogger(dataset, log2_frac)
    syst = getLogger(dataset, syst_frac)
'''
    JOSE JAVIER JO ESCOBAR
    14343
    MAIN
'''
import numpy as np
from NNet import NNet
import DatasetReader as DR

NN = NNet([784, 60, 10])
Training_data, Verification_data, Testing_data = DR.data_load()
Training_data = list(map(lambda x, y : (x, y), Training_data[0], Training_data[1]))
test = list(map(lambda x, y : (x, y), Testing_data[0], Testing_data[1]))

NN.SGD(Training_data, 5, 40, 3.0, test_data=test) # 30,10,3.0 best

x = np.array(DR.image_read())
# for i in range(28):
#     print([x[i*28 + j] for j in range(28)])
# print(x)

res = NN.ff(np.array([x]).reshape(784,1))
#print(res.shape)
res = res * 100

print(' \n 0 ---> CIRCULO\n'
      ' 1 ---> HUEVO\n'
      ' 2 ---> CASA\n'
      ' 3 ---> INTERROGACION\n'
      ' 4 ---> CARA TRISTE\n'
      ' 5 ---> CARA FELIZ\n'
Пример #8
0
import ImageTools
import DatasetReader

input_directory = "./dataset/"

output_directory = "./dataset-reshaped/"

list_batch_file = \
  ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5", "test_batch"]

# for cifar10_file in list_batch_file:
#     ImageTools.reshape_images_cifar10_file(
#         input_directory + cifar10_file, output_directory + cifar10_file)


reader = DatasetReader.DatasetReader("./dataset-reshaped/data_batch_1", 3)
batch = reader.get_batch()
print batch["labels"][1]
ImageTools.show_image_from_pixels(batch["data"][1])

Пример #9
0
import PDTTest
from matplotlib import pyplot as plt

if __name__ == '__main__':
    exptNum = 1
    pool = None
    if len(sys.argv) > 1:
        exptNum = int(sys.argv[1])

    if len(sys.argv) > 2:
        import pathos.multiprocessing as mp
        pool = mp.ProcessingPool(7)

    if exptNum == 1:
        for name in ['scene', 'yeast', 'rcv1_topics', 'tmc2007']:
            dataset = DatasetReader.DatasetReader(copy_dataset=None,
                                                  verbose=True)
            if name == 'rcv1_topics':
                dataset.loadDataset(corpusName=name,
                                    labelSubset=[33, 59, 70, 102])
            else:
                dataset.loadDataset(corpusName=name)

            svm_scores = []
            crf_scores = []
            crf_expected_scores = []
            logger_scores = []
            logger_map_scores = []
            prm_scores = []
            prm_map_scores = []
            erm_scores = []
            erm_map_scores = []
import PDTTest
from matplotlib import pyplot as plt

if __name__ == '__main__':
    exptNum = 1
    pool = None 
    if len(sys.argv) > 1:
        exptNum = int(sys.argv[1])

    if len(sys.argv) > 2:
         import pathos.multiprocessing as mp
         pool = mp.ProcessingPool(7)

    if exptNum == 1:
        for name in ['scene', 'yeast', 'rcv1_topics', 'tmc2007']:
            dataset = DatasetReader.DatasetReader(copy_dataset = None, verbose = True)
            if name == 'rcv1_topics':
                dataset.loadDataset(corpusName = name, labelSubset = [33, 59, 70, 102])
            else:
                dataset.loadDataset(corpusName = name)

            svm_scores = []
            crf_scores = []
            crf_expected_scores = []
            logger_scores = []
            logger_map_scores = []
            prm_scores = []
            prm_map_scores = []
            erm_scores = []
            erm_map_scores = []
            poem_scores = []
Пример #11
0
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    print "Starting training"
    training_start_time = time.time()

    iteration = 0
    learning_rate_manager = LearningRateManager.LearningRate(0.001, 0.6, 80)

    for epoch in range(0, EPOCHS):
        print "Epoch: " + str(epoch)
        # Training with all the cifar10 files for each epoch
        for cifar10_file in LIST_BATCH_FILES:
            print "File " + cifar10_file

            reader = DatasetReader.DatasetReader(
                DIR_BATCH_FILES + cifar10_file, BATCH_SIZE)
            batch = reader.get_batch()

            while batch != {}:
                _, loss = sess.run(
                    [training_op, error],
                    feed_dict={
                        input_batch: batch["data"],
                        label_batch: batch["labels"],
                        learning_rate: learning_rate_manager.learning_rate
                    })

                OutputAnalyzer.write_error_to_file(ERROR_FILE, iteration, loss)
                learning_rate_manager.add_error(loss)

                iteration = iteration + 1