예제 #1
0
def main():
    # Read the arguments
    dataPath = sys.argv[1] # not in use
    net = sys.argv[2]
    name = sys.argv[3]  # noisy23 / noisy/ curated
    dataset = sys.argv[4]

    import os
    dir_path = os.path.dirname(os.path.realpath(__file__))

    test = ""
    # dataPath = '/media/mlagunas/a0148b08-dc3a-4a39-aee5-d77ee690f196/TFG/test'
    # net = "trainf"
    # name = "curated"  # noisy23 / noisy/ curated
    # dataset = "curated"
    # clf = joblib.load(dataPath + "/SVM/SVM.pkl")

    # Load the pre-trained SVM
    print "========> Loading SVM pickle"
    if net == "vgg19":
        clf = joblib.load(dir_path + "../models/SVM/SVMft_curated.pkl")
    elif net == "trainf":
        clf = joblib.load("../models/SVM/SVMft_train.pkl")
    else:
        print "ERROR loading pickle"

    features = "models/h5/curated/features/trainf/curated_trainf_"

    if sys.argv[5] != None:
        features = sys.argv[5]
        out = sys.argv[6]
        folder = sys.argv[7]
        feat = utils.getFeatures(features, "features")
        predToH5_1(feat, out, clf, folder)
    else:
        features = "../models/h5/" + dataset + "/features/" + \
            net + "/" + name + "_" + net + "_"
        cl = "../../data/paths/" + dataset + "/" + name + "_paths"

        # Get features
        feat = utils.getFeatures(features + "42.h5", "features")
        feat_train = utils.getFeatures(features + "train_42.h5", "features")
        feat_crossv = feat_train[:int(len(feat_train) * 0.2)]
        feat_test = utils.getFeatures(features + "test_42.h5", "features")

        # Load classes
        classes, path = utils.getClasses(cl + ".txt")
        classes_train, path_train = utils.getClasses(cl + "_train.txt")
        classes_crossv, path_crossv = classes_train[
            :int(len(classes_train) * 0.2)], path_train[:int(len(path_train) * 0.2)]
        classes_test, path_test = utils.getClasses(cl + "_test.txt")

        predToH5(feat_test, classes_test, clf, "../models/h5/" + dataset + "/svm/" +
                 net + "/" + name + test + "_" + net + "_probabilities_test.h5")
        predToH5(feat_train, classes_train, clf, "../models/h5/" + dataset +
                 "/svm/" + net + "/" + name + test + "_" + net + "_probabilities_train.h5")
        predToH5(feat, classes, clf, "../models/h5/" + dataset + "/svm/" +
                 net + "/" + name + test + "_" + net + "_probabilities.h5")
예제 #2
0
    def generate_data(self, count, offset):
        """
        Generates training data in the CRF++ format for the ingredient
        tagging task
        """
        df = pd.read_csv(self.opts.data_path)
        df = df.fillna("")

        start = int(offset)
        end = int(offset) + int(count)

        df_slice = df.iloc[start: end]

        for index, row in df_slice.iterrows():
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])
                tokens = utils.tokenize(display_input)
                del(row["input"])

                rowData = self.addPrefixes([(t, self.matchUp(t, row)) for t in tokens])

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i+1, tokens)
                    print utils.joinLine([token] + features + [self.bestTag(tags)])

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            print
예제 #3
0
    def generate_data(self, count, offset):
        """
        Generates training data in the CRF++ format for the ingredient
        tagging task
        """
        df = pd.read_csv("nyt-ingredients-snapshot-2015.csv")
        df = df.fillna("")

        start = int(offset)
        end = int(offset) + int(count)

        df_slice = df.iloc[start:end]
        s = ""
        for index, row in df_slice.iterrows():
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])

                tokens = utils.tokenize(display_input)
                del (row["input"])
                rowData = self.addPrefixes([(t, self.matchUp(t, row))
                                            for t in tokens])

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i + 1, tokens)
                    s = s + utils.joinLine([token] + features +
                                           [self.bestTag(tags)]) + '\n'

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            print
        self.writeTempFile(s)
예제 #4
0
def translate_row(row):
    """Translates a row of labeled data into CRF++-compatible tag strings.

    Args:
        row: A row of data from the input CSV of labeled ingredient data.

    Returns:
        The row of input converted to CRF++-compatible tags, e.g.

            2\tI1\tL4\tNoCAP\tNoPAREN\tB-QTY
            cups\tI2\tL4\tNoCAP\tNoPAREN\tB-UNIT
            flour\tI3\tL4\tNoCAP\tNoPAREN\tB-NAME
    """
    # extract the display name
    display_input = utils.cleanUnicodeFractions(row['input'])
    tokens = tokenizer.tokenize(display_input)

    labels = _row_to_labels(row)
    label_data = _addPrefixes([(t, _matchUp(t, labels)) for t in tokens])

    translated = ''
    for i, (token, tags) in enumerate(label_data):
        features = utils.getFeatures(token, i + 1, tokens)
        translated += utils.joinLine([token] + features +
                                     [_bestTag(tags)]) + '\n'
    return translated
예제 #5
0
    def _generate_data_worker(self, args):
            index, row = args
            out = []
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])
                tokens = utils.tokenize(display_input)
                del(row["input"])

                rowData = self.addPrefixes([(t, self.matchUp(t, row)) for t in tokens])

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i+1, tokens)
                    out.append(utils.joinLine([token] + features + [self.bestTag(tags)]))

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            if out:
                self.output_queue.put('\n'.join(out))
예제 #6
0
    def generate_data(self, count, offset):
        """
        Generates training data in the CRF++ format for the ingredient
        tagging task
        """
        df = pd.read_csv(self.opts.data_path)
        df = df.fillna("")

        start = int(offset)
        end = int(offset) + int(count)

        df_slice = df.iloc[start:end]

        for index, row in df_slice.iterrows():

            prev_tag = None
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])
                tokens = utils.tokenize(display_input)
                del (row["input"])

                taggedTokens = [(t, self.matchUp(t, row)) for t in tokens]
                rowData = self.addPrefixes(taggedTokens)

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i + 1, tokens)
                    best_tag = self.bestTag(tags)
                    if best_tag.startswith("I-") and best_tag.split(
                            "-")[-1] != prev_tag.split("-")[-1]:
                        best_tag = best_tag.replace("I-", "B-")

                    print utils.joinLine([token] + features + [best_tag])
                    prev_tag = best_tag

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            print
예제 #7
0
    def generate_data(self, count, offset):
        """
        Generates training data in the CRF++ format for the ingredient
        tagging task
        """
        data = []
        with open(self.opts.data_path, "r") as csvfile:
            reader = csv.DictReader(csvfile)
            for line in reader:
                data.append(line)

        start = int(offset)
        end = int(offset) + int(count)

        data_slice = data[start:end]

        for row in data_slice:
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])
                tokens = utils.tokenize(display_input)
                del (row["input"])

                rowData = self.addPrefixes([(t, self.matchUp(t, row))
                                            for t in tokens])

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i + 1, tokens)
                    print utils.joinLine([token] + features +
                                         [self.bestTag(tags)])

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            print
예제 #8
0
def sliding_window(imageinput):
    posx = 0
    posy = 0
    cont = 0

    classes = next(os.walk('1_Dataset/train'))[1]
    train, train_labels = loadData('1_Dataset/train', 224, 224, classes)
    test, test_labels = loadData('1_Dataset/test', 224, 224, classes)
    model_features = resnet152_model(224, 224, 3, len(classes))
    features = getFeatures(model_features, train, 8)
    test_features = getFeatures(model_features, test, 8)
    prediction_model = Sequential()

    prediction_model.add(
        Dense(256, input_shape=features.shape[1:], activation='relu'))
    prediction_model.add(Dense(len(classes), activation='softmax'))

    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    prediction_model.compile(optimizer=sgd,
                             loss='categorical_crossentropy',
                             metrics=['accuracy'])
    prediction_model.fit(features, train_labels, epochs=10, batch_size=8)

    predictions = prediction_model.predict(test_features,
                                           batch_size=8,
                                           verbose=1)

    acc = getAccuracy(test_labels, predictions)
    print('############### current model accuracy ################')
    print(acc)
    print('#######################################################')

    if os.path.isfile(imageinput.file_input):
        image = Image.open(imageinput.file_input)
        size = image.size
        width = size[0]
        height = size[1]
        csv_path = os.path.splitext(imageinput.file_input)[0] + '.csv'
        csv_file = open(csv_path, 'w')
        csv_file.write(
            'cont,label,score,divisor,posx,posy,posx,new_width,new_width,new_height,new_height,posy\n'
        )

        for divisor in range(40, 41, 1):
            posx = 0
            posy = 0
            new_width = width / divisor
            new_height = height / divisor
            overlap_x = new_width * imageinput.overlapx
            overlap_y = new_height * imageinput.overlapy

            new_path = 'solution/divisor_' + str(divisor)
            if not os.path.isdir(new_path):
                os.makedirs(new_path)

            while new_height <= height:
                posx = 0
                new_width = width / divisor
                while new_width <= width:
                    cont += 1
                    box = (posx, posy, new_width, new_height)
                    region = image.crop(box)
                    new_name = new_path + '/' + str(cont) + 'image_' + str(
                        posx) + '_' + str(posy) + '.jpg'
                    region.save(new_name)

                    img = np.array([
                        cv2.resize((cv2.imread(new_name)).astype(np.float32),
                                   (224, 224))
                    ])

                    img_features = getFeatures(model_features, img, 8)

                    predictions = prediction_model.predict(img_features,
                                                           batch_size=8,
                                                           verbose=1)

                    csv_file.write(
                        '%i,Z,%f,%i,%i,%i,%i,%i,%i,%i,%i,%i\n' %
                        (cont, predictions[0][0], divisor, posx, posy, posx,
                         new_width, new_width, new_height, new_height, posy))
                    csv_file.write(
                        '%i,S,%f,%i,%i,%i,%i,%i,%i,%i,%i,%i\n' %
                        (cont, predictions[0][1], divisor, posx, posy, posx,
                         new_width, new_width, new_height, new_height, posy))
                    posx += overlap_x
                    new_width += overlap_x
                posy += overlap_y
                new_height += overlap_y
        image.close()
    else:
        print("image not found")
        sys.exit(1)
예제 #9
0
                  (mean_score, scores.std() * 2, params))
        print()

    # Note the problem is too easy: the hyperparameter plateau is too flat and the
    # output model is the same for precision and recall with ties in quality.


dataPath = '/media/mlagunas/a0148b08-dc3a-4a39-aee5-d77ee690f196/TFG'
net = "vgg19"
name = "curated"  # noisy23 / noisy/ curated
typ = "curated"
features = dataPath + "/h5/" + typ + "/features/" + \
    net + "/" + name + "_" + net + "_"
cl = "../../data/paths/" + typ + "/" + name + "_paths"

feat = utils.getFeatures(features + "42.h5", "features")
feat_train = utils.getFeatures(features + "train_42.h5", "features")
feat_crossv = feat_train[:int(len(feat_train) * 0.2)]
feat_test = utils.getFeatures(features + "test_42.h5", "features")
# Load classes
classes, path = utils.getClasses(cl + ".txt")
classes_train, path_train = utils.getClasses(cl + "_train.txt")
classes_crossv, path_crossv = classes_train[:int(
    len(classes_train) * 0.2)], path_train[:int(len(path_train) * 0.2)]
classes_test, path_test = utils.getClasses(cl + "_test.txt")

# Get the best parameters for the dataset
fineTunning(feat_crossv, classes_crossv)

# Create the SVM with the given parameters
clf = svm.SVC(kernel='rbf',
예제 #10
0
    return zip(curated_synset, acc_class)

curated_synset_path = "/home/mlagunas/Bproject/DLart/data/data_utils/synset_curated.txt"
test = ""
dataPath = '/media/mlagunas/a0148b08-dc3a-4a39-aee5-d77ee690f196/TFG/test'
net = "vgg19"
name = "curated"  # noisy23 / noisy/ curated
dataset = "curated"
clf = joblib.load(dataPath + "/SVM/SVM_vgg19.pkl")

features = dataPath + "/h5/" + dataset + "/features/" + \
    net + "/" + name + "_" + net + "_"
cl = "../../data/paths/" + dataset + "/" + name + "_paths"

# Get features
feat_test = utils.getFeatures(features + "test_42.h5", "features")
classes_test, path_test = utils.getClasses(cl + "_test.txt")

with open(curated_synset_path) as f:
    curated_synset = f.read().splitlines()

######################################################
## Getting the accuracy
probs = clf.predict_proba(feat_test)
probs = probs.tolist()
accuracy_top_n(5, curated_synset, probs, classes_test)
accuracy_top_n(1, curated_synset, probs, classes_test)

y_true, y_pred = classes_test, clf.predict(feat_test)

%matplotlib
ratio = 4  # to define leghth of line structure
C = []  # binary crakc image
#%% 3.4 Multi-scale crack map
for w in range(Smin + 2, Smax + 10, 3):
    S = getStr(w, w * ratio)  # get line shape structure

    # 3.1.1 Morphological operation
    T = morphLineEnhence(I,
                         S)  # Eq (1): T = max[close(opening(I, S), S), I] - I

    # Thresholding with Otsu
    _, bn = cv2.threshold(T.astype(np.uint8), 0, 255, cv2.THRESH_OTSU)

    #%% 3.2 Feature Extraction
    _, contour, _ = cv2.findContours(bn, 3, 1)
    x_data, object_idx = getFeatures(contour, bn)

    #%% 3.3 Classification(NN)
    # Classify
    predict = model.predict(
        x_data)  # Neural Network using Keras with Tensorflow
    object_idx = np.asarray(object_idx)
    cracks = object_idx[(predict > 0.5)[:, 0]]

    result = np.zeros_like(I)
    for dd in cracks:
        cv2.drawContours(result, [contour[dd]], -1, (255), -1)

    C.append(result)

# 3.4 Multi-scale crack map
예제 #12
0
    classes = next(os.walk(ARGS.image_dir + '/train'))[1]

    # Load train
    train, train_labels = loadData(ARGS.image_dir + '/train', ARGS.imgs_cols,
                                    ARGS.imgs_rows, classes)
                          
    # Load validation_data
    test, test_labels = loadData(ARGS.image_dir + '/test', ARGS.imgs_cols,
                                    ARGS.imgs_rows, classes)
    # Load our model
    model = resnet152_model(ARGS.imgs_rows, ARGS.imgs_cols, channel, len(classes))

    # We save the result of passing the images through the model trained with imagenet
    # without the last layer
    features = getFeatures(model, train, ARGS.batch_size)    
    test_features = getFeatures(model, test, ARGS.batch_size)

    # We create a new model, so that we train only the last layer
    new_model = Sequential()
    new_model.add(Dense(256, input_shape = features.shape[1:], activation='relu'))
    new_model.add(Dense(len(classes), activation='softmax'))

    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    new_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
    new_model.fit(features, train_labels, epochs = ARGS.epochs, batch_size = ARGS.batch_size)
    
    # Make predictions
    predictions = new_model.predict(test_features, batch_size = ARGS.batch_size, verbose=1)
    print(predictions)
예제 #13
0
파일: matcher.py 프로젝트: sigstj/fmap
def MatchTraces(d, m):
    # Compute global action frequency
    dFreqMap = utils.computeActionFreq(d)
    mFreqMap = utils.computeActionFreq(m)

    dFeatures = utils.getFeatures(d, dFreqMap)
    mFeatures = utils.getFeatures(m, mFreqMap)

    x,y = len(dFeatures), len(mFeatures)
    data = numpy.zeros(shape=(x,y))
    mcs = numpy.chararray(shape=(x,y), unicode=True, itemsize=50)
    mcs[:] = ''
    avgTraceLen = numpy.zeros(shape=(x,y))
    tsm = utils.getTraceStringMap(d, m)
    #print tsm

    i=0
    for df in dFeatures:
        j=0
        dFile = sorted(list(df))[0]
        tr1 = d.traces[dFile]
        for mf in mFeatures:
            mFile = sorted(list(mf))[0]
            tr2 = m.traces[mFile]
            mcs[i,j], data[i,j] = utils.getWeightedMCSLength(tr1, tr2, d, m, dFreqMap, mFreqMap)
            avgTraceLen[i,j] = (float(len(tr1)+len(tr2))/2)
            j=j+1
        i = i+1

    if utils.verbose:
        print "MCS", mcs

    # Merge match-equivalent traces
    rLen = len(data)
    cLen = len(data[0])

    iRows = getIdenticalRows(mcs, rLen)
    if utils.verbose:
        print "Merging Match-Equivalent Trace Rows:", iRows
    rowsToDel = getRowsToDel(dFeatures, iRows)
    data, mcs = deleteRows(data, mcs, dFeatures, rowsToDel)

    iCols = getIdenticalRows(mcs.T, cLen)
    if utils.verbose:
        print "Merging Match-Equivalent Trace Cols:", iCols
    colsToDel = getRowsToDel(mFeatures, iCols)
    data, mcs = deleteRows(data.T, mcs.T, mFeatures, colsToDel)

    data = data.T
    mcs = mcs.T

    if utils.verbose:
        print data

    # Normalize the data (Scale within [0,1])
    maxim = 0.0
    for r in range(len(data)):
        for c in range(len(data[r])):
            data[r][c] = (data[r][c] / avgTraceLen[r][c])
            if(data[r][c]>maxim): maxim = data[r][c]
    for r in range(len(data)):
        for c in range(len(data[r])):
            if maxim > 0:
                data[r][c] = (data[r][c] / maxim)

#     if DEBUG:
    utils.printCSV(data, tsm, dFeatures, mFeatures)

    # Maximum Weight Bipartite Matching
    mwbgm = bipartitematching.Munkres()
    cost_matrix = bipartitematching.make_cost_matrix(data, lambda x : 100000000 - (x*10000))
    indexes = mwbgm.compute(cost_matrix)

    mapping = {}

    print "%"*120, "\n\t START RQ2: (MATCH DETAILS) \n", "%"*120

    for row, column in indexes:
        value = data[row][column]
        if value > 0.1: #Be conservative with matching
            #print '(%d, %d) -> %f' % (row, column, value)
            df = ",".join(dFeatures[row])
            mf = ",".join(mFeatures[column])
            print '[%d,%d] (%s <-> %s) -> %f' % (row, column, df, mf, value)
            mapping[row] = column

    print "%"*120, "\n\t END RQ2: \n", "%"*120

    print "#"*120, "\n  LOAD STATS\n", "#"*120
    d.printLoadStats()
    m.printLoadStats()

    utils.printMatchStats(d, m, dFeatures, mFeatures, mapping)

    return mapping
def train(epoch):
    # tf.logging.set_verbosity(tf.logging.INFO)
    trainAnchorData = tf.placeholder(tf.float32,
                                     shape=(batchSize, 64, None, 3),
                                     name="anchor")
    trainAnchorLabels = tf.placeholder(tf.int32,
                                       shape=(batchSize),
                                       name="ancLabel")

    model = convGruNet()
    # a = model.net(xInput=xInput)
    ancOut = model.net(trainAnchorData)

    # loss, pos, neg = computeTripletLoss(anchor_feature=ancOut, positive_feature=posOut, negative_feature=negOut, margin=margin)
    # loss = batch_hard_triplet_loss(labels, emb, margin, squared=False)
    loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(
        labels=trainAnchorLabels, embeddings=ancOut)
    # loss = batch_hard_triplet_loss(labels=trainAnchorLabels, embeddings=ancOut, margin=margin)
    globalStep = tf.Variable(tf.constant(0))
    # lr = tf.train.exponential_decay(
    #     tf.convert_to_tensor(0.0005), # Learning rate
    #     globalStep,
    #     3000,# 1500steps后衰减
    #     0.95 # Decay step
    # )
    lr = tf.placeholder(dtype=tf.float32)

    optimizer = tf.train.AdamOptimizer(lr).minimize(loss,
                                                    global_step=globalStep)
    saver = model.saver()
    with tf.Session() as sess:
        trainWriter = tf.summary.FileWriter('./logsTriplet/train', sess.graph)

        devWriter = tf.summary.FileWriter('./logsTriplet/test', sess.graph)

        tf.summary.scalar('loss', loss)
        # tf.summary.scalar('positives', pos)
        # tf.summary.scalar('negatives', neg)
        tf.summary.scalar('lr', lr)
        merged = tf.summary.merge_all()

        sess.run(tf.global_variables_initializer())

        for e in range(epoch):
            ''' 训练 '''
            count = 0
            for i in range(100):
                count += 1
                globalStep.assign(globalStep + 1)
                ancBatch, ancBatchLabel = getFeatures(filePath,
                                                      classNum=classNum,
                                                      perClassNum=perClassNum)
                # ancBatch, ancBatchLabel = tF(filePath, classNum=classNum, perClassNum=perClassNum)

                feedDict = {
                    trainAnchorData: ancBatch,
                    trainAnchorLabels: ancBatchLabel,
                    lr: 0.001
                }
                _, losses, learningrate, summary = sess.run(
                    [optimizer, loss, lr, merged], feed_dict=feedDict)
                trainWriter.add_summary(summary, sess.run(globalStep))
                print("trainLoss: ", losses)
            ''' 验证 '''
            count = 0
            for i in range(5):
                count += 1
                ancBatchDev, ancBatchLabelDev = getFeatures(
                    devFilePath, classNum=classNum, perClassNum=perClassNum)
                # ancBatchDev, ancBatchLabelDev = tF(filePath, classNum=classNum, perClassNum=perClassNum)
                feedDict = {
                    trainAnchorData: ancBatchDev,
                    trainAnchorLabels: ancBatchLabelDev,
                    lr: 0.001
                }
                losses, summary = sess.run([loss, merged], feed_dict=feedDict)
                devWriter.add_summary(summary, sess.run(globalStep))
                print("testLoss: ", losses)

            saver.save(sess,
                       "triplet/modelCheckpoint",
                       global_step=sess.run(globalStep))
        trainWriter.close()
        devWriter.close()
def evaluate(thresholds_file, cpu_testset, iperf_testset, trainset,
             time_window_threshold):
    plt.clf()

    stats_json = data_prefix + 'normalization_stats.json'

    model = load_model('model/5g_autoencoder.h5')
    normalization_stats = loadDictJson(stats_json)

    cols_to_normalize = getFeatures()
    cols = [c + '_normalized' for c in cols_to_normalize]

    time_window_threshold = 30
    refresh_time_interval = 15

    n_steps = 4
    n_features = len(cols)

    logging.info('Loading evaluation datasets')
    val_df = loadDataset(trainset)
    cpu_df = loadDataset(cpu_testset)
    iperf_df = loadDataset(iperf_testset)

    cpu_df.fillna(method='backfill', inplace=True)
    cpu_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    iperf_df.fillna(method='backfill', inplace=True)
    iperf_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    val_df.fillna(method='backfill', inplace=True)
    val_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    logging.info('Normalizing evaluation data')
    for col in cols_to_normalize:
        cpu_df[col + '_normalized'] = normalizeFeature(
            cpu_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])
        iperf_df[col + '_normalized'] = normalizeFeature(
            iperf_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])
        val_df[col + '_normalized'] = normalizeFeature(
            val_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])

    logging.info('Evaluating for CPU and memory metrics')

    cpu_xs = []
    cpu_ys = []

    net_up_xs = []
    net_up_ys = []

    net_down_xs = []
    net_down_ys = []

    mem_xs_a1 = []
    mem_ys_a1 = []
    for sample_start in range(0, len(cpu_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        cpu_df_sample = cpu_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        cpu_dataset = cpu_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_cpu, y_test_cpu = split_sequences(cpu_dataset, n_steps)
        X_test_cpu = X_test_cpu.reshape((len(X_test_cpu), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_cpu = model.predict(X_test_cpu, verbose=0)

        cpu_rmse_dict = printPredictionErrors(y_test_cpu, yhat_cpu)

        net_up_xs.append(len(net_up_xs))
        net_up_ys.append(cpu_rmse_dict['net_up_rmse'])

        net_down_xs.append(len(net_down_xs))
        net_down_ys.append(cpu_rmse_dict['net_down_rmse'])

        cpu_xs.append(len(cpu_xs))
        cpu_ys.append(cpu_rmse_dict['cpu_rmse'])

        mem_xs_a1.append(len(mem_xs_a1))
        mem_ys_a1.append(cpu_rmse_dict['mem_rmse'])

    plt.plot(cpu_xs,
             cpu_ys,
             color='blue',
             label='CPU Percentage Rate (mode=user)')
    #plt.plot(mem_xs_a1, mem_ys_a1, color='red', label='Memory Percentage Rate')
    plt.title('CPU Attack Dataset')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_cpu.png')
    plt.clf()

    logging.info('Evaluating for network and 5G metrics')

    net_up_xs = []
    net_up_ys = []

    net_down_xs = []
    net_down_ys = []

    net_5g_up_xs = []
    net_5g_up_ys = []

    net_5g_down_xs = []
    net_5g_down_ys = []

    mem_xs_a2 = []
    mem_ys_a2 = []
    for sample_start in range(0, len(iperf_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        iperf_df_sample = iperf_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        iperf_dataset = iperf_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_iperf, y_test_iperf = split_sequences(iperf_dataset, n_steps)
        X_test_iperf = X_test_iperf.reshape(
            (len(X_test_iperf), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_iperf = model.predict(X_test_iperf, verbose=0)

        iperf_rmse_dict = printPredictionErrors(y_test_iperf, yhat_iperf)

        net_up_xs.append(len(net_up_xs))
        net_up_ys.append(iperf_rmse_dict['net_up_rmse'])

        net_down_xs.append(len(net_down_xs))
        net_down_ys.append(iperf_rmse_dict['net_down_rmse'])

        net_5g_up_xs.append(len(net_5g_up_xs))
        net_5g_up_ys.append(iperf_rmse_dict['net_up_5g_rmse'])

        net_5g_down_xs.append(len(net_5g_down_xs))
        net_5g_down_ys.append(iperf_rmse_dict['net_down_5g_rmse'])

        mem_xs_a2.append(len(mem_xs_a2))
        mem_ys_a2.append(iperf_rmse_dict['mem_rmse'])

    plt.plot(net_up_xs, net_up_ys, color='green', label='Network Up Rate')
    plt.plot(net_down_xs,
             net_down_ys,
             color='purple',
             label='Network Down Rate')
    #plt.plot(mem_xs_a2, mem_ys_a2, color='red', label='Memory Percentage Rate')
    plt.title('iperf Attack Dataset')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_iperf_net.png')
    plt.clf()

    plt.plot(net_5g_up_xs,
             net_5g_up_ys,
             color='green',
             label='5G Network Up Rate')
    plt.plot(net_5g_down_xs,
             net_5g_down_ys,
             color='blue',
             label='5G Network Down Rate')
    plt.title('iperf Attack Dataset')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_iperf_5g.png')
    plt.clf()

    logging.info('Evaluating with training data')

    cpu_xs = []
    cpu_ys = []

    net_up_xs = []
    net_up_ys = []

    net_down_xs = []
    net_down_ys = []

    net_5g_up_xs = []
    net_5g_up_ys = []

    net_5g_down_xs = []
    net_5g_down_ys = []

    mem_xs_n = []
    mem_ys_n = []
    for sample_start in range(0, len(val_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        val_df_sample = val_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        val_dataset = val_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_val, y_test_val = split_sequences(val_dataset, n_steps)
        X_test_val = X_test_val.reshape((len(X_test_val), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_val = model.predict(X_test_val, verbose=0)

        val_rmse_dict = printPredictionErrors(y_test_val, yhat_val)

        cpu_xs.append(len(cpu_xs))
        cpu_ys.append(val_rmse_dict['cpu_rmse'])

        mem_xs_n.append(len(mem_xs_n))
        mem_ys_n.append(val_rmse_dict['mem_rmse'])

        net_up_xs.append(len(net_up_xs))
        net_up_ys.append(val_rmse_dict['net_up_rmse'])

        net_down_xs.append(len(net_down_xs))
        net_down_ys.append(val_rmse_dict['net_down_rmse'])

        net_5g_up_xs.append(len(net_5g_up_xs))
        net_5g_up_ys.append(val_rmse_dict['net_up_5g_rmse'])

        net_5g_down_xs.append(len(net_5g_down_xs))
        net_5g_down_ys.append(val_rmse_dict['net_down_5g_rmse'])

    plt.plot(cpu_xs,
             cpu_ys,
             color='blue',
             label='CPU Percentage Rate (mode=user)')
    plt.plot(mem_xs_n, mem_ys_n, color='red', label='Memory Percentage Rate')
    plt.plot(net_up_xs, net_up_ys, color='green', label='Network Up Rate')
    plt.plot(net_down_xs,
             net_down_ys,
             color='purple',
             label='Network Down Rate')
    plt.title('Training Dataset (Edge Metrics)')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_val_1.png')
    plt.clf()

    plt.plot(net_5g_up_xs,
             net_5g_up_ys,
             color='orange',
             label='5G Network Up Rate')
    plt.plot(net_5g_down_xs,
             net_5g_down_ys,
             color='cyan',
             label='5G Network Down Rate')
    plt.title('Training Dataset (5G Metrics)')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_val_2.png')
    plt.clf()
예제 #16
0
    parser.add_argument('--influx_user',
                        required=False,
                        help='InfluxDB Username')
    parser.add_argument('--influx_pass',
                        required=False,
                        help='InfluxDB Password')
    parser.add_argument('--influx_db',
                        required=False,
                        help='InfluxDB Database')
    parser.add_argument('--influx_measurement',
                        required=False,
                        help='InfluxDB Measurement')

    args = parser.parse_args()

    cols_to_normalize = getFeatures()
    cols = [c + '_normalized' for c in cols_to_normalize]

    # Number of time steps
    n_steps = 4
    # Number of features, that will be used
    n_features = len(cols)

    # Number of max historicaal records to keep for predicting
    time_window_threshold = 30

    refresh_time_interval = 15

    if args.mode == 'train':
        logging.info('Mode: Training')
        logging.info('Evaluation: ' +
예제 #17
0
## creating the bounding box for cropping our rasters
dir_shp = "data/bdtopobati_frejus"
BuildingsGDF = gpd.read_file(os.path.join(dir_shp, 'bdtopo_bati_1954.shp'))
bbox = BuildingsGDF.total_bounds

# setting a bounding box for cropping
glob_minx, glob_miny = bbox[0], bbox[1]
glob_maxx, glob_maxy = bbox[2], bbox[3]
# twe change the miny to avoid the sea
glob_miny = 6265642

# getting global boundaries in the correct format
bbox = box(glob_minx, glob_miny, glob_maxx, glob_maxy)
geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(2154))
coords = fun.getFeatures(geo)

print("""

We load the rasters
Then we sample them into a grid with similar resolution

""")

# list rasters
dir_tifs = "data/tifs"
list_files = fun.get_files(dir_tifs)
list_tifs = [name for name in list_files if name[-3:] == "tif"]
list_tifs.sort(reverse=True)

# storing our rasters per year in a dictionary