Пример #1
0
def get_max_connections_hostname():
    results = collection.aggregate([{
        '$match': {
            'timestamp': {
                '$gte': one_hour_before_ts,
                '$lt': current_timestamp
            }
        }
    }, {
        '$group': {
            '_id': '$src',
            'total': {
                '$sum': 1
            }
        }
    }, {
        '$sort': {
            'total': -1
        }
    }, {
        '$limit': 1
    }])
    for result in results:
        utils.logger(
            'INFO',
            'Host generating the most connections: {}'.format(result['_id']))
Пример #2
0
def get_hosts_receiving_from(src_host):
    results = collection.find({
        'timestamp': {
            '$gte': one_hour_before_ts,
            '$lt': current_timestamp
        },
        'src': src_host
    })
    hostnames = []
    for result in results:
        hostnames.append(result['dst'])
    utils.logger(
        'INFO',
        'List of hosts connected from {}: {}'.format(src_host, hostnames))
Пример #3
0
def get_hosts_connected_to(dst_host):
    results = collection.find({
        'timestamp': {
            '$gte': one_hour_before_ts,
            '$lt': current_timestamp
        },
        'dst': dst_host
    })
    hostnames = []
    for result in results:
        hostnames.append(result['src'])
    utils.logger(
        'INFO',
        'List of hosts connected to {}: {}'.format(destination_host,
                                                   hostnames))
def performsFullAnalysis(url):
    result = {}
    try:
        result['classification'] = performClassificationAnalysis(url)
        result['text'] = result['classification']['text']
        result['sentiment'] = performSentimentAnalysis(url)
        result['entity'] = performElsaAnalysis(url)
        result['concepts'] = performConceptAnayalisis(url)
        result['summary'] = performSummayAnalysis(url)
        for key in result.keys():
            if key is not 'text':
                print(key)
                del result[key]['text']
    except:
        utils.logger("error perfom the full anayalusis",
                     "performsFullAnalysis")
    return result
Пример #5
0
def getUserByEmail(username, password):
    user = None
    try:
        client = GetDbClient()
        db = client.watchfulowl
        user = db.persons.find_one({
            "$or": [{
                "email": username
            }, {
                "user_info.username": username
            }],
            "user_info.password":
            password
        })
    except:
        utils.logger("error getting the data", "getUserByEmail")

    return user
Пример #6
0
# config = read_config('config.yaml')

# set variable from config file
# SMTP_SERVER = config['SMTP_SERVER']
# PORT = config['PORT']
# FROM = config['FROM']
# TO = config['TO']
# PASSWORD = config['PASSWORD']
# HEALTHCHECK_URL = config['HEALTHCHECK_URL']
# LOG_FILENAME_SERVER = config['LOG_FILENAME_SERVER']
# LOG_FILENAME_EMAIL = config['LOG_FILENAME_EMAIL']

# # initialise logger
# log = logger(LOG_FILENAME_EMAIL)

log = logger(__name__)


def send_email(error_msd, filename, config):
    """
    send email with error message from log file

    Args:
        error_msg:``list``
            Error message, each sentence as list
        filename:``str``
            filename of log file
        config:``dict``
            dict of config file

            .. code-block:: json
Пример #7
0
import matplotlib.pyplot as plt

from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn import tree
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

from sklearn import metrics
from sklearn import model_selection

if __name__ == '__main__':
    # Open data
    utils.logger("DATA LOADING...")
    train_data = utils.load_data("../files/train.csv")

    # Outlier Example
    plt.boxplot(train_data['full_sq'])
    plt.show()

    # Get current column ordering to order concat dataset
    right_order = []
    for column in train_data.columns:
        right_order.append(column)

    test_data = utils.load_data("../files/test.csv")

    # Merge datasets
    frames = [train_data, test_data]
Пример #8
0
                '$lt': current_timestamp
            }
        }
    }, {
        '$group': {
            '_id': '$src',
            'total': {
                '$sum': 1
            }
        }
    }, {
        '$sort': {
            'total': -1
        }
    }, {
        '$limit': 1
    }])
    for result in results:
        utils.logger(
            'INFO',
            'Host generating the most connections: {}'.format(result['_id']))


utils.logger('INFO',
             '[OPERATION] [CONNECTED TO] [{}]'.format(destination_host))
get_hosts_connected_to(destination_host)
utils.logger('INFO', '[OPERATION] [CONNECTED FROM] [{}]'.format(source_host))
get_hosts_receiving_from(source_host)
utils.logger('INFO', '[OPERATION] [GENERATOR] []')
get_max_connections_hostname()
Пример #9
0
def apricot5(model,
             model_weights_dir,
             dataset,
             adjustment_strategy,
             activation='binary'):
    """
    including Apricot and Apricot lite
    input:
        * dataset: [x_train_val, y_train_val, x_val, y_val, x_test, y_test]
    """
    # package the dataset
    x_train, x_test, y_train, y_test = load_dataset(dataset)
    x_train_val, x_val, y_train_val, y_val = split_validation_dataset(
        x_train, y_train)

    # x_train_val = np.concatenate((x_train_val, x_val), axis=0)
    # y_train_val = np.concatenate((y_train_val, y_val), axis=0)
    # print(x_train_val.shape, type(x_train_val))
    # print(y_train_val.shape, type(y_train_val))
    # return

    fixed_model = model

    submodel_dir = os.path.join(model_weights_dir, 'submodels')
    trained_weights_path = os.path.join(model_weights_dir, 'trained.h5')
    fixed_weights_path = os.path.join(
        model_weights_dir,
        'compare_fixed_{}_{}.h5'.format(adjustment_strategy, activation))
    log_path = os.path.join(model_weights_dir,
                            'compare_log_{}.txt'.format(adjustment_strategy))

    if not os.path.exists(fixed_weights_path):
        fixed_model.save_weights(fixed_weights_path)

    datagen = ImageDataGenerator(horizontal_flip=True,
                                 width_shift_range=0.125,
                                 height_shift_range=0.125,
                                 fill_mode='constant',
                                 cval=0.)

    datagen.fit(x_train)

    logger('----------original model----------', log_path)

    # submodels
    _, base_train_acc = fixed_model.evaluate(x_train_val, y_train_val)
    logger('The train accuracy: {:.4f}'.format(base_train_acc), log_path)
    _, base_val_acc = fixed_model.evaluate(x_val, y_val)
    # print('The validation accuracy: {:.4f}'.format(base_val_acc))
    logger('The validation accuracy: {:.4f}'.format(base_val_acc), log_path)
    _, base_test_acc = fixed_model.evaluate(x_test, y_test)
    # print('The test accuracy: {:.4f}'.format(base_test_acc))
    logger('The test accuracy: {:.4f}'.format(base_test_acc), log_path)

    best_weights = fixed_model.get_weights()
    best_acc = base_val_acc

    # find all indices of xs that original model fails on them.
    # fail_xs, fail_ys, fail_ys_label, fail_num = get_failing_cases(fixed_model, x_train_val, y_train_val)
    fail_xs, fail_ys, fail_ys_label, fail_num = get_failing_cases(
        fixed_model, x_train, y_train)  # use the whole training dataset

    if settings.NUM_SUBMODELS == 20:
        sub_correct_matrix_path = os.path.join(
            model_weights_dir,
            'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED,
                                           settings.NUM_SUBMODELS))
    else:
        sub_correct_matrix_path = os.path.join(
            model_weights_dir,
            'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED,
                                           settings.NUM_SUBMODELS))
    sub_correct_matrix = None  # 1: predicts correctly, -1: predicts incorrectly.
    print('obtaining sub correct matrix...')

    if not os.path.exists(sub_correct_matrix_path):
        # obtain submodel correctness matrix
        sub_correct_matrix = cal_sub_corr_matrix(fixed_model,
                                                 sub_correct_matrix_path,
                                                 submodel_dir,
                                                 fail_xs,
                                                 fail_ys,
                                                 fail_ys_label,
                                                 fail_num,
                                                 num_submodels=20)
    else:
        sub_correct_matrix = np.load(sub_correct_matrix_path)

    # generate random matrix for comparison.
    # sub_correct_matrix = np.random.randint(0,2, sub_correct_matrix.shape)
    # sub_correct_matrix[sub_correct_matrix == 0] = -1
    sub_correct_matrix = np.ones(sub_correct_matrix.shape)
    sub_correct_matrix = sub_correct_matrix * -1

    sub_weights_list = get_submodels_weights(fixed_model, submodel_dir)
    print('collected.')
    fixed_model.load_weights(trained_weights_path)
    # print(sub_correct_matrix.shape)
    # print(sub_correct_matrix[0:20, :])

    # print('start fixing process...')
    logger('----------start fixing process----------', log_path)
    logger(
        'number of cases to be adjusted: {}'.format(
            sub_correct_matrix.shape[0]), log_path)
    for _ in range(settings.LOOP_COUNT):
        np.random.shuffle(sub_correct_matrix)

        # load batches rather than single input.
        iter_num, rest = divmod(sub_correct_matrix.shape[0],
                                settings.FIX_BATCH_SIZE)
        if rest != 0:
            iter_num += 1

        print('iter num: {}'.format(iter_num))
        # batch version
        for i in range(iter_num):
            curr_weights = fixed_model.get_weights()
            batch_corr_matrix = sub_correct_matrix[settings.FIX_BATCH_SIZE *
                                                   i:settings.FIX_BATCH_SIZE *
                                                   (i + 1), :]
            # print('---------------------------------')
            # print(batch_corr_matrix)
            # print('---------------------------------')
            corr_w, incorr_w = batch_get_adjustment_weights(
                batch_corr_matrix, sub_weights_list, adjustment_strategy,
                curr_weights)
            # print(len(corr_w),len(incorr_w))
            print('calculating batch adjust weights...')
            # adjust_w = None
            # print(adjust_w)
            adjust_w = batch_adjust_weights_func(curr_weights,
                                                 corr_w,
                                                 incorr_w,
                                                 adjustment_strategy,
                                                 activation=activation)
            # print(curr_weights[0][0])
            # print('----------')
            # print(adjust_w[0][0])
            fixed_model.set_weights(adjust_w)

            _, curr_acc = fixed_model.evaluate(x_val, y_val)
            print('After adjustment, the validation accuracy: {:.4f}'.format(
                curr_acc))

            if curr_acc > best_acc:
                best_acc = curr_acc
                fixed_model.save_weights(fixed_weights_path)

                if adjustment_strategy <= 3:
                    # further training epochs.
                    checkpoint = ModelCheckpoint(fixed_weights_path,
                                                 monitor='val_accuracy',
                                                 verbose=1,
                                                 save_best_only=True,
                                                 mode='max')
                    checkpoint.best = best_acc
                    hist = fixed_model.fit_generator(
                        datagen.flow(x_train_val,
                                     y_train_val,
                                     batch_size=settings.BATCH_SIZE),
                        steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1,
                        validation_data=(x_val, y_val),
                        epochs=settings.FURTHER_ADJUSTMENT_EPOCHS,
                        callbacks=[checkpoint])

                    # for key in hist.history:
                    #     print(key)

                    fixed_model.load_weights(fixed_weights_path)

                    # eval the model
                    _, val_acc = fixed_model.evaluate(x_val, y_val, verbose=0)
                    # _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0)
                    best_acc = val_acc

                    # print('validation accuracy after further training: {:.4f}'.format(test_acc))
                    logger(
                        'validation accuracy improved, after further training: {:.4f}'
                        .format(val_acc), log_path)
                else:
                    logger(
                        'validation accuracy improved: {:.4f}'.format(
                            best_acc), log_path)
            else:
                fixed_model.load_weights(fixed_weights_path)
                # pass

    fixed_model.load_weights(fixed_weights_path)
    if adjustment_strategy > 3:
        # final training process.
        _, val_acc = fixed_model.evaluate(x_val, y_val)
        checkpoint = ModelCheckpoint(fixed_weights_path,
                                     monitor='val_accuracy',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        checkpoint.best = val_acc

        fixed_model.fit_generator(
            datagen.flow(x_train_val,
                         y_train_val,
                         batch_size=settings.BATCH_SIZE),
            steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1,
            validation_data=(x_val, y_val),
            epochs=20,
            callbacks=[checkpoint])
        fixed_model.load_weights(fixed_weights_path)

    # final evaluation.
    _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0)
    logger('----------final evaluation----------', log_path)
    logger('test accuracy: {:.4f}'.format(test_acc), log_path)
Пример #10
0
def apricot2(model,
             model_weights_dir,
             dataset,
             adjustment_strategy,
             activation='binary'):
    """
    including Apricot and Apricot lite
    input:
        * dataset: [x_train_val, y_train_val, x_val, y_val, x_test, y_test]
    """
    # package the dataset
    x_train, x_test, y_train, y_test = load_dataset(dataset)
    x_train_val, x_val, y_train_val, y_val = split_validation_dataset(
        x_train, y_train)

    fixed_model = model

    submodel_dir = os.path.join(model_weights_dir, 'submodels')
    trained_weights_path = os.path.join(model_weights_dir, 'trained.h5')
    fixed_weights_path = os.path.join(
        model_weights_dir, 'fixed_{}_{}.h5'.format(adjustment_strategy,
                                                   activation))
    log_path = os.path.join(model_weights_dir,
                            'log_{}.txt'.format(adjustment_strategy))

    if not os.path.exists(fixed_weights_path):
        fixed_model.save_weights(fixed_weights_path)

    datagen = ImageDataGenerator(horizontal_flip=True,
                                 width_shift_range=0.125,
                                 height_shift_range=0.125,
                                 fill_mode='constant',
                                 cval=0.)

    datagen.fit(x_train_val)

    logger('----------original model----------', log_path)

    # submodels
    _, base_val_acc = fixed_model.evaluate(x_val, y_val)
    # print('The validation accuracy: {:.4f}'.format(base_val_acc))
    logger('The validation accuracy: {:.4f}'.format(base_val_acc), log_path)
    _, base_test_acc = fixed_model.evaluate(x_test, y_test)
    # print('The test accuracy: {:.4f}'.format(base_test_acc))
    logger('The test accuracy: {:.4f}'.format(base_test_acc), log_path)

    best_weights = fixed_model.get_weights()
    best_acc = base_val_acc

    # find all indices of xs that original model fails on them.
    fail_xs, fail_ys, fail_ys_label, fail_num = get_failing_cases(
        fixed_model, x_train_val, y_train_val)

    if settings.NUM_SUBMODELS == 20:
        sub_correct_matrix_path = os.path.join(
            model_weights_dir,
            'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED,
                                           settings.NUM_SUBMODELS))
    else:
        sub_correct_matrix_path = os.path.join(
            model_weights_dir,
            'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED,
                                           settings.NUM_SUBMODELS))
    sub_correct_matrix = None  # 1: predicts correctly, 0: predicts incorrectly.
    print('obtaining sub correct matrix...')

    if not os.path.exists(sub_correct_matrix_path):
        # obtain submodel correctness matrix
        sub_correct_matrix = cal_sub_corr_matrix(fixed_model,
                                                 sub_correct_matrix_path,
                                                 submodel_dir, fail_xs,
                                                 fail_ys, fail_ys_label,
                                                 fail_num)
    else:
        sub_correct_matrix = np.load(sub_correct_matrix_path)

    sub_weights_list = get_submodels_weights(fixed_model, submodel_dir)
    print('collected.')
    fixed_model.load_weights(trained_weights_path)

    # print('start fixing process...')
    logger('----------start fixing process----------', log_path)
    for _ in range(settings.LOOP_COUNT):
        np.random.shuffle(sub_correct_matrix)

        for index in range(sub_correct_matrix.shape[0]):
            curr_weights = fixed_model.get_weights()
            corr_mat = sub_correct_matrix[index, :]

            print('obtaining correct and incorrect weights...')
            if adjustment_strategy <= 3:
                corr_w, incorr_w = get_adjustment_weights(
                    corr_mat, sub_weights_list, adjustment_strategy)
                print('calculating adjust weights...')
                adjust_w = adjust_weights_func(curr_weights,
                                               corr_w,
                                               incorr_w,
                                               adjustment_strategy,
                                               activation=activation)
            else:  # lite version
                print('calculating adjust weights...')
                adjust_w = adjust_weights_func_lite(corr_mat, sub_weights_list,
                                                    curr_weights)

            if adjust_w == -1:
                continue
            fixed_model.set_weights(adjust_w)

            _, curr_acc = fixed_model.evaluate(x_val, y_val)
            print('After adjustment, the validation accuracy: {:.4f}'.format(
                curr_acc))

            if curr_acc > best_acc:
                best_acc = curr_acc
                fixed_model.save_weights(fixed_weights_path)

                if adjustment_strategy <= 3:
                    # further training epochs.
                    checkpoint = ModelCheckpoint(fixed_weights_path,
                                                 monitor='val_acc',
                                                 verbose=1,
                                                 save_best_only=True,
                                                 mode='max')
                    checkpoint.best = best_acc
                    fixed_model.fit_generator(
                        datagen.flow(x_train_val,
                                     y_train_val,
                                     batch_size=settings.BATCH_SIZE),
                        steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1,
                        validation_data=(x_val, y_val),
                        epochs=settings.FURTHER_ADJUSTMENT_EPOCHS,
                        callbacks=[checkpoint])

                    fixed_model.load_weights(fixed_weights_path)

                    # eval the model
                    _, val_acc = fixed_model.evaluate(x_val, y_val, verbose=0)
                    # _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0)
                    best_acc = val_acc

                    # print('validation accuracy after further training: {:.4f}'.format(test_acc))
                    logger(
                        'validation accuracy improved, after further training: {:.4f}'
                        .format(val_acc), log_path)
                else:
                    logger(
                        'validation accuracy improved: {:.4f}'.format(
                            best_acc), log_path)
            else:
                fixed_model.load_weights(fixed_weights_path)

    fixed_model.load_weights(fixed_weights_path)
    if adjustment_strategy > 3:
        # final training process.
        _, val_acc = fixed_model.evaluate(x_val, y_val)
        checkpoint = ModelCheckpoint(fixed_weights_path,
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        checkpoint.best = val_acc

        fixed_model.fit_generator(
            datagen.flow(x_train_val,
                         y_train_val,
                         batch_size=settings.BATCH_SIZE),
            steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1,
            validation_data=(x_val, y_val),
            epochs=settings.FURTHER_ADJUSTMENT_EPOCHS,
            callbacks=[checkpoint])
        fixed_model.load_weights(fixed_weights_path)

    # final evaluation.
    _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0)
    logger('----------final evaluation----------', log_path)
    logger('test accuracy: {:.4f}'.format(test_acc), log_path)
Пример #11
0
outdir = os.path.join(basedir, "Physio CSV data_preprocessed")
outfile = os.path.join(basedir, "Summary.csv")
infosheetfile = os.path.join(basedir, "Physio AMP_Subject_Info_Sheet (1).xlsx")

if not os.path.exists(outdir):
    os.mkdir(outdir)

# get a list of all subjects in physio folder
folders = [x for x in os.listdir(datadir) if not x.startswith(".DS")]
subjects = dict(Counter([x[:10] for x in folders]))

##############
# PREPROCESS #
##############

utils.logger("\nAligning timestamps for all subjects...\n", level=0)

# loop over all physio folders and preprocess timestamps to datetimes for all measurements
for folder in folders:

    # reconstruct subject folder and make preprocessing folder (if not existing)
    subdir = os.path.join(datadir, folder)
    suboutdir = os.path.join(outdir, folder)
    if not os.path.exists(suboutdir):
        os.mkdir(suboutdir)

    # loop over ACC, EDA, BVP, TEMP, HR that have the same processing stream
    for metric in ['ACC', 'EDA', 'BVP', 'TEMP', 'HR', 'IBI']:
        measurements = utils.extract_measurements(metric, subdir)
        if isinstance(measurements, int):
            continue
Пример #12
0
documents = []
start = time.time()
for line in sys.stdin:
    parsed_line = line.strip().split(" ")
    try:
        documents.append({
            'timestamp':
            datetime.datetime.fromtimestamp(int(parsed_line[0]) / 1e3),
            'src':
            parsed_line[1],
            'dst':
            parsed_line[2]
        })
    except IndexError:
        utils.logger('ERROR', 'Line index out of range {}'.format(parsed_line))
    if len(documents) >= 3000 or (time.time() - start) > 3.0:
        try:
            collection.insert_many(documents)
            utils.logger('INFO', '[DB] [INSERT] [{}]'.format(len(documents)))
            documents = []
            start = time.time()
        except Exception as e:
            utils.logger('ERROR', '[DB] [INSERTION] [{}]'.format(e))

try:
    collection.insert_many(documents)
    utils.logger('INFO', '[DB] [INSERT] [{}]'.format(len(documents)))
except Exception as e:
    utils.logger('ERROR', '[DB] [INSERTION] [{}]'.format(e))