Exemplo n.º 1
0
def run_ds_pipeline(config):
    #get logger object, probably already created
    logger = sa_logger.init(globals.PACKAGE_NAME)
    logger.info('Data science pipeline begining...')

    #initialize db
    dbif.db_init()

    if config['action']['ingest']['make_list'] == True:
        #begin with data ingestion
        ingest.ingest_data(config)
    else:
        print 'skipping ingestion...'

    if config['action']['wrangle']['transform'] == True:
        #time for data wrangling
        wrangle.wrangle_data(config)
    else:
        logger.info('skipping wrangling and insertion of data into db ...')

    if config['action']['analyze']['analyze'] == True:
        #time for data analysis
        analyze.analyze_data(config)
    else:
        logger.info('skipping analysis ...')

    if config['action']['visualize']['visualize'] == True:
        #time for data visualization
        visualize_and_analyze.visualize_data(config)
    else:
        logger.info('skipping visualization ...')
Exemplo n.º 2
0
import va_utils

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from scipy import stats
import matplotlib.pyplot as plt

import statsmodels.api as sm
from sklearn import linear_model

#global varialbes for this file
logger = sa_logger.init(globals.PACKAGE_NAME)
OUTPUT_DIR_NAME = "output"

def do_linear_regression(file_name):
    try:
        logger.info('-------------------------------------')
        logger.info('TCP Vs UDP linear regression.........')
        logger.info('-------------------------------------')

        df = pd.read_csv(file_name)
        #plot TCP Vs UDP, so UDP goes on x axis
        x = np.array(df['UDP'])
        X = x[:, np.newaxis]
        y = np.array(df['TCP'])

        #create a linear regressor