def test_avg_runup(self): e1 = ETL(self.d, 'YHOO') assert np.abs(e1.df_temp.ix[datetime(2016, 5, 19), 'YHOO_Avg_Runup'] - (-0.022897)) < 0.0001 e2 = ETL(self.d, 'MSFT') assert np.abs(e2.df_temp.ix[datetime(2016, 6, 8), 'MSFT_Avg_Runup'] - 0.030256) < 0.0001
def test_daily_return(self): e1 = ETL(self.d, 'A') assert np.abs(e1.df_temp.ix[datetime(2009, 2, 13), 'A_return'] - -0.008588) < 0.0001 e2 = ETL(self.d, 'AA') assert np.abs(e2.df_temp.ix[datetime(2016, 6, 28), 'AA_return'] - 0.025275) < 0.0001
def main(): try: #params = Params( args.config ) etl = ETL() etl.run() #etl.clean_previous_etl() except Exception as e: print('main(), error: {}'.format(e))
def test_cov63d(self): e1 = ETL(self.d, 'BF-B') assert np.abs(e1.df_temp.ix[datetime(2013, 1, 24), 'BF-B_Cov63d'] - 0.000048) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2013, 2, 5), 'BF-B_Cov63d'] - 0.000041) < 0.0001 e2 = ETL(self.d, 'CCL') assert np.abs(e2.df_temp.ix[datetime(2014, 7, 25), 'CCL_Cov63d'] - 0.000027) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2014, 8, 25), 'CCL_Cov63d'] - 0.000026) < 0.0001
def test_ema(self): e1 = ETL(self.d, 'GOOGL') assert np.abs(e1.df_temp.ix[datetime(2010, 8, 26), 'GOOGL_EMA'] - 247.943286) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2012, 11, 12), 'GOOGL_EMA'] - 338.467381) < 0.0001 e2 = ETL(self.d, 'IBM') assert np.abs(e2.df_temp.ix[datetime(2010, 8, 4), 'IBM_EMA'] - 110.986086) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2010, 9, 7), 'IBM_EMA'] - 111.070117) < 0.0001
def test_sma_momentum(self): e1 = ETL(self.d, 'GWW') assert np.abs(e1.df_temp.ix[datetime(2010, 8, 26), 'GWW_SMA_Momentum'] - (-4.818233)) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2012, 11, 12), 'GWW_SMA_Momentum'] - 12.634321) < 0.0001 e2 = ETL(self.d, 'HAL') assert np.abs(e2.df_temp.ix[datetime(2010, 8, 2), 'HAL_SMA_Momentum'] - (-0.067255)) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2010, 8, 11), 'HAL_SMA_Momentum'] - (-2.495234)) < 0.0001
def test_mma(self): e1 = ETL(self.d, 'JEC') assert np.abs(e1.df_temp.ix[datetime(2010, 8, 26), 'JEC_MMA'] - 40.437522) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2012, 11, 12), 'JEC_MMA'] - 40.214954) < 0.0001 e2 = ETL(self.d, 'MSFT') assert np.abs(e2.df_temp.ix[datetime(2010, 8, 2), 'MSFT_MMA'] - 22.728518) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2010, 8, 11), 'MSFT_MMA'] - 22.654056) < 0.0001
def test_sma(self): e1 = ETL(self.d, 'SCG') assert np.abs(e1.df_temp.ix[datetime(2010, 8, 26), 'SCG_SMA'] - 29.149889) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2012, 11, 12), 'SCG_SMA'] - 41.288754) < 0.0001 e2 = ETL(self.d, 'YHOO') assert np.abs(e2.df_temp.ix[datetime(2010, 8, 2), 'YHOO_SMA'] - 15.814554) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2010, 8, 11), 'YHOO_SMA'] - 15.653366) < 0.0001
def test_vol_momentum_r1(self): e1 = ETL(self.d, 'HBAN') assert np.abs(e1.df_temp.ix[datetime(2010, 9, 10), 'HBAN_p_real1'] - 0) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2010, 9, 13), 'HBAN_p_real1'] - 1) < 0.0001 e2 = ETL(self.d, 'HCP') assert np.abs(e2.df_temp.ix[datetime(2010, 8, 2), 'HCP_p_real1'] - 1) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2010, 8, 11), 'HCP_p_real1'] - 1) < 0.0001
def test_vol_momentum_r2(self): e1 = ETL(self.d, 'HD') assert np.abs(e1.df_temp.ix[datetime(2011, 1, 10), 'HD_p_real2'] - 0) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2011, 1, 13), 'HD_p_real2'] - 0) < 0.0001 e2 = ETL(self.d, 'HES') assert np.abs(e2.df_temp.ix[datetime(2012, 11, 13), 'HES_p_real2'] - 0) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2012, 11, 15), 'HES_p_real2'] - 1) < 0.0001
def test_stock_mean63d(self): e1 = ETL(self.d, 'AAPL') assert np.abs(e1.df_temp.ix[datetime(2016, 5, 25), 'AAPL_Mean63d'] - 0.000676) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2016, 6, 1), 'AAPL_Std63d'] - 0.014677) < 0.0001 e2 = ETL(self.d, 'AKAM') assert np.abs(e2.df_temp.ix[datetime(2014, 7, 25), 'AKAM_Mean63d'] - 0.002280) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2013, 1, 7), 'AKAM_Std63d'] - 0.022000) < 0.0001
def test_beta63d(self): e1 = ETL(self.d, 'DOW') assert np.abs(e1.df_temp.ix[datetime(2013, 1, 9), 'DOW_Beta'] - 0.343213) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2013, 1, 16), 'DOW_Beta'] - 0.332140) < 0.0001 e2 = ETL(self.d, 'GOOG') assert np.abs(e2.df_temp.ix[datetime(2013, 1, 22), 'GOOG_Beta'] - 0.280741) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2014, 9, 2), 'GOOG_Beta'] - 0.367537) < 0.0001
def test_vol_momentum(self): e1 = ETL(self.d, 'HAR') assert np.abs(e1.df_temp.ix[datetime(2010, 9, 10), 'HAR_Vol_Momentum'] - (-61882700)) < 0.0001 assert np.abs(e1.df_temp.ix[datetime(2010, 9, 13), 'HAR_Vol_Momentum'] - (-18180000)) < 0.0001 e2 = ETL(self.d, 'HAS') assert np.abs(e2.df_temp.ix[datetime(2010, 8, 2), 'HAS_Vol_Momentum'] - (-78002300.0)) < 0.0001 assert np.abs(e2.df_temp.ix[datetime(2010, 8, 11), 'HAS_Vol_Momentum'] - (-63306800.0)) < 0.0001
def __init__(self, dataloader, symbol): self.label_ready = False self.label_name = None self.data_ready = False self.data_X = None self.data_y = None self.symbol = symbol self.e = ETL(dataloader, symbol) self.x_days = None self.df_main = self.e.df_temp
def extract(): try: input_query = request.form input_query = input_query['query'] spotify_data = sp.search(input_query) artist_list = {} for i in range(len(spotify_data['tracks']['items'])): for j in range(len(spotify_data['tracks']['items'][i]['artists'])): artist_list[spotify_data['tracks']['items'][i]['artists'][j] ['uri']] = spotify_data['tracks']['items'][i][ 'artists'][j]['name'] print('{} uris found for the {} genre'.format(len(artist_list), input_query)) for uri, name in artist_list.items(): extractor = SpotifyDataHarvester(sp, uri, name, engine) extractor.dump_raw_data() print( 'Data extraction completed for the genre {}.'.format(input_query)) etl = ETL(engine) etl.build_final_table() print('Final table ready for analysis') return render_template('index.html', msg='Harvest completed') except Exception as e: print(e)
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_statis_month_inoutpatient_charge_avg")
def setup_class(cls): print("Setting up CLASS {0}".format(cls.__name__)) cls.d = DataLoader('dev.csv', '2009-01-01', '2016-06-30') cls.d.load_stock_data() cls.e = ETL(cls.d, 'GOOGL')
import os import sys import pandas as pd from ETL import ETL from Classifier import LogisticRegressor from TrafficProcessor import TrafficProcessor as processor #Pass argument to python script pos_file = sys.argv[1] neg_file = sys.argv[2] #Load/Process Data print("Loading Data...") ETL().load_data(file_path=pos_file, label=1) # Load, process, and save to file ETL().load_data(file_path=neg_file, label=0) # Load, process, and save to file print("Done Loading data") df = pd.read_csv('pos_neg_output.txt') X = df.drop([df.columns[-1]], axis=1) y = df[df.columns[-1]] #Prepare data print("Preparing data for model...") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_audit_item_analysis")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_audit_dept_analysis") etl2 = ETL({"pmonth": "2018-12"}) etl2.run("proc_audit_dept_violation_detail")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-11"}) etl1.run("proc_audit_diagnosis_analysis") etl2 = ETL({"pmonth": "2018-11"}) etl2.run("proc_audit_diagnosis_violation_detail")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_statis_service_project_frequency")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_statis_project_development_item")
import yaml import sys import os from ETL import ETL import logging if __name__ == '__main__': logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d/%m/%Y %I:%M:%S%p') logger = logging.getLogger(__name__) logger.info('Loading configuration file') with open(os.path.join("config_file", "ETL_params.yaml"), 'r') as stream: setting = yaml.load(stream) my_etl = ETL(setting) my_etl.download() my_etl.parse(verbose=False)
def single_proc(actionName, month): etl1 = ETL({"pmonth": month}) etl1.run(actionName) print("%s %s process completed!" % (month, actionName))
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_statis_service_project_violation")
import os,sys from ETL import ETL etl1 = ETL({"pmonth":"2018-12"}) etl1.run("proc_statis_section_work")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_statis_rules_violation_amount")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_audit_service_record")
import os, sys from ETL import ETL etl1 = ETL({"pmonth": "2018-12"}) etl1.run("proc_statis_month_violation_detail")