def insert(date): start = time.time() datafilepath = settings.data_dir + "UnderlyingOptionsEODQuotes_" + str(date) + ".zip" unzippedpath = util.unzip(datafilepath) db = psycopg2.connect("host=localhost dbname=optiondata user="******"DROP TABLE") cur.execute("DROP TABLE IF EXISTS fullday") print("CREATE TABLE") cur.execute("CREATE TABLE fullday(id SERIAL PRIMARY KEY NOT NULL, underlying_symbol VARCHAR, quote_date date, root VARCHAR, expiration date, strike decimal, option_type VARCHAR, open decimal, high decimal, low decimal, close decimal, trade_volume int, bid_size_1545 VARCHAR, bid_1545 decimal, ask_size_1545 VARCHAR, ask_1545 decimal, underlying_bid_1545 decimal, underlying_ask_1545 decimal, underlying_mid_1545 decimal, bid_size_eod VARCHAR, bid_eod VARCHAR, ask_size_eod VARCHAR, ask_eod VARCHAR, mid_1545 decimal, underlying_bid_eod VARCHAR, underlying_ask_eod VARCHAR, vwap decimal, open_interest VARCHAR, delivery_code VARCHAR, rf decimal, rtiy decimal, iv decimal, bs_price_bid_ask decimal, delta decimal, theta decimal, vega decimal)") print("CREATE INDEX...") cur.execute("CREATE INDEX idx_underlying_symbol_fullday ON fullday USING btree (underlying_symbol)") cur.execute("CREATE INDEX idx_strike_fullday ON fullday USING btree (strike)") cur.execute("CREATE INDEX idx_expiration_fullday ON fullday USING btree (expiration)") cur.execute("CREATE INDEX idx_type_fullday ON fullday USING btree (option_type)") cur.execute("CREATE INDEX idx_bid_1545_fullday ON fullday USING btree (bid_1545)") cur.execute("CREATE INDEX idx_ask_1545_fullday ON fullday USING btree (ask_1545)") print ("copy_from and commit") with open(unzippedpath, 'r') as f: next(f) # Skip the header row. cur.copy_from(f, 'fullday', sep=',', columns=('underlying_symbol', 'quote_date', 'root', 'expiration', 'strike', 'option_type', 'open', 'high', 'low', 'close', 'trade_volume', 'bid_size_1545', 'bid_1545', 'ask_size_1545', 'ask_1545', 'underlying_bid_1545', 'underlying_ask_1545', 'bid_size_eod', 'bid_eod', 'ask_size_eod', 'ask_eod', 'underlying_bid_eod', 'underlying_ask_eod', 'vwap', 'open_interest', 'delivery_code')) db.commit() print("Update: LOWER(option_type)") cur.execute("UPDATE fullday SET option_type = LOWER(option_type)") db.commit() print("Update: underlying_mid_1545") cur.execute("UPDATE fullday SET underlying_mid_1545 = (underlying_bid_1545 + underlying_ask_1545) / 2") db.commit() print("Update: mid_1545") cur.execute("UPDATE fullday SET mid_1545 = (bid_1545 + ask_1545) / 2") db.commit() print() print ("Done!") if unzippedpath != "": os.remove(unzippedpath) end = time.time() print (end - start) db.close()
def insert(underlyings): print("insert: " + str(underlyings)) print() engine = create_engine("postgresql://" + settings.db_username + ":" + settings.db_password + "@127.0.0.1/optiondata") table = 'optiondata' db = pgdb.connect(host="localhost", user=settings.db_username, password=settings.db_password, database="optiondata") cur2 = db.cursor() counter = 0 onlyfiles = [ f for f in listdir(settings.path_to_data_directory) if isfile(join(settings.path_to_data_directory, f)) ] for datafile in onlyfiles: if datafile.endswith(".zip"): index = datafile.index('_') + 1 datestring = datafile[index:(index + 10)] unzippedpath = "" for underlying in underlyings: query = "SELECT EXISTS(SELECT 1 FROM optiondata WHERE quote_date = '" + datestring + "' AND underlying_symbol = '" + underlying + "')" cur2.execute(query) row = cur2.fetchone() if (row[0] == 0): counter += 1 print( str(counter) + "\t" + datestring + "\t" + underlying + "\t" + str(datafile)) if unzippedpath == "": datafilepath = settings.path_to_data_directory + datafile unzippedpath = util.unzip(datafilepath) csv = pd.read_csv(unzippedpath, header=0, dtype={ "underlying_symbol": object, "quote_date": object, "root": object, "expiration": object, "strike": np.float64, "option_type": object, "open": np.float64, "high": np.float64, "low": np.float64, "close": np.float64, "trade_volume": np.int64, "bid_size_1545": np.int64, "bid_1545": np.float64, "ask_size_1545": np.int64, "ask_1545": np.float64, "underlying_bid_1545": np.float64, "underlying_ask_1545": np.float64, "bid_size_eod": np.int64, "bid_eod": np.float64, "ask_size_eod": np.int64, "ask_eod": np.float64, "underlying_bid_eod": np.float64, "underlying_ask_eod": np.float64, "vwap": object, "open_interest": np.float64, "ask_eod": np.float64, "delivery_code": object }) filtered = csv[(csv['underlying_symbol'] == underlying)] if underlying == "^SPX": filtered = filtered[(filtered.root != 'BSZ') & (filtered.root != 'BSK' )] # filter out binary options filtered['option_type'] = filtered.option_type.str.lower() filtered['mid_1545'] = (filtered['bid_1545'] + filtered['ask_1545']) / 2 filtered['underlying_mid_1545'] = ( filtered['underlying_bid_1545'] + filtered['underlying_ask_1545']) / 2 filtered.to_sql(table, engine, if_exists='append', index=False) db.commit() if unzippedpath != "": os.remove(unzippedpath) print() print("Done inserting data ") print() db.close()
def insert(underlyings, dir, precompute): print("insert: " + str(underlyings)) print() engine = create_engine("postgresql://" + settings.db_username + ":" + settings.db_password + "@127.0.0.1/optiondata") table = 'optiondata' db = psycopg2.connect(host="localhost", user=settings.db_username, password=settings.db_password, database="optiondata") cur2 = db.cursor() counter = 0 dates = set() # traverse root directory, and list directories as dirs and files as files for root, dirs, files in os.walk(dir): for file in files: if file.endswith(".zip"): index = file.index('_') + 1 datestring = file[index:(index + 10)] date = datetime.strptime(datestring, '%Y-%m-%d').date() print(date) unzippedpath = "" for underlying in underlyings: if ((underlying in startdates) and (date > startdates[underlying])) or ( underlying not in startdates): query = "SELECT EXISTS(SELECT 1 FROM optiondata WHERE quote_date = '" + datestring + "' AND underlying_symbol = '" + underlying + "')" cur2.execute(query) row = cur2.fetchone() if (row[0] == False): counter += 1 print( str(counter) + "\t" + datestring + "\t" + underlying + "\t" + str(file)) if unzippedpath == "": datafilepath = root + "/" + file unzippedpath = util.unzip(datafilepath) if unzippedpath is not None: df = pd.read_csv( unzippedpath, header=0, dtype={ "underlying_symbol": object, "quote_date": object, "root": object, "expiration": object, "strike": np.float64, "option_type": object, "open": np.float64, "high": np.float64, "low": np.float64, "close": np.float64, "trade_volume": np.int64, "bid_size_1545": np.int64, "bid_1545": np.float64, "ask_size_1545": np.int64, "ask_1545": np.float64, "underlying_bid_1545": np.float64, "underlying_ask_1545": np.float64, "bid_size_eod": np.int64, "bid_eod": np.float64, "ask_size_eod": np.int64, "ask_eod": np.float64, "underlying_bid_eod": np.float64, "underlying_ask_eod": np.float64, "vwap": object, "open_interest": np.float64, "delivery_code": object }) filtered = df[( df['underlying_symbol'] == underlying)] if underlying == "^SPX": filtered = filtered[ (filtered.root != 'BSZ') & (filtered.root != 'BSK')] # filter out binary options filtered[ 'option_type'] = filtered.option_type.str.lower( ) filtered['mid_1545'] = ( filtered['bid_1545'] + filtered['ask_1545']) / 2 filtered['underlying_mid_1545'] = ( filtered['underlying_bid_1545'] + filtered['underlying_ask_1545']) / 2 if len(filtered.index) > 0: print(str(len(filtered.index))) dates.add(datestring) filtered.to_sql(table, engine, if_exists='append', index=False, chunksize=1000) db.commit() # only if data hast been inserted, for all underlyings together if precompute: precompute_bs_price.precompute("optiondata", date, "*", True) precompute_greeks.precompute("optiondata", date, "*", True) if ((unzippedpath != "") and (unzippedpath is not None)): os.remove(unzippedpath) print("Done inserting data \n") db.close() return dates
def load(underlyings, dir, precompute_all): print("load: " + str(underlyings)) print() engine = create_engine("postgresql://" + settings.db_username + ":" + settings.db_password + "@127.0.0.1/optiondata") table = "optiondata" db = psycopg2.connect( host="localhost", user=settings.db_username, password=settings.db_password, database="optiondata", ) cur2 = db.cursor() counter = 0 dates_from_dir = set() dates_in_db = set() dates_to_load = set() # traverse root directory, and list directories as dirs and files as files # get all dates that need to be processed for root, dirs, files in os.walk(dir): for file in files: if file.endswith(".zip"): index = file.index("_") + 1 file_datestring = file[index:(index + 10)] dates_from_dir.add(file_datestring) for underlying in underlyings: if len(dates_from_dir) > 0: query = ( "SELECT DISTINCT quote_date FROM optiondata WHERE underlying_symbol = '" + underlying + "' AND quote_date IN " + str(dates_from_dir).replace("{", "(").replace("}", ")") + "") cur2.execute(query) rows = cur2.fetchall() for row in rows: dates_in_db.add(str(row[0])) dates_to_load = dates_from_dir - dates_in_db print("dates_from_dir: " + str(len(dates_from_dir))) print("dates_in_db: " + str(len(dates_in_db))) print("dates_to_load: " + str(len(dates_to_load))) print() for date in sorted(dates_to_load): if ((underlying in util.startdates) and (datetime.strptime(date, '%Y-%m-%d').date() > util.startdates[underlying])) or (underlying not in util.startdates): unzippedpath = "" counter += 1 file = "UnderlyingOptionsEODQuotes_" + str(date) + ".zip" print( str(counter) + "\t" + date + "\t" + underlying + "\t" + file) currentdir = dir if currentdir.endswith("/optiondata/"): currentdir = currentdir + date[0:4] + "/" datafilepath = currentdir + file unzippedpath = util.unzip(datafilepath) if unzippedpath is not None: df = pd.read_csv( unzippedpath, header=0, dtype={ "underlying_symbol": object, "quote_date": object, "root": object, "expiration": object, "strike": np.float64, "option_type": object, "open": np.float64, "high": np.float64, "low": np.float64, "close": np.float64, "trade_volume": np.int64, "bid_size_1545": np.int64, "bid_1545": np.float64, "ask_size_1545": np.int64, "ask_1545": np.float64, "underlying_bid_1545": np.float64, "underlying_ask_1545": np.float64, "bid_size_eod": np.int64, "bid_eod": np.float64, "ask_size_eod": np.int64, "ask_eod": np.float64, "underlying_bid_eod": np.float64, "underlying_ask_eod": np.float64, "vwap": object, "open_interest": np.float64, "delivery_code": object, }, ) filtered = df[(df["underlying_symbol"] == underlying)] if underlying == "^SPX": filtered = filtered[(filtered.root != "BSZ") & ( filtered.root != "BSK")] # filter out binary options filtered[ "option_type"] = filtered.option_type.str.lower() filtered["mid_1545"] = (filtered["bid_1545"] + filtered["ask_1545"]) / 2 filtered["underlying_mid_1545"] = ( filtered["underlying_bid_1545"] + filtered["underlying_ask_1545"]) / 2 if len(filtered.index) > 0: print(str(len(filtered.index))) filtered.to_sql( table, engine, if_exists="append", index=False, chunksize=1000, ) db.commit() if (unzippedpath != "") and (unzippedpath is not None): os.remove(unzippedpath) if precompute_all: precompute_counter = 0 for date_to_load in sorted(dates_to_load): counter += 1 print(str(counter) + ": " + date_to_load) precompute_greeks.precompute("optiondata", date_to_load, underlying, True) print("Done loading data \n") db.close() return dates_to_load