def __init__(self): super().__init__(__file__) Log.d('construct') retry_delay_seconds = int(AppConfig.setting('DATAFETCH_API_RETRY_DELAY_SECONDS')) data_response_dirpath = AppConfig.setting('DATA_RESPONSE_DIRPATH') Log.d('data response dirpath is: {}', data_response_dirpath) self.retry_delay_seconds = retry_delay_seconds self.data_response_dirpath = data_response_dirpath OsExpert.ensure_abs_dirpath_exists(data_response_dirpath) self.subscribers = subscribe.all()
def __init__(self): super().__init__(__file__) Log.d('construct') self.dir_path = AppConfig.setting('DATA_RESPONSE_DIRPATH') self.store = Store() self.subscribers = subscribe.all() self.parse_util = ParseUtil(self.subscribers, self.store)
def __init__(self, h5_filepath, version): warnings.simplefilter('ignore', NaturalNameWarning) h5_inputfile = Path(h5_filepath) output_dirpath = AppConfig.setting('PREDICTOR_DATA_DIRPATH') self.h5_out_filepath = os.path.join(output_dirpath, h5_inputfile.name) h5_out_file = Path(self.h5_out_filepath) if h5_out_file.exists(): Log.i('overwrite file?: {}', h5_out_file) if not OsExpert.prompt_confirm('File already exists, overwrite? {}'.format(h5_out_file)): Log.d('user aborted, exiting') exit() Log.w('removing file: {}', h5_out_file) os.remove(self.h5_out_filepath) self.predictors_map = {} base_filepath = output_dirpath with pd.HDFStore(h5_filepath, mode='r') as h5: keys = h5.keys() Log.i('h5 input keys: {}', keys) assert len(keys) == 1, 'harcoded restriction on single key was violated' for key in keys: Log.i('row count for {}: {}', key, h5.get_storer(key).nrows) self.predictors_map[key] = [ EnsemblePredictor(min_predict_generator_size=2000, max_train_size=5000) ] self.h5_watcher = H5FileWatcher(h5_filepath, self.handle_job_epoch, {'is_simulated': 0})
def frame_info(filename): dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') filepath = os.path.join(dirpath, filename) result = {} with pd.HDFStore(filepath, mode='r') as h5: key = h5.keys()[0] # TODO: always select first? storer = h5.get_storer(key) time_column_names = [ attr for attr in storer.attrs.data_columns if attr.endswith(')_time') ] frame = pd.read_hdf(h5, key) #, columns=column_names) active_cols = [ attr for attr in storer.attrs.data_columns if attr.endswith('_active') ] row_count = len(frame) result[key] = { 'row count': row_count, 'time cols sum': json2html.convert(frame[time_column_names].sum().to_json()), 'active_ratios': json2html.convert( (frame[active_cols].sum() / row_count).to_json()), 'latest_row': json2html.convert(frame.iloc[-1].to_json()), 'isnull-sum': json2html.convert(frame.isnull().sum().to_json()), 'describe': json2html.convert(frame.describe().to_json()), } return render_template('frame-info.html', style=style, frame_info_map=result)
def retrieve(db, url, datasource_id, exchange_id, currency_id): temp_dirpath=AppConfig.setting('TEMP_DIRPATH') filepath = os.path.join(temp_dirpath, url.split('/')[-1]) downloadFile(url, filepath) duplicateCount = 0 insertCount = 0 with gzip.open(filepath, 'rt') as f: Log.d('Processing csv file..') spamreader = csv.reader(f, delimiter=',', quotechar='|') for row in spamreader: timeStr = row[0] epochTime = int(timeStr) priceStr = row[1] price = float(priceStr) amountStr = row[2] amount = float(amountStr) transaction = { 'datasource_id': datasource_id, 'exchange_id': exchange_id, 'amount': amount, 'price': price, 'currency_id': currency_id, 'epoch_time': epochTime, } try: db.create_transaction(transaction) insertCount += 1 except DuplicateInsertException as e: duplicateCount += 1 os.remove(filepath) Log.i('Done processing, insert count: {}, duplicate count: {}', insertCount, duplicateCount)
def __init__(self): super().__init__(__file__, isToNotifyStartup=False) self.maxEmailReccurenceMinutes = float( AppConfig.setting('LOGWATCH_EMAIL_MAX_RECCURENCE_MINUTES')) self.triggerLines = ['ERROR', 'WARNING'] Log.d('construct: {}', self.__dict__) self.matchCountSinceLastEmail = 0 self.lastEmailDatetime = None
def get_or_create_hdf5(self): filepath = AppConfig.setting('DATASTORE_HDF5_FILEPATH') file = Path(filepath) exists = file.exists() if exists: assert file.is_file(), 'hdf5 filepath exists but is not a file' pd.set_option('io.hdf.default_format', 'table') hdf5 = pd.HDFStore(filepath, append=True) hdf5.swmr_mode = True # may or may not have an effect return hdf5
async def __socket_subscribe(self): try: publicKey = AppConfig.setting('BITCOINAVERAGE_PUBLIC_KEY') secretKey = AppConfig.setting('BITCOINAVERAGE_SECRET_KEY') timestamp = int(time.time()) payload = '{}.{}'.format(timestamp, publicKey) hex_hash = hmac.new(secretKey.encode(), msg=payload.encode(), digestmod=hashlib.sha256).hexdigest() signature = '{}.{}'.format(payload, hex_hash) ticket_url = 'https://apiv2.bitcoinaverage.com/websocket/get_ticket' ticket_header = {'X-signature': signature} async with aiohttp.ClientSession() as session: async with session.get(ticket_url, headers=ticket_header) as resp: response_text = await resp.text() Log.d('received ticket response: {}', response_text) if response_text == 'Client limit reached for api key apikey': raise Exception(response_text) response_json = json.loads(response_text) ticket = response_json['ticket'] Log.d('ticket received: {}', ticket) url = 'wss://apiv2.bitcoinaverage.com/websocket/ticker?public_key={}&ticket={}'.format(publicKey, ticket) subscribe_message = json.dumps({ 'event': 'message', 'data': { 'operation': 'subscribe', 'options': { 'currency': '{}{}'.format(self.from_currency_code, self.to_currency_code), 'market': self.market_name() } } }) Log.d('sending subscribe message: {}', subscribe_message) session = aiohttp.ClientSession() async with session.ws_connect(url) as ws: await ws.send_str(subscribe_message) async for msg in ws: if msg.type == aiohttp.WSMsgType.CLOSED: raise Exception('Socket presumed invalidated as received message was of aiohttp type "closed"') if msg.type == aiohttp.WSMsgType.ERROR: raise Exception('Socket presumed invalidated as received message was of aiohttp type "error"') result = msg.data yield result except Exception as e: raise Exception('Failed to subscribe via socket') from e
def __init__(self, version): super().__init__(__file__) self.window_size = 15 self.interval_seconds = [15 * 60] # 15 minutes self.contruct_time = time.time() self.version = version self.sleep_seconds = 1 # must be low enough to produce empty result set eventually > reaktime self.transaction_min_timestamp = int( AppConfig.setting('GENERATOR_TRANSACTION_MIN_TIMESTAMP')) self.data_dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') Log.d('construct: {}', self.__dict__) self.db = DatabaseGateway() max_history_minutes = 10 * 24 * 60 #max(self.minute_intervals) self.from_currency_ids = [] self.to_currency_ids = [] self.run_config = self.read_run_config() self.jobs = list( self.__jobs_iterate(max_history_minutes, self.run_config)) Log.i('count of generator jobs: {}', len(self.jobs))
def create_predictor_from_csv(self): Log.i('initiating sagemaker model creation') role = AppConfig.setting('AWS_PREDICTOR_ROLE') bucket='cryptrade-sagemaker' custom_code_upload_location = 's3://{}/customcode/tensorflow_iris'.format(bucket) model_artifacts_location = 's3://{}/artifacts'.format(bucket) Log.d('training data will be uploaded to: {}', custom_code_upload_location) Log.d('training artifacts will be uploaded to: {}', model_artifacts_location) sess = sagemaker.Session() def upload_to_s3(channel, filepath, skip_if_name_and_size_matches=False): file = Path(filepath) """From SM examples. Like here: https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning.ipynb""" s3 = boto3.resource('s3') key = channel + '/' + file.name bucket_ref = s3.Bucket(bucket) objs = list(bucket_ref.objects.filter(Prefix=key)) is_file_already_existing = len(objs) > 0 and objs[0].key == key if is_file_already_existing is True: if skip_if_name_and_size_matches is True: s3_client = boto3.client('s3') response = s3_client.head_object(Bucket=bucket, Key=key) local_size = file.stat().st_size remote_size = response['ContentLength'] if remote_size == local_size: Log.w('skipping upload as s3 key of same size ({:.2f}kb) already exists: {}', local_size/1000, key) return Log.w('overwriting existing s3 key: {}', key) with open(filepath, "rb") as data: s3.Bucket(bucket).put_object(Key=key, Body=data) s3_data_folder = 'data' upload_to_s3(s3_data_folder, self.train_filepath, True) upload_to_s3(s3_data_folder, self.test_filepath, True) upload_to_s3(s3_data_folder, self.meta_filepath) estimator = TensorFlow( entry_point='aws_dnn_predictor_entry.py', role=role, output_path=model_artifacts_location, code_location=custom_code_upload_location, train_instance_count=1, train_instance_type='ml.c5.xlarge', training_steps=1000, evaluation_steps=100 ) train_data_location = 's3://{}/{}'.format(bucket, s3_data_folder) Log.i('fitting train data: {}', train_data_location) estimator.fit(train_data_location) Log.i('deploying model') deploy_start = datetime.now() predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.t2.medium' ) deploy_end = datetime.now() Log.i('deployed predictor in {}s, endpoint is:\n{}', deploy_end - deploy_start, predictor.endpoint) self.predictor = predictor
def test_file_contents_md5hash(self): tempFilepath = os.path.join( AppConfig.setting('TEMP_DIRPATH'), 'file.txt' ) with open(tempFilepath,'w') as f: f.write('sample text') self.assertEqual( OsExpert.md5(tempFilepath), '70ee1738b6b21e2c8a43f3a5ab0eee71' ) os.remove(tempFilepath)
def frame(mode, filename, from_epoch, to_epoch, filterInNth, agents, format_as_image): dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') filepath = os.path.join(dirpath, filename) if from_epoch is None: from_epoch = to_epoch - 60 * 60 * 24 * 7 with pd.HDFStore(filepath, mode='r') as h5: key = h5.keys()[0] # TODO: always select first? storer = h5.get_storer(key) row_count = storer.nrows Log.d(row_count) first_epoch = pd.read_hdf(h5, key, start=0, stop=1, columns=[]).index.values[0] last_epoch = pd.read_hdf(h5, key, start=row_count - 1, stop=row_count, columns=[]).index.values[0] column_names = [attr for attr in storer.attrs.data_columns] plot_html = h5_to_plot(h5, from_epoch, to_epoch, filterInNth, agents, format_as_image) if mode == 'plot_only': return plot_html feature_columns = set([ a.split('_')[1] for a in column_names if a.startswith('feature_') ]) feature_names = [c.split('(')[0] for c in feature_columns] agent_map = { fn: [c for c in feature_columns if c.startswith(fn)] for fn in feature_names } return render_template( 'frame.html', style=style, plothtml=plot_html, filename=filename, from_epoch=from_epoch, to_epoch=to_epoch, first_epoch=first_epoch, last_epoch=last_epoch, min_epoch=1514764800, max_epoch=int(time.time()), agent_map=sorted(agent_map.items()), # min epoch is 2018 job_uid=key, frame_info_html=json2html.convert(json={ 'row count': row_count, 'columns': column_names }))
def datafetch_api_id_by_handler_filepath(self, handler_filepath, datafetch_api_ids=None, create_if_nonexisting=False): table_name = 'datafetch_api' col_name = 'handler_filepath' scalar_col_name = 'id' if create_if_nonexisting == True: result = self.__scalar_by_unique_col_value( table_name, col_name, handler_filepath, scalar_col_name, frame=datafetch_api_ids, nonexisting_is_error=False) if result is not None: return result handler_filename = os.path.basename(handler_filepath) result_endpoint_prefix = AppConfig.setting( 'RESULT_ENDPOINT_PREFIX') result_endpoint = '{}{}'.format(result_endpoint_prefix, handler_filename) new_datafetch_api_id = self.create_datafetch_api({ 'handler_filepath': handler_filepath, 'result_endpoint': result_endpoint, 'result_frequency_seconds': 30 }) Log.d('created datafetch api id {} for handler filepath "{}"', new_datafetch_api_id, handler_filepath) return self.__scalar_by_unique_col_value(table_name, col_name, handler_filepath, scalar_col_name, frame=datafetch_api_ids)
def h5_to_plot(h5, from_epoch, to_epoch, filterInNth, agents, format_as_image): Log.d('============') Log.d(agents) agent_keys = [a for a in agents.split(',') if a] if len(agent_keys) == 0: return 'No agent selected' filterInNth = int(filterInNth) df_info = '' pd.options.display.float_format = '{:.2f}'.format df_info += 'No agent selected\n\n{}\n\n'.format(h5.info()) for key in h5: where = 'index >= {} and index <= {}'.format(from_epoch, to_epoch) Log.d('where: {}', where) frame = pd.read_hdf(h5, key, where=where) if frame.empty == True: return 'Empty frame' df_info += '{}\n\n'.format(frame.describe()) background_color = '#272822' minute_intervals = [ 12 * 60, # 12 hours ] x = range(100) y = [a * 2 + random.randint(-20, 20) for a in x] fig, ax = plt.subplots(figsize=(23, 12)) #figsize=(28,21)) fig.patch.set_facecolor(background_color) Log.t('building plot') is_image_format = int(format_as_image) == True def label_connect(path_collection, labels, color=None): tooltip = mpld3.plugins.PointHTMLTooltip(path_collection, [ '<span class="point-tooltip" style="color: {}">{} <span class="point-tooltip-key">{}<span><span>' .format(color, l, key) for l in labels ], voffset=100, hoffset=0) mpld3.plugins.connect(fig, tooltip) for agent_key in agent_keys: try: agent_name = agent_key.split('(')[0] Log.d('plotting agent: {} -> {}', agent_key, agent_name) agent = agent_map[agent_name] plot_title = '' col_prefix = 'feature_{}_'.format(agent_key) agent_plot = agent.plot(plot_title, None, frame, ax, is_image_format, label_connect=label_connect, filter_in_nth=filterInNth, cp=col_prefix) pe.style_plot(ax, plot_title) except KeyError as ke: Log.w('Valid keys are: {}', frame.keys()) raise ke plot_dirpath = AppConfig.setting('PLOT_DIRPATH') plot_filepath = os.path.join(plot_dirpath, '{}.png'.format('some plot')) fig.patch.set_facecolor(style.backgroundColor) fig.tight_layout() if is_image_format == True: sio = BytesIO() fig.savefig(sio, facecolor=fig.get_facecolor(), edgecolor='none', format="png") html = '<img src="data:image/png;base64,{}"/>'.format( base64.encodebytes(sio.getvalue()).decode()) return html mpld3.plugins.connect(fig, ZoomSizePlugin()) return mpld3.fig_to_html(fig) raise 'hmmm'
@version(1, 0, 0) class ExecuteApp(App): def __init__(self, version): super().__init__(__file__) def run(self, h5_filepath): self.executor = Executor(h5_filepath=h5_filepath, initial_capital=1000, initial_coins=10).run_async().join() if __name__ == '__main__': try: app = ExecuteApp() assert len( sys.argv) == 2, 'not exactly two parameters (i.e. one argument)' h5_filename = sys.argv[1] h5_filepath = os.path.join(AppConfig.setting('PREDICTOR_DATA_DIRPATH'), h5_filename) assert Path(h5_filepath).is_file(), 'is not a file: {}'.format( h5_filepath) app.run(h5_filepath) except KeyboardInterrupt: print('\n\nKeyboardInterrupt\n') except Exception as e: Log.c('app failed: {}', e) stacktrace = OsExpert.stacktrace() Log.d('stacktrace:\n{}', stacktrace)
def initialize(appconfig_filepath, logconfig_filepath): OsExpert.ensure_abs_filepath_exists(appconfig_filepath) Log.initialize(logconfig_filepath) AppConfig.initialize(appconfig_filepath)
def predictor_from_config_maybe(self): endpoint = AppConfig.setting('AWS_DNN_PREDICTOR_ENDPOINT') return TensorFlowPredictor(endpoint) if endpoint not in (None, '') else None
def __init__(self): self.host = AppConfig.setting('DB_HOST') self.user = AppConfig.setting('DB_USER') self.password = AppConfig.setting('DB_PASSWORD') self.db_name = AppConfig.setting('DB_NAME')
import sys; sys.path.append('..') import os import asyncio from applogging import Log from core import AppConfig, OsExpert, Timeout import traceback import time AppConfig.initialize_in_file_dir( OsExpert.path_backstep(__file__) ) class Parser(): def __init__(self): wfPath = "/tmp/my_fifo2" wp = None try: if not os.path.exists(wfPath): os.mkfifo(wfPath) while True: is_sent = False try: with Timeout(1): with open(wfPath, 'w') as wp: print('sending..') wp.write("a write!\n") print('sent') is_sent = True time.sleep(1) except TimeoutError: if not is_sent:
def list(): dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') files = [f for f in os.scandir(dirpath) if f.name.endswith('.h5')] return render_template('files.html', style=style, files=files)
import json import time import configparser import io import os import hashlib import requests import hmac import websocket import simplejson as json from applogging import Log from core import AppConfig, OsExpert from db import DatabaseGateway sys.path.append('..') AppConfig.initialize_in_file_dir(OsExpert.path_backstep(__file__)) file_path = os.path.realpath(__file__) print(file_path) db = DatabaseGateway() datafetch_api_id = db.datafetch_api_id_by_handler_filepath(file_path) print(datafetch_api_id) exit() def prettyJson(jsonData): return json.dumps(jsonData, indent=4, sort_keys=True) publicKey = AppConfig.setting("BITCOINAVERAGE_PUBLIC_KEY") secretKey = AppConfig.setting("BITCOINAVERAGE_SECRET_KEY") url = "https://apiv2.bitcoinaverage.com/websocket/get_ticket"
class PredictApp(App): def __init__(self, version): super().__init__(__file__) def handle_change(self): Log.d('modified') def run(self, h5_filepath): predictor = Predictor(h5_filepath) thread = predictor.run_async() thread.join() if __name__ == '__main__': try: app = PredictApp() assert len( sys.argv) == 2, 'not exactly two parameters (i.e. one argument)' h5_filename = sys.argv[1] h5_filepath = os.path.join(AppConfig.setting('GENERATOR_DATA_DIRPATH'), h5_filename) assert Path(h5_filepath).is_file(), 'is not a file: {}'.format( h5_filepath) app.run(h5_filepath) except KeyboardInterrupt: print('\n\nKeyboardInterrupt\n') except Exception as e: Log.c('app failed: {}', e) stacktrace = OsExpert.stacktrace() Log.d('stacktrace:\n{}', stacktrace)