Пример #1
0
	async def check_for_alert_match(self):
		urls = [
			'https://twitter.com/CFTC', 
			'https://twitter.com/sec_enforcement?lang=en',
			'https://twitter.com/ushouserep?lang=en'
			]
		strip_texts = None
		with open('ignore-lines.json', 'r') as f:    
		    strip_texts = json.load(f)			    	    
		Log.d('checking {} sources, ignoring {} lines..', len(urls), len(strip_texts))
		patterns = [
			r'.{,200}bitcoin.{,200}', 
			r'.{,200}crypto.{,200}', 
			r'.{,200}virtual currency.{,200}',
			]
		for url in urls:
			async with aiohttp.ClientSession() as session:
				html_text = await self.__fetch(session, url)
				text = StringExpert.strip_tags(html_text)
				text = html.unescape(text)
				for strip_text in strip_texts:
					text = text.replace(strip_text, '')
				for pattern in patterns:
					match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
					if match is not None:
						matched_line = match.group()
						warning = 'Found pattern "{}" at url "{}" in line: {}'.format(pattern, url, matched_line) 
						Log.w(warning)						
						return True
		return False
Пример #2
0
	def __init__(self, h5_filepath, version):
		warnings.simplefilter('ignore', NaturalNameWarning)
		h5_inputfile = Path(h5_filepath)
		output_dirpath = AppConfig.setting('PREDICTOR_DATA_DIRPATH')
		self.h5_out_filepath = os.path.join(output_dirpath, h5_inputfile.name)
		h5_out_file =  Path(self.h5_out_filepath)
		if h5_out_file.exists():
			Log.i('overwrite file?: {}', h5_out_file)
			if not OsExpert.prompt_confirm('File already exists, overwrite? {}'.format(h5_out_file)):
				Log.d('user aborted, exiting')
				exit()
			Log.w('removing file: {}', h5_out_file)
			os.remove(self.h5_out_filepath)
		self.predictors_map = {}
		base_filepath = output_dirpath
		with pd.HDFStore(h5_filepath, mode='r') as h5: 	
			keys = h5.keys()
			Log.i('h5 input keys: {}', keys)
			assert len(keys) == 1, 'harcoded restriction on single key was violated'
			for key in keys:
				Log.i('row count for {}: {}', key, h5.get_storer(key).nrows)
				self.predictors_map[key] = [
				EnsemblePredictor(min_predict_generator_size=2000, max_train_size=5000)
				]		
		self.h5_watcher = H5FileWatcher(h5_filepath, self.handle_job_epoch, {'is_simulated': 0})
Пример #3
0
def retrieve(db, url, datasource_id, exchange_id, currency_id):
	temp_dirpath=AppConfig.setting('TEMP_DIRPATH')
	filepath = os.path.join(temp_dirpath, url.split('/')[-1])
	downloadFile(url, filepath)
	duplicateCount = 0
	insertCount = 0
	with gzip.open(filepath, 'rt') as f:
		Log.d('Processing csv file..')
		spamreader = csv.reader(f, delimiter=',', quotechar='|')
		for row in spamreader:
			timeStr = row[0]
			epochTime = int(timeStr)
			priceStr = row[1]
			price = float(priceStr)
			amountStr = row[2]
			amount = float(amountStr)
			transaction = {
				'datasource_id': datasource_id,
				'exchange_id': exchange_id,
				'amount': amount,
				'price': price,
				'currency_id': currency_id,
				'epoch_time': epochTime,
			}
			try:
				db.create_transaction(transaction)
				insertCount += 1
			except DuplicateInsertException as e:
				duplicateCount += 1
	os.remove(filepath)
	Log.i('Done processing, insert count: {}, duplicate count: {}', insertCount, duplicateCount)
Пример #4
0
	async def __process_subscriber(self, index, subscriber):
		fail_count = 0
		response_file_prefix = subscriber.handler_filename
		while True:
			try:
				Log.i('invoking subscriber {}', subscriber.handler_filename)				
				async for response_text in subscriber.subscribe():
					response_text_md5hash = StringExpert.md5hash(response_text)					
					try:
						epoch = int(time.time())
						filepath = os.path.join(
							self.data_response_dirpath,
							'{}.{}.{}'.format(response_file_prefix, epoch, FetchApp.RESPONSE_EXTENSION)
							)
						with open(filepath, 'w') as file:
							file.write(response_text)
					except Exception as e:
						Log.e('Failed to save response to file, message: {}', e)
					Log.d('stored api response for subcriber {} (hash {})', subscriber.handler_filename, response_text_md5hash)
			except Exception as e:
				fail_count += 1
				Log.e('failed to invoke subscriber {} ({} failures so far)', subscriber.handler_filename, fail_count)
				stacktrace = traceback.format_exc()
				Log.d('exception stack:\n{}', stacktrace)
				Log.i('retrying in {} seconds..', self.retry_delay_seconds)
				await asyncio.sleep(self.retry_delay_seconds)
Пример #5
0
 def __init__(self):
     super().__init__(__file__)
     Log.d('construct')
     self.dir_path = AppConfig.setting('DATA_RESPONSE_DIRPATH')
     self.store = Store()
     self.subscribers = subscribe.all()
     self.parse_util = ParseUtil(self.subscribers, self.store)
Пример #6
0
	def __init__(self, min_predict_generator_size, max_train_size):
		super().__init__(predict_col='feature_rtrspc()_next_trend_pricefeature')
		assert max_train_size > min_predict_generator_size
		self.min_predict_generator_size = min_predict_generator_size 
		self.max_train_size = max_train_size
		self.predictor = None
		Log.d('core count: {}', core_count)
Пример #7
0
	def filter_simulated_observations(self, df):
		filtered_df = df[df['is_simulated'] != 1]
		dropped = df[~df.index.isin(filtered_df.index)]
		if len(dropped) > 0:
			Log.w('filtered out {} simulated frames', len(dropped))
		else:
			Log.d('no simulated frames found to filter out')
		return filtered_df
Пример #8
0
 def __init__(self):
     super().__init__(__file__, isToNotifyStartup=False)
     self.maxEmailReccurenceMinutes = float(
         AppConfig.setting('LOGWATCH_EMAIL_MAX_RECCURENCE_MINUTES'))
     self.triggerLines = ['ERROR', 'WARNING']
     Log.d('construct: {}', self.__dict__)
     self.matchCountSinceLastEmail = 0
     self.lastEmailDatetime = None
Пример #9
0
 def tryAppNotifyByEmail(serviceName, messsage):
     if AppConfig.setting('IS_EMAIL_NOTIFICATION_ENABLED') != '1':
         Log.d('ignoring email request per configuration')
         return False
     alertEmail = AppConfig.setting('ALERT_EMAIL')
     hostName = socket.gethostname()
     return NetworkExpert.emailMaybe(
         alertEmail, alertEmail,
         '*** {}: {} ***'.format(hostName, serviceName), messsage)
Пример #10
0
	def __init__(self):
		super().__init__(__file__)
		Log.d('construct')
		retry_delay_seconds = int(AppConfig.setting('DATAFETCH_API_RETRY_DELAY_SECONDS'))
		data_response_dirpath = AppConfig.setting('DATA_RESPONSE_DIRPATH')
		Log.d('data response dirpath is: {}', data_response_dirpath)
		self.retry_delay_seconds = retry_delay_seconds
		self.data_response_dirpath = data_response_dirpath
		OsExpert.ensure_abs_dirpath_exists(data_response_dirpath)
		self.subscribers = subscribe.all()
Пример #11
0
	def create_predictor_from_csv(self):
			Log.i('initiating sagemaker model creation')
			role = AppConfig.setting('AWS_PREDICTOR_ROLE')
			bucket='cryptrade-sagemaker'
			custom_code_upload_location = 's3://{}/customcode/tensorflow_iris'.format(bucket)
			model_artifacts_location = 's3://{}/artifacts'.format(bucket)
			Log.d('training data will be uploaded to: {}', custom_code_upload_location)
			Log.d('training artifacts will be uploaded to: {}', model_artifacts_location)
			sess = sagemaker.Session()
			def upload_to_s3(channel, filepath, skip_if_name_and_size_matches=False):
				file = Path(filepath)
				"""From SM examples. Like here: https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning.ipynb"""
				s3 = boto3.resource('s3')
				key = channel + '/' + file.name
				bucket_ref = s3.Bucket(bucket)
				objs = list(bucket_ref.objects.filter(Prefix=key))
				is_file_already_existing = len(objs) > 0 and objs[0].key == key
				if is_file_already_existing is True:
					if skip_if_name_and_size_matches is True:
						s3_client = boto3.client('s3')
						response = s3_client.head_object(Bucket=bucket, Key=key)
						local_size = file.stat().st_size
						remote_size = response['ContentLength']
						if remote_size == local_size:
							Log.w('skipping upload as s3 key of same size ({:.2f}kb) already exists: {}', local_size/1000, key)
							return
					Log.w('overwriting existing s3 key: {}', key)
				with open(filepath, "rb") as data:
					s3.Bucket(bucket).put_object(Key=key, Body=data)
			s3_data_folder = 'data'
			upload_to_s3(s3_data_folder, self.train_filepath, True)
			upload_to_s3(s3_data_folder, self.test_filepath, True)
			upload_to_s3(s3_data_folder, self.meta_filepath)
			estimator = TensorFlow(
				entry_point='aws_dnn_predictor_entry.py',
				role=role,
				output_path=model_artifacts_location,
				code_location=custom_code_upload_location,
				train_instance_count=1,
				train_instance_type='ml.c5.xlarge',
				training_steps=1000,
				evaluation_steps=100
				)
			train_data_location = 's3://{}/{}'.format(bucket, s3_data_folder)
			Log.i('fitting train data: {}', train_data_location)
			estimator.fit(train_data_location)
			Log.i('deploying model')
			deploy_start = datetime.now()
			predictor = estimator.deploy(initial_instance_count=1,
			                                       instance_type='ml.t2.medium'
			                                       )
			deploy_end = datetime.now()
			Log.i('deployed predictor in {}s, endpoint is:\n{}', deploy_end - deploy_start, predictor.endpoint)
			
			self.predictor = predictor
Пример #12
0
 def __init__(self, h5_filepath, row_handler, contraints_dict=None):
     self.handle_event = Event()
     self.h5_filepath = h5_filepath
     self.handle_count = 0
     self.job_frames = {}
     self.last_handle_count = None
     self.row_handler = row_handler
     self.contraints_clause = '' if contraints_dict is None else ' '.join(
         'and {}={}'.format(k, v) for k, v in contraints_dict.items())
     Log.d('cc: {}', self.contraints_clause)
     assert row_handler
Пример #13
0
def downloadFile(url, filepath):
	if url is None:
		raise ValueError('parameter "value" not specified')
	if filepath is None:
		raise ValueError('parameter "filepath" not specified')
	Log.d('Downloading to path {}: {}'.format(filepath, url))
	r = requests.get(url, stream=True) # NOTE the stream=True parameter
	with open(filepath, 'wb') as f:
		for chunk in r.iter_content(chunk_size=1024): 
			if chunk: # filter out keep-alive new chunks
				f.write(chunk)
Пример #14
0
	def print_acc(self, df):
		Log.d('begin acc calc ======')
		y_predict_colname = 'prediction_ensmbl_next_trend_feature' #'prediction_awsdnn_next_trend'
		y_true_colname = 'feature_rtrspc()_next_trend_pricefeature'
		df = df[[y_predict_colname, y_true_colname]]
		filtered = df.dropna(how='any')
		Log.d('acc source frame:\n{}', filtered)
		Log.d('dropped {}/{} rows where either the predictor or the true value was unspecified', len(df) - len(filtered), len(df))
		y_predict = filtered[y_predict_colname]
		y_true = filtered[y_true_colname]
		score = accuracy_score(y_true, y_predict, normalize=True)
		Log.d('accuracy: {}', score)
		Log.d('===== end acc calc ')
Пример #15
0
	def __predict(self, df):
		max_prediction_count = 100
		if self.predict_count >= max_prediction_count:
			Log.w('too many predictions {} reached, exiting', self.predict_count)
			exit()
		assert len(df) == 1
		X_all, y_all = self.frame_to_ml_inputs(df)
		predict_row = X_all.iloc[0]
		Log.d('predicting based on {} values:\n{}', len(predict_row.values), predict_row.squeeze().sort_index())
		prediction_response = self.predictor.predict(predict_row.values)
		prediction = self.sagemaker_response_highest_score_label(prediction_response)
		self.predict_count += 1
		return prediction 
Пример #16
0
    def unparsed_datafetch_api_responses_frame(self, min_id=0, limit=100):
        sql = """
			SELECT {0}.* FROM {0}
			LEFT OUTER JOIN {1} ON 
							{1}.source_md5hash = {0}.response_md5hash
			WHERE 
				{1}.source_md5hash IS NULL 
			AND 
				{0}.id >= {2}
            ORDER BY {0}.id
            LIMIT {3}
			""".format('datafetch_api_response', 'transaction', min_id, limit)
        Log.d('executing:\n{}', sql)
        sys.stdout.flush()
        return self.__query_frame(sql)
Пример #17
0
 def watch_continuously(self, watch_interval_seconds):
     Log.i('continuous watching activated with interval of {} seconds',
           watch_interval_seconds)
     consecutive_error_count = 0
     while True:
         try:
             self.__verify_datafetch_apis_write_frequency()
             consecutive_error_count = 0
         except Exception as e:
             consecutive_error_count += 1
             Log.e('fail during watcher check ({} consecutive errors)',
                   consecutive_error_count)
             stacktrace = OsExpert.stacktrace()
             Log.d('stacktrace:\n{}', stacktrace)
         time.sleep(watch_interval_seconds)
Пример #18
0
 def email_maybe(self, header, message):
     now = datetime.now()
     if self.lastEmailDatetime is not None:
         minutesSinceLastEmail = (
             now - self.lastEmailDatetime).total_seconds() / 60.0
         if minutesSinceLastEmail < self.maxEmailReccurenceMinutes:
             timeLeftMinutes = int(self.maxEmailReccurenceMinutes -
                                   minutesSinceLastEmail)
             Log.d(
                 'Aborting email notification ({}+ minutes left in window)',
                 timeLeftMinutes)
             return
     self.lastEmailDatetime = now
     self.matchCountSinceLastEmail = 0
     NetworkExpert.tryAppNotifyByEmail(header, message)
Пример #19
0
def frame(mode, filename, from_epoch, to_epoch, filterInNth, agents,
          format_as_image):
    dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH')
    filepath = os.path.join(dirpath, filename)
    if from_epoch is None:
        from_epoch = to_epoch - 60 * 60 * 24 * 7
    with pd.HDFStore(filepath, mode='r') as h5:
        key = h5.keys()[0]  # TODO: always select first?
        storer = h5.get_storer(key)
        row_count = storer.nrows
        Log.d(row_count)
        first_epoch = pd.read_hdf(h5, key, start=0, stop=1,
                                  columns=[]).index.values[0]
        last_epoch = pd.read_hdf(h5,
                                 key,
                                 start=row_count - 1,
                                 stop=row_count,
                                 columns=[]).index.values[0]
        column_names = [attr for attr in storer.attrs.data_columns]
        plot_html = h5_to_plot(h5, from_epoch, to_epoch, filterInNth, agents,
                               format_as_image)
        if mode == 'plot_only':
            return plot_html
        feature_columns = set([
            a.split('_')[1] for a in column_names if a.startswith('feature_')
        ])
        feature_names = [c.split('(')[0] for c in feature_columns]
        agent_map = {
            fn: [c for c in feature_columns if c.startswith(fn)]
            for fn in feature_names
        }
        return render_template(
            'frame.html',
            style=style,
            plothtml=plot_html,
            filename=filename,
            from_epoch=from_epoch,
            to_epoch=to_epoch,
            first_epoch=first_epoch,
            last_epoch=last_epoch,
            min_epoch=1514764800,
            max_epoch=int(time.time()),
            agent_map=sorted(agent_map.items()),  # min epoch is 2018
            job_uid=key,
            frame_info_html=json2html.convert(json={
                'row count': row_count,
                'columns': column_names
            }))
Пример #20
0
 def process_nonparsed_api_responses_full(self, sleep_seconds=0):
     Log.i(
         'initiating continuous parsing of api responses with subset sleep interval: {} seconds',
         sleep_seconds)
     try:
         min_id = -1
         next_min_id = 0
         while next_min_id > min_id:
             min_id = next_min_id
             parse_count = 0
             next_min_id = self.process_nonparsed_api_responses_subset(
                 next_min_id=min_id)
             time.sleep(sleep_seconds)
     except Exception as e:
         raise Exception('Failed to process nonparsed api responses') from e
     transaction_count = self.store.transaction_count()
     Log.d('no more api responses to parse, transaction count is now {}',
           transaction_count)
Пример #21
0
 def __init__(self, version):
     super().__init__(__file__)
     self.window_size = 15
     self.interval_seconds = [15 * 60]  # 15 minutes
     self.contruct_time = time.time()
     self.version = version
     self.sleep_seconds = 1  # must be low enough to produce empty result set eventually > reaktime
     self.transaction_min_timestamp = int(
         AppConfig.setting('GENERATOR_TRANSACTION_MIN_TIMESTAMP'))
     self.data_dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH')
     Log.d('construct: {}', self.__dict__)
     self.db = DatabaseGateway()
     max_history_minutes = 10 * 24 * 60  #max(self.minute_intervals)
     self.from_currency_ids = []
     self.to_currency_ids = []
     self.run_config = self.read_run_config()
     self.jobs = list(
         self.__jobs_iterate(max_history_minutes, self.run_config))
     Log.i('count of generator jobs: {}', len(self.jobs))
Пример #22
0
	def handle_job_epoch(self, jobuid, df, start_index):
		trade_fee = float64(.25 / 100) 
		min_capital = self.initial_capital * trade_fee * 10
		print(start_index)
		print(len(df))
		try:
			assert jobuid == '/bitcoinaverage_multiple_global_ETH_USD_900', 'unexpected job id'
			new_df = df[start_index:]
			for epoch, row in new_df.iterrows():
				action = row[PREDICT_ACTION]
				coin_price = row['close']
				if self.start_value is None:
					self.start_value = self.current_value(coin_price)
				if not isnan(action):
					print('coin price ', coin_price, ', capital ', self.capital)
					if action == FeatureValue.BUY:					
						coin_transaction_count = (1 - trade_fee) * (self.capital - min_capital) / coin_price 
						if coin_transaction_count > 0:
							print('BUYING coins: ', coin_transaction_count)
							cost = coin_transaction_count * coin_price
							fee  = cost * trade_fee
							assert self.capital >= cost + fee, '{} >= {} + {} = {}'.format(self.capital, cost, fee, cost + fee)
							self.capital -= cost
							self.coins += coin_transaction_count
							self.pay_fee(cost)
					elif action == FeatureValue.SELL:
						fee = min(self.coins * coin_price * trade_fee, self.capital)					
						coin_transaction_count = fee / (coin_price * trade_fee)
						if coin_transaction_count > 0 and self.coins >= coin_transaction_count:
							print('SELLING coins: {}'.format(coin_transaction_count))
							gain = coin_transaction_count * coin_price
							self.capital += gain
							self.coins -= coin_transaction_count
							self.pay_fee(gain)	
						else:
							Log.d('NOT ENOUGH COINS TO SELL! {} at {}', coin_transaction_count, fee)		
					net_worth = self.current_value(coin_price)
		except Exception as e:
			raise Exception('Failed to execute on new job epoch') from e
		print(len(df))
		print(df[PREDICT_ACTION].value_counts())
		print('done')
		sys.stdout.flush()
Пример #23
0
 def __run(self):
     Log.d('Watching file: {}', self.h5_filepath)
     thread = FileWatcher(self.h5_filepath,
                          modified=self.handle_change).run_async()
     try:
         while self.handle_event.wait():
             if self.last_handle_count is not None:
                 jump_count = self.handle_count - self.last_handle_count
                 if jump_count > 1:
                     Log.w(
                         'handle count has jumped {} times than once since the last processing',
                         jump_count)
             self.last_handle_count = self.handle_count
             self.process_h5()
             self.handle_event.clear()
     finally:
         Log.w('run loop broken, unwatching file: {}', self.h5_filepath)
         thread.stop()
         thread.join()
Пример #24
0
	def process(self, epoch, df):
		if df.empty:
			Log.d('skipping processing of empty dataset')
			return
		r_index = df.index.get_loc(epoch)
		if self.predictor is not None:	
			row_frame = df[r_index:r_index + 1]
			return self.__predict(row_frame)
		not_enough_predictor_data = r_index +1 < self.min_predict_generator_size 
		if not_enough_predictor_data:
			return
		Log.d('initiating predictor contruction at index {}, frame length {}', r_index, len(df))
		predictor = self.predictor_from_config_maybe()
		if predictor is not None:
			self.predictor = predictor
			Log.i('existing predictor endpoint loaded: {}', predictor.endpoint)
			return
		train_df = df[:r_index +1]
		Log.i('at index {}, detected data of adequate length {} writing csv', r_index, len(train_df), self.csv_filepath)
		self.write_csv(train_df)
		return None
Пример #25
0
 def plot(title, second_count, frame, ax, is_image, label_connect,
          filter_in_nth, cp):
     values = FeatureBase.derive_plot_values(title, second_count, frame)
     if values is None:
         return None
     latest_min = values['latest'].min()
     mult_factor = values['latest'].max() - latest_min
     offset_y = (latest_min - mult_factor)
     values[cp + 'd_slow'] = values[cp + 'd_slow'] * mult_factor + offset_y
     values[cp + 'd_fast'] = values[cp + 'd_fast'] * mult_factor + offset_y
     values[cp + 'k'] = values[cp + 'k'] * mult_factor + offset_y
     filtered = values[values.index % filter_in_nth == 0]
     assert len(filtered) > 0
     Log.d(frame[cp + action_cf].value_counts())
     indices = filtered['index'].tolist()
     df_k = filtered[cp + 'k']
     ax.plot(indices, df_k, color='orange', alpha=0.9, zorder=-5)
     df_d_slow = filtered[cp + 'd_slow']
     ax.plot(indices, df_d_slow, color='lightblue', alpha=0.9)
     for y in [0, lower_k, upper_k, 1]:
         ax.plot([indices[0], indices[-1]],
                 [y * mult_factor + offset_y] * 2,
                 color='white',
                 dashes=[6, 2],
                 alpha=0.5,
                 zorder=-10)
     df_buy = values[values[cp + action_cf] == ActionFeature.BUY]
     ax.scatter(df_buy['index'],
                df_buy[cp + 'd_slow'],
                color='green',
                s=70,
                zorder=10,
                alpha=0.7)
     df_sell = values[values[cp + action_cf] == ActionFeature.SELL]
     ax.scatter(df_sell['index'],
                df_sell[cp + 'd_slow'],
                color='red',
                s=70,
                zorder=10,
                alpha=0.7)
Пример #26
0
 def __init__(self, datasource, exchange, from_currency, to_currency,
              interval_second, features, uid):
     assert isinstance(datasource, Datasource)
     assert isinstance(exchange, Exchange)
     assert isinstance(from_currency, Currency)
     assert isinstance(to_currency, Currency)
     assert isinstance(features, list)
     assert isinstance(interval_second, int)
     self.datasource = datasource
     self.exchange = exchange
     self.from_currency = from_currency
     self.to_currency = to_currency
     self.features = features  # trend agent must be first!
     self.interval_second = interval_second
     self.uid = uid
     self.frame = None
     self.interval_stat = None
     self.reserved_cols = [
         'time', 'volume', 'age', 'is_simulated', 'is_realtime'
     ]
     self.feature_reserved_cols = ['time']
     Log.d('generator job created with features: {}',
           sorted([f.col_prefix for f in self.features]))
Пример #27
0
 def process_api_response_file(self,
                               filepath,
                               subscriber,
                               datafetch_api_response=None):
     db = self.store
     filename = os.path.basename(filepath)
     if not os.path.isfile(filepath) or not filename.startswith(
             subscriber.handler_filename):
         return False
     receiveTimestamp = int(ParseUtil.extractTimestampText(filename))
     with open(filepath, 'r') as disk_file:
         response_text = disk_file.read()
         response_text_md5hash = StringExpert.md5hash(response_text)
         if datafetch_api_response is None:
             datafetch_api_response = ParseUtil.partial_datafetch_api_response(
                 subscriber, db)
         datafetch_api_response = {
             **datafetch_api_response, 'response': response_text,
             'response_md5hash': response_text_md5hash,
             'epoch_receive_time': receiveTimestamp,
             'response_filename': filename
         }
         transaction = None
         try:
             datafetch_api_response_id = db.create_datafetch_api_response(
                 datafetch_api_response)
         except DuplicateInsertException as e:
             Log.d('db rejected api_response_id as a duplicate: {}',
                   response_text_md5hash)
             return False
         except Exception as e:
             Log.e('Failed to store api_response ({})',
                   response_text_md5hash)
             raise e
         ParseUtil.parse_and_persist_as_transaction_maybe(
             datafetch_api_response, subscriber, db)
     return True
Пример #28
0
 def datafetch_api_id_by_handler_filepath(self,
                                          handler_filepath,
                                          datafetch_api_ids=None,
                                          create_if_nonexisting=False):
     table_name = 'datafetch_api'
     col_name = 'handler_filepath'
     scalar_col_name = 'id'
     if create_if_nonexisting == True:
         result = self.__scalar_by_unique_col_value(
             table_name,
             col_name,
             handler_filepath,
             scalar_col_name,
             frame=datafetch_api_ids,
             nonexisting_is_error=False)
         if result is not None:
             return result
         handler_filename = os.path.basename(handler_filepath)
         result_endpoint_prefix = AppConfig.setting(
             'RESULT_ENDPOINT_PREFIX')
         result_endpoint = '{}{}'.format(result_endpoint_prefix,
                                         handler_filename)
         new_datafetch_api_id = self.create_datafetch_api({
             'handler_filepath':
             handler_filepath,
             'result_endpoint':
             result_endpoint,
             'result_frequency_seconds':
             30
         })
         Log.d('created datafetch api id {} for handler filepath "{}"',
               new_datafetch_api_id, handler_filepath)
     return self.__scalar_by_unique_col_value(table_name,
                                              col_name,
                                              handler_filepath,
                                              scalar_col_name,
                                              frame=datafetch_api_ids)
Пример #29
0
 def feed_jobs_forever(self, job_changed_handler):
     assert job_changed_handler is not None
     sleep_seconds = self.sleep_seconds
     transaction_min_timestamp = self.transaction_min_timestamp
     start_transaction_min_timestamp = transaction_min_timestamp
     data_dirpath = self.data_dirpath
     start_time = time.time()
     Log.i(
         'processing transactions, sleep interval {}s, starting from epoch {} ({})',
         sleep_seconds, transaction_min_timestamp,
         StringExpert.format_timestamp(transaction_min_timestamp))
     to_fetch_count = self.db.transaction_count(transaction_min_timestamp)
     Log.d('transaction count since {} ({}): {}', transaction_min_timestamp,
           StringExpert.format_timestamp(transaction_min_timestamp),
           to_fetch_count)
     pd.set_option('io.hdf.default_format', 'table')
     hdf5_filename = '{}_{}_{}.h5'.format(
         self.version.major, self.version.minor,
         datetime.fromtimestamp(start_time).strftime('%Y%m%d_%H%M%S'))
     hdf5_filepath = path.join(data_dirpath, hdf5_filename)
     Log.i('hdf5 output filepath is: \n{}', hdf5_filepath)
     set_size = 1000
     fetch_count = 0
     plot_time = time.time()
     is_realtime = False
     while True:
         try:
             next_transaction_min_timestamp = self.process_transaction_subset(
                 transaction_min_timestamp, set_size, hdf5_filepath,
                 job_changed_handler, is_realtime)
             if next_transaction_min_timestamp is None:
                 Log.d('nothing to process, waiting..')
                 is_realtime = True  # TODO: empty polling perhaps not the best indicator of switch to realtime
                 time.sleep(sleep_seconds)
             else:
                 assert next_transaction_min_timestamp > transaction_min_timestamp, 'next minimum timestamp was not greater than the current timestamp'
                 transaction_min_timestamp = next_transaction_min_timestamp
                 fetch_count += set_size
                 percentage = 100 * fetch_count / to_fetch_count
                 current_time = time.time()
                 Log.d(
                     'processed {}/{}, {}%, spent {} on the period {} ({}) to {} ({})',
                     fetch_count, to_fetch_count, int(percentage),
                     Timespan.from_seconds(int(current_time -
                                               start_time)).as_string(),
                     StringExpert.format_timestamp(
                         start_transaction_min_timestamp),
                     start_transaction_min_timestamp,
                     StringExpert.format_timestamp(
                         transaction_min_timestamp),
                     transaction_min_timestamp)
         except Exception as e:
             raise Exception(
                 'Failed to process nonparsed api responses') from e
     Log.w('all {} rows read, but should loop forever', row_count)
Пример #30
0
	def sagemaker_response_highest_score_label(self, prediction_response):
		Log.d('parsing response: {}', prediction_response)
		classifications = prediction_response['result']['classifications']
		assert len(classifications) == 1
		classification = classifications[0]
		classes = classification['classes']
		Log.d('parsing classes: {}', classes)
		label_scores = { c['score']:c['label'] for c in classes if 'score' in c }
		assert len(label_scores) > 0
		scores = sorted(label_scores.keys(), reverse=True)
		assert all(scores[i] >= scores[i+1] for i in range(len(scores)-1))
		win_score = scores[0]
		win_label = int(label_scores[win_score])
		Log.d('winner is \'{}\' in score: {}', win_label, label_scores)
		return win_label