def do_inference(self, model_dir, parameters, context): log.info('Start to inference {}'.format('Demo')) try: amplifier = parameters['instance']['params']['amplifier'] end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time series = self.tsanaclient.get_timeseries(parameters['apiEndpoint'], parameters['apiKey'], parameters['seriesSets'], start_time, end_time) res = [] for data in series or []: for value in data.value or []: v = { 'dim': data.dim, 'metric_id': data.metric_id, 'series_id': data.series_id, 'value': value['value'] * amplifier, 'timestamp': value['timestamp'] } res.append(v) self.tsanaclient.save_inference_result(parameters, res) return STATUS_SUCCESS, '' except Exception as e: log.error('Exception thrown by inference: ' + repr(e)) return STATUS_FAIL, 'Exception thrown by inference: ' + repr(e)
def do_inference(self, subscription, model_id, model_dir, parameters): log.info('Start to inference {}'.format('Demo')) try: amplifier = parameters['instance']['params']['amplifier'] end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time series = self.tsanaclient.get_timeseries(parameters['apiKey'], parameters['seriesSets'], start_time, end_time) copied = copy.deepcopy(series) for data in copied: data.value = data.value * amplifier self.tsanaclient.save_inference_result(parameters, copied) return STATUS_SUCCESS, '' except Exception as e: log.error('Exception thrown by inference: ' + repr(e)) return STATUS_FAIL, 'Exception thrown by inference: ' + repr(e)
def prepare_training_data(self, parameters): end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time factor_def = parameters['seriesSets'] factors_data = self.tsanaclient.get_timeseries(factor_def, start_time, end_time) time_key = dt_to_str_file_name(end_time) data_dir = os.path.join(self.config.model_data_dir, time_key, str(uuid.uuid1())) shutil.rmtree(data_dir, ignore_errors=True) os.makedirs(data_dir, exist_ok=True) variable = {} for factor in factors_data: csv_file = factor.series_id + '.csv' csv_data = [] csv_data.append(('timestamp', 'value')) csv_data.extend([(tuple['timestamp'], tuple['value']) for tuple in factor.value]) save_to_csv(csv_data, os.path.join(data_dir, csv_file)) variable[factor.series_id] = csv_file zip_dir = os.path.abspath(os.path.join(data_dir, os.pardir)) zip_file_base = os.path.join(zip_dir, 'training_data') zip_file = zip_file_base + '.zip' if os.path.exists(zip_file): os.remove(zip_file) shutil.make_archive(zip_file_base, 'zip', data_dir) azure_blob = AzureBlob(self.config.az_tsana_model_blob_connection) container_name = self.config.tsana_app_name azure_blob.create_container(container_name) blob_name = 'training_data_' + time_key with open(zip_file, "rb") as data: azure_blob.upload_blob(container_name, blob_name, data) os.remove(zip_file) blob_url = AzureBlob.generate_blob_sas( self.config.az_storage_account, self.config.az_storage_account_key, container_name, blob_name) result = {} result['variable'] = variable result['fillUpMode'] = parameters['instance']['params']['fillUpMode'] result['tracebackWindow'] = parameters['instance']['params'][ 'tracebackWindow'] #result['source'] = blob_url result['source'] = '/data/training_data.zip' result['startTime'] = dt_to_str(start_time) result['endTime'] = dt_to_str(end_time) return result
def do_verify(self, subscription, parameters): # ------TO BE REPLACED: Other application just replace below part------- # For forecast, check the factors and target has same granularity, and each factor could only contain one series meta = self.tsanaclient.get_metric_meta(parameters['apiKey'], parameters['instance']['params']['target']['metricId']) if meta is None: return STATUS_FAIL, 'Target is not found. ' target_gran = meta['granularityName'] # Only for custom, the granularity amount is meaningful which is the number of seconds target_gran_amount = meta['granularityAmount'] for data in parameters['seriesSets']: if target_gran != data['metricMeta']['granularityName'] or (target_gran == 'Custom' and target_gran_amount != data['metricMeta']['granularityAmount']): return STATUS_FAIL, 'Granularity must be identical between target and factors. ' # Check series count, and each factor should only contain 1 series seriesCount = 0 for data in parameters['seriesSets']: dim = {} for dimkey in data['dimensionFilter']: dim[dimkey] = [data['dimensionFilter'][dimkey]] dt = dt_to_str(str_to_dt(meta['dataStartFrom'])) para = dict(metricId=data['metricId'], dimensions=dim, count=2, startTime=dt) # Let's said 100 is your limitation ret = self.tsanaclient.post(parameters['apiKey'], '/metrics/' + data['metricId'] + '/rank-series', data=para) if ret is None or 'value' not in ret: return STATUS_FAIL, 'Read series rank filed. ' seriesCount += len(ret['value']) if len(ret['value']) != 1 or seriesCount > self.config.series_limit: return STATUS_FAIL, 'Cannot accept ambiguous factors or too many series in the group, limit is ' + str(self.config.series_limit) + '.' return STATUS_SUCCESS, ''
def do_verify(self, subscription, parameters): # Check series count, and each factor should only contain 1 series seriesCount = 0 for data in parameters['seriesSets']: dim = {} for dimkey in data['dimensionFilter']: dim[dimkey] = [data['dimensionFilter'][dimkey]] meta = self.tsanaclient.get_metric_meta(parameters['apiKey'], data['metricId']) if meta is None: return STATUS_FAIL, 'Metric {} is not found.'.format( data['metricId']) dt = dt_to_str(str_to_dt(meta['dataStartFrom'])) para = dict(metricId=data['metricId'], dimensions=dim, count=2, startTime=dt) # Let's said 100 is your limitation ret = self.tsanaclient.post(parameters['apiKey'], '/metrics/' + data['metricId'] + '/rank-series', data=para) if ret is None or 'value' not in ret: return STATUS_FAIL, 'Read series rank failed.' if len(ret['value']) == 0: return STATUS_FAIL, "Data not found for {}".format(para) seriesCount += len(ret['value']) if len(ret['value'] ) != 1 or seriesCount > self.config.series_limit: return STATUS_FAIL, 'Cannot accept ambiguous factors or too many series in the group, limit is ' + str( self.config.series_limit) + '.' return STATUS_SUCCESS, ''
def save_inference_result(self, parameters, result): try: if len(result) <= 0: return STATUS_SUCCESS, '' body = { 'groupId': parameters['groupId'], 'instanceId': parameters['instance']['instanceId'], 'results': [] } for item in result: item['timestamp'] = dt_to_str(str_to_dt(item['timestamp'])) body['results'].append({ 'params': parameters['instance']['params'], 'timestamp': item['timestamp'], 'result': item, 'status': InferenceState.Ready.name }) self.post(parameters['apiKey'], '/timeSeriesGroups/' + parameters['groupId'] + '/appInstances/' + parameters['instance']['instanceId'] + '/saveResult', body) return STATUS_SUCCESS, '' except Exception as e: traceback.print_exc(file=sys.stdout) return STATUS_FAIL, str(e)
def get_inference_time_range(self, parameters): end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time start_time_list = [] for series_set in parameters['seriesSets']: metric_meta = series_set['metricMeta'] gran = (metric_meta['granularityName'], metric_meta['granularityAmount']) start_time_list.append((get_time_offset(start_time, gran, -1), gran)) max_start_time = max(start_time_list, key=lambda i: i[0]) return start_time, end_time, max_start_time[1]
def get_timeseries(self, api_key, series_sets, start_time, end_time, offset=0, granularityName=None, granularityAmount=0, top=1): if offset != 0 and granularityName is None: offset = 0 end_str = dt_to_str(end_time) start_str = dt_to_str(start_time) dedup = {} series = [] # Query each series's tag for data in series_sets: dim = {} if 'dimensionFilter' not in data: data['dimensionFilter'] = data['filters'] for dimkey in data['dimensionFilter']: dim[dimkey] = [data['dimensionFilter'][dimkey]] para = dict(metricId=data['metricId'], dimensions=dim, count=top, startTime=start_str) ret = self.post(api_key, '/metrics/' + data['metricId'] + '/rank-series', data=para) for s in ret['value']: if s['seriesId'] not in dedup: s['seriesSetId'] = data['seriesSetId'] s['startTime'] = start_str s['endTime'] = end_str s['dimension'] = s['dimensions'] del s['dimensions'] series.append(s) dedup[s['seriesId']] = True # Query the data multi_series_data = None if len(series) > 0: ret = self.post(api_key, '/metrics/series/data', data=dict(value=series)) if granularityName is not None: multi_series_data = [ Series(factor['id']['metricId'], series[idx]['seriesSetId'], factor['id']['dimension'], [dict(timestamp=get_time_offset(str_to_dt(y[0]), (granularityName, granularityAmount), offset) , value=y[1]) for y in factor['values']]) for idx, factor in enumerate(ret['value']) ] else: multi_series_data = [ Series(factor['id']['metricId'], series[idx]['seriesSetId'], factor['id']['dimension'], value=[dict(timestamp=y[0] , value=y[1]) for y in factor['values']]) for idx, factor in enumerate(ret['value']) ] else: log.info("Series is empty") return multi_series_data
def prepare_inference_data(self, parameters): end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time factor_def = parameters['seriesSets'] factors_data = self.tsanaclient.get_timeseries(factor_def, start_time, end_time) variable = {} for factor in factors_data: variable[factor.series_id] = factor.value result = {} result['data'] = variable result['startTime'] = dt_to_str(start_time) result['endTime'] = dt_to_str(end_time) return result
def get_data_time_range(self, parameters): end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time min_start_time = start_time max_end_time = end_time for series_set in parameters['seriesSets']: metric_meta = series_set['metricMeta'] gran = (metric_meta['granularityName'], metric_meta['granularityAmount']) data_end_time = get_time_offset(end_time, gran, +1) data_start_time = get_time_offset( start_time, gran, -parameters['instance']['params']['tracebackWindow'] * 3) if data_end_time > max_end_time: max_end_time = data_end_time if data_start_time < min_start_time: min_start_time = data_start_time return min_start_time, max_end_time
def do_train(self, subscription, model_id, model_dir, parameters): inference_window = parameters['instance']['params']['windowSize'] meta = self.tsanaclient.get_metric_meta(parameters['apiKey'], parameters['instance']['params']['target']['metricId']) if meta is None: raise Exception('Metric is not found.') end_time = str_to_dt(parameters['endTime']) data_end_time = get_time_offset(end_time, (meta['granularityName'], meta['granularityAmount']), + 1) start_time = get_time_offset(data_end_time, (meta['granularityName'], meta['granularityAmount']), - inference_window) if 'max_train_history_steps' in parameters['instance']['params']: backwindow = parameters['instance']['params']['max_train_history_steps'] else: backwindow = self.config.lstm['train_history_step'] start_time_train = get_time_offset(data_end_time, (meta['granularityName'], meta['granularityAmount']), - inference_window - backwindow) factor_def = parameters['seriesSets'] factors_data = self.tsanaclient.get_timeseries(parameters['apiKey'], factor_def, start_time_train, data_end_time) target_def = [parameters['instance']['params']['target']] offset = 0 if 'target_offset' in parameters['instance']['params']: offset = int(parameters['instance']['params']['target_offset']) target_data = self.tsanaclient.get_timeseries(parameters['apiKey'], target_def, start_time_train, data_end_time, offset, meta['granularityName'], meta['granularityAmount']) if factors_data is None or target_data is None: raise Exception('Error to get series.') inference_window, best_model = train(factor_series=factors_data, target_series=target_data[0], model_dir=model_dir, # the model used to inference window=inference_window, timestamp=end_time, future_target_size=parameters['instance']['params']['step'], gran=Gran[meta['granularityName']], custom_in_seconds=meta['granularityAmount'], max_cores=2, metric_sender=MetricSender(self.config, subscription, model_id), epoc=parameters['instance']['params']['epoc'] if 'epoc' in parameters[ 'instance'][ 'params'] else self.config.lstm['epoc'], batch_size=parameters['instance']['params']['batch_size'] if 'batch_size' in parameters[ 'instance'][ 'params'] else self.config.lstm['batch_size'], steps_per_epoc=parameters['instance']['params']['steps_per_epoc'] if 'steps_per_epoc' in parameters[ 'instance'][ 'params'] else self.config.lstm['steps_per_epoc'], validation_freq=parameters['instance']['params']['validation_freq'] if 'validation_freq' in parameters[ 'instance'][ 'params'] else self.config.lstm['validation_freq'], validation_ratio=parameters['instance']['params']['validation_ratio'] if 'validation_ratio' in parameters[ 'instance'][ 'params'] else self.config.lstm['validation_ratio'], num_hidden=parameters['instance']['params']['num_hidden'] if 'num_hidden' in parameters[ 'instance'][ 'params'] else self.config.lstm['num_hidden'], fill_type=Fill[parameters['instance']['params']['fill']] if 'fill' in parameters[ 'instance'][ 'params'] else Fill.Previous, fill_value=parameters['instance']['params']['fillValue'] if 'fillValue' in parameters[ 'instance'][ 'params'] else 0 ) # Need to call callback return STATUS_SUCCESS, ''
def do_inference(self, subscription, model_id, model_dir, parameters): log.info("Start to inference %s", model_dir) inference_window = parameters['instance']['params']['windowSize'] meta = self.tsanaclient.get_metric_meta(parameters['apiKey'], parameters['instance']['params']['target']['metricId']) if meta is None: return STATUS_FAIL, 'Metric is not found. ' end_time = str_to_dt(parameters['endTime']) if 'startTime' in parameters: start_time = str_to_dt(parameters['startTime']) else: start_time = end_time cur_time = start_time data_end_time = get_time_offset(end_time, (meta['granularityName'], meta['granularityAmount']), + 1) data_start_time = get_time_offset(start_time, (meta['granularityName'], meta['granularityAmount']), - inference_window * 2) factor_def = parameters['seriesSets'] factors_data = self.tsanaclient.get_timeseries(parameters['apiKey'], factor_def, data_start_time, data_end_time) target_def = [parameters['instance']['params']['target']] target_data = self.tsanaclient.get_timeseries(parameters['apiKey'], target_def, data_start_time, data_end_time) model, window = load_inference_model(model_dir=model_dir, target_size=parameters['instance']['params']['step'], window=inference_window, metric_sender=MetricSender(self.config, subscription, model_id), epoc=parameters['instance']['params']['epoc'] if 'epoc' in parameters[ 'instance'][ 'params'] else self.config.lstm['epoc'], validation_freq=parameters['instance']['params']['validation_freq'] if 'validation_freq' in parameters[ 'instance'][ 'params'] else self.config.lstm['validation_freq'], validation_ratio=parameters['instance']['params']['validation_ratio'] if 'validation_ratio' in parameters[ 'instance'][ 'params'] else self.config.lstm['validation_ratio']) input_data = load_inference_input_data(target_series=target_data[0],factor_series=factors_data, model=model, gran=Gran[meta['granularityName']], custom_in_seconds=meta['granularityAmount'], fill_type=Fill[parameters['instance']['params']['fill']] if 'fill' in parameters[ 'instance'][ 'params'] else Fill.Previous, fill_value=parameters['instance']['params']['fillValue'] if 'fillValue' in parameters[ 'instance'][ 'params'] else 0) while cur_time <= end_time: try: result = inference(input_data=input_data, window=window, timestamp=cur_time, target_size=parameters['instance']['params']['step'], model=model) if len(result) > 0: # offset back if 'target_offset' in parameters['instance']['params']: offset = int(parameters['instance']['params']['target_offset']) for idx in range(len(result)): result[idx]['timestamp'] = dt_to_str(get_time_offset(cur_time, (meta['granularityName'], meta['granularityAmount']), - offset + idx)) # print(result[idx]['timestamp']) self.tsanaclient.save_inference_result(parameters, result) else: log.error("No result for this inference %s, key %s" % (dt_to_str(cur_time), model_dir)) # process = psutil.Process(os.getpid()) # print(process.memory_info().rss) except Exception as e: log.error("-------Inference exception-------") cur_time = get_time_offset(cur_time, (meta['granularityName'], meta['granularityAmount']), + 1) return STATUS_SUCCESS, ''
{"seriesSetId":"0d4cce4d-f4d4-4cef-be87-dbd28062abfc","metricId":"3274f7e6-683b-4d92-b134-0c1186e416a1","dimensionFilter":{"ts_code":"600030.SH"},"seriesSetName":"Stock price_change","metricMeta":{"granularityName":"Daily","granularityAmount":0,"datafeedId":"29595b1c-531f-445c-adcf-b75b2ab93c34","metricName":"change","datafeedName":"Stock price","dataStartFrom":1105315200000}} \ ], \ "gran":{"granularityString":"Daily","customInSeconds":0}, \ "instance":{ \ "instanceName":"Forecast_Instance_1586447708033","instanceId":"528cbe52-cb6a-44c0-b388-580aba57f2f7","status":"Active","appId":"173276d9-a7ed-494b-9300-6dd1aa09f2c3","appName":"Forecast","appDisplayName":"Forecast","appType":"Internal","remoteModelKey":"", \ "params":{"missingRatio":0.5,"target":{"seriesSetId":"b643e346-6883-4764-84a5-e63a3788eec9","filters":{"ts_code":"600030.SH"},"metricId":"dc5b66cf-6dd0-4c83-bb8f-d849e68a7660","name":"Stock price_high"},"waitInSeconds":60,"windowSize":28, "step":2},"hookIds":[] \ }, \ "startTime":"2020-03-18T00:00:00Z","endTime":"2020-04-18T00:00:00Z","apiKey":"3517cf61-065d-40e9-8ed4-eda58147982d","apiEndpoint":"https://stock-exp2-api.azurewebsites.net/","fieldsFilter":["IsAnomaly"]}' #response = client.post('/dummy/models/train', data=request_json) #time.sleep(10) #response = client.post('/dummy/models/train', data=request_json) response = client.post('/dummy/models/7cbb3a50-dc7a-11ea-a0bb-000d3af88183/inference', data=request_json) #response = client.get('/dummy/models/b06f99c6-d186-11ea-a12e-000d3af88183') #response = client.get('/dummy/models') time.sleep(1000) alert_request_json = '{"seriesSets":[{"seriesSetName":"yongw/5min_value","seriesSetId":"33973891-f2a0-4e7d-9221-c7bce5de50f7","metricId":"f7a8325a-ba58-4c6f-8572-56f5efeb1beb","dimensionFilter":{"seriesId":"3"},"enrichmentConfigs":[{"enrichmentName":"AnomalyDetection","enrichmentConfigId":"8d0c1aab-54d1-4237-a003-9d1a0f88c195"}],"metricMeta":{"granularityName":"Custom","granularityAmount":300,"datafeedId":"603c065b-0cfe-46f4-98ce-f8b72e400fdd","metricName":"value","datafeedName":"yongw/5min","dataStartFrom":"2020-07-01T00:00:00Z"}},{"seriesSetName":"yongw/5min_value","seriesSetId":"700fe222-4d9e-403a-b459-c275f97bd0da","metricId":"f7a8325a-ba58-4c6f-8572-56f5efeb1beb","dimensionFilter":{"seriesId":"2"},"enrichmentConfigs":[{"enrichmentName":"AnomalyDetection","enrichmentConfigId":"8d0c1aab-54d1-4237-a003-9d1a0f88c195"}],"metricMeta":{"granularityName":"Custom","granularityAmount":300,"datafeedId":"603c065b-0cfe-46f4-98ce-f8b72e400fdd","metricName":"value","datafeedName":"yongw/5min","dataStartFrom":"2020-07-01T00:00:00Z"}},{"seriesSetName":"yongw/5min_value","seriesSetId":"10864d2b-c078-45bd-adda-f55ec0e945d0","metricId":"f7a8325a-ba58-4c6f-8572-56f5efeb1beb","dimensionFilter":{"seriesId":"1"},"enrichmentConfigs":[{"enrichmentName":"AnomalyDetection","enrichmentConfigId":"8d0c1aab-54d1-4237-a003-9d1a0f88c195"}],"metricMeta":{"granularityName":"Custom","granularityAmount":300,"datafeedId":"603c065b-0cfe-46f4-98ce-f8b72e400fdd","metricName":"value","datafeedName":"yongw/5min","dataStartFrom":"2020-07-01T00:00:00Z"}},{"seriesSetName":"yongw/5min_value","seriesSetId":"b21522dc-f785-426f-a3d6-65e77b884f66","metricId":"f7a8325a-ba58-4c6f-8572-56f5efeb1beb","dimensionFilter":{"seriesId":"0"},"enrichmentConfigs":[{"enrichmentName":"AnomalyDetection","enrichmentConfigId":"8d0c1aab-54d1-4237-a003-9d1a0f88c195"}],"metricMeta":{"granularityName":"Custom","granularityAmount":300,"datafeedId":"603c065b-0cfe-46f4-98ce-f8b72e400fdd","metricName":"value","datafeedName":"yongw/5min","dataStartFrom":"2020-07-01T00:00:00Z"}}],"instance":{"instanceName":"MAGA-TEST_Instance_1596081953900","instanceId":"47e08c86-088e-4ff2-b801-10f1abcfa97d","status":"Active","appId":"c96fbe27-b5b2-4a22-a27e-881259745bb7","appName":"MAGAplugin","appDisplayName":"MAGA-TEST","appType":"External","remoteModelKey":"e35aec16-d9f8-11ea-9943-e2140f8a2855","remoteCandidateModelKey":"","params":{"alertRatio":-1,"alertWindow":28,"fillMissingMethod":"Linear","fillMissingValue":1,"mergeMode":"Outer","metricDeficiency":0,"sensitivity":92,"snooze":3,"tracebackWindow":388},"hookIds":["e78723ef-3c12-4830-9f79-e9e7073d728a"]},"groupId":"6b733629-465a-4f9b-aeb5-2faa56aeda53","startTime":"2020-08-10T09:20:00Z","endTime":"2020-08-10T09:20:00Z","apiKey":"525f9a7e-d59b-4f6a-bf26-fcb647d097a1","apiEndpoint":"https://stock-exp3-api.azurewebsites.net/","manually":false}' alert_result_json = '[{"contributors":[{"variable":"166518f482792966f6bcc1600853f5cf","probability":1.1350116729736328},{"variable":"f1a6b15f82caed2d80b73f91d0d26a33","probability":1.2112369537353516},{"variable":"cd12c5722becb50f88961ef0d766f493","probability":1.220422625541687},{"variable":"8a70009191d7d71315626906fe38fffb","probability":1.223111629486084}],"isAnomaly":true,"score":-0.9746478796005249,"severity":0.0,"timestamp":"2020-07-03T09:20:00Z"},{"contributors":[],"isAnomaly":false,"score":-0.9746478796005249,"severity":0.0,"timestamp":"2020-08-04T09:30:00Z"},{"timestamp":"2020-08-04T09:35:00Z"}]' parameters = json.loads(alert_request_json) start_time = str_to_dt("2020-08-07T01:40:00Z") end_time = str_to_dt("2020-08-07T01:50:00Z") gran = ("Custom", 300) result = json.loads(alert_result_json) dummy.tsanaclient.trigger_alert(parameters, start_time, end_time, gran, result)