def prepare_training_data(self, parameters):
        end_time = str_to_dt(parameters['endTime'])
        if 'startTime' in parameters:
            start_time = str_to_dt(parameters['startTime'])
        else:
            start_time = end_time

        factor_def = parameters['seriesSets']
        factors_data = self.tsanaclient.get_timeseries(factor_def, start_time,
                                                       end_time)

        time_key = dt_to_str_file_name(end_time)
        data_dir = os.path.join(self.config.model_data_dir, time_key,
                                str(uuid.uuid1()))
        shutil.rmtree(data_dir, ignore_errors=True)
        os.makedirs(data_dir, exist_ok=True)

        variable = {}
        for factor in factors_data:
            csv_file = factor.series_id + '.csv'
            csv_data = []
            csv_data.append(('timestamp', 'value'))
            csv_data.extend([(tuple['timestamp'], tuple['value'])
                             for tuple in factor.value])
            save_to_csv(csv_data, os.path.join(data_dir, csv_file))
            variable[factor.series_id] = csv_file

        zip_dir = os.path.abspath(os.path.join(data_dir, os.pardir))
        zip_file_base = os.path.join(zip_dir, 'training_data')
        zip_file = zip_file_base + '.zip'
        if os.path.exists(zip_file):
            os.remove(zip_file)
        shutil.make_archive(zip_file_base, 'zip', data_dir)

        azure_blob = AzureBlob(self.config.az_tsana_model_blob_connection)
        container_name = self.config.tsana_app_name
        azure_blob.create_container(container_name)

        blob_name = 'training_data_' + time_key
        with open(zip_file, "rb") as data:
            azure_blob.upload_blob(container_name, blob_name, data)

        os.remove(zip_file)
        blob_url = AzureBlob.generate_blob_sas(
            self.config.az_storage_account, self.config.az_storage_account_key,
            container_name, blob_name)

        result = {}
        result['variable'] = variable
        result['fillUpMode'] = parameters['instance']['params']['fillUpMode']
        result['tracebackWindow'] = parameters['instance']['params'][
            'tracebackWindow']
        #result['source'] = blob_url
        result['source'] = '/data/training_data.zip'
        result['startTime'] = dt_to_str(start_time)
        result['endTime'] = dt_to_str(end_time)

        return result
示例#2
0
    def get_timeseries(self, api_key, series_sets, start_time, end_time, offset=0, granularityName=None, granularityAmount=0,
                       top=1):
        if offset != 0 and granularityName is None:
            offset = 0

        end_str = dt_to_str(end_time)
        start_str = dt_to_str(start_time)
        dedup = {}
        series = []

        # Query each series's tag
        for data in series_sets:
            dim = {}
            if 'dimensionFilter' not in data:
                data['dimensionFilter'] = data['filters']

            for dimkey in data['dimensionFilter']:
                dim[dimkey] = [data['dimensionFilter'][dimkey]]

            para = dict(metricId=data['metricId'], dimensions=dim, count=top, startTime=start_str)
            ret = self.post(api_key, '/metrics/' + data['metricId'] + '/rank-series', data=para)
            for s in ret['value']:
                if s['seriesId'] not in dedup:
                    s['seriesSetId'] = data['seriesSetId']
                    s['startTime'] = start_str
                    s['endTime'] = end_str
                    s['dimension'] = s['dimensions']
                    del s['dimensions']
                    series.append(s)
                    dedup[s['seriesId']] = True

        # Query the data
        multi_series_data = None
        if len(series) > 0:
            ret = self.post(api_key, '/metrics/series/data', data=dict(value=series))
            if granularityName is not None:
                multi_series_data = [
                    Series(factor['id']['metricId'], series[idx]['seriesSetId'], factor['id']['dimension'],
                           [dict(timestamp=get_time_offset(str_to_dt(y[0]), (granularityName, granularityAmount),
                                                           offset)
                                 , value=y[1])
                            for y in factor['values']])
                    for idx, factor in enumerate(ret['value'])
                ]
            else:
                multi_series_data = [
                    Series(factor['id']['metricId'], series[idx]['seriesSetId'], factor['id']['dimension'],
                           value=[dict(timestamp=y[0]
                                       , value=y[1])
                                  for y in factor['values']])
                    for idx, factor in enumerate(ret['value'])
                ]
        else:
            log.info("Series is empty")

        return multi_series_data
示例#3
0
    def prepare_inference_data(self, parameters):
        start_time, end_time = self.get_data_time_range(parameters)

        factor_def = parameters['seriesSets']
        factors_data = self.tsanaclient.get_timeseries(parameters['apiKey'],
                                                       factor_def, start_time,
                                                       end_time)

        time_key = dt_to_str_file_name(end_time)
        data_dir = os.path.join(self.config.model_data_dir, time_key,
                                str(uuid.uuid1()))
        shutil.rmtree(data_dir, ignore_errors=True)
        os.makedirs(data_dir, exist_ok=True)

        try:
            for factor in factors_data:
                csv_file = factor.series_id + '.csv'
                csv_data = []
                csv_data.append(('timestamp', 'value'))
                csv_data.extend([(tuple['timestamp'], tuple['value'])
                                 for tuple in factor.value])
                save_to_csv(csv_data, os.path.join(data_dir, csv_file))

            zip_dir = os.path.abspath(os.path.join(data_dir, os.pardir))
            zip_file_base = os.path.join(zip_dir, 'inference_data')
            zip_file = zip_file_base + '.zip'
            if os.path.exists(zip_file):
                os.remove(zip_file)
            shutil.make_archive(zip_file_base, 'zip', data_dir)

            azure_blob = AzureBlob(self.config.az_tsana_model_blob_connection)
            container_name = self.config.tsana_app_name
            azure_blob.create_container(container_name)

            blob_name = 'inference_data_' + time_key
            with open(zip_file, "rb") as data:
                azure_blob.upload_blob(container_name, blob_name, data)

            os.remove(zip_file)
            blob_url = AzureBlob.generate_blob_sas(
                self.config.az_storage_account,
                self.config.az_storage_account_key, container_name, blob_name)

            result = {}
            result['source'] = blob_url
            result['startTime'] = dt_to_str(start_time)
            result['endTime'] = dt_to_str(end_time)
            return result
        finally:
            shutil.rmtree(data_dir, ignore_errors=True)
示例#4
0
 def get_inference_result(self, parameters, start_time, end_time):
     try: 
         ret = self.get(parameters['apiKey'], '/timeSeriesGroups/' 
                             + parameters['groupId'] 
                             + '/appInstances/' 
                             + parameters['instance']['instanceId'] 
                             + '/history?startTime=' 
                             + dt_to_str(start_time)
                             + '&endTime=' 
                             + dt_to_str(end_time))
         
         return STATUS_SUCCESS, '', ret
     except Exception as e: 
         traceback.print_exc(file=sys.stdout)
         return STATUS_FAIL, str(e), None
    def do_verify(self, subscription, parameters):
        # ------TO BE REPLACED: Other application just replace below part-------
        # For forecast, check the factors and target has same granularity, and each factor could only contain one series
        meta = self.tsanaclient.get_metric_meta(parameters['apiKey'], parameters['instance']['params']['target']['metricId'])
        if meta is None: 
            return STATUS_FAIL, 'Target is not found. '
        target_gran = meta['granularityName']
        # Only for custom, the granularity amount is meaningful which is the number of seconds
        target_gran_amount = meta['granularityAmount']

        for data in parameters['seriesSets']: 
            if target_gran != data['metricMeta']['granularityName'] or (target_gran == 'Custom' and target_gran_amount != data['metricMeta']['granularityAmount']):
                return STATUS_FAIL, 'Granularity must be identical between target and factors. '

        # Check series count, and each factor should only contain 1 series
        seriesCount = 0
        for data in parameters['seriesSets']: 
            dim = {}
            for dimkey in data['dimensionFilter']: 
                dim[dimkey] = [data['dimensionFilter'][dimkey]]
            
            dt = dt_to_str(str_to_dt(meta['dataStartFrom']))
            para = dict(metricId=data['metricId'], dimensions=dim, count=2, startTime=dt)     # Let's said 100 is your limitation
            ret = self.tsanaclient.post(parameters['apiKey'], '/metrics/' + data['metricId'] + '/rank-series', data=para)
            if ret is None or 'value' not in ret:
                return STATUS_FAIL, 'Read series rank filed. '
            seriesCount += len(ret['value'])
            if len(ret['value']) != 1 or seriesCount > self.config.series_limit:
                return STATUS_FAIL, 'Cannot accept ambiguous factors or too many series in the group, limit is ' + str(self.config.series_limit) + '.'

        return STATUS_SUCCESS, ''
示例#6
0
    def do_verify(self, subscription, parameters):
        # Check series count, and each factor should only contain 1 series
        seriesCount = 0
        for data in parameters['seriesSets']:
            dim = {}
            for dimkey in data['dimensionFilter']:
                dim[dimkey] = [data['dimensionFilter'][dimkey]]

            meta = self.tsanaclient.get_metric_meta(parameters['apiKey'],
                                                    data['metricId'])
            if meta is None:
                return STATUS_FAIL, 'Metric {} is not found.'.format(
                    data['metricId'])
            dt = dt_to_str(str_to_dt(meta['dataStartFrom']))
            para = dict(metricId=data['metricId'],
                        dimensions=dim,
                        count=2,
                        startTime=dt)  # Let's said 100 is your limitation
            ret = self.tsanaclient.post(parameters['apiKey'],
                                        '/metrics/' + data['metricId'] +
                                        '/rank-series',
                                        data=para)
            if ret is None or 'value' not in ret:
                return STATUS_FAIL, 'Read series rank failed.'
            if len(ret['value']) == 0:
                return STATUS_FAIL, "Data not found for {}".format(para)
            seriesCount += len(ret['value'])
            if len(ret['value']
                   ) != 1 or seriesCount > self.config.series_limit:
                return STATUS_FAIL, 'Cannot accept ambiguous factors or too many series in the group, limit is ' + str(
                    self.config.series_limit) + '.'

        return STATUS_SUCCESS, ''
示例#7
0
    def save_inference_result(self, parameters, result):
        try: 

            if len(result) <= 0: 
                return STATUS_SUCCESS, ''

            body = {
                'groupId': parameters['groupId'], 
                'instanceId': parameters['instance']['instanceId'], 
                'results': []
            }

            for item in result:
                item['timestamp'] = dt_to_str(str_to_dt(item['timestamp']))
                body['results'].append({
                    'params': parameters['instance']['params'],
                    'timestamp': item['timestamp'],
                    'result': item,
                    'status': InferenceState.Ready.name
                })

            self.post(parameters['apiKey'], '/timeSeriesGroups/' + parameters['groupId'] + '/appInstances/' + parameters['instance']['instanceId'] + '/saveResult', body)
            return STATUS_SUCCESS, ''
        except Exception as e: 
            traceback.print_exc(file=sys.stdout)
            return STATUS_FAIL, str(e)
    def prepare_inference_data(self, parameters):
        end_time = str_to_dt(parameters['endTime'])
        if 'startTime' in parameters:
            start_time = str_to_dt(parameters['startTime'])
        else:
            start_time = end_time

        factor_def = parameters['seriesSets']
        factors_data = self.tsanaclient.get_timeseries(factor_def, start_time,
                                                       end_time)

        variable = {}
        for factor in factors_data:
            variable[factor.series_id] = factor.value

        result = {}
        result['data'] = variable
        result['startTime'] = dt_to_str(start_time)
        result['endTime'] = dt_to_str(end_time)
        return result
示例#9
0
    def do_inference(self, model_dir, parameters, context):
        results = []
        factors_data = self.prepare_inference_data(parameters)
        start_time, end_time, gran = self.get_inference_time_range(parameters)

        traceback_window = parameters['instance']['params']['tracebackWindow']
        for timestamp in get_time_list(start_time, end_time, gran):
            single_point = []
            for factor in factors_data:
                x = np.array([
                    tuple['timestamp'].timestamp() for tuple in factor.value
                    if tuple['timestamp'] < timestamp
                ])[-traceback_window:].reshape(-1, 1)
                y = np.array([
                    tuple['value'] for tuple in factor.value
                    if tuple['timestamp'] < timestamp
                ])[-traceback_window:]

                model = linear_model.LinearRegression().fit(x, y)
                y_new = model.predict(x)

                single_point.append(
                    dict(seriesId=factor.series_id,
                         value=model.predict(
                             np.array([timestamp.timestamp()]).reshape(-1,
                                                                       1))[0],
                         mse=mean_squared_error(y, y_new),
                         r2score=r2_score(y, y_new)))
            results.append(
                dict(timestamp=dt_to_str(timestamp),
                     status=InferenceState.Ready.name,
                     result=single_point))

        status, message = self.tsanaclient.save_inference_result(
            parameters, results)
        if status != STATUS_SUCCESS:
            raise Exception(message)

        return STATUS_SUCCESS, ''
    def do_inference(self, subscription, model_id, model_dir, parameters):
        log.info("Start to inference %s", model_dir)
        inference_window = parameters['instance']['params']['windowSize']
        meta = self.tsanaclient.get_metric_meta(parameters['apiKey'], parameters['instance']['params']['target']['metricId'])
        if meta is None: 
            return STATUS_FAIL, 'Metric is not found. '
        end_time = str_to_dt(parameters['endTime'])
        if 'startTime' in parameters: 
            start_time  = str_to_dt(parameters['startTime'])
        else: 
            start_time = end_time
        cur_time = start_time

        data_end_time = get_time_offset(end_time, (meta['granularityName'], meta['granularityAmount']),
                                                    + 1)

        data_start_time = get_time_offset(start_time, (meta['granularityName'], meta['granularityAmount']),
                                                    - inference_window * 2)


        factor_def = parameters['seriesSets']
        factors_data = self.tsanaclient.get_timeseries(parameters['apiKey'], factor_def, data_start_time, data_end_time)

        target_def = [parameters['instance']['params']['target']]
        target_data = self.tsanaclient.get_timeseries(parameters['apiKey'], target_def, data_start_time, data_end_time)

        model, window = load_inference_model(model_dir=model_dir, target_size=parameters['instance']['params']['step'],
                            window=inference_window, 
                            metric_sender=MetricSender(self.config, subscription, model_id), 
                            epoc=parameters['instance']['params']['epoc'] if 'epoc' in
                                                                                                parameters[
                                                                                                    'instance'][
                                                                                                    'params'] else self.config.lstm['epoc'],
                            validation_freq=parameters['instance']['params']['validation_freq'] if 'validation_freq' in
                                                                                                parameters[
                                                                                                    'instance'][
                                                                                                    'params'] else self.config.lstm['validation_freq'],     
                            validation_ratio=parameters['instance']['params']['validation_ratio'] if 'validation_ratio' in
                                                                                                parameters[
                                                                                                    'instance'][
                                                                                                    'params'] else self.config.lstm['validation_ratio'])

        input_data = load_inference_input_data(target_series=target_data[0],factor_series=factors_data, 
                                            model=model, gran=Gran[meta['granularityName']], 
                                            custom_in_seconds=meta['granularityAmount'], 
                                            fill_type=Fill[parameters['instance']['params']['fill']] if 'fill' in
                                                                                                        parameters[
                                                                                                            'instance'][
                                                                                                            'params'] else Fill.Previous,
                                            fill_value=parameters['instance']['params']['fillValue'] if 'fillValue' in
                                                                                                        parameters[
                                                                                                            'instance'][
                                                                                                            'params'] else 0)
        while cur_time <= end_time: 
            try: 
                result = inference(input_data=input_data, window=window, timestamp=cur_time, 
                                            target_size=parameters['instance']['params']['step'], model=model)
                    
                if len(result) > 0: 
                    # offset back
                    if 'target_offset' in parameters['instance']['params']:
                        offset = int(parameters['instance']['params']['target_offset'])
                        for idx in range(len(result)): 
                            result[idx]['timestamp'] = dt_to_str(get_time_offset(cur_time, (meta['granularityName'], meta['granularityAmount']),
                                                                                        - offset + idx))
                            # print(result[idx]['timestamp'])
                    self.tsanaclient.save_inference_result(parameters, result)
                else:
                    log.error("No result for this inference %s, key %s" % (dt_to_str(cur_time), model_dir))
                # process = psutil.Process(os.getpid())
                # print(process.memory_info().rss)
            except Exception as e: 
                log.error("-------Inference exception-------")
            
            cur_time = get_time_offset(cur_time, (meta['granularityName'], meta['granularityAmount']),
                                                            + 1)
        return STATUS_SUCCESS, ''