def extract_features_with_param(time_series, window): ## type: (object, object) -> object ## type: (object, object) -> object """ Extracts three types of features from the time series. :param time_series: the time series to extract the feature of :type time_series: pandas.Series :param window: the length of window :type window: int :return: the value of features :return type: list with float """ # if not tsd_common.is_standard_time_series(time_series, window): # # add your report of this error here... # # return [] # spilt time_series split_time_series = tsd_common.split_time_series(time_series, window) normalized_split_time_series = tsd_common.normalize_time_series( split_time_series) max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min( split_time_series) s_features_with_parameter1 = statistical_features.get_parameters_features( max_min_normalized_time_series) # s_features_with_parameter2 = statistical_features.get_parameters_features(normalized_split_time_series) features = s_features_with_parameter1 return features
def extract_features(time_series, window): """ Extracts three types of features from the time series. :param time_series: the time series to extract the feature of :type time_series: pandas.Series :param window: the length of window :type window: int :return: the value of features :return type: list with float """ if not tsd_common.is_standard_time_series(time_series, window): # add your report of this error here... return [] # spilt time_series split_time_series = tsd_common.split_time_series(time_series, window) # nomalize time_series normalized_split_time_series = tsd_common.normalize_time_series(split_time_series) max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(split_time_series) s_features = statistical_features.get_statistical_features(normalized_split_time_series[4]) f_features = fitting_features.get_fitting_features(normalized_split_time_series) c_features = classification_features.get_classification_features(max_min_normalized_time_series) # combine features with types features = s_features + f_features + c_features return features
def time_series_window_parts_value_distribution_with_threshold(x): """ Split the whole time series into five parts. Given a threshold = 0.01, return the percentage of elements of time series which are less than threshold :param x: normalized time series :type x: pandas.Series :return: 5 values of this feature :return type: list """ threshold = 0.01 split_value_list = split_time_series(x, DEFAULT_WINDOW) count_list = [] for value_list in split_value_list: nparray_threshold = np.array(value_list) nparray_threshold[nparray_threshold < threshold] = -1 count_list.append((nparray_threshold == -1).sum()) if sum(count_list) == 0: features = [0, 0, 0, 0, 0] else: features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1))) return features
def extract_features(time_series, window): """ Extracts three types of features from the time series. :param time_series: the time series to extract the feature of :type time_series: pandas.Series :param window: the length of window :type window: int :return: the value of features :return type: list with float """ if not tsd_common.is_standard_time_series(time_series, window): # add your report of this error here... return [] # spilt time_series split_time_series = tsd_common.split_time_series(time_series, window) # nomalize time_series normalized_split_time_series = tsd_common.normalize_time_series( split_time_series) max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min( split_time_series) s_features = statistical_features.get_statistical_features( normalized_split_time_series[4]) f_features = fitting_features.get_fitting_features( normalized_split_time_series) c_features = classification_features.get_classification_features( max_min_normalized_time_series) # combine features with types features = s_features + f_features + c_features return features
def time_series_window_parts_value_distribution_with_threshold(x): """ Split the whole time series into five parts. Given a threshold = 0.01, return the percentage of elements of time series which are less than threshold :param x: normalized time series :type x: pandas.Series :return: 5 values of this feature :return type: list """ threshold = 0.01 split_value_list = split_time_series(x, DEFAULT_WINDOW) count_list = [] for value_list in split_value_list: nparray_threshold = np.array(value_list) nparray_threshold[nparray_threshold < threshold] = -1 count_list.append((nparray_threshold == -1).sum()) if sum(count_list) == 0: features = [0, 0, 0, 0, 0] else: features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1))) return features
def calculate_all_features(time_series, window): """ Extracts three types of features from the time series. :param time_series: the time series to extract the feature of :type time_series: pandas.Series :param window: the length of window :type window: int :return: the value of features :return type: list with float """ split_time_series = tsd_common.split_time_series(time_series, window) normalized_split_time_series = tsd_common.normalize_time_series( split_time_series) max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min( split_time_series) # s_features = statistical_features.get_statistical_features(normalized_split_time_series[4]) # c_features = classification_features.get_classification_features(max_min_normalized_time_series) # f_features = fitting_features.get_fitting_features(normalized_split_time_series) # s_features_with_parameter1 = feature_calculate.get_parameters_features(max_min_normalized_time_series) # features = s_features + c_features + f_features + s_features_with_parameter1 anom_feature = feature_calculate.get_classification_features_test( normalized_split_time_series) pattern_feature = feature_calculate.get_classification_feature_pattern( max_min_normalized_time_series) stat_feature = feature_calculate.get_classification_feature_stat( max_min_normalized_time_series) features = stat_feature + anom_feature + pattern_feature return features
def time_series_daily_parts_value_distribution_with_threshold(x): """ Split the whole time series into three parts: c, b, a. Given a threshold = 0.01, return the percentage of elements of time series which are less than threshold :param x: normalized time series :type x: pandas.Series :return: 6 values of this feature :return type: list """ threshold = 0.01 split_value_list = split_time_series(x, DEFAULT_WINDOW) data_c = split_value_list[0] + split_value_list[1][1:] data_b = split_value_list[2] + split_value_list[3][1:] data_a = split_value_list[4] # the number of elements in time series which is less than threshold: nparray_data_c_threshold = np.array(data_c) nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1 nparray_data_b_threshold = np.array(data_b) nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1 nparray_data_a_threshold = np.array(data_a) nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1 # the total number of elements in time series which is less than threshold: nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + ( nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum() if nparray_threshold_count == 0: features = [0, 0, 0] else: features = [(nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count), (nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count), (nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count)] features.extend([ (nparray_data_c_threshold == -1).sum() / float(len(data_c)), (nparray_data_b_threshold == -1).sum() / float(len(data_b)), (nparray_data_a_threshold == -1).sum() / float(len(data_a)) ]) return features
def time_series_window_parts_value_distribution_with_threshold(x): """ Split the whole time series into five parts. Given a threshold = 0.01, return the percentage of elements of time series which are less than threshold :param x: normalized time series :type x: pandas.Series :return: 5 values of this feature :return type: list """ threshold = 0.01 split_value_list = split_time_series(x, DEFAULT_WINDOW) count_list = [] for value_list in split_value_list: nparray_threshold = np.array(value_list) nparray_threshold[nparray_threshold < threshold] = -1 count_list.append((nparray_threshold == -1).sum()) if sum(count_list) == 0: features = [0, 0, 0, 0, 0] else: features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1))) return features # # def get_classification_features(x): # """ # :param x: splited time series normalized by maximun and minimum value # :return: list of some local anomaly features and morphological features # """ # classification_features =[ # # {"time_series_autocorrelation_classification":time_series_autocorrelation(x)}, # {"time_series_coefficient_of_variation_classification":time_series_coefficient_of_variation(x)}, # ] # classification_features.extend(time_series_value_distribution(x)) # # classification_features.extend(time_series_daily_parts_value_distribution(x)) # # classification_features.extend(time_series_daily_parts_value_distribution_with_threshold(x)) # # classification_features.extend(time_series_window_parts_value_distribution_with_threshold(x)) # # classification_features.extend(time_series_binned_entropy(x)) # # add yourself classification features here... # # return classification_features
def time_series_daily_parts_value_distribution(x): """ Given buckets, calculate the percentage of elements in three subsequences of the whole time series in different buckets :param x: normalized time series :type x: pandas.Series :return: the values of this feature :return type: list """ thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0] split_value_list = split_time_series(x, DEFAULT_WINDOW) data_c = split_value_list[0] + split_value_list[1][1:] data_b = split_value_list[2] + split_value_list[3][1:] data_a = split_value_list[4] count_c = list(np.histogram(data_c, bins=thresholds)[0]) count_b = list(np.histogram(data_b, bins=thresholds)[0]) count_a = list(np.histogram(data_a, bins=thresholds)[0]) return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
def time_series_daily_parts_value_distribution(x): """ Given buckets, calculate the percentage of elements in three subsequences of the whole time series in different buckets :param x: normalized time series :type x: pandas.Series :return: the values of this feature :return type: list """ thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0] split_value_list = split_time_series(x, DEFAULT_WINDOW) data_c = split_value_list[0] + split_value_list[1][1:] data_b = split_value_list[2] + split_value_list[3][1:] data_a = split_value_list[4] count_c = list(np.histogram(data_c, bins=thresholds)[0]) count_b = list(np.histogram(data_b, bins=thresholds)[0]) count_a = list(np.histogram(data_a, bins=thresholds)[0]) return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
def time_series_daily_parts_value_distribution_with_threshold(x): """ Split the whole time series into three parts: c, b, a. Given a threshold = 0.01, return the percentage of elements of time series which are less than threshold :param x: normalized time series :type x: pandas.Series :return: 6 values of this feature :return type: list """ threshold = 0.01 split_value_list = split_time_series(x, DEFAULT_WINDOW) data_c = split_value_list[0] + split_value_list[1][1:] data_b = split_value_list[2] + split_value_list[3][1:] data_a = split_value_list[4] # the number of elements in time series which is less than threshold: nparray_data_c_threshold = np.array(data_c) nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1 nparray_data_b_threshold = np.array(data_b) nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1 nparray_data_a_threshold = np.array(data_a) nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1 # the total number of elements in time series which is less than threshold: nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum() if nparray_threshold_count == 0: features = [0, 0, 0] else: features = [ (nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count), (nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count), (nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count) ] features.extend([ (nparray_data_c_threshold == -1).sum() / float(len(data_c)), (nparray_data_b_threshold == -1).sum() / float(len(data_b)), (nparray_data_a_threshold == -1).sum() / float(len(data_a)) ]) return features
def extract_features_without_param(time_series, window): ## type: (object, object) -> object ## type: (object, object) -> object """ Extracts three types of features from the time series. :param time_series: the time series to extract the feature of :type time_series: pandas.Series :param window: the length of window :type window: int :return: the value of features :return type: list with float """ # if not tsd_common.is_standard_time_series(time_series, window): # # add your report of this error here... # # return [] # spilt time_series split_time_series = tsd_common.split_time_series(time_series, window) split_time_series2 = tsd_common.split_time_series2(time_series, window) # nomalize time_series normalized_split_time_series = tsd_common.normalize_time_series( split_time_series) max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min( split_time_series) s_features = statistical_features.get_statistical_features( normalized_split_time_series[4]) f_features = fitting_features.get_fitting_features( normalized_split_time_series) c_features = classification_features.get_classification_features( max_min_normalized_time_series) # combine features with types # s_features_without_parameter = statistical_features.calculate_nonparameters_features(normalized_split_time_series[4]) # s_features_with_parameter = statistical_features.get_parameters_features(normalized_split_time_series[4]) # s_features_with_parameter = statistical_features.get_parameters_features(time_series) # features = c_features # return s_features_with_parameter features = s_features + c_features + f_features # features = c_features return features
def _f(): threshold = 0.01 split_value_list = split_time_series(x, DEFAULT_WINDOW) count_list = [] a = 0 for value_list in split_value_list: nparray_threshold = np.array(value_list) nparray_threshold[nparray_threshold < threshold] = -1 temp = (nparray_threshold == -1).sum() count_list.append((nparray_threshold == -1).sum()) name = ("time_series_window_parts_value_distribution_with_threshold_{}".format(a)) a =a+1 if sum(count_list) == 0: # features = [0, 0, 0, 0, 0] features = [{'time_series_window_parts_value_distribution_with_threshold_Ais0':0}, {'time_series_window_parts_value_distribution_with_threshold_bis0':0}, {'time_series_window_parts_value_distribution_with_threshold_cis0':0}, {'time_series_window_parts_value_distribution_with_threshold_Dis0':0}, {'time_series_window_parts_value_distribution_with_threshold_Eis0':0}] else: features = temp/float((DEFAULT_WINDOW + 1)) # list(np.array(count_list) / float((DEFAULT_WINDOW + 1))) yield {'{}'.format(name):features}
def time_series_daily_parts_value_distribution( x): ##问题是返回值最后是每行一个list---观察combine部分的数据返回值内容 """ :param x: normalized time series :type x: pandas.Series :return: the values of this feature :return type: list """ thresholds = [ 0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0 ] split_value_list = split_time_series(x, DEFAULT_WINDOW) data_c = split_value_list[0] + split_value_list[1][1:] data_b = split_value_list[2] + split_value_list[3][1:] data_a = split_value_list[4] count_c = list(np.histogram(data_c, bins=thresholds)[0]) count_b = list(np.histogram(data_b, bins=thresholds)[0]) count_a = list(np.histogram(data_a, bins=thresholds)[0]) return list(np.array(count_c) / float(len(data_c))) + list( np.array(count_b) / float(len(data_b))) + list( np.array(count_a) / float(len(data_a)))