示例#1
0
def getdata(data,samplerate=44100):
    data=[(float)(i) for i in data]
    sound_list["samplerate"] = samplerate
    sound_list["wavedata"] = data
    sound_list["number_of_samples"] = (sound_list["wavedata"]).shape[0]
    sound_list["song_length"] = int(sound_list["number_of_samples"] / samplerate)
    ans=[]
    zcr,ts=zero_crossing_rate(data,1024,sound_list["samplerate"])
    ans=[np.min(zcr),np.max(zcr),np.mean(zcr),np.std(zcr),np.median(zcr),st.skew(zcr),st.kurtosis(zcr)]
    rms,ts=root_mean_square(data,1024,sound_list["samplerate"])
    rms=[np.min(rms),np.max(rms),np.mean(rms),np.std(rms),np.median(rms),st.skew(rms),st.kurtosis(rms)]
    ans=ans+rms
    sc,ts=spectral_centroid(data,1024,sound_list["samplerate"])
    rms=[np.min(sc),np.max(sc),np.mean(sc),np.std(sc),np.median(sc),st.skew(sc),st.kurtosis(sc)]
    ans=ans+rms
    sr,ts=spectral_rolloff(data,1024,sound_list["samplerate"])
    rms=[np.min(sr),np.max(sr),np.mean(sr),np.std(sr),np.median(sr),st.skew(sr),st.kurtosis(sr)]
    ans=ans+rms
    sf,ts=spectral_flux(data,1024,sound_list["samplerate"])
    rms=[np.min(sf),np.max(sf),np.mean(sf),np.std(sf),np.median(sf),st.skew(sf),st.kurtosis(sf)]
    ans=ans+rms
    x.set_input_data(data)
    mfcc=x.MFCCs
    for pop in mfcc:
        for poop in pop:
            ans.append(poop)
    return ans
示例#2
0
def get_transform_funcs(train, cols):
    transform_funcs = []
    for col in cols:
        vector = [row[col] for row in train]
        transforms = [(skew(vector, bias=False), "none"),
                      (skew(log_transform(vector), bias=False), "log"),
                      (skew(sqrt_transform(vector), bias=False), "sqrt")]
        best_transform = sorted(transforms)[0][1]
        transform_funcs.append(best_transform)
    return transform_funcs
示例#3
0
def get_mfcc_features(filename):
    feature_dict = {}
    (rate, sig) = wav.read(filename)

    if sig.ndim == 2:
        # wav is stereo so average over both channels
        try:
            mfcc_feat_chan0 = mfcc(sig[:, 0],
                                   rate,
                                   numcep=15,
                                   appendEnergy=True)
            mfcc_feat_chan1 = mfcc(sig[:, 1],
                                   rate,
                                   numcep=15,
                                   appendEnergy=True)
            mfcc_feat = (mfcc_feat_chan0 + mfcc_feat_chan1) / 2
        except IndexError:
            print('Index error')
            mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True)

    else:
        mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True)

    # Velocity is the difference between timestep t+1 and t for each mfcc_feat / 2
    vel = (mfcc_feat[:-1, :] - mfcc_feat[1:, :]) / 2.0
    # Acceleration is the difference between timestep t+1 and t for each velocity / 2
    acc = (vel[:-1, :] - vel[1:, :]) / 2.0
    mfcc_means = []
    for i in range(0, 14):
        key = "energy" if i == 0 else "mfcc" + str(i)
        # mfcc
        feature_dict[key + "_mean"] = mfcc_feat[:, i].mean()
        feature_dict[key + "_var"] = mfcc_feat[:, i].var()
        feature_dict[key + "_skewness"] = st.skew(mfcc_feat[:, i])
        feature_dict[key + "_kurtosis"] = st.kurtosis(mfcc_feat[:, i])
        # Vel
        feature_dict[key + "_vel_mean"] = vel[:, i].mean()
        feature_dict[key + "_vel_var"] = vel[:, i].var()
        feature_dict[key + "_vel_skewness"] = st.skew(vel[:, i])
        feature_dict[key + "_vel_kurtosis"] = st.kurtosis(vel[:, i])
        # Accel
        feature_dict[key + "_accel_mean"] = acc[:, i].mean()
        feature_dict[key + "_accel_var"] = acc[:, i].var()
        feature_dict[key + "_accel_skewness"] = st.skew(acc[:, i])
        feature_dict[key + "_accel_kurtosis"] = st.kurtosis(acc[:, i])

        # Need the skewness and kurtosis of all mfcc means
        if i > 0:
            mfcc_means.append(feature_dict[key + "_mean"])

    feature_dict["mfcc_skewness"] = st.skew(mfcc_means)
    feature_dict["mfcc_kurtosis"] = st.kurtosis(mfcc_means)
    return feature_dict
示例#4
0
def get_transform_funcs(train, cols):
  transform_funcs = []
  for col in cols:
    vector = [row[col] for row in train]
    transforms = [
      (skew(vector, bias=False), "none"),
      (skew(log_transform(vector), bias=False), "log"),
      (skew(sqrt_transform(vector), bias=False), "sqrt")
    ]
    best_transform = sorted(transforms)[0][1]
    transform_funcs.append(best_transform)
  return transform_funcs
示例#5
0
def AAcal(seqcont):
    v = []
    for i in range(len(seqcont)):
        vtar = seqcont[i]
        vtarv = []
        vtar7 = 0
        vtar8 = 0
        vtar9 = 0
        s = pd.Series(vtar)
        vtar3 = np.mean(
            vtar)  # These 4 dimensions are relevant statistical terms
        vtar4 = st.kurtosis(vtar)
        vtar5 = np.var(vtar)
        vtar6 = st.skew(vtar)
        #for p in range(len(vtar)): # These 3 dimensions are inspired by PAFIG algorithm
        #vtar7=vtar[p]**2+vtar7
        #if vtar[p]>va:
        #vtar8=vtar[p]**2+vtar8
        #else:
        #vtar9=vtar[p]**2+vtar9
        vcf1 = []
        vcf2 = []
        for j in range(len(vtar) - 1):  #Sequence-order-correlation terms
            vcf1.append((vtar[j] - vtar[j + 1]))
        for k in range(len(vtar) - 2):
            vcf2.append((vtar[k] - vtar[k + 2]))
        vtar10 = np.mean(vcf1)
        vtar11 = np.var(vcf1)
        vtar11A = st.kurtosis(vcf1)
        vtar11B = st.skew(vcf1)
        vtar12 = np.mean(vcf2)
        vtar13 = np.var(vcf2)
        vtar13A = st.kurtosis(vcf2)
        vtar13B = st.skew(vcf2)
        vtarv.append(vtar3)
        vtarv.append(vtar4)
        vtarv.append(vtar5)
        vtarv.append(vtar6)
        #vtarv.append(vtar7/len(vtar))
        #vtarv.append(vtar8/len(vtar))
        #vtarv.append(vtar9/len(vtar))
        vtarv.append(vtar10)
        vtarv.append(vtar11)
        vtarv.append(vtar11A)
        vtarv.append(vtar11B)
        vtarv.append(vtar12)
        vtarv.append(vtar13)
        vtarv.append(vtar13A)
        vtarv.append(vtar13B)
        v.append(vtarv)
    return v
示例#6
0
    def __extract_features(self, mfcc_data: dict) -> dict:
        """
        Extracts the features from the MFCC data
        :param mfcc_data: MFCC data for an audio chunk
        :return: the extracted features from the input MFCC data
        """
        features, mfcc_means = {}, []

        for i in range(0, 14):
            key = "energy" if i == 0 else "mfcc_" + str(i)

            features.update(
                self.__get_summary_stats(key, mfcc_data["mfcc_features"], i))
            features.update(
                self.__get_summary_stats(key + "_velocity",
                                         mfcc_data["velocity"], i))
            features.update(
                self.__get_summary_stats(key + "_acceleration",
                                         mfcc_data["acceleration"], i))

            if i > 0:
                mfcc_means.append(features[key + "_mean"])

        features["mfcc_skewness"] = st.skew(np.array(mfcc_means))
        features["mfcc_kurtosis"] = st.kurtosis(mfcc_means)

        return features
def get_data(column, np_values, alpha):

    mvs = bayes_mvs(np_values, alpha)

    #report these metrics
    output = [
        present("Column", column),
        present("Length", len(np_values)),
        present("Unique", len(np.unique(np_values))),
        present("Min", np_values.min()),
        present("Max", np_values.max()),
        present("Mid-Range", (np_values.max() - np_values.min())/2),
        present("Range", np_values.max() - np_values.min()),
        present("Mean", np_values.mean()),
        present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])),
        present("Variance", mvs[1][0]),
        present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])),
        present("StdDev", mvs[2][0]),
        present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])),
        present("Mode", stats.mode(np_values)[0][0]),
        present("Q1", stats.scoreatpercentile(np_values, 25)),
        present("Q2", stats.scoreatpercentile(np_values, 50)),
        present("Q3", stats.scoreatpercentile(np_values, 75)),
        present("Trimean", trimean(np_values)),
        present("Minhinge", midhinge(np_values)),
        present("Skewness", stats.skew(np_values)),
        present("Kurtosis", stats.kurtosis(np_values)),
        present("StdErr", sem(np_values)),
        present("Normal-P-value", normaltest(np_values)[1])
        ]
    return output
def get_data(column, np_values, alpha):

    mvs = bayes_mvs(np_values, alpha)

    #report these metrics
    output = [
        present("Column", column),
        present("Length", len(np_values)),
        present("Unique", len(np.unique(np_values))),
        present("Min", np_values.min()),
        present("Max", np_values.max()),
        present("Mid-Range", (np_values.max() - np_values.min()) / 2),
        present("Range",
                np_values.max() - np_values.min()),
        present("Mean", np_values.mean()),
        present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])),
        present("Variance", mvs[1][0]),
        present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])),
        present("StdDev", mvs[2][0]),
        present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])),
        present("Mode",
                stats.mode(np_values)[0][0]),
        present("Q1", stats.scoreatpercentile(np_values, 25)),
        present("Q2", stats.scoreatpercentile(np_values, 50)),
        present("Q3", stats.scoreatpercentile(np_values, 75)),
        present("Trimean", trimean(np_values)),
        present("Minhinge", midhinge(np_values)),
        present("Skewness", stats.skew(np_values)),
        present("Kurtosis", stats.kurtosis(np_values)),
        present("StdErr", sem(np_values)),
        present("Normal-P-value",
                normaltest(np_values)[1])
    ]
    return output
示例#9
0
def get_stats_numpy(data, zero):
    mean = np.mean(data)
    median = np.median(data)
    std = np.std(data)
    var = np.var(data)
    skew = stats.skew(data)
    kurt = stats.kurtosis(data)
    pc = [25, 50, 75, 90]
    percentiles = np.array(np.percentile(data, pc))
    silences = np.count_nonzero(np.asarray(data) == zero)
    silence_mean = np.mean(
        list(sum(1 for _ in g) for k, g in groupby(data) if k == zero))
    longest_silence = max(
        sum(1 for _ in g) for k, g in groupby(data)
        if k == 0) if silences > 0 else 0
    shortest_silence = min(
        sum(1 for _ in g) for k, g in groupby(data)
        if k == 0) if silences > 0 else 0

    # print("Mean: " + str(mean))
    # print("Media: " + str(median))
    # print("StdDev: " + str(std))
    # print("Variance: " + str(var))
    # print("Skewness: " + str(skew))
    # print("Kurtosis: " + str(kurt))
    # print("Pc25: " + str(percentiles[0]))
    # print("Pc50: " + str(percentiles[1]))
    # print("Pc75: " + str(percentiles[2]))

    features = np.hstack(
        (mean, median, std, var, skew, kurt, percentiles, silences,
         silence_mean, longest_silence, shortest_silence))

    return features
def get_stats_json(data):
    mean = np.mean(data)
    median = np.median(data)
    std = np.std(data)
    var = np.var(data)
    skew = stats.skew(data)
    kurt = stats.kurtosis(data)
    pc = [25,50,75]
    percentiles = np.array(np.percentile(data, pc))
    silences = np.count_nonzero(np.asarray(data)==0.0)
    longest_silence = max(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0
    shortest_silence = min(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0
    #print("Mean: " + str(mean))
    #print("Media: " + str(median))
    #print("StdDev: " + str(std))
    #print("Variance: " + str(var))
    #print("Skewness: " + str(skew))
    #print("Kurtosis: " + str(kurt))
    #print("Pc25: " + str(percentiles[0]))
    #print("Pc50: " + str(percentiles[1]))
    #print("Pc75: " + str(percentiles[2]))
    
    statistiscs = {
        'mean': mean,
        'median': median,
        'std': std,
        'var': var,
        'skew': skew,
        'kurt': kurt,
        'pc25': percentiles[0],
        'pc50': percentiles[1],
        'pc75': percentiles[2],
    }
    
    return statistiscs
示例#11
0
def get_launch_feature(row):
    feature = pd.Series()
    feature['user_id'] = list(row['user_id'])[0]
    # feature['launch_count'] = len(row)
    diff_day = np.diff(row['day'])
    if len(diff_day) != 0:
        feature['launch_day_diff_mean'] = np.mean(diff_day)
        feature['launch_day_diff_std'] = np.std(diff_day)
        feature['launch_day_diff_max'] = np.max(diff_day)
        feature['launch_day_diff_min'] = np.min(diff_day)
        feature['launch_day_diff_kur'] = stats.kurtosis(diff_day)
        feature['launch_day_diff_ske'] = stats.skew(diff_day)
        feature['launch_day_diff_last'] = diff_day[-1]
        # feature['launch_day_cut_max_day'] = day_cut_max_day(row['day'])
        feature['launch_sub_register'] = np.subtract(np.max(row['max_day']),
                                                     np.max(row['day']))
    else:
        feature['launch_day_diff_mean'] = 0
        feature['launch_day_diff_std'] = 0
        feature['launch_day_diff_max'] = 0
        feature['launch_day_diff_min'] = 0
        feature['launch_day_diff_kur'] = 0
        feature['launch_day_diff_ske'] = 0
        feature['launch_day_diff_last'] = 0
        # feature['launch_day_cut_max_day'] = day_cut_max_day(row['day'])
        feature['launch_sub_register'] = np.subtract(np.max(row['max_day']),
                                                     np.max(row['day']))

    launch_day_count = np.bincount(row['day'])[np.nonzero(
        np.bincount(row['day']))[0]]
    feature['launch_day_count_mean'] = np.mean(launch_day_count)
    feature['launch_day_count_max'] = np.max(launch_day_count)
    feature['launch_day_count_std'] = np.std(launch_day_count)
    return feature
示例#12
0
def ICAFilter(signal=None):
    # EEG filtering based on Independent Component Analysis

    # ICA decomposition
    ica = FastICA(whiten=True)
    IC = ica.fit(signal).transform(signal)
    A = ica.get_mixing_matrix()  # signal = np.dot(IC, A.T)

    # noise metrics
    sigma2 = IC.std(ddof=1, axis=0)**2
    f1 = np.abs(IC).max(axis=0) / sigma2
    f2 = np.abs(stats.skew(IC, bias=False, axis=0))
    f = np.hstack((f1.reshape((len(f1), 1)), f2.reshape((len(f2), 1))))
    fr = f.copy()
    f /= f.max(axis=0)
    norm = np.sqrt(np.dot(f, f.T)).diagonal()

    # remove noisy IC
    ind = norm.argmax()
    IC_ = IC.copy()
    IC_[:, ind] = 0

    # recompute signal
    signalF = np.dot(IC_, A.T)

    return signalF, IC, fr
示例#13
0
def usr_moments(coords):
    """
    Calculates the USR moments for a set of input coordinates as well as the four
    USR reference atoms.

    :param coords: numpy.ndarray
    """
    # centroid of the input coordinates
    ctd = coords.mean(axis=0)

    # get the distances to the centroid
    dist_ctd = distance_to_point(coords, ctd)

    # get the closest and furthest coordinate to/from the centroid
    cst, fct = coords[dist_ctd.argmin()], coords[dist_ctd.argmax()]

    # get the distance distributions for the points that are closest/furthest
    # to/from the centroid
    dist_cst = distance_to_point(coords, cst)
    dist_fct = distance_to_point(coords, fct)

    # get the point that is the furthest from the point that is furthest from
    # the centroid
    ftf = coords[dist_fct.argmax()]
    dist_ftf = distance_to_point(coords, ftf)

    # calculate the first three moments for each of the four distance distributions
    moments = concatenate([(ar.mean(), ar.std(), cbrt(skew(ar)))
                           for ar in (dist_ctd, dist_cst, dist_fct, dist_ftf)])

    # return the USR moments as well as the four points for later re-use
    return (ctd, cst, fct, ftf), moments
示例#14
0
 def _get_reward(self, real_values: dict, i: int):
     """
     Get the reward returned after previous action
     """
     df = pd.read_csv('output.csv', skiprows=[0], sep=';')
     last_return = df['price'].values[-1] / self.init_price - 1
     reward = {'return': last_return}
     if i < 100:  # + 1
         return reward
     #
     returns = self.sim_df.tail(99)['return'].dropna().values + [
         last_return
     ]
     mu, sigma = norm.fit(returns)
     skew, kurtosis = st.skew(returns), st.kurtosis(returns)
     # autocorr = f_autocorr(np.abs(returns))[0, 1]
     reward.update({
         'mu': mu,
         'sigma': sigma,
         'skew': skew,
         'kurtosis': kurtosis,
         # 'autocorr': autocorr,
     })
     # error = {
     #     k: np.abs((reward[k] - real_values[k])**2 / real_values[k])
     #     for k, v in reward.items() if k != 'return'
     # }
     sub_df = self.df.iloc[i - 100:i]
     error = {
         k: ((reward[k] - sub_df[k].mean()) / sub_df[k].std())**2
         for k, v in reward.items() if k != 'return'
     }
     reward['error'] = -sum(error.values())
     os.remove('output.csv')
     return reward
示例#15
0
def get_mean_var_skew_kurt(np_array):
    return {
        "mean": np_array.mean(),
        "var": np_array.var(),
        "skewness": st.skew(np_array),
        "kurtosis": st.kurtosis(np_array),
    }
示例#16
0
 def __get_summary_stats(key: str, data: np.array,
                         coefficient: int) -> dict:
     return {
         key + "_mean": data[:, coefficient].mean(),
         key + "_variance": data[:, coefficient].var(),
         key + "_skewness": st.skew(data[:, coefficient]),
         key + "_kurtosis": st.kurtosis(data[:, coefficient])
     }
def base_stats(data_1):
    stats_dict = np.zeros((data_1.shape[0], 4))
    for i in range(data_1.shape[0]):
        stats_dict[i, 0] = st.skew(data_1[i], bias=False)
        stats_dict[i, 1] = st.kurtosis(data_1[i], bias=False)
        stats_dict[i, 2] = np.max(data_1[i])
        stats_dict[i, 3] = np.std(data_1[i])
    return stats_dict
 def extract(self, sourcepc, neighborhood, targetpc, targetindex,
             volume_description):
     if neighborhood:
         source_data = sourcepc[point][self.data_key]['data'][neighborhood]
         skew = stat.skew(source_data)
     else:
         skew = np.NaN
     return skew
示例#19
0
 def _extract_one(self, point_cloud, neighborhood):
     if neighborhood:
         source_data = point_cloud[point][
             self.data_key]['data'][neighborhood]
         skew = stat.skew(source_data)
     else:
         skew = np.NaN
     return skew
示例#20
0
def skewness_features(img, pcloud):
    feats = []
    points = pcloud.get_numpy()
    dim = pcloud.dims
    for x_i in range(dim):
        print(points[:, x_i].shape)
        corr_xy = st.skew(points[:, x_i])
        feats.append(corr_xy)
    # print(feats)
    return feats
示例#21
0
def skewness_normal_distribution(df, features, crypto_name, output_path):
    res = {'feature': [], 'skewness_of_n_distrib': []}
    for feature in features:
        #df = df.dropna(subset=[feature])
        stat, p = stats.skew(df[feature])
        res['feature'].append(feature)
        res['skewness_of_n_distrib'].append(stat)
    pd.DataFrame(data=res).to_csv(output_path + crypto_name + ".csv",
                                  sep=",",
                                  index=False)
def main():
    """main function"""

    json_string = raw_input()
    #json_string = data1

    # load json data
    parsed_json = json.loads(json_string)

    # histogram record length
    rec_len = len(parsed_json[0]['histogram'])

    # variables declaration
    date_lst = []
    rmse_lst = []
    skewnessList = []

    # loop through the record
    for record in parsed_json:
        # extract date part
        date = record['date']

        # extract histogram data part
        histogram = record['histogram']

        # compute rmse (root mean squared error for the histogram)
        rmse = compute_rmse(histogram)

        # add the computed rmse to a list. This gives us a date-wise rmse
        # values for histogram
        rmse_lst.append(rmse)

        # add date to a date list
        date_lst.append(date)

        # compute skewness of the histogram.
        # Skewness is the measure of symmetry.
        # If there is a spurious rise in the skewness value that means there is a
        # clear deviation of the distributionon a certain day from how it
        # appeared in the previous one
        skewnessList.append(st.skew(histogram))

    # compute standard deviation for the rmse list
    stddev = compute_stddev(rmse_lst)

    # compute two standard deviation of skewness list
    skew_2stdDev = compute_2stddev(skewnessList)

    # check for regression
    regression_date = check_regression(rmse_lst, stddev, skewnessList,
                                       skew_2stdDev, date_lst)

    # print the regression date. If regression is found then print the date
    # else print an empty string
    print regression_date
示例#23
0
def get_mfcc_features(filename):
    feature_dict = {}
    (rate, sig) = wav.read(filename)

    if sig.ndim == 2:
        # wav is stereo so average over both channels 
        mfcc_feat_chan0 = mfcc(sig[:,0], rate, numcep=15, appendEnergy=True)
        mfcc_feat_chan1 = mfcc(sig[:,1], rate, numcep=15, appendEnergy=True)
        mfcc_feat = (mfcc_feat_chan0 + mfcc_feat_chan1) / 2
    else:
        mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True)

    # Velocity is the difference between timestep t+1 and t for each mfcc_feat / 2
    vel = (mfcc_feat[:-1,:] - mfcc_feat[1:,:]) / 2.0
    # Acceleration is the difference between timestep t+1 and t for each velocity / 2
    acc = (vel[:-1,:] - vel[1:,:]) / 2.0
    mfcc_means = []
    for i in xrange(0, 14):
        key = "energy" if i == 0 else "mfcc" + str(i)
        # mfcc
        feature_dict[key + "_mean"]     = mfcc_feat[:, i].mean()
        feature_dict[key + "_var"]      = mfcc_feat[:, i].var()
        feature_dict[key + "_skewness"] = st.skew(mfcc_feat[:, i])
        feature_dict[key + "_kurtosis"] = st.kurtosis(mfcc_feat[:, i])
        # Vel
        feature_dict[key + "_vel_mean"]     = vel[:, i].mean()
        feature_dict[key + "_vel_var"]      = vel[:, i].var()
        feature_dict[key + "_vel_skewness"] = st.skew(vel[:, i])
        feature_dict[key + "_vel_kurtosis"] = st.kurtosis(vel[:, i])
        # Accel
        feature_dict[key + "_accel_mean"]     = acc[:, i].mean()
        feature_dict[key + "_accel_var"]      = acc[:, i].var()
        feature_dict[key + "_accel_skewness"] = st.skew(acc[:, i])
        feature_dict[key + "_accel_kurtosis"] = st.kurtosis(acc[:, i])

        # Need the skewness and kurtosis of all mfcc means 
        if i > 0:
            mfcc_means.append(feature_dict[key + "_mean"])
    
    feature_dict["mfcc_skewness"] = st.skew(mfcc_means)
    feature_dict["mfcc_kurtostis"] = st.kurtosis(mfcc_means)
    return feature_dict
示例#24
0
文件: pdf.py 项目: toejamhoney/nabu
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
示例#25
0
文件: pdf.py 项目: toejamhoney/nabu
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
示例#26
0
def startApplication(images):
    trainData = []
    for image in images:
        img = Image.open(image)

        # Grayscale convertion
        imgBW = convertGrayscale(img)

        # Scaling to 100x100
        imgResize = scaleImage(imgBW)

        # Denoising using median filtering
        imgDenoised = denoiseImage(imgResize)

        # Background elimination
        imgBackgroundEliminated = backgroundEliminate(imgDenoised)

        # Signature Normalization
        imgNormal = normalizeImage(imgBackgroundEliminated)

        # Thinning Image
        imgThin = thinImage(imgNormal, 300)

        # Feature extraction
        #
        # Global Feature

        # Density feature
        density = getDensityOfImage(imgThin)
        print('Density ', density)

        # Width to height ratio
        widthHeightRatio = getWidthToHeightRatio(imgThin)
        print('Width to height ratio', widthHeightRatio)

        # Slope feature
        slope = getSlope(imgThin)
        print('Slope', slope)

        # Skew feature
        skew = stats.skew(imgThin)
        print('Skew', skew)

        # Constructing train data
        pattern = []
        pattern.append(density)
        pattern.append(widthHeightRatio)
        pattern.append(slope)
        pattern.extend(skew)

        trainData.append(pattern)

    # Training
    train(trainData)
def get_feature(region_props, n_region, feature_name):
    feature = [0] * 5
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature[MAX] = format_2f(np.max(feature_values))
        feature[MEAN] = format_2f(np.mean(feature_values))
        feature[VARIANCE] = format_2f(np.var(feature_values))
        feature[SKEWNESS] = format_2f(st.skew(np.array(feature_values)))
        feature[KURTOSIS] = format_2f(st.kurtosis(np.array(feature_values)))

    return feature
def get_feature(region_props, n_region, feature_name):
    feature = [0] * 5
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature[MAX] = utils.format_2f(np.max(feature_values))
        feature[MEAN] = utils.format_2f(np.mean(feature_values))
        feature[VARIANCE] = utils.format_2f(np.var(feature_values))
        feature[SKEWNESS] = utils.format_2f(st.skew(np.array(feature_values)))
        feature[KURTOSIS] = utils.format_2f(st.kurtosis(np.array(feature_values)))

    return feature
def extract_features_for_pqrst(row, pqrsts):
    features = []

    p = [x[0] for x in pqrsts]
    q = [x[1] for x in pqrsts]
    r = [x[2] for x in pqrsts]
    s = [x[3] for x in pqrsts]
    t = [x[4] for x in pqrsts]

    pqrsts = pqrsts[:min(NB_RR, len(pqrsts))]
    row = low_pass_filtering(row)
    row = high_pass_filtering(row)
    for i in range(len(pqrsts)):
        pq = row[p[i]:q[i]]
        st = row[s[i]:t[i]]
        pt = row[p[i]:t[i]]
        pmax = np.amax(pq)
        pmin = np.amax(pq)
        tmax = np.amax(st)
        tmin = np.amax(st)

        p_mean = np.mean(pq)
        t_mean = np.mean(st)

        features += [
            # features for PQ interval
            pmax,
            pmax / row[r[i]],
            pmin / pmax,
            p_mean,
            p_mean / pmax,
            np.std(pq),
            common.mode(pq),

            # feature for ST interval
            tmax,
            tmax / row[r[i]],
            tmin / tmax,
            t_mean,
            t_mean / tmax,
            np.std(st),
            common.mode(st),
            p_mean / t_mean,

            # features for whole PQRST interval
            stats.skew(pt),
            stats.kurtosis(pt)
        ]

    for i in range(NB_RR - len(pqrsts)):
        features += [0 for x in range(17)]

    return features
示例#30
0
def get_feature(region_props, n_region, feature_name):
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature = feature_tuple(
            MAX=format_2f(np.max(feature_values)),
            MEAN=format_2f(np.mean(feature_values)),
            VARIANCE=format_2f(np.var(feature_values)),
            SKEWNESS=format_2f(st.skew(np.array(feature_values))),
            KURTOSIS=format_2f(st.kurtosis(np.array(feature_values))))
    else:
        feature = feature_tuple(*([0] * 5))
    return feature
def getFourMoments(sequence, ax=1):
    finalArray = [
        np.mean(sequence, axis=ax),
        np.var(sequence, axis=ax),
        skew(sequence, axis=ax),
        kurtosis(sequence, axis=ax),
        sem(sequence, axis=ax),
    ]
    if ax != None:
        finalArray = np.array(finalArray)
        finalArray = finalArray.T
        return np.concatenate((finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax)
    finalArray.extend(mquantiles(sequence, axis=ax))
    return np.array(finalArray)
示例#32
0
 def _calculateStatistics(self, img, haralick=False, zernike=False):
     result = []
     # 3-bin histogram
     result.extend(mquantiles(img))
     # First four moments
     result.extend([img.mean(), img.var(), skew(img, axis=None), kurtosis(img, axis=None)])
     # Haralick features
     if haralick:
         integerImage = dtype.img_as_ubyte(img)
         result.extend(texture.haralick(integerImage).flatten())
     # Zernike moments
     if zernike:
         result.extend(zernike_moments(img, int(self.rows) / 2 + 1))
     return result
示例#33
0
def usr_moments_with_existing(coords, ref_points):
    """
    Calculates the USR moments for a set of coordinates and an already existing
    set of four USR reference points.
    """
    ctd, cst, fct, ftf = ref_points
    dist_ctd = distance_to_point(coords, ctd)
    dist_cst = distance_to_point(coords, cst)
    dist_fct = distance_to_point(coords, fct)
    dist_ftf = distance_to_point(coords, ftf)

    moments = concatenate([(ar.mean(), ar.std(), cbrt(skew(ar)))
                           for ar in (dist_ctd, dist_cst, dist_fct, dist_ftf)])

    return moments
def get_feature(region_props, n_region, feature_name):
    """
    Returns: 
        feature:list of [max, mean, variance, skewness, kurtosis]
    """
    feature = [0] * 5
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature[MAX] = utils.format_2f(np.max(feature_values))
        feature[MEAN] = utils.format_2f(np.mean(feature_values))
        feature[VARIANCE] = utils.format_2f(np.var(feature_values))
        feature[SKEWNESS] = utils.format_2f(st.skew(np.array(feature_values)))
        feature[KURTOSIS] = utils.format_2f(st.kurtosis(np.array(feature_values)))

    return feature
def generate_moment(dataset, NO_OF_PROPERTIES, NO_MOMENTS):
    element_count = len(dataset)
    moments = np.zeros((element_count, NO_OF_PROPERTIES, NO_MOMENTS))
    # TODO debugging here only
    for row in range(element_count):
        moments[row, :, :] = np.array([
            scipy.mean(dataset[row][0:NO_OF_PROPERTIES, :], axis=1),
            # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1),
            # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1),
            # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1),
            scipy.std(dataset[row][0:NO_OF_PROPERTIES, :], axis=1),
            stats.skew(dataset[row][0:NO_OF_PROPERTIES, :], axis=1),
            stats.kurtosis(dataset[row][0:NO_OF_PROPERTIES, :], axis=1)
        ]).transpose()
    return moments
示例#36
0
def getFourMoments(sequence, ax=1):
    finalArray = [
        np.mean(sequence, axis=ax),
        np.var(sequence, axis=ax),
        skew(sequence, axis=ax),
        kurtosis(sequence, axis=ax),
        sem(sequence, axis=ax)
    ]
    if ax != None:
        finalArray = np.array(finalArray)
        finalArray = finalArray.T
        return np.concatenate(
            (finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax)
    finalArray.extend(mquantiles(sequence, axis=ax))
    return np.array(finalArray)
示例#37
0
def compute_features(dataframe, columns, bins, model, model_type="KMeans"):
    """
    Compute the features of the specified columns from a Pandas dataframe using the given model.

    :param dataframe: Pandas dataframe.
    :param columns: List of the columns name.
    :param bins: Number of bins.
    :param model: Model.
    :param model_type: Type of the model.
    :return: Features.
    """
    import numpy as np
    import scipy.stats.stats as st
    row = []
    for j, column in enumerate(columns):
        column_df = dataframe[column]
        X = column_df.values

        if model is not None:
            if model_type == "KMeans":
                r = model[column].predict(X.reshape(-1, 1))

            if model_type == "PolynomialFeatures":
                r = model[column].transform(X.reshape(-1, 1)).tolist()
        else:
            r = X

        # compute feature histogram
        # counts, bin_edges = np.histogram(result, bins=bins[j], density=False)
        # column_hist = counts

        # compute normalized feature histogram
        counts, bin_edges = np.histogram(r, bins=bins[j], density=True)
        column_hist = counts * np.diff(bin_edges)

        row.extend(column_hist)

        # add extra features
        kurtosis = st.kurtosis(X.reshape(-1, 1))[0]
        skew = st.skew(X.reshape(-1, 1))[0]
        min_value = column_df.min()
        max_value = column_df.max()
        mean_value = column_df.mean()
        median_value = column_df.median()
        row.extend(
            [kurtosis, skew, min_value, max_value, mean_value, median_value])
    return row
示例#38
0
def extract_features(data, y, window_len, task2=False):  #num_windows):
    i = 0
    #window_len = len(data)/(num_windows/2)
    if task2:
        num_windows = len(data) - window_len + 1
    else:
        num_windows = len(data) / (window_len / 2)
    #print 'num_windows = 208, window_len = ' , str(len(data)/(208/2))
    #print 'now num_windows = '+ str(num_windows)+', window_len = '+str(window_len)
    features = []
    targets = []
    for n in range(num_windows):
        win = data[i:i + window_len]
        if task2:
            target = y.iloc[i]
        else:
            try:
                target = int(y[i:i + window_len].mode())
            except:
                target = int(y[i:i + window_len])
        targets.append(target)
        for c in data.columns:
            s = np.array(win[c])
            rms_val = rms(s)
            (min_max, peak, peaknum) = min_max_mean(s)
            mean = s.mean()
            std = s.std()
            skew = st.skew(s)
            kurtosis = st.kurtosis(s)
            coefficients = std / mean
            logpower = np.log10((s**2)).sum()
            new_features = [
                rms_val, min_max, mean, std, skew, kurtosis, peak, peaknum,
                coefficients, logpower
            ]
            #new_features = [rms_val, min_max, mean, std]
            features.append(new_features)
        if (task2):
            i += 1
        else:
            i += window_len / 2
    features = np.array(features)
    features.shape = num_windows, 120  #48#72
    targets = np.array(targets)
    return features, targets
示例#39
0
def usr_moments_with_existing(coords, ref_points, number_of_moments=3, mean=0):
    """

    :param coords:
    :param ref_points:
    :param number_of_moments:
    :param mean: index in [np.mean, geometrical_mean, harmonical_mean]
    :return:
    """
    n_dimension = coords.shape[1]
    center = np.mean(coords)
    # get distance matrix where rows are pivot points and columns are data points
    dist_to_centroid = np.array(
        [[np.linalg.norm(coords[j] - center) for j in range(coords.shape[0])]])
    dist_matrix = np.spatial.distance_matrix(ref_points, coords)

    # aggregate the symetric pivots
    if mean not in [0, 1, 2]:
        mean = 0
    mean_options = [np.mean, geometrical_mean, harmonical_mean]
    mean = mean_options[mean]
    dist_ufsr = np.array([
        mean(dist_matrix[i], dist_matrix[n_dimension + i])
        for i in range(n_dimension)
    ])

    # add the distance to center of mass, dist_ufsr is now a matrix with distribution of distances wrt
    # (000),mean(100,-100), mean(010,0-10), mean(001,00-1) as rows
    dist_ufsr = np.concatenate((dist_to_centroid, dist_ufsr))

    # get the features
    means = np.array([[np.mean(dist_ufsr[i]) for i in range(n_dimension + 1)]])
    means = np.transpose(means)
    # moments = np.array(
    #     [[np.stats.moment(dist_ufsr[i], j) for j in range(2, number_of_moments + 1)] for i in range(n_dimension + 1)])

    # FIXME : VARIANCE VS STANDARD DEVIATION include also other moments to use number_of_moments
    moments = np.array([[dist_ufsr[i].std(),
                         cbrt(skew(dist_ufsr[i]))]
                        for i in range(n_dimension + 1)])
    ufsr_feature = np.concatenate((means, moments), axis=1)

    # mean, moment2, moment3..., moment6, mean, moment1... for each pivot
    ufsr_feature = ufsr_feature.ravel()
    return ufsr_feature
示例#40
0
def get_mean_var_skew_kurt(np_array):
    return {"mean":np_array.mean(),
            "var":np_array.var(),
            "skewness":st.skew(np_array),
            "kurtosis":st.kurtosis(np_array),}
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("segments_filename")
    args = parser.parse_args()

    segment_boundaries = np.loadtxt(args.segments_filename, usecols=(2, 3))
    segment_lengths = segment_boundaries[:, 1] - segment_boundaries[:, 0]
    count = len(segment_lengths)
    mean = np.mean(segment_lengths)
    median = np.median(segment_lengths)

    print("num segments read: {:d}".format(count))
    print("total time (h): {:.2f}".format(np.sum(segment_lengths) / 3600))
    print("mean (s): {:.2f}".format(mean))
    print("median (s): {:.2f}".format(median))
    print("skew: {:.2f}".format(st.skew(segment_lengths, bias=True)))
    print("skew [corrected]: {:.2f}".format(st.skew(segment_lengths, bias=False)))
    print("skewtest: {}".format(st.skewtest(segment_lengths)))
    print("kurtosis: {:.2f}".format(st.kurtosis(segment_lengths)))

    # Figure out how many segments would fill the desired number of hours,
    # then round up to the nearest 10k.
    possible_num_hours_segmentations = (100, 300, 500, 1000, 1500, 3000)
    print("=== from mean ===")
    for num_hours in possible_num_hours_segmentations:
        num_segments = int(num_hours * 3600 / mean)
        print("{:d} h: {:d} ({:d}) segments".format(num_hours, round(num_segments, -4), num_segments))
    print("=== from median ===")
    for num_hours in possible_num_hours_segmentations:
        num_segments = int(num_hours * 3600 / median)
        print("{:d} h: {:d} ({:d}) segments".format(num_hours, round(num_segments, -4), num_segments))
示例#42
0
import numpy as np
import matplotlib.pyplot as plt
from criticality import *
import scipy.stats.stats as st

xt = np.genfromtxt('xt.csv',delimiter=';')
phit = np.tanh(xt)

print xt.shape

s = fr2spike(phit,0.1)
sau = SimActiveUnits(s)
av = avalancheSize(sau)
print np.mean(sau)
print np.std(sau)
print st.skew(sau,bias=False)

plt.hist(sau,bins=50)
plt.show()


示例#43
0
tempc,templ = temp.shape


for n in xrange(0, 600-1):
    for m in xrange(0,2):
        WAVEFORMLENGTH_a[m] = WAVEFORMLENGTH_a[m] + (-temp[n][m]+temp[n+1][m]);


temp = list(temp.values)

#print len(find(np.diff(np.sign(temp[0][:]))))
for o in xrange(0, 2):
    ZEROCROSSINGS_a[o] = len(find(np.diff(np.sign(temp[o][0:599]))));
    SLOPECHANGES_a[o] = len(find(np.diff(np.sign(np.diff(temp[o][0:599])))));
    SKEWNESS_a[o] = st.skew(temp[o][0:599]);
    HJORTHPARAM_activity_a[o] = np.var(temp[o][0:599]);
    HJORTHPARAM_mobility_a[o] = np.sqrt((np.var(np.diff(temp[o][0:599])))/np.var(temp[o][0:599]));
    HJORTHPARAM_complexity_a[o] = (np.sqrt(np.var(np.diff(np.diff(temp[o][0:599]))))/(np.var(np.diff(temp[o][0:599]))))/np.sqrt((np.var(np.diff(temp[o][0:599])))/np.var(temp[o][0:599]));

ZEROCROSSINGS_a = ZEROCROSSINGS_a.transpose()
SLOPECHANGES_a = SLOPECHANGES_a.transpose()
SKEWNESS_a = SKEWNESS_a.transpose()
HJORTHPARAM_activity_a = HJORTHPARAM_activity_a.transpose()
HJORTHPARAM_mobility_a = HJORTHPARAM_mobility_a.transpose()
HJORTHPARAM_complexity_a = HJORTHPARAM_complexity_a.transpose()
WAVEFORMLENGTH_a = WAVEFORMLENGTH_a.transpose()
# Concatenando os atributos para formar a matriz de dados
data = [];

for i in xrange(0, 2):
示例#44
0
def signal_stats(signal=None):
    """Compute various metrics describing the signal.

    Parameters
    ----------
    signal : array
        Input signal.

    Returns
    -------
    mean : float
        Mean of the signal.
    median : float
        Median of the signal.
    max : float
        Maximum signal amplitude.
    var : float
        Signal variance (unbiased).
    std_dev : float
        Standard signal deviation (unbiased).
    abs_dev : float
        Absolute signal deviation.
    kurtosis : float
        Signal kurtosis (unbiased).
    skew : float
        Signal skewness (unbiased).

    """

    # check inputs
    if signal is None:
        raise TypeError("Please specify an input signal.")

    # ensure numpy
    signal = np.array(signal)

    # mean
    mean = np.mean(signal)

    # median
    median = np.median(signal)

    # maximum amplitude
    maxAmp = np.abs(signal - mean).max()

    # variance
    sigma2 = signal.var(ddof=1)

    # standard deviation
    sigma = signal.std(ddof=1)

    # absolute deviation
    ad = np.sum(np.abs(signal - median))

    # kurtosis
    kurt = stats.kurtosis(signal, bias=False)

    # skweness
    skew = stats.skew(signal, bias=False)

    # output
    args = (mean, median, maxAmp, sigma2, sigma, ad, kurt, skew)
    names = ('mean', 'median', 'max', 'var', 'std_dev', 'abs_dev', 'kurtosis',
             'skewness')

    return utils.ReturnTuple(args, names)
示例#45
0
	def evaluate(self, t):
		"""
		t地点における短期的歪度を返します.
		"""
		d = self.asset.getPreviousData(t, self.__length)
		return stats.skew(d)
示例#46
0
    if maxInterim > maxValue:
        maxValue = maxInterim
    minInterim = min(my_data[:, x])
    if minInterim < minValue:
        minValue = minInterim
        

binWidth = (maxValue - minValue) / (numBins)
newBins = np.arange(minValue, maxValue, binWidth)

# TODO process array only once for speedup?
for x in range (0, numModels):
    frequency = plt.hist(my_data[:, x], bins=newBins, histtype='step', normed=True, label=labels[x]);
    b[x, 0] = mean(my_data[:, x]);
    b[x, 1] = var(my_data[:, x]);
    b[x, 2] = skew(my_data[:, x]);
    b[x, 3] = kurtosis(my_data[:, x]);
    b[x, 4] = entropy(frequency[0])

plt.title(csvString + " Frequency")
plt.legend()
deg = u'\N{DEGREE SIGN}'

plt.xlabel("Airflow Rate (cfm)")
plt.ylabel("Frequency")

for i in range (0, 5):
    print(b[:, i])

plt.show()