Пример #1
0
    def test_2D(self):
        a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)), mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0)))
        actual = mstats.hmean(a)
        desired = ma.array((1, 2, 3, 4))
        assert_array_almost_equal(actual, desired, decimal=14)

        actual1 = mstats.hmean(a, axis=-1)
        desired = (4.0 / (1 / 1.0 + 1 / 2.0 + 1 / 3.0 + 1 / 4.0), 2.0 / (1 / 2.0 + 1 / 3.0), 2.0 / (1 / 1.0 + 1 / 4.0))
        assert_array_almost_equal(actual1, desired, decimal=14)
Пример #2
0
    def test_1D(self):
        a = (1,2,3,4)
        actual = mstats.hmean(a)
        desired = 4. / (1./1 + 1./2 + 1./3 + 1./4)
        assert_almost_equal(actual, desired, decimal=14)
        desired1 = mstats.hmean(ma.array(a),axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)

        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.hmean(a)
        desired = 3. / (1./1 + 1./2 + 1./3)
        assert_almost_equal(actual, desired,decimal=14)
        desired1 = mstats.hmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
Пример #3
0
 def test_1D_float96(self):
     a = ma.array((1,2,3,4), mask=(0,0,0,1))
     actual_dt = mstats.hmean(a, dtype=np.float96)
     desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3),
                             dtype=np.float96)
     assert_almost_equal(actual_dt, desired_dt, decimal=14)
     assert_(actual_dt.dtype == desired_dt.dtype)
Пример #4
0
def main():
    X_train, X_val, y_train, y_val = common.load_train_dummies()
    slr = make_pipeline(MinMaxScaler(), LogisticRegression())
    plr = make_pipeline(PCA(), LogisticRegression())
    nb_bag = BaggingClassifier(base_estimator=GaussianNB())
    clfs = (
            GaussianNB(),
            #GridSearchCV(slr, dict(logisticregression__C=[1.0, 0.8])),
            make_pipeline(PCA(), GaussianNB()),
            GridSearchCV(plr, dict(pca__n_components=[None, 3, 8], logisticregression__C=[1.0, 0.7]), scoring='roc_auc'),
            GridSearchCV(nb_bag, dict(max_samples=[0.2, 0.4, 0.6], max_features=[0.3, 0.7]), scoring='roc_auc'),
            xgb.XGBClassifier(n_estimators=20, max_depth=3, colsample_bytree=0.7, subsample=0.6, learning_rate=0.1),
            #make_pipeline(KMeans(), GaussianNB()),
            #GridSearchCV(
            #    BaggingClassifier(),
            #    dict(base_estimator=[None, GaussianNB(), LogisticRegression()],
            #        n_estimators=[7, 10, 14],
            #        max_samples=[0.3, 0.6])),
            #GridSearchCV(xgb.XGBClassifier(), dict(n_estimators=[2, 3, 4], learning_rate=[0.01, 0.1], subsample=[0.5, 0.9])),
            #BaggingClassifier(base_estimator=SVC(), max_features=0.8, max_samples=2500, n_estimators=5),
    )
    preds = []
    for clf in clfs:
        print clf
        clf.fit(X_train, y_train)
        val_pred = clf.predict(X_val)
        print roc_auc_score(y_val, val_pred)
        clf.fit(X_val, y_val)
        train_pred = clf.predict(X_train)
        preds.append(np.concatenate((train_pred, val_pred)))
        print roc_auc_score(y_train, train_pred)
        print

    y_all = np.concatenate((y_train, y_val))
    preds = np.column_stack(preds)
    gm = gmean(preds, axis=1)
    hm = hmean(preds+1, axis=1)
    preds = np.column_stack((preds, gm, hm))
    print 'GM', roc_auc_score(y_all, gm)
    print 'HM', roc_auc_score(y_all, hm)
    meta = GaussianNB()
    meta = GridSearchCV(xgb.XGBClassifier(), dict(max_depth=[2, 3, 4], learning_rate=[0.01, 0.05, 0.1], n_estimators=[20, 40, 60]), scoring='roc_auc')
    meta.fit(preds, y_all)
    scores = cross_val_score(meta, preds, y_all, scoring='roc_auc', cv=5)
    print scores
    print scores.mean()
 x = cent[:, 0]
 z = cent[:, 2]
 # generate the random field
 cov_model = Gaussian(dim=dim,
                      var=var,
                      len_scale=len_scale)
 srf = SRF(cov_model, mean=mean, seed=seed)
 # use unstructured for a 2D vertical mesh
 field = srf((x, z),
             mesh_type='unstructured',
             force_moments=True)  #, mode_no=100)
 # conductivities as log-normal distributed from the field data
 cond = np.exp(field)
 from scipy.stats.mstats import gmean, hmean
 arimean = np.mean(cond)
 harmean = hmean(cond)
 geomean = gmean(cond)
 print("The geometric mean is: " + str(geomean))
 #plt.hist(field)
 # show the heterogeneous field
 plt.figure(figsize=(20, thickness / length * 20))
 cond_log = np.log10(cond)
 plt.tricontourf(x, z, cond_log.T)
 plt.colorbar(ticks=[
     np.min(cond_log),
     np.mean(cond_log),
     np.max(cond_log)
 ])
 plt.title("log-normal K field [log10 K]")
 plt.savefig(dire + '/' + name + '.png',
             dpi=300,
def multiresolution_analysis_reconstruction(x, y, error_x, error_y,
                                            final_scale):
    '''

    Parameters
    ----------
    x : TYPE
        DESCRIPTION.
    y : TYPE
        DESCRIPTION.
    error_x : TYPE
        DESCRIPTION.
    error_y : TYPE
        DESCRIPTION.
    final_scale : 

    Raises
    ------
    Exception
        DESCRIPTION.

    Returns
    -------
    None.

    '''
    x = np.array(x, float)
    y = np.array(y, float)
    error_x = np.array(error_x, float)
    error_y = np.array(error_y, float)

    final_scale = int(final_scale)
    initial_scale = int(np.log2(len(x)))

    if final_scale <= initial_scale:
        raise Exception(
            'final_scale should be greater than initial_scale. The value of initial_scale is: {}'
            .format(initial_scale))

    else:

        for scale in range(initial_scale, final_scale):
            xplt = np.array([], float)
            yplt = np.array([], float)
            yp = sign = 0
            nfinal = (2**(scale + 1) + 1)

            for i in range(len(y)):
                if i == 0:
                    yp = (5 / 16) * y[int(i)] + (15 / 16) * y[int(i + 1)] - (
                        5 / 16) * y[int(i + 2)] + (1 / 16) * y[int(i + 3)]
                    yplt = np.append(yplt, y[i])
                    yplt = np.append(yplt, yp)
                elif 0 < i < len(y) - 2:
                    sign = (
                        (pph_interpolation_sign.sign_pph(
                            y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]) +
                         pph_interpolation_sign.sign_pph(
                             y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)])) /
                        2)
                    yp = (y[int(i)] + y[int(i + 1)]) / 2 - (
                        1 / 8
                    ) * sign * hmean(
                        np.array([
                            abs(y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]),
                            abs(y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)])
                        ], float))
                    yplt = np.append(yplt, y[i])
                    yplt = np.append(yplt, yp)
                elif i == len(y) - 2:
                    yp = (1 /
                          16) * y[int(i - 2)] - (5 / 16) * y[int(i - 1)] + (
                              15 / 16) * y[int(i)] + (5 / 16) * y[int(i + 1)]
                    yplt = np.append(yplt, y[i])
                    yplt = np.append(yplt, yp)
                else:
                    yplt = np.append(yplt, y[i])

            for i, j in zip(x, y):
                x = pd.DataFrame(x)
                xm = x.rolling(2, center=True).mean()
                xplt = pd.concat([x, xm], axis=1).stack().sort_values(
                    ascending=True).reset_index(drop=True).to_numpy().reshape(
                        nfinal, 1)
            x = xplt
            yplt = np.array([yplt], float)
            y = yplt.reshape(nfinal, 1)

            for i, item in enumerate(x):

                for e in range(len(error_x)):
                    if item == error_x[e]:
                        y[i] = error_y[e]

            ma_reconstructed = pd.DataFrame(np.column_stack((x, y)),
                                            columns=['x', 'y'])
        return ma_reconstructed
Пример #7
0
def pph_interpolation(x, y, final_scale):
    """

    Parameters
    ----------
    x : x values of the coordenates.
    y : y values of the coordenates.
    final_scale : Final scale to reach.

    Raises
    ------
    Exception
        final_scale should be greater than initial_scale..

    Returns
    -------
    pph_interpolation : Coordenates of the PPH 4 points interpolatory subdivision scheme at the level of discretization indicated over a regular grid.
    To build the values x=1 and x=n-1, Lagrange interpolatory subdivision scheme S(1,3) and S(3,1) respectively are applied.
    Data Frame type.

    """
    x = np.array(x, float)
    y = np.array(y, float)

    final_scale = int(final_scale)
    initial_scale = int(np.log2(len(x)))

    if final_scale <= initial_scale:
        raise Exception(
            'final_scale should be greater than initial_scale. The value of initial_scale is: {}'
            .format(initial_scale))

    else:

        for scale in range(initial_scale, final_scale):
            xplt = np.array([], float)
            yplt = np.array([], float)
            yp = sign = 0
            nfinal = (2**(scale + 1) + 1)

            for i in range(len(y)):
                if i == 0:
                    yp = (5 / 16) * y[int(i)] + (15 / 16) * y[int(i + 1)] - (
                        5 / 16) * y[int(i + 2)] + (1 / 16) * y[int(i + 3)]
                    yplt = np.append(yplt, y[i])
                    yplt = np.append(yplt, yp)
                elif 0 < i < len(y) - 2:
                    sign = (
                        (pph_interpolation_sign.sign_pph(
                            y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]) +
                         pph_interpolation_sign.sign_pph(
                             y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)])) /
                        2)
                    yp = (y[int(i)] + y[int(i + 1)]) / 2 - (
                        1 / 8
                    ) * sign * hmean(
                        np.array([
                            abs(y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]),
                            abs(y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)])
                        ], float))
                    yplt = np.append(yplt, y[i])
                    yplt = np.append(yplt, yp)
                elif i == len(y) - 2:
                    yp = (1 /
                          16) * y[int(i - 2)] - (5 / 16) * y[int(i - 1)] + (
                              15 / 16) * y[int(i)] + (5 / 16) * y[int(i + 1)]
                    yplt = np.append(yplt, y[i])
                    yplt = np.append(yplt, yp)
                else:
                    yplt = np.append(yplt, y[i])

            for i, j in zip(x, y):
                x = pd.DataFrame(x)
                xm = x.rolling(2, center=True).mean()
                xplt = pd.concat([x, xm], axis=1).stack().sort_values(
                    ascending=True).reset_index(drop=True).to_numpy().reshape(
                        nfinal, 1)
            x = xplt
            yplt = np.array([yplt], float)
            y = yplt.reshape(nfinal, 1)
            pph_interpolation = pd.DataFrame(np.column_stack((x, y)),
                                             columns=['x', 'y'])
        return pph_interpolation
Пример #8
0
def main():

    N = 5

    ind = np.arange(N)  # the x locations for the groups
    width = 0.35  # the width of the bars: can also be len(x) sequence

    lines = []

    name = "res1_p4.txt"

    sta1 = []
    sta2 = []
    sta3 = []
    sta4 = []
    sta5 = []
    sta6 = []

    stations_means = []
    number_packets = []

    file = open(name, "r")

    with open(name, "r") as file:
        for line in file:
            if '10.0.0.1' in line:
                line = line.strip()  #preprocess line
                line = line.replace('10.0.0.1 : ', '')
                sta1 = map(float, re.findall('\d+\.\d+', line))
                # print(sta1)
                # raw_input("Press Enter to continue...")
                number_packets.append(len(sta1))
                sta1_mean = hmean(sta1)
                stations_means.append(sta1_mean)
            elif '10.0.0.2' in line:
                line = line.strip()  #preprocess line
                line = line.replace('10.0.0.2 : ', '')
                sta2 = map(float, re.findall('\d+\.\d+', line))
                # print(sta2)
                # raw_input("Press Enter to continue...")
                number_packets.append(len(sta2))
                sta2_mean = hmean(sta2)
                stations_means.append(sta2_mean)
            elif '10.0.0.3' in line:
                line = line.strip()  #preprocess line
                line = line.replace('10.0.0.3 : ', '')
                sta3 = map(float, re.findall('\d+\.\d+', line))
                # print(sta3)
                # raw_input("Press Enter to continue...")
                number_packets.append(len(sta3))
                sta3_mean = hmean(sta3)
                stations_means.append(sta3_mean)
            elif '10.0.0.4' in line:
                line = line.strip()  #preprocess line
                line = line.replace('10.0.0.4 : ', '')
                sta4 = map(float, re.findall('\d+\.\d+', line))
                number_packets.append(len(sta4))
                sta4_mean = hmean(sta4)
                # print(sta4)
                # raw_input("Press Enter to continue...")
                stations_means.append(sta4_mean)
            elif '10.0.0.5' in line:
                line = line.strip()  #preprocess line
                line = line.replace('10.0.0.5 : ', '')
                sta5 = map(float, re.findall('\d+\.\d+', line))
                # print(sta5)
                # raw_input("Press Enter to continue...")
                number_packets.append(len(sta5))
                sta5_mean = hmean(sta5)
                stations_means.append(sta5_mean)
            elif '10.0.0.6' in line:
                line = line.strip()  #preprocess line
                line = line.replace('10.0.0.6 : ', '')
                sta6 = map(float, re.findall('\d+\.\d+', line))
                # print(sta6)
                # raw_input("Press Enter to continue...")
                number_packets.append(len(sta6))
                sta6_mean = hmean(sta6)
                stations_means.append(sta6_mean)
            else:
                print("Nothing!")

    file.close()

    #p1 = plt.bar(ind, stations_means, width)
    #p2 = plt.bar(ind, number_packets, width, bottom=stations_means)
    p2 = plt.bar(ind, number_packets, width, color=('orange'))

    plt.ylabel('Number of packets')
    #plt.ylabel('Harmonic mean of RTT in ms ')
    plt.title('Number of packets sent from STA6 to all stations')
    #plt.title('RTT of traffic from STA1 to all stations')
    plt.xticks(ind, ('STA2', 'STA3', 'STA4', 'STA5', 'STA6'))
    plt.yticks(np.arange(0, 2001, 500))  #From 0 to 101 with intervals of 107
    #plt.yticks(np.arange(0, 61, 10))
    #plt.legend((p1[0], p2[0]), ('RTT Mean', 'Num of sent packets'))

    plt.show()
Пример #9
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from media_geometrica_harmonica_quadratica import quadratic_mean
from scipy.stats.mstats import gmean
from scipy.stats.mstats import hmean
import numpy as np
import statistics
import math

dataset = pd.read_csv('census.csv')

dados = dataset['age']

media = sum(dados) / len(dados)
mediana = dados.median()
moda = statistics.mode(dados)

media_harmonica = hmean(dados)
media_geometrica = gmean(dados)
media_quadratica = quadratic_mean(dados)

print(media)
print(mediana)
print(moda)
print(media_harmonica)
print(media_geometrica)
print(media_quadratica)
Пример #10
0
from scipy.stats.mstats import gmean
from scipy.stats.mstats import hmean
import numpy as np
import statistics
import math

dados = np.array([150, 151, 152, 152, 153, 154, 155, 155, 155, 155, 156, 156, 156,
                  157, 158, 158, 160, 160, 160, 160, 160, 161, 161, 161, 161, 162,
                  163, 163, 164, 164, 164, 165, 166, 167, 168, 168, 169, 170, 172,
                  173])


# media geometrica

print(gmean(dados))

# media harmonica

print(hmean(dados))

# media quadratica


def quadratic_mean(dados):
    return math.sqrt(sum(n * n for n in dados) / len(dados))


print(quadratic_mean(dados))
Пример #11
0
import pandas as pd
from scipy.stats.mstats import gmean, hmean
import matplotlib.pyplot as plt

my_dataset = pd.read_excel('Smith_glass_post_NYT_data.xlsx',
                           sheet_name='Supp_traces')

a_mean = my_dataset.Zr.mean()
g_mean = gmean(my_dataset['Zr'])
h_mean = hmean(my_dataset['Zr'])

print('-------')
print('arithmetic mean')
print("{0:.1f} [ppm]".format(a_mean))
print('-------')

print('geometric mean')
print("{0:.1f} [ppm]".format(g_mean))
print('-------')

print('harmonic mean')
print("{0:.1f} [ppm]".format(h_mean))
print('-------')

fig, ax = plt.subplots()
ax.hist(my_dataset.Zr,
        bins='auto',
        density=True,
        edgecolor='k',
        label='Measurements Hist',
        alpha=0.8)
Пример #12
0
             D_out_wn_00001,
             color="orange",
             linestyle="",
             marker="*",
             markersize="10",
             label="D out : white noise, Ss = 0.0001")
plt.legend()
plt.ylabel("Diffusivity [m^2/s]")
plt.xlabel("model")
plt.savefig("/Users/houben/Desktop/baseflow_sa/in_vs_out.png", dpi=300)

# calculate the geomean, harmean, arimean from the derived values:
from scipy.stats.mstats import gmean, hmean

geomean_D_in_001 = gmean(D_in_001)
harmean_D_in_001 = hmean(D_in_001)
arimean_D_in_001 = np.mean(D_in_001)
geomean_D_in_00001 = gmean(D_in_00001)
harmean_D_in_00001 = hmean(D_in_00001)
arimean_D_in_00001 = np.mean(D_in_00001)
geomean_D_out_mhm_001 = gmean(D_out_mhm_001)
harmean_D_out_mhm_001 = hmean(D_out_mhm_001)
arimean_D_out_mhm_001 = np.mean(D_out_mhm_001)
geomean_D_out_wn_001 = gmean(D_out_wn_001)
harmean_D_out_wn_001 = hmean(D_out_wn_001)
arimean_D_out_wn_001 = np.mean(D_out_wn_001)
geomean_D_out_mhm_00001 = gmean(D_out_mhm_00001)
harmean_D_out_mhm_00001 = hmean(D_out_mhm_00001)
arimean_D_out_mhm_00001 = np.mean(D_out_mhm_00001)
geomean_D_out_wn_00001 = gmean(D_out_wn_00001)
harmean_D_out_wn_00001 = hmean(D_out_wn_00001)
Пример #13
0
peso = df_mamiferos['bodywt']
peso = np.ceil(peso)

mp = (sono * peso).sum() / peso.sum()
mp

# média aritmética
from scipy.stats.mstats import gmean

mg = gmean(sono)
mg

# é importante notar que a média geométrica tende a ser sempre menor que a média aritimética

# média harmonica

from scipy.stats.mstats import hmean

mh = hmean(sono)
mh

# é importante notar que a média harmonica tende a ser sempre menor que a média geométrica
# média harmonica é a menor delas e a aritmetica é a maior
# na média harmonica não pode haver numero zero, pois não pode haver divisão por zero

# outra forma de calcular a media harmonica usando outra biblioteca

import statistics as sss

sss.harmonic_mean(sono)
Пример #14
0
def engine(args):
    check_condition(args)

    if not args.remove:
        create_file.engine(args)
    
    sizes = list(range(args.start_size, args.end_size + 1, args.step_size))
    sizes_in_str = ["{}MB".format(size) for size in sizes]
    
    TIMER = Timer()
    
    NAME_OF_FUNCTION = "hash" if args.function == "h" else "encryption" if args.function == "e" else "decryption"
    elapsed_time = []
    for size, size_str in zip(sizes, sizes_in_str):
        tmp_elapsed_time = []
        file_name = FILE_NAME_FORMAT.format(size_str)
        
        if args.remove:
            if os.path.isfile(file_name) == False:
                create_file.create_a_file(file_name, size * 1024 ** 2, 3 * 1024 ** 2)
        
        for _ in range(args.ntrial):
            TIMER.start(size_str)
            if args.function == "e":
                __try_encrypting_file__(file_name, ".tmp")
            elif args.function == "d":
                __try_decrypting_file__(file_name, ".tmp")
            elif args.function == "h":
                __hash_a_file__(file_name)
            TIMER.end(size_str)

            if args.function in ["e", "d"]:
                if os.path.isfile(".tmp"):
                    os.remove(".tmp")

            tmp_elapsed_time.append(TIMER.get(size_str))
        
        if args.remove:
            if os.path.isfile(file_name):
                os.remove(file_name)

        if args.mean == "AM":
            elapsed_time.append(mean(tmp_elapsed_time))
        elif args.mean == "GM":
            elapsed_time.append(gmean(tmp_elapsed_time))
        else:
            elapsed_time.append(hmean(tmp_elapsed_time))
        print("Elapsed time for {} of {} is {:.2f}s".format(NAME_OF_FUNCTION, file_name, elapsed_time[-1]))
        print("Variation of elapsed time is {:.2f}".format(variation(tmp_elapsed_time)))
        print("-----------------------------------------------------------")

    print("Variation of all elapsed time is {:.2f}".format(variation(elapsed_time)))

    if args.display:
        plt.plot(sizes, elapsed_time)
        plt.xticks(sizes[::4], sizes_in_str[::4])
        plt.ylabel("{} time (s)".format(NAME_OF_FUNCTION))
        plt.xlabel("Size of data (MB)")

        nticks = 5
        mintick = args.start_size
        maxtick = max(range(args.start_size, args.end_size + 1, args.step_size))
        steptick = __round_int__((maxtick - mintick) // nticks, 1)
        ticks = range(mintick, maxtick + 1, steptick)
        labelticks = ["{}".format(tick) for tick in ticks]

        plt.xticks(ticks, labelticks)
        plt.show()
Пример #15
0
import numpy as np
from scipy.stats.mstats import gmean, hmean

x = gmean([1, 3, 9])
y = hmean([1, 3, 9])

#Lambda functions
lm1 = lambda a: a + 10
lm2 = lambda a, b: a * b

print(lm1(5))
print(lm2(2, 3))


def lm3(n):
    return lambda a: a * n


lm4 = lm3(4)
print(lm4(11))

from functools import reduce

li = [5, 7, 22, 97, 54, 62, 77, 23, 73, 61, 73]
final_list = list(map(lambda x: x * 2, li))
sum1 = reduce((lambda x, y: x + y), li)

odd_time = reduce((lambda a, b: a ^ b), li)
print(odd_time)

list2 = ["geeks", "geeg", "keek", "practice", "aa"]
    plt.tick_params(rotation=20)
    plt.title("histogramm of kf values")
    plt.savefig(CWD + "/kf_values" + "/hist_kf.png", dpi=300)
    plt.close()
    plt.semilogy(sorted(kf_list))
    plt.title("kf values")
    plt.savefig(CWD + "/kf_values" + "/plot_kf.png", dpi=300)
    plt.close()
    sns.distplot(kf_list, hist=False, rug=True)
    plt.title("kerne density of kf values")
    plt.tick_params(rotation=45)
    plt.savefig(CWD + "/kf_values" + "/kde_kf.png", dpi=300)
    kf_list_file = open(CWD + "/kf_values" + "/kf_list_file.txt", "w")
    kf_list_file.write("geomean, harmean, arimean\n")
    kf_list_file.write(
        str(gmean(kf_list)) + ", " + str(hmean(kf_list)) + ", " +
        str(np.mean(kf_list)) + "\n")
    kf_list_file.write("list of kf values\n")
    kf_list_file.write("\n".join([str(i) for i in kf_list]))
    kf_list_file.close()

# Set a start value for "overall_count" which is the index. I recommend to use
# as much digits as you will be generating new ogs models to end up with a
# consistant naming. I.e. if more than 100 take 1001 as start.
start = 1001
overall_count = start
# -------------------- model configurations
# Specify the PROCESS and PRIMARY_VARIABLE
pcs_type_flow = "GROUNDWATER_FLOW"
var_name_flow = "HEAD"
# Give it a name.
Пример #17
0
harmean = []
geomean = []

realizations = 10

for i in range(realizations):
    #np.random.seed(1337)
    mean = -10
    sigma = 1
    size = 100
    kf_list = np.random.lognormal(mean=mean, sigma=sigma, size=i * size)

    from scipy.stats.mstats import gmean, hmean

    arimean.append(np.mean(kf_list))
    harmean.append(hmean(kf_list))
    geomean.append(gmean(kf_list))

import matplotlib.pyplot as plt

x = [i * size for i in range(realizations)]

#plt.plot(x, arimean, label="arimean")
#plt.plot(x, harmean, label="harmean")
#plt.plot(x, geomean, label="geomean")
plt.semilogy(x, arimean, label="arimean")
plt.semilogy(x, harmean, label="harmean")
plt.semilogy(x, geomean, label="geomean")
plt.semilogy(sorted(kf_list), label="kf values")
#plt.ylim(0.00004,0.00005)
plt.ylabel("mean of samples")
Пример #18
0
import pandas as pd
import scipy.stats.mstats as sc

data = pd.read_csv('data/CARS.csv')

print("SPEED:\nArithmetic Mean = " + str(data['speed'].mean()))
print("Geometric Mean = " + str(sc.gmean(data['speed'])))
print("Harmonic Mean = " + str(sc.hmean(data['speed'])) + "\n")

print("Distance:\nArithmetic Mean = " + str(data['dist'].mean()))
print("Geometric Mean = " + str(sc.gmean(data['dist'])))
print("Harmonic Mean = " + str(sc.hmean(data['dist'])))
Пример #19
0
def main():
    """"""

    parser = argparse.ArgumentParser(
        __doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--species",
                        type=argparse.FileType("r"),
                        required=True)
    parser.add_argument("--out",
                        type=argparse.FileType("w"),
                        default=sys.stdout)
    parser.add_argument("--format",
                        choices=tabulate._table_formats.keys(),
                        default="latex")
    parser.add_argument("--level",
                        default="transcript",
                        choices=line_correspondence.keys())
    args = parser.parse_args()

    species = yaml.load(args.species)

    # Names are the SPECIES
    names = species.pop("names")
    # Categories are Real vs Simulated data
    categories = species.pop("categories")

    # We want to create a table of the form

    # Species | Method   || Category              || Category              || ..
    # species | Name     | Prec | Rec | F1        ||Prec | Rec | F1        || ..

    name_ar = []
    for category in categories:
        for name in names:
            # print(category, name)
            key = [
                _ for _ in species.keys()
                if isinstance(species[_], dict) and species[_]["name"] == name
                and species[_]["category"] == category
            ]
            # print(key)
            assert len(key) == 1
            key = key.pop()
            name_ar.append(key)
    name_ar = np.array(list(grouper(name_ar, ceil(len(species) / 2), None)))

    header = [""] + list(categories)

    header.append(["Species", "Aligner", "Method"] +
                  ["Precision", "Recall", "F1"] * 2)

    rows = []
    # print(rows)

    key = None
    methods = None
    divisions = None

    for yrow, name in enumerate(names):

        new_rows = OrderedDict()

        for xrow, category in enumerate(categories):
            try:
                key = name_ar[xrow, yrow]
            except IndexError:
                raise IndexError(name_ar, xrow, yrow)
            if key is None:
                raise IndexError(name_ar, xrow, category, yrow, name)
                # continue

            with open(species[key]["configuration"]) as configuration:
                options = parse_configuration(configuration,
                                              prefix=species[key]["folder"])
                # Assembler
                if methods is None:
                    methods = list(options["methods"])
                    divisions = list(options["divisions"])

                for method in options["methods"]:
                    # Aligner
                    for division in options["divisions"]:
                        meth_key = (method, division)
                        if meth_key not in new_rows:
                            new_rows[meth_key] = OrderedDict()
                        try:
                            orig, filtered = options["methods"][method][
                                division]
                        except TypeError:
                            warnings.warn(
                                "Something went wrong for {}, {}; continuing".
                                format(method, division))

                            new_rows[meth_key][category] = (-10, -10, -10)
                            continue
                        orig_lines = [line.rstrip() for line in open(orig)]
                        filtered_lines = [
                            line.rstrip() for line in open(filtered)
                        ]
                        for index, line_index in enumerate(
                            [line_correspondence[args.level]]):
                            precision = float(orig_lines[line_index].split(":")
                                              [1].split()[1])
                            recall = float(filtered_lines[line_index].split(
                                ":")[1].split()[0])
                            try:
                                f1 = hmean(np.array([precision, recall]))
                            except TypeError as exc:
                                raise TypeError("\n".join([
                                    str(_) for _ in [(
                                        precision,
                                        type(precision)), (recall,
                                                           type(recall)), exc]
                                ]))
                            # print(level, method, division, (precision, recall, f1))
                            new_rows[meth_key][category] = (precision, recall,
                                                            f1)

        begun = False
        for division in divisions:
            division_done = False
            for method in methods:
                meth_key = (method, division)
                if not begun:
                    row = [name]
                    begun = True
                else:
                    row = [""]

                if not division_done:
                    row.append(division)
                    division_done = True
                else:
                    row.append("")

                row.append(method)
                # row.append(meth_key)
                # print(new_rows[meth_key].keys())
                for category in new_rows[meth_key]:
                    row.extend(new_rows[meth_key][category])

                rows.append(row)

    print(tabulate.tabulate(rows, headers=header, tablefmt=args.format))
    # print(categories)
    return