Exemplo n.º 1
0
def waveletScaleZero(order, nr_ploy, x):
    from getfilter import getfilter
    from scaling import scaling
    Sum = 0
    G0 = getfilter("G", 0, nr_ploy)
    G1 = getfilter("G", 1, nr_ploy)
    for i in range(nr_ploy):
        # print("G0[", order ,",",i,"]" ,"=" ,G0[order,i])
        # print("scaling(",i,",",1,",",0,",",x,")","=",sc.scaling(i, 1, 0, x) )
        # print("restult",G0[order, i]*sc.scaling(i, 1, 0, x)/np.sqrt(2) )
        # Check if this should be a pluss or minus
        wavelet = (G0[order, i]*scaling(i, 1, 0, x)) + \
            (G1[order, i]*scaling(i, 1, 1, x))
        Sum += wavelet
    return Sum
Exemplo n.º 2
0
 def shell(self, r=100, dr=1, dust2gas=0.01, verbose=0):
     ''' evaluate rate of particles in a shell '''
     n = 0
     rate = 0.0
     t0 = time()
     if r == 0:
         r2min = 0.0
         r2max = 1e30
     else:
         r2min = (r - dr)**2
         r2max = (r + dr)**2
     for id in (self.particles).keys():
         part = self.particles[id]
         p = part['p']
         p2 = sum(p**2)
         if p2 > r2min and p2 < r2max:
             v = part['v']
             w = part['w']
             rate -= sum(p * v) / sqrt(p2) * w
             n = n + 1
     if not hasattr(self, 'gas_mass'):
         self.get_gas_mass(verbose=verbose)
     rate = rate / (2. * dr) * self.gas_mass
     self.rate = rate
     if verbose:
         units = scaling()
         rate1 = dust2gas * rate * cgs.yr / units.t * units.m / cgs.m_earth
         s = 'rate:{:9.2e} ={:9.2e} M_E/yr, based on {} particles.'.format(
             rate, rate1, n)
         s = s + ' Time used: {:.1f} sec.'.format(time() - t0)
         print(s)
Exemplo n.º 3
0
def umax_rt(p, courant, cdtd):
    '''
    Return the umax value. Input is a patch, the courant number and cdtd

        p.umax_rt = du.umax_rt(p)
    '''
    from scaling import scaling, cgs
    sc = scaling(cgs)
    u_max = sc.stefan * np.pi * 16. / 3. * p.fmax_rt.max() * courant / cdtd
    return u_max
Exemplo n.º 4
0
def fmax_rt(p, kappa):
    '''
    Return a data block, containing the f_max(RT) values. input is a patch
    and kappa in cgs units.

        p.fmax_rt = du.fmax_rt(p,kappa)
    '''
    from scaling import scaling, cgs
    sc = scaling(cgs)
    k = kappa * sc.m / sc.l**2
    fmax = (p.pg / p.d)**4 / p.pg * (k * p.d * p.dx.min())
    fmax = fmax / (1. + (k * p.d * p.dx.min())**2)
    return fmax
test = pd.read_csv(os.path.join(data_dir, 'test_features.csv'))
train_drug = pd.read_csv(os.path.join(data_dir, 'train_drug.csv'))
submission = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

x_train = train.copy()
x_test = test.copy()
y_train = targets_scored.copy()

cp_features = ['cp_type', 'cp_time', 'cp_dose']
genes_features = [column for column in train.columns if 'g-' in column]
cells_features = [column for column in train.columns if 'c-' in column]

# scale the data, like RankGauss
x_train, x_test = scaling(x_train,
                          x_test,
                          scale=cfg_fe.scale,
                          n_quantiles=cfg_fe.scale_n_quantiles,
                          seed=cfg_fe.seed)

# fe_stats
x_train, x_test = fe_stats(x_train, x_test, genes_features, cells_features)
x_train.head()

# group the drug using kmeans
if runty == 'traineval':
    x_train, x_test = fe_cluster(x_train,
                                 x_test,
                                 genes_features,
                                 cells_features,
                                 n_cluster_g=cfg_fe.n_clusters_g,
                                 n_cluster_c=cfg_fe.n_clusters_c,
Exemplo n.º 6
0
from regression import regression
from test_train import test_train
from scaling import scaling
from data_preprocess import data_preprocess
from predict_plot import predict_plot
import pandas as pd

df = pd.read_csv("a.us.txt")
df.set_index('Date',inplace=True)

df = data_preprocess(df)
print(df.head())


features , labels = scaling(df)
features_train,labels_train,features_test,labels_test = test_train(features,labels)
reg = regression(features_train,features_test,labels_train,labels_test)
predict_plot(reg , features,df)
Exemplo n.º 7
0
geocoder_params = {
    "apikey": "40d1649f-0493-4b70-98ba-98533de7710b",
    "geocode": toponym_to_find,
    "format": "json"}

response = requests.get(geocoder_api_server, params=geocoder_params)

if not response:
    pass

json_response = response.json()
toponym = json_response["response"]["GeoObjectCollection"][
    "featureMember"][0]["GeoObject"]

toponym_coodrinates = toponym["Point"]["pos"]
toponym_longitude, toponym_lattitude = toponym_coodrinates.split(" ")
l_c, u_c = toponym["boundedBy"]["Envelope"]["lowerCorner"], toponym["boundedBy"]["Envelope"]["upperCorner"]

map_params = {
    "ll": ",".join([toponym_longitude, toponym_lattitude]),
    "spn": scaling(u_c, l_c),
    "l": "map",
    "pt": ",".join([toponym_longitude, toponym_lattitude]) + "," + "pm2rdm"
}

map_api_server = "http://static-maps.yandex.ru/1.x/"

response = requests.get(map_api_server, params=map_params)

Image.open(BytesIO(
    response.content)).show()
Exemplo n.º 8
0
import pandas as pd
from scaling import scaling
data = pd.read_csv('Seals.csv')

data = scaling(data, a = (0, 8), b = (10, 12), columnIndices = (0, 1))
data.to_csv('SealsScaled.csv')
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a",
                        "--alpha",
                        type=float,
                        default=params.ALPHA,
                        help="Set alpha")
    parser.add_argument("-i",
                        "--nbItr",
                        type=int,
                        default=params.NB_ITERATIONS,
                        help="Set number of iterations")
    parser.add_argument("-f",
                        "--feature",
                        type=str,
                        default=params.FEATURES,
                        help="Set feature name")
    parser.add_argument("-l",
                        "--label",
                        type=str,
                        default=params.LABELS,
                        help="Set label name")
    parser.add_argument("-c", "--cost", action='store_true', help="Cost visu")
    parser.add_argument("-s",
                        "--accuracyScore",
                        action='store_true',
                        help="Accuracy score")
    parser.add_argument("-p",
                        "--dataPath",
                        type=str,
                        default=(params.DIR_PATH + params.DATA_PATH +
                                 params.DATA_NAME),
                        help="Data absolute path")
    args = parser.parse_args()

    alpha = args.alpha
    nbItr = args.nbItr
    featureName = args.feature
    labelName = args.label
    dataPath = args.dataPath
    accuracyScore = args.accuracyScore

    # get data
    try:
        data = pd.read_csv(dataPath)
    except FileNotFoundError:
        print(colors.FAIL + 'Data not found.' + colors.ENDC)
        exit(0)

        # get features and labels
    try:
        x = np.array(data[featureName])
        y = np.array(data[labelName])
    except KeyError:
        print(
            colors.FAIL +
            'Wrong features or labels\nPlease use [-f/--feature] FEATURE and/or [-l/--label] LABEL'
            + colors.ENDC)
        exit(0)

    print('Parameters :')
    print('  alpha :', alpha)
    print('  iterations :', nbItr)
    print('  accuracyScore :', accuracyScore, '\n')

    # create thetas
    theta = np.array([[0], [0]], float)

    # scale input
    scaledX = scaling(x)

    # train
    theta, cost_history = fit_with_cost(scaledX, y, theta, alpha, nbItr)

    if args.cost:
        fig = plt.figure()
        ax = plt.axes()
        ax.plot(np.arange(len(cost_history)), cost_history)
        ax.set(xlabel='number of iterations', ylabel='cost', title='Cost')
        plt.show()

        # scaling
    theta[1] = theta[1] / (np.amax(x) - np.amin(x))

    print('Thetas :', theta, '\n')

    try:
        with open(params.DIR_PATH + params.JSON_PATH + params.JSON_NAME,
                  'w') as f:
            json.dump(theta.tolist(), f)
    except FileNotFoundError:
        print(colors.FAIL + "'" + params.DIR_PATH + params.JSON_PATH + "'" +
              ' directory not found.' + colors.ENDC)
        exit(0)

    if accuracyScore:
        accuracy(data, featureName, labelName, theta)

    visualizeRegression(theta, x, y, featureName, labelName)

    print(colors.OKGREEN + 'Thetas writtent in :',
          params.DIR_PATH + params.JSON_PATH + params.JSON_NAME + colors.ENDC)
Exemplo n.º 10
0
def ORRT_D1_lambdas(train, test, S=1000, gamma=512):

    # Scale the training subset
    (train, Mtree, mtree) = scaling(train)
    n_train = len(train)
    p = int(len(train[0]) - 1)

    # Rescale the test subset
    test = rescaling(test, Mtree, mtree)
    n_test = len(test)

    # Set the grid of values of lambda^L and lambda^G
    lambdasL = np.append(0, np.exp2(list(range(-6, 6)))) / (3 * p)
    nlambdasL = len(lambdasL)
    lambdasG = np.append(0, np.exp2(list(range(-6, 6)))) / p
    nlambdasG = len(lambdasG)

    # Definition of the objective function
    def f(x, train, gamma, p, lambdaL, lambdaG):
        calc = -(np.dot(train[:, :p], x[0:p] - x[
            (4 * p + 3):(5 * p + 3)]) / p - x[p]) * gamma
        P2 = np.zeros(len(calc))
        P2[calc < 600] = 1 / (1 + np.exp(calc[calc < 600]))
        P3 = 1 - P2
        Q2 = np.dot(train[:, :p], x[(p + 1):(2 * p + 1)] -
                    x[(5 * p + 3):(6 * p + 3)]) - x[(3 * p + 1)]
        Q3 = np.dot(train[:, :p], x[(2 * p + 1):(3 * p + 1)] -
                    x[(6 * p + 3):(7 * p + 3)]) - x[(3 * p + 2)]
        errorp = np.square(P2 * Q2 + P3 * Q3 - train[:, p])
        meanerrorp = np.mean(errorp)
        meanerrorplasso = meanerrorp + lambdaG * np.sum(x[
            (3 * p + 3):(4 * p + 3)]) + lambdaL * (np.sum(x[0:p]) + np.sum(x[
                (p + 1):(3 * p + 1)]) + np.sum(x[(4 * p + 3):(7 * p + 3)]))
        if np.isnan(meanerrorplasso):
            exit()
        return meanerrorplasso

    # Definition of bounds
    def lulb(p):
        lu = np.concatenate(
            (np.zeros(p), -np.ones(1), np.repeat(0, 2 * p), np.repeat(-50, 2),
             np.repeat(0, p), np.repeat(0, 3 * p)))
        lb = np.concatenate(
            (np.ones(p + 1), np.repeat(50, 2 * p + 2), np.repeat(np.inf, p),
             np.repeat(1, p), np.repeat(50, 2 * p)))
        return (lu, lb)

    d = lulb(p)
    bounds = Bounds(d[0], d[1])

    # Definition of gradient
    def gradient(x, train, gamma, p, lambdaL, lambdaG):
        calc = -(np.dot(train[:, :p], x[0:p] - x[
            (4 * p + 3):(5 * p + 3)]) / p - x[p]) * gamma
        p1 = np.zeros(len(calc))
        p1[calc < 600] = 1 / (1 + np.exp(calc[calc < 600]))
        exponencial1 = np.exp(600) * np.ones(len(calc))
        exponencial1[calc < 600] = np.exp(calc[calc < 600])
        P2 = p1
        P3 = 1 - p1
        Q2 = np.dot(train[:, :p], x[(p + 1):(2 * p + 1)] -
                    x[(5 * p + 3):(6 * p + 3)]) - x[(3 * p + 1)]
        Q3 = np.dot(train[:, :p], x[(2 * p + 1):(3 * p + 1)] -
                    x[(6 * p + 3):(7 * p + 3)]) - x[(3 * p + 2)]
        g = P2 * Q2 + P3 * Q3 - train[:, p]
        der = np.zeros_like(x)
        m1 = 2 * g * exponencial1 * np.square(p1) * (Q2 - Q3)
        der[0:p] = gamma / p * np.mean(np.transpose(train[:, :p]) * m1,
                                       axis=1) + np.repeat(lambdaL, p)
        der[p] = -gamma * np.mean(m1)
        der[(p + 1):(2 * p +
                     1)] = np.mean(2 * g * np.transpose(train[:, :p]) * P2,
                                   axis=1) + np.repeat(lambdaL, p)
        der[(2 * p + 1):(3 * p +
                         1)] = np.mean(2 * g * np.transpose(train[:, :p]) * P3,
                                       axis=1) + np.repeat(lambdaL, p)
        der[(3 * p + 1)] = -np.mean(2 * g * P2)
        der[(3 * p + 2)] = -np.mean(2 * g * P3)
        der[(3 * p + 3):(4 * p + 3)] = np.repeat(lambdaG, p)
        der[(4 * p + 3):(5 * p + 3)] = -gamma / p * np.mean(
            np.transpose(train[:, :p]) * m1, axis=1) + np.repeat(lambdaL, p)
        der[(5 * p +
             3):(6 * p +
                 3)] = -np.mean(2 * g * np.transpose(train[:, :p]) * P2,
                                axis=1) + np.repeat(lambdaL, p)
        der[(6 * p +
             3):(7 * p +
                 3)] = -np.mean(2 * g * np.transpose(train[:, :p]) * P3,
                                axis=1) + np.repeat(lambdaL, p)
        return der

    # Definition of constraints and jacobian
    jacons = np.zeros((3 * p, 7 * p + 3))
    jacons[0:p, 0:p] = -np.eye(p)
    jacons[0:p, (4 * p + 3):(5 * p + 3)] = -np.eye(p)
    jacons[0:p, (3 * p + 3):(4 * p + 3)] = np.eye(p)
    jacons[p:2 * p, (p + 1):(2 * p + 1)] = -np.eye(p)
    jacons[p:2 * p, (5 * p + 3):(6 * p + 3)] = -np.eye(p)
    jacons[p:2 * p, (3 * p + 3):(4 * p + 3)] = np.eye(p)
    jacons[2 * p:3 * p, (2 * p + 1):(3 * p + 1)] = -np.eye(p)
    jacons[2 * p:3 * p, (6 * p + 3):(7 * p + 3)] = -np.eye(p)
    jacons[2 * p:3 * p, (3 * p + 3):(4 * p + 3)] = np.eye(p)
    lambdaL = 0
    lambdaG = 0
    ineq_cons = {
        'type':
        'ineq',
        'fun':
        lambda x: np.concatenate((x[(3 * p + 3):(4 * p + 3)] - x[0:p] - x[
            (4 * p + 3):(5 * p + 3)], x[(3 * p + 3):(4 * p + 3)] - x[
                (p + 1):(2 * p + 1)] - x[(5 * p + 3):(6 * p + 3)], x[
                    (3 * p + 3):(4 * p + 3)] - x[(2 * p + 1):(3 * p + 1)] - x[
                        (6 * p + 3):(7 * p + 3)])),
        'jac':
        lambda x: jacons,
        'arg': (train, gamma, p, lambdaL, lambdaG)
    }

    # Set the grid of S random initial solutions
    np.random.seed(1)
    x0 = np.zeros((S, 7 * p + 3))
    x0[:, p] = 2 * np.random.random(S) - 1
    a1iaux = -700 / gamma + x0[:, p]
    a1iaux2 = np.maximum(a1iaux, -1)
    a1iaux3 = np.transpose(np.tile(a1iaux2, (p, 1)))
    a1iaux4 = (1 - a1iaux3) * np.random.random((S, p)) + a1iaux3
    x0[:, 0:p] = np.maximum(a1iaux4, 0)
    x0[:, (p + 1):(2 * p + 1)] = np.random.random((S, p))
    x0[:, (2 * p + 1):(3 * p + 1)] = np.random.random((S, p))
    x0[:, (3 * p + 1)] = np.random.random(S)
    x0[:, (3 * p + 2)] = np.random.random(S)
    x0[:, (3 * p + 3):(4 * p + 3)] = np.random.random((S, p))
    x0[:, (4 * p + 3):(5 * p + 3)] = np.maximum(-a1iaux4, 0)
    x0[:, (5 * p + 3):(6 * p + 3)] = np.random.random((S, p))
    x0[:, (6 * p + 3):(7 * p + 3)] = np.random.random((S, p))

    # Define the function to be parallelized
    def funcion(valores):
        [
            f, x0nn, nn, train, gamma, p, lambdasG, lambdasL, gradient, bounds,
            ineq_cons
        ] = valores
        nlambdasG = len(lambdasG)
        nlambdasL = len(lambdasL)
        objetivo = 1000000 * np.ones((nlambdasL, nlambdasG))
        sol = np.zeros((nlambdasL, nlambdasG, 7 * p + 3))
        for ll in range(nlambdasL):
            for gg in range(nlambdasG):
                try:
                    print(ll, gg, nn)
                    print('local', ll)
                    print('global', gg)
                    res = minimize(f,
                                   x0nn,
                                   args=(train, gamma, p, lambdasL[ll],
                                         lambdasG[gg]),
                                   method='SLSQP',
                                   jac=gradient,
                                   options={
                                       'ftol': 1e-5,
                                       'disp': False,
                                       'maxiter': 300
                                   },
                                   bounds=bounds,
                                   constraints=ineq_cons)
                    objetivo[ll, gg] = res.fun
                    sol[ll, gg, :] = res.x
                    x0nn[0:p] = res.x[0:p]
                    x0nn[p] = res.x[p]
                    x0nn[(p + 1):(2 * p + 1)] = res.x[(p + 1):(2 * p + 1)]
                    x0nn[(2 * p + 1):(3 * p + 1)] = res.x[(2 * p + 1):(3 * p +
                                                                       1)]
                    x0nn[(3 * p + 1)] = res.x[(3 * p + 1)]
                    x0nn[(3 * p + 2)] = res.x[(3 * p + 2)]
                    x0nn[(3 * p + 3):(4 * p + 3)] = res.x[(3 * p + 3):(4 * p +
                                                                       3)]
                    x0nn[(4 * p + 3):(5 * p + 3)] = res.x[(4 * p + 3):(5 * p +
                                                                       3)]
                    x0nn[(5 * p + 3):(6 * p + 3)] = res.x[(5 * p + 3):(6 * p +
                                                                       3)]
                    x0nn[(6 * p + 3):(7 * p + 3)] = res.x[(6 * p + 3):(7 * p +
                                                                       3)]
                except:
                    pass
        return (objetivo, sol)

    values = [([
        f, x0[nn], nn, train, gamma, p, lambdasL, lambdasG, gradient, bounds,
        ineq_cons
    ]) for nn in range(S)]

    # Solve Problem (1) for a grid of lambda^L and lambda^G
    results = Parallel(n_jobs=8)(delayed(funcion)(value) for value in values)
    objetivos = [results[i][0] for i in range(S)]
    xs = [results[i][1] for i in range(S)]

    # Obtain the parameters of the SORRT with depth D = 1 for the grid of
    # values of lambda^L and lambda^G, as well as the performance over
    # the training and test subsets.
    objetivopt = np.zeros((nlambdasL, nlambdasG))
    indexopt = np.zeros((nlambdasL, nlambdasG), dtype=int)
    xopt = np.zeros((nlambdasL, nlambdasG, 7 * p + 3))
    a1opt = np.zeros((p, nlambdasL, nlambdasG))
    a2opt = np.zeros((p, nlambdasL, nlambdasG))
    a3opt = np.zeros((p, nlambdasL, nlambdasG))
    betaopt = np.zeros((p, nlambdasL, nlambdasG))
    mu1opt = np.zeros((nlambdasL, nlambdasG))
    mu2opt = np.zeros((nlambdasL, nlambdasG))
    mu3opt = np.zeros((nlambdasL, nlambdasG))
    predtrain = np.zeros((n_train, nlambdasL, nlambdasG))
    errortrain = np.zeros((n_train, nlambdasL, nlambdasG))
    msetrain = np.zeros((nlambdasL, nlambdasG))
    R2train = np.zeros((nlambdasL, nlambdasG))
    predtest = np.zeros((n_test, nlambdasL, nlambdasG))
    errortest = np.zeros((n_test, nlambdasL, nlambdasG))
    msetest = np.zeros((nlambdasL, nlambdasG))
    R2test = np.zeros((nlambdasL, nlambdasG))
    coefsnonulos = np.zeros((nlambdasL, nlambdasG))
    numberofeatures = np.zeros((nlambdasL, nlambdasG))
    localsparsity = np.zeros((nlambdasL, nlambdasG))
    globalsparsity = np.zeros((nlambdasL, nlambdasG))
    for ll in range(nlambdasL):
        for gg in range(nlambdasG):
            obj = [objetivos[i][ll, gg] for i in range(S)]
            objetivopt[ll, gg] = np.min(obj)
            indexopt[ll, gg] = np.nanargmin(obj)
            xopt[ll, gg, :] = xs[indexopt[ll, gg]][ll, gg]
            a1opt[:, ll,
                  gg] = xopt[ll, gg, 0:p] - xopt[ll, gg,
                                                 (4 * p + 3):(5 * p + 3)]
            a2opt[:, ll,
                  gg] = xopt[ll, gg,
                             (p + 1):(2 * p + 1)] - xopt[ll, gg,
                                                         (5 * p + 3):(6 * p +
                                                                      3)]
            a3opt[:, ll,
                  gg] = xopt[ll, gg,
                             (2 * p + 1):(3 * p + 1)] - xopt[ll, gg,
                                                             (6 * p +
                                                              3):(7 * p + 3)]
            betaopt[:, ll, gg] = xopt[ll, gg, (3 * p + 3):(4 * p + 3)]
            mu1opt[ll, gg] = xopt[ll, gg, p]
            mu2opt[ll, gg] = xopt[ll, gg, (3 * p + 1)]
            mu3opt[ll, gg] = xopt[ll, gg, (3 * p + 2)]
            (predtrain[:, ll, gg], errortrain[:, ll, gg], msetrain[ll, gg],
             R2train[ll,
                     gg]) = predict(train, a1opt[:, ll, gg], mu1opt[ll, gg],
                                    a2opt[:, ll, gg], mu2opt[ll, gg],
                                    a3opt[:, ll, gg], mu3opt[ll, gg], gamma)
            (predtest[:, ll, gg], errortest[:, ll, gg], msetest[ll, gg],
             R2test[ll, gg]) = predict(test, a1opt[:, ll, gg], mu1opt[ll, gg],
                                       a2opt[:, ll, gg], mu2opt[ll, gg],
                                       a3opt[:, ll, gg], mu3opt[ll, gg], gamma)
            coefsnonulos[ll, gg] = np.sum(
                np.absolute(np.around(a1opt[:, ll, gg], decimals=3)) >= 0.001,
                axis=0) + np.sum(
                    np.absolute(np.around(a2opt[:, ll, gg],
                                          decimals=3)) >= 0.001,
                    axis=0) + np.sum(np.absolute(
                        np.around(a3opt[:, ll, gg], decimals=3)) >= 0.001,
                                     axis=0)
            numberofeatures[ll, gg] = np.sum(np.logical_or(
                np.absolute(np.around(a1opt[:, ll, gg], decimals=3)) >= 0.001,
                np.logical_or(
                    np.absolute(np.around(a2opt[:, ll, gg],
                                          decimals=3)) >= 0.001,
                    np.absolute(np.around(a3opt[:, ll, gg], decimals=3)) >=
                    0.001)),
                                             axis=0)
            localsparsity[ll,
                          gg] = 100 * (3 * p - coefsnonulos[ll, gg]) / (3 * p)
            globalsparsity[ll, gg] = 100 * (p - numberofeatures[ll, gg]) / p

    return (a1opt, mu1opt, a2opt, mu2opt, a3opt, mu3opt, betaopt, gamma,
            predtrain, errortrain, msetrain, R2train, predtest, errortest,
            msetest, R2test, localsparsity, globalsparsity)
Exemplo n.º 11
0
    def read_shell(self, r=100, dr=1, dust2gas=0.01, save=True, verbose=0):
        ''' Read particles in a shell, by default saving them '''
        dict = {}
        n = 0
        npatch = 0
        rate = 0.0
        start = time()
        t0 = start
        if r == 0:
            r2min = 0.0
            r2max = 1e30
        else:
            r2min = (r - dr)**2
            r2max = (r + dr)**2

        # open each patch file and check if relevant
        for file in self.files:
            p = Patch(file)
            rc = p.corner_radii()

            # If relevant, open the .peb file, and read the data
            if rc.min() < (r + dr) and rc.max() > (r - dr) or r == 0:
                npatch += 1
                idx, dd = self.read_file(file)

                # If in the shell, sum up rate contribution, and add particle to dict
                if (r > 0):
                    for i in range(size(idx)):
                        id = idx[i]
                        d = dd[id]
                        p = d['p']
                        p2 = sum(p**2)
                        if p2 > r2min and p2 < r2max:
                            v = d['v']
                            w = d['w']
                            rate -= sum(p * v) / sqrt(p2) * w
                            n = n + 1
                            if save:
                                dict[id] = d
                else:
                    for i in range(size(idx)):
                        id = idx[i]
                        dict[id] = dd[id]
                if verbose > 1:
                    now = time()
                    print('{:.3f} sec'.format(now - start))
                    start = now
        if r == 0:
            self.particles = dict
            print('{:.3f} sec'.format(time() - start))
            return
        if not hasattr(self, 'gas_mass'):
            self.get_gas_mass(verbose=verbose)
        rate = rate / (2.0 * dr) * self.gas_mass
        self.rate = rate
        if save:
            self.particles = dict
            if verbose > 1:
                print('{} self.particles saved'.format(size(dict.keys())))
        if verbose:
            units = scaling()
            rate1 = dust2gas * rate * cgs.yr / units.t * units.m / cgs.m_earth
            s = 'rate:{:9.2e} ={:9.2e} M_E/yr'.format(rate, rate1)
            s = s + ', based on {} particles from {} patches'.format(n, npatch)
            print(s + ' ({:.1f} sec)'.format(time() - t0))
Exemplo n.º 12
0
import numpy as np
import matplotlib.pyplot as pl

import EOS
from scaling import scaling, cgs


#%% Void object
class void():
    pass


evol = void()

#%% Soft gravity
sc = scaling()
m_planet = 5.0
a_planet = 1.0


def force(r, rsm):
    if r > rsm:
        f = cgs.grav * cgs.m_earth * m_planet / r**2
    else:
        f = cgs.grav * cgs.m_earth * m_planet / rsm**2 * (4. * (r / rsm) - 3. *
                                                          (r / rsm)**2)
    return f


#%% plot force
pl.figure(4)
Exemplo n.º 13
0
def ORRT_D1(train, test, S=1000, gamma=512):

    # Scale the training subset
    (train, Mtree, mtree) = scaling(train)
    N = len(train)
    p = int(len(train[0]) - 1)

    # Rescale the test subset
    test = rescaling(test, Mtree, mtree)

    # Definition of the objective function
    def f(x, train, gamma, p):
        calc = -(np.dot(train[:, :p], x[0:p]) / p - x[p]) * gamma
        P2 = np.zeros(len(calc))
        P2[calc < 600] = 1 / (1 + np.exp(calc[calc < 600]))
        P3 = 1 - P2
        Q2 = np.dot(train[:, :p], x[(p + 1):(2 * p + 1)]) - x[(3 * p + 1)]
        Q3 = np.dot(train[:, :p], x[(2 * p + 1):(3 * p + 1)]) - x[(3 * p + 2)]
        errorp = np.square(P2 * Q2 + P3 * Q3 - train[:, p])
        meanerrorp = np.mean(errorp)
        if np.isnan(meanerrorp):
            exit()
        return meanerrorp

    # Definition of bounds
    def lulb(p):
        lu = np.append(-np.ones(p + 1), np.repeat(-50, 2 * p + 2))
        lb = np.append(np.ones(p + 1), np.repeat(50, 2 * p + 2))
        return (lu, lb)

    d = lulb(p)
    bounds = Bounds(d[0], d[1])

    # Definition of gradient
    def gradient(x, train, gamma, p):
        calc = -(np.dot(train[:, :p], x[0:p]) / p - x[p]) * gamma
        p1 = np.zeros(len(calc))
        p1[calc < 600] = 1 / (1 + np.exp(calc[calc < 600]))
        exponencial1 = np.exp(600) * np.ones(len(calc))
        exponencial1[calc < 600] = np.exp(calc[calc < 600])
        P2 = p1
        P3 = 1 - p1
        Q2 = np.dot(train[:, :p], x[(p + 1):(2 * p + 1)]) - x[(3 * p + 1)]
        Q3 = np.dot(train[:, :p], x[(2 * p + 1):(3 * p + 1)]) - x[(3 * p + 2)]
        g = P2 * Q2 + P3 * Q3 - train[:, p]
        der = np.zeros_like(x)
        m1 = 2 * g * exponencial1 * np.square(p1) * (Q2 - Q3)
        der[0:p] = gamma / p * np.mean(np.transpose(train[:, :p]) * m1, axis=1)
        der[p] = -gamma * np.mean(m1)
        der[(p + 1):(2 * p + 1)] = np.mean(2 * g * np.transpose(train[:, :p]) *
                                           P2,
                                           axis=1)
        der[(2 * p + 1):(3 * p + 1)] = np.mean(2 * g *
                                               np.transpose(train[:, :p]) * P3,
                                               axis=1)
        der[(3 * p + 1)] = -np.mean(2 * g * P2)
        der[(3 * p + 2)] = -np.mean(2 * g * P3)
        return der

    # Set the grid of S random initial solutions
    np.random.seed(1)
    a1i = np.zeros((S, p))
    mu1i = np.zeros(S)
    nn = 0
    while (nn < S):
        a1i[nn, :] = 2 * np.random.random(p) - 1
        mu1i[nn] = 2 * np.random.random(1) - 1
        vale = True
        ii = 0
        while (vale and ii < N):
            if (np.isinf(
                    np.exp(
                        -(np.sum(a1i[nn, :] * train[ii, :p]) / p - mu1i[nn]) *
                        gamma))):
                vale = False
            else:
                ii = ii + 1
        if vale:
            nn = nn + 1
    x0 = np.zeros((S, 3 * p + 3))
    x0[:, 0:p] = a1i
    x0[:, p] = mu1i
    x0[:, (p + 1):(2 * p + 1)] = np.random.random((S, p))
    x0[:, (3 * p + 1)] = np.random.random(S)
    x0[:, (2 * p + 1):(3 * p + 1)] = np.random.random((S, p))
    x0[:, (3 * p + 2)] = np.random.random(S)

    # Define the function to be parallelized
    def funcion(valores):
        [f, x0nn, train, gamma, p, gradient, bounds] = valores
        try:
            res = minimize(f,
                           x0nn,
                           args=(train, gamma, p),
                           method='SLSQP',
                           jac=gradient,
                           options={
                               'ftol': 1e-5,
                               'disp': False,
                               'maxiter': 300
                           },
                           bounds=bounds)
            objetivo = res.fun
            sol = res.x
        except:
            objetivo = 1e+300
            sol = np.zeros(p)
        return (objetivo, sol)

    values = [([f, x0[nn], train, gamma, p, gradient, bounds])
              for nn in range(S)]

    # Solve Problem (1) for the S initial solutions
    results = Parallel(n_jobs=8)(delayed(funcion)(value) for value in values)

    # Obtain the best solution
    objetivo = [results[i][0] for i in range(S)]
    indexopt = np.nanargmin(objetivo)
    xopt = results[indexopt][1]

    # Obtain the parameters of the SORRT with depth D = 1
    a1opt = xopt[0:p]
    mu1opt = xopt[p]
    a2opt = xopt[(p + 1):(2 * p + 1)]
    a3opt = xopt[(2 * p + 1):(3 * p + 1)]
    mu2opt = xopt[(3 * p + 1)]
    mu3opt = xopt[(3 * p + 2)]

    # Performance over the training and test subsets
    (predtrain, errortrain, msetrain,
     R2train) = predict(train, a1opt, mu1opt, a2opt, mu2opt, a3opt, mu3opt,
                        gamma)
    (predtest, errortest, msetest, R2test) = predict(test, a1opt, mu1opt,
                                                     a2opt, mu2opt, a3opt,
                                                     mu3opt, gamma)

    return (a1opt, mu1opt, a2opt, mu2opt, a3opt, mu3opt, gamma, predtrain,
            errortrain, msetrain, R2train, predtest, errortest, msetest,
            R2test)
Exemplo n.º 14
0
def main():

    cfg_fe = Config_FeatureEngineer()
    seed_everything(seed_value=cfg_fe.seed)

    data_dir = '/kaggle/input/lish-moa/'
    save_path = './'
    load_path = '/kaggle/input/moatabnetmultimodekfold/'
    runty = 'eval'

    train = pd.read_csv(os.path.join(data_dir, 'train_features.csv'))
    targets_scored = pd.read_csv(
        os.path.join(data_dir, 'train_targets_scored.csv'))
    test = pd.read_csv(os.path.join(data_dir, 'test_features.csv'))
    train_drug = pd.read_csv(os.path.join(data_dir, 'train_drug.csv'))
    submission = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

    x_train = train.copy()
    x_test = test.copy()
    y_train = targets_scored.copy()

    genes_features = [column for column in x_train.columns if 'g-' in column]
    cells_features = [column for column in x_train.columns if 'c-' in column]

    # scale the data, like RankGauss
    x_train, x_test = scaling(x_train,
                              x_test,
                              scale=cfg_fe.scale,
                              n_quantiles=cfg_fe.scale_n_quantiles,
                              seed=cfg_fe.seed)

    # decompose data, like PCA
    if runty == 'traineval':
        x_train, x_test = decompo_process(x_train,
                                          x_test,
                                          decompo=cfg_fe.decompo,
                                          genes_variance=cfg_fe.genes_variance,
                                          cells_variance=cfg_fe.cells_variance,
                                          seed=cfg_fe.seed,
                                          pca_drop_orig=cfg_fe.pca_drop_orig,
                                          runty=runty,
                                          path=save_path)
    elif runty == 'eval':
        x_train, x_test = decompo_process(x_train,
                                          x_test,
                                          decompo=cfg_fe.decompo,
                                          genes_variance=cfg_fe.genes_variance,
                                          cells_variance=cfg_fe.cells_variance,
                                          seed=cfg_fe.seed,
                                          pca_drop_orig=cfg_fe.pca_drop_orig,
                                          runty=runty,
                                          path=load_path)

    # select feature, VarianceThreshold
    x_train, x_test = feature_selection(
        x_train,
        x_test,
        feature_select=cfg_fe.feature_select,
        variancethreshold_for_FS=cfg_fe.variancethreshold_for_FS)

    # fe_stats
    x_train, x_test = fe_stats(x_train, x_test, genes_features, cells_features)

    # group the drug using kmeans
    if runty == 'traineval':
        x_train, x_test = fe_cluster(x_train,
                                     x_test,
                                     genes_features,
                                     cells_features,
                                     n_cluster_g=cfg_fe.n_clusters_g,
                                     n_cluster_c=cfg_fe.n_clusters_c,
                                     seed=cfg_fe.seed,
                                     runty=runty,
                                     path=save_path)
    elif runty == 'eval':
        x_train, x_test = fe_cluster(x_train,
                                     x_test,
                                     genes_features,
                                     cells_features,
                                     n_cluster_g=cfg_fe.n_clusters_g,
                                     n_cluster_c=cfg_fe.n_clusters_c,
                                     seed=cfg_fe.seed,
                                     runty=runty,
                                     path=load_path)

    # one-hot encoding
    x_train = onehot_encoding(x_train)
    x_test = onehot_encoding(x_test)

    feature_cols = [
        c for c in x_train.columns
        if (str(c)[0:5] != 'kfold' and c not in
            ['sig_id', 'drug_id', 'cp_type', 'cp_time', 'cp_dose'])
    ]
    target_cols = [x for x in y_train.columns if x != 'sig_id']

    # label smoothing
    if cfg_fe.regularization_ls:
        y_train = ls_manual(y_train, ls_rate=cfg_fe.ls_rate)

    # merge drug_id and labels
    x_train = x_train.merge(y_train, on='sig_id')
    x_train = x_train.merge(train_drug, on='sig_id')

    # remove sig_id
    # x_train, x_test, y_train = remove_ctl(x_train, x_test, y_train)

    # make CVs
    target_cols = [x for x in targets_scored.columns if x != 'sig_id']
    x_train = make_cv_folds(x_train, cfg_fe.seeds, cfg_fe.nfolds,
                            cfg_fe.drug_thresh, target_cols)

    begin_time = datetime.datetime.now()

    if (runty == 'traineval'):
        test_preds_all = train_tabnet(x_train, y_train, x_test, submission,
                                      feature_cols, target_cols, cfg_fe.seeds,
                                      cfg_fe.nfolds, save_path)
        y_train = targets_scored[
            train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
        test_pred_final = pred_tabnet(x_train,
                                      y_train,
                                      x_test,
                                      submission,
                                      feature_cols,
                                      target_cols,
                                      cfg_fe.seeds,
                                      cfg_fe.nfolds,
                                      load_path='./',
                                      stacking=False)
    elif (runty == 'eval'):
        y_train = targets_scored[
            train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
        test_pred_final = pred_tabnet(x_train,
                                      y_train,
                                      x_test,
                                      submission,
                                      feature_cols,
                                      target_cols,
                                      cfg_fe.seeds,
                                      cfg_fe.nfolds,
                                      load_path,
                                      stacking=False)

    time_diff = datetime.datetime.now() - begin_time
    print(f'Total time is {time_diff}')

    # make submission
    all_feat = [col for col in submission.columns if col not in ["sig_id"]]
    # To obtain the same lenght of test_preds_all and submission
    # sig_id = test[test["cp_type"] != "ctl_vehicle"].sig_id.reset_index(drop=True)
    sig_id = test.sig_id
    tmp = pd.DataFrame(test_pred_final, columns=all_feat)
    tmp["sig_id"] = sig_id

    submission = pd.merge(test[["sig_id"]], tmp, on="sig_id", how="left")
    submission.fillna(0, inplace=True)
    submission[test["cp_type"] == "ctl_vehicle"] = 0.

    submission.to_csv("submission_tabbet.csv", index=None)