def train(x, y, l):
    j = 0
    max_iter = 20
    w = Counter({w: 0 for w in x[0]})

    print "Lambda = ", l

    while j < max_iter:

        t = 0.0

        print "Passing through data, iter #", j + 1
        for i, features in enumerate(x):

            t += 1.0
            nt = 1 / (t * l)

            p1 = 1 - nt * l
            w = multiply(p1, w)

            op = multiply(y[i], w)

            if dot_product(op, features) < 1:
                p2 = multiply(nt * y[i], features)

        j += 1

    return w
def train(x, y, max_iter):
    mistakes = iterations = 0
    w = Counter({w: 0 for w in x[0]})

    while True:

        all_correct = True
        iterations += 1

        print "Passing through data, iter #", iterations
        for i, features in enumerate(x):

            op = multiply(y[i], features)

            zeros = all(map(lambda v: v == 0, features.values()))
            if zeros: continue

            if dot_product(op, w) <= 0:
                all_correct = False
                mistakes += 1

        if all_correct or max_iter == iterations:

    return w, mistakes, iterations
def VarCircle(X, Y, ParIn):
    Function that computes the specimen variance of distances from data points (XY) to the circle Par = [a b R]
    helper function for calculating the curvature

    Input: list of of x and y values, and a tuple containing the parameters of the circle (a, b, r)


    # Handle inputs
    n = len(X)

    Dx = []
    Dy = []
    D = []
    for i in range(n):
        Dx.append(X[i] - ParIn[0])
        Dy.append(Y[i] - ParIn[1])
        D.append(math.sqrt(Dx[i] * Dx[i] + Dy[i] * Dy[i]) - ParIn[2])

    result = helpers.dot_product(D, D) / (n - 3)

    return (result)
    def boostrap(Q_str, c_str, site, selection, alpha, NumCycles, Confidence):
        # split in measurements m0 m1 m2 m3 m4 with multiple specimens per list
        m0 = list(filter(lambda m: m['type'] == 0, selection))
        m1 = list(filter(lambda m: m['type'] == 1, selection))
        m2 = list(filter(lambda m: m['type'] == 2, selection))
        m3 = list(filter(lambda m: m['type'] == 3, selection))
        m4 = list(filter(lambda m: m['type'] == 4, selection))

        m1_all = list(filter(lambda m: m['type'] == 1, site))

        # get the steps for the labfield array, this is done by looking at all the data from one site and find the min and maximum used labfields.

        fields = []
        num_specimens = len(m1_all)  # all the data and not only the selection
        for j in range(num_specimens):
            fields.append(m1_all[j]["lab_field"])  # append all used labfields

        # find min and max labfield used and determine the step
        minField = min(fields)
        maxField = max(fields)
        numsteps = 11  # Moster et al., 2015 shows that 11 lab steps give the best results
        step = (minField + maxField) / (numsteps - 1.
                                        )  # This is (Hmin+Hmax)/10

        # append the step to a list of labfields -> Hlist
        Hlist = []
        for i in range(numsteps):
            Hlist.append(i * step)

        # set minimum standard deviation for Hlab
        stdevH_min = 10

        N2 = []
        stdevHl = []
        aa = []
        bb = []
        intercept = []

        H0 = []
        H1 = []
        H2 = []
        H3 = []
        H4 = []
        H5 = []
        H6 = []
        H7 = []
        H8 = []
        H9 = []
        H10 = []

        m = 0
        killCounter = 0
        while m < (NumCycles) and killCounter < (NumCycles * 5):

            Hlab_DB = []
            Hlab_DSC = []
            Q_DB_error = []
            Q_DSC_error = []

            num_specimens = len(m0)

            for j in range(num_specimens):  # get N times a random specimen

                # get the index of a random specimen
                i = int(helpers.rand_num() *
                        num_specimens)  # random number between 0 & N

                # get moment per random specimen
                m_m0 = m0[i]["total_m"]
                m_m1 = m1[i]["total_m"]

                # get corresponding error for that specimen, for Q_DB only m0 & m1
                e_m0 = m0[i]["error"]
                e_m1 = m1[i]["error"]

                # calculate new m0_err and m1_err to calculate new Q_DB_error
                frac_m0 = helpers.rand_num() * (0.02 * e_m0) + 1 - 0.01 * e_m0
                m0_err = frac_m0 * m_m0

                frac_m1 = helpers.rand_num() * (0.02 * e_m1) + 1 - 0.01 * e_m1
                m1_err = frac_m1 * m_m1

                Q_DB_error.append((m1_err - m0_err) / m0_err)

                if Q_str == "DSC":
                    if m2[i]["total_m"] != None:

                        m_m2 = m2[i]["total_m"]
                        m_m3 = m3[i]["total_m"]
                        m_m4 = m4[i]["total_m"]

                        e_m2 = m2[i]["error"]
                        e_m3 = m3[i]["error"]
                        e_m4 = m4[i]["error"]

                        # and check for the corrected version, if so replace the moments
                        if (c_str == "_corr"):
                            m0M = [m0[i]["x"], m0[i]["y"], m0[i]["z"]]
                            m1M = [m1[i]["x"], m1[i]["y"], m1[i]["z"]]
                            m2M = [m2[i]["x"], m2[i]["y"], m2[i]["z"]]
                            m3M = [m3[i]["x"], m3[i]["y"], m3[i]["z"]]
                            m4M = [m4[i]["x"], m4[i]["y"], m4[i]["z"]]

                            NRMrem = helpers.list_mult_num(
                                helpers.list_plus_list(m1M, m2M), 0.5)

                            m1pTRM = helpers.list_min_list(m1M, NRMrem)
                            m2pTRM = helpers.list_min_list(m2M, NRMrem)
                            m3pTRM = helpers.list_min_list(m3M, NRMrem)
                            m4pTRM = helpers.list_min_list(m4M, NRMrem)

                            m_m0 = m0[i]["total_m"]  # m_m0_corr
                            m_m1 = helpers.norm(NRMrem) + helpers.norm(
                                m1pTRM)  # m_m1_corr
                            m_m2 = helpers.norm(NRMrem) - helpers.norm(
                                m2pTRM)  # exception to the rule
                            m_m3 = helpers.norm(NRMrem) + helpers.norm(m3pTRM)
                            m_m4 = helpers.norm(NRMrem) + helpers.norm(m4pTRM)

                        frac_m0 = helpers.rand_num() * (0.02 *
                                                        e_m0) + 1 - 0.01 * e_m0
                        m0_err = frac_m0 * m_m0

                        frac_m1 = helpers.rand_num() * (0.02 *
                                                        e_m1) + 1 - 0.01 * e_m1
                        m1_err = frac_m1 * m_m1
                        frac_m2 = helpers.rand_num() * (0.02 *
                                                        e_m2) + 1 - 0.01 * e_m2
                        m2_err = frac_m2 * m_m2

                        frac_m3 = helpers.rand_num() * (0.02 *
                                                        e_m3) + 1 - 0.01 * e_m3
                        m3_err = frac_m3 * m_m3

                            2 *
                            ((1 + alpha) * m1_err - m0_err - alpha * m3_err) /
                            (2 * m0_err - m1_err - m2_err))

            if (Q_str == "DB"):
                Q_error = Q_DB_error
                Hlab = Hlab_DB
            elif (Q_str == "DSC"):
                Q_error = Q_DSC_error
                Hlab = Hlab_DSC

            N = len(Q_error)

            if N > 1:
                avgH = sum(Hlab) / N

                # calculate standard deviation on Hlab, and determine x and y
                stdevH1 = []
                x = []
                y = []
                for k in range(N):
                    stdevH1.append((Hlab[k] - avgH)**2)
                stdevH = math.sqrt(sum(stdevH1) / (N - 1))

                # calculate Sx, Sy, Sxx, Syy, Sxy
                Sx = sum(x)
                Sy = sum(y)
                Sxy = helpers.dot_product(x, y)
                Sxx = helpers.dot_product(x, x)

                # calculate linear fit is not all at the same Hlab
                if stdevH > stdevH_min:
                    b = (N * Sxy - Sx * Sy) / (N * Sxx - Sx**2)
                    a = Sy / N - b * Sx / N

                    PI = -1 * a / b


                    H0.append(a + b * Hlist[0])
                    H1.append(a + b * Hlist[1])
                    H2.append(a + b * Hlist[2])
                    H3.append(a + b * Hlist[3])
                    H4.append(a + b * Hlist[4])
                    H5.append(a + b * Hlist[5])
                    H6.append(a + b * Hlist[6])
                    H7.append(a + b * Hlist[7])
                    H8.append(a + b * Hlist[8])
                    H9.append(a + b * Hlist[9])
                    H10.append(a + b * Hlist[10])

                    # end of the big while loop, add one to m (this should be within the if statement)
                    m += 1
            killCounter += 1

        # sort columns and apply cut-off
        cutOffValue = 0.01 * (100 - Confidence) / 2
        cutOff = int(NumCycles * cutOffValue)


        Q_Hlist = [H0, H1, H2, H3, H4, H5, H6, H7, H8, H9, H10]

        # determine the average of the bootstrap over the 11 labfields
        # take the average of each of the labfield specified in Q_Hlist

        Boot_int_min = []
        Boot_int_max = []
        Boot_avg = []

        if len(Q_Hlist[0]) != 0:
            h = 0
            for el in Q_Hlist:
                Boot_avg.append([Hlist[h], sum(el) / len(el)])
                h += 1

            F = cutOff  # the minimum value F first
            L = m - cutOff - 1  # the maximum value L last ( -1 because python counts from 0)

            y_min = []
            y_max = []
            for w in range(len(Q_Hlist)):

            for w in range(len(Hlist)):
                Boot_int_min.append([Hlist[w], y_min[w]])
                Boot_int_max.append([Hlist[w], y_max[w]])

            # determine the x axis intercept for lower bound
            ind_min = 999
            for i in range(len(y_min) - 1):
                if (y_min[i] < 0) & (y_min[i + 1] > 0):
                    ind_min = i

            if ind_min == 999:
                ictLow = None
                slope_min = (y_min[ind_min + 1] - y_min[ind_min]) / (
                    Hlist[ind_min + 1] - Hlist[ind_min])
                ictLow = -1 * (y_min[ind_min] -
                               Hlist[ind_min] * slope_min) / slope_min

            # determine the x axis intercept for upper bound
            ind_max = 999
            for j in range(len(y_max) - 1):
                if (y_max[j] < 0) & (y_max[j + 1] > 0):
                    ind_max = j

            if ind_max == 999:
                ictHigh = None
                slope_max = (y_max[ind_max + 1] - y_max[ind_max]) / (
                    Hlist[ind_max + 1] - Hlist[ind_max])
                ictHigh = -1 * (y_max[ind_max] -
                                Hlist[ind_max] * slope_max) / slope_max

            # write corresponding PI min and max values, these are the intercepts of the bootstrap intervals
            PI_min = ictHigh
            PI_max = ictLow
            PI_min = None
            PI_max = None

        return [PI_min, PI_max, Boot_int_min, Boot_int_max, Boot_avg]
    def direction_stat_dir_type(x, y, z, type_dir):

        # center of mass
        mean_x = sum(x) / len(x)
        mean_y = sum(y) / len(y)
        mean_z = sum(z) / len(z)

        # transform the NRM vector
        if type_dir == 1:  # free

            x_prime = helpers.list_min_num(x, mean_x)
            y_prime = helpers.list_min_num(y, mean_y)
            z_prime = helpers.list_min_num(z, mean_z)

        if type_dir == 2:  # forced / anchored
            x_prime = x
            y_prime = y
            z_prime = z

        orient_tensor = [[helpers.dot_product(x_prime, x_prime), helpers.dot_product(x_prime, y_prime), helpers.dot_product(x_prime, z_prime)],
                         [helpers.dot_product(x_prime, y_prime), helpers.dot_product(y_prime, y_prime), helpers.dot_product(y_prime, z_prime)],
                         [helpers.dot_product(x_prime, z_prime), helpers.dot_product(z_prime, y_prime), helpers.dot_product(z_prime, z_prime)]]

        # this numpy statement stays here :-)
        orient_tensor = numpy.array(orient_tensor)

        # get the eigenvalues (tau), and eigenvectors(V)
        tau, V = numpy.linalg.eig(orient_tensor)

        # tau & V to list, instead of numpy
        tau = tau.tolist()
        V = V.tolist()

        # print("help",tau, V)

        # rescale tau to sum-to-one
        tau = helpers.list_div_num(tau, sum(tau))

        # find index max tau (eigenvaule 1)
        ind_tau_max = -1
        tau_max = -1
        for idx in range(len(tau)):
            if tau[idx] > tau_max:
                tau_max = tau[idx]
                ind_tau_max = idx

        # transpose V
        TV = helpers.transpose_list(V)

        # find eigenvector and eigenvalue 1
        v1 = TV[ind_tau_max]  # [0] is necessary to make it a vector instead of matrix
        e1 = tau[ind_tau_max]

        # the other two eigenvaules and vectors
        sum_e23 = sum(tau) - tau_max

        # define the reference vector
        r1 = (x_prime[0] - x_prime[len(x_prime) - 1])
        r2 = (y_prime[0] - y_prime[len(x_prime) - 1])
        r3 = (z_prime[0] - z_prime[len(x_prime) - 1])

        R = [r1, r2, r3]

        dot = helpers.dot_product(v1, R)

        if dot < -1:
            dot = -1
        elif dot > 1:
            dot = 1

        if math.acos(dot) > (math.pi / 2.):
            PD = helpers.list_mult_num(v1, -1)
            PD = v1

        [Mdec, Minc, R] = helpers.cart2dir(PD[0], PD[1], PD[2])

        CMvec = [mean_x, mean_y, mean_z]

        MAD = math.atan(math.sqrt(sum_e23 / e1)) * 180 / math.pi
        return [Mdec, Minc, MAD, CMvec]
    def results(Q_str, c_str, Eps_alt):
        Q = sc["MSP_Q_calc" + c_str]["Q_" + Q_str]

        x = []
        y = []
        EpsAlt = []

        # if (Q_str == "DSC"):  # check for None in the specimen list
        for i in range(len(Q)):
            if Q[i][1] != None:

        N = len(x)  # sumber of specimens

        if N > 1:  # if N> 1 then you have enough specimens to calculate the Linear regression
            # determine x and y coordinates, and Eps

            # calculate Sx, Sy, Sxx, Syy, Sxy
            Sx = sum(x)
            Sy = sum(y)

            Sxy = helpers.dot_product(x, y)
            Sxx = helpers.dot_product(x, x)
            Syy = helpers.dot_product(y, y)

            # calculate linear regression coefficients
            LRb = (N * Sxy - Sx * Sy) / (N * Sxx - Sx**2)
            LRa = Sy / N - LRb * Sx / N

            # determine PI
            PI = -1 * LRa / LRb

            # get two points for the linear regression line
            x1 = -1
            x2 = 500
            y1 = LRa + LRb * x1
            y2 = LRa + LRb * x2
            Line_fig = [[x1, y1], [x2, y2]]

            # calculate the average, agv X and Y for r squared

            avg_x = Sx / N
            avg_y = Sy / N

            xDiff = helpers.list_min_num(x, avg_x)
            yDiff = helpers.list_min_num(y, avg_y)

            x2Sum = helpers.dot_product(xDiff, xDiff)
            y2Sum = helpers.dot_product(yDiff, yDiff)
            xySum = helpers.dot_product(xDiff, yDiff)

            r_sq = (xySum / math.sqrt(x2Sum * y2Sum))**2

            # difficlt expression: yexp = Hlab[i]*LRb - LRa
            Yexp = helpers.list_min_num(helpers.list_mult_num(x, LRb),
                                        -1 * LRa)

            yminYexp = helpers.list_min_list(y, Yexp)

            ChiSum = helpers.dot_product(yminYexp, yminYexp)
            chi_sq = ChiSum / N

            # calculate the average epsilon for DSC only

            if (Q_str == "DB"):
                delta_b = None
                avg_eps_alt = None
                delta_b = LRa + 1
                avg_eps_alt = sum(EpsAlt) / N

            sc["MSP_results_Q_" + Q_str + c_str]["PI"] = PI
            sc["MSP_results_Q_" + Q_str + c_str]["avg_eps_alt"] = avg_eps_alt
            sc["MSP_results_Q_" + Q_str + c_str]["delta_b"] = delta_b
            sc["MSP_results_Q_" + Q_str + c_str]["r_sq"] = r_sq
            sc["MSP_results_Q_" + Q_str + c_str]["chi_sq"] = chi_sq
            sc["MSP_results_Q_" + Q_str + c_str][
                "Line_fig"] = Line_fig  # line through point 1 and 2, [[x1,y1], [x2,y2]]
def s_tensor_calc(sc):
    Function to calculate the s-tensor for the anisotropy correction, following the calculations in the standard paleointensity definitions by Paterson et al., 2014.

    input: anisotropy measurements
    output: the s-tensor

    # input:    preprocessed/               aniso_trm            [x+, x-, y+, y-, z+, z-, check, orderd]
    # output:   anisotropy_statistics/      s_tensor   [s]

    Xp_list = sc["preprocessed"]["aniso_trm"]["x+"]
    Xm_list = sc["preprocessed"]["aniso_trm"]["x-"]
    Yp_list = sc["preprocessed"]["aniso_trm"]["y+"]
    Ym_list = sc["preprocessed"]["aniso_trm"]["y-"]
    Zp_list = sc["preprocessed"]["aniso_trm"]["z+"]
    Zm_list = sc["preprocessed"]["aniso_trm"]["z-"]
    check_list = sc["preprocessed"]["aniso_trm"]["check"]

    orderd = sc["preprocessed"]["aniso_trm"]["orderd"]

    if Xp_list != None:  # then to the calculations for anisotropy tensor
        master_list = [Xp_list, Yp_list, Zp_list, Xm_list, Ym_list, Zm_list]

        # get the order of the aniso measurements and make TRM vec of all measurements
        meas_ord = []
        for meas in orderd:
            if meas["type"] != 87:  # do not append the check, type 80 is not in orderd
                meas_ord.append(int(meas["type"]) - 80)

        # small helper funtion to add coordinates to TRM vec
        def construct_TRM(input_list):
            return TRM

        # add all TRM x, y, z coordinates of the 6 measurements in the correct order
        TRM = []
        for i in range(len(master_list)):
            TRM = construct_TRM(master_list[int(meas_ord[i]) - 1])

        # make P matrix with the 6 direction positions of measuring the TRM
        xp_dir = [1, 0, 0]  # x+
        yp_dir = [0, 1, 0]  # y+
        zp_dir = [0, 0, 1]  # z+
        xm_dir = [-1, 0, 0]  # x-
        ym_dir = [0, -1, 0]  # y-
        zm_dir = [0, 0, -1]  # z-

        # add all direction to a master list
        master_dir = [xp_dir, yp_dir, zp_dir, xm_dir, ym_dir, zm_dir]

        # make the P matrix with correct directions according to measurement order
        P = []
        for i in range(len(meas_ord)):
            P.append(master_dir[int(meas_ord[i]) - 1])

        # generate the design matrix A
        A = []
        for i in range(len(P)):
            Ai1 = [P[i][0], 0, 0, P[i][1], 0, P[i][2]]
            Ai2 = [0, P[i][1], 0, P[i][0], P[i][2], 0]
            Ai3 = [0, 0, P[i][2], 0, P[i][1], P[i][0]]

        At = helpers.transpose_list(A)

        AtA = []
        AtA_j = []
        for i in range(len(At)):
            for j in range(len(At)):
                AtA_j.append(helpers.dot_product(At[i], At[j]))
            AtA_j = []

        # make numpy arrays and do numpy linalg.inv
        # make numpy array (1)
        AtA = numpy.array(AtA)

        # do numpy calculation (2)
        AtA_inv = numpy.linalg.inv(AtA)

        # back to python lists (3)
        AtA_inv = AtA_inv.tolist()

        # make x, that is the inversion of AtA times At
        x = []
        x_j = []
        for i in range(len(AtA_inv)):
            for j in range(len(A)):
                x_j.append(helpers.dot_product(AtA_inv[i], A[j]))
            x_j = []

        # next step is to muliply x with TRM tensor to get s
        # s (6x1) = x (6 x 18) * TRM (18 x 1)
        s = []
        for i in range(len(x)):
            s.append(helpers.dot_product(x[i], TRM))

        sc["anisotropy_statistics"]["Aniso_tensor"]["s_tensor"] = s
    return (sc)
def anisotropy_calc(sc):
    Function that calculates the paleointensity correction factor when anisotropy data is available. It uses the previously calculated s-tensor. First the direction of the NRM for the selected part of the Arai plot is determined to give Mhat_ChRM. Which is used to get the anisotropy correction c. The anisotropy correction in multiplied with Banc to get Banc_aniso_corr.

    input: s-tensor, the direction of the applied labfield, direction of the data selection, the original Banc
    output: the anisotropy correction anis_c, the anisotropy corrected paleointensity estimate Banc_aniso_corr

    # input:    preprocessed/               field_basics            [field_dir_vec]
    #                                       s_tensor                [s1_list, s2_list, s3_list, s4_list, s5_list, s6_list, scheck_list]
    #           arai_statistics/            PI_Banc_est             [B_anc]
    #           directional_statistics/     mean_dir_stat           [Mdec_free, Minc_free]
    # output:   anisotropy_statistics/      Anisotropy_Correction   [anis_c, Banc_aniso_corr]

    Mdec_free = sc["directional_statistics"]["mean_dir_stat"]["Mdec_free"]
    Minc_free = sc["directional_statistics"]["mean_dir_stat"]["Minc_free"]

    field_dir_vec = sc["preprocessed"]["field_basics"]["field_dir_vec"]
    B_anc = sc["arai_statistics"]["PI_Banc_est"]["B_anc"]

    Xp_list = sc["preprocessed"]["aniso_trm"]["x+"]
    s_tensor = sc["anisotropy_statistics"]["Aniso_tensor"]["s_tensor"]

    if Xp_list != None:  # then there is anisotropy data, do calculations, if no anisotropy data present do nothing

        anis_c = []

        # the direction of the NRM, should be determined from the selected Arai plot
        # this is the Mdec_free & Minc_free, the unit vecotr of this gives -> Mhat_ChRM

        # get mean unit vector
        Mhat_ChRM = helpers.dir2cart(Mdec_free, Minc_free, 1)
        Blab_orient = field_dir_vec  # it was: helpers.dir2cart(Mdec_free, Minc_free, 1), that is wrong!! (13 feb 2020)

        s1 = s_tensor[0]
        s2 = s_tensor[1]
        s3 = s_tensor[2]
        s4 = s_tensor[3]
        s5 = s_tensor[4]
        s6 = s_tensor[5]

        A1 = [s1, s4, s6]
        A2 = [s4, s2, s5]
        A3 = [s6, s5, s3]

        A = [A1, A2, A3]

        # make A and Mhat_ChRM into a numpy arrays (1)
        A = numpy.array(A)
        Mhat_ChRM = numpy.array(Mhat_ChRM)

        # do numpy calculation (2)
        Hanc = numpy.linalg.solve(A, Mhat_ChRM)

        # back to python lists (3)
        A = A.tolist()
        Mhat_ChRM = Mhat_ChRM.tolist()
        Hanc = Hanc.tolist()

        # unit vector in the direction of the ancient field
        Hanc_hat = helpers.list_div_num(Hanc, helpers.norm(Hanc))

        Manc = []
        Mlab = []
        for i in range(len(A)):
            Manc.append(helpers.dot_product(A[i], Hanc_hat))
            Mlab.append(helpers.dot_product(A[i], field_dir_vec))

        aniso_c = helpers.norm(Mlab) / helpers.norm(Manc)

        Banc_aniso_corr = aniso_c * B_anc

            "aniso_c"] = aniso_c
            "Banc_aniso_corr"] = Banc_aniso_corr

    return (sc)