def train(x, y, l): j = 0 max_iter = 20 w = Counter({w: 0 for w in x[0]}) print "Lambda = ", l while j < max_iter: t = 0.0 print "Passing through data, iter #", j + 1 for i, features in enumerate(x): t += 1.0 nt = 1 / (t * l) p1 = 1 - nt * l w = multiply(p1, w) op = multiply(y[i], w) if dot_product(op, features) < 1: p2 = multiply(nt * y[i], features) w.update(p2) j += 1 return w
def train(x, y, max_iter): mistakes = iterations = 0 w = Counter({w: 0 for w in x[0]}) while True: all_correct = True iterations += 1 print "Passing through data, iter #", iterations for i, features in enumerate(x): op = multiply(y[i], features) zeros = all(map(lambda v: v == 0, features.values())) if zeros: continue if dot_product(op, w) <= 0: w.update(op) all_correct = False mistakes += 1 if all_correct or max_iter == iterations: break return w, mistakes, iterations
def VarCircle(X, Y, ParIn): """ Function that computes the specimen variance of distances from data points (XY) to the circle Par = [a b R] helper function for calculating the curvature Input: list of of x and y values, and a tuple containing the parameters of the circle (a, b, r) """ # Handle inputs n = len(X) Dx = [] Dy = [] D = [] for i in range(n): Dx.append(X[i] - ParIn[0]) Dy.append(Y[i] - ParIn[1]) D.append(math.sqrt(Dx[i] * Dx[i] + Dy[i] * Dy[i]) - ParIn[2]) result = helpers.dot_product(D, D) / (n - 3) return (result)
def boostrap(Q_str, c_str, site, selection, alpha, NumCycles, Confidence): # split in measurements m0 m1 m2 m3 m4 with multiple specimens per list m0 = list(filter(lambda m: m['type'] == 0, selection)) m1 = list(filter(lambda m: m['type'] == 1, selection)) m2 = list(filter(lambda m: m['type'] == 2, selection)) m3 = list(filter(lambda m: m['type'] == 3, selection)) m4 = list(filter(lambda m: m['type'] == 4, selection)) m1_all = list(filter(lambda m: m['type'] == 1, site)) # get the steps for the labfield array, this is done by looking at all the data from one site and find the min and maximum used labfields. fields = [] num_specimens = len(m1_all) # all the data and not only the selection for j in range(num_specimens): fields.append(m1_all[j]["lab_field"]) # append all used labfields # find min and max labfield used and determine the step minField = min(fields) maxField = max(fields) numsteps = 11 # Moster et al., 2015 shows that 11 lab steps give the best results step = (minField + maxField) / (numsteps - 1. ) # This is (Hmin+Hmax)/10 # append the step to a list of labfields -> Hlist Hlist = [] for i in range(numsteps): Hlist.append(i * step) # set minimum standard deviation for Hlab stdevH_min = 10 N2 = [] stdevHl = [] aa = [] bb = [] intercept = [] H0 = [] H1 = [] H2 = [] H3 = [] H4 = [] H5 = [] H6 = [] H7 = [] H8 = [] H9 = [] H10 = [] m = 0 killCounter = 0 while m < (NumCycles) and killCounter < (NumCycles * 5): Hlab_DB = [] Hlab_DSC = [] Q_DB_error = [] Q_DSC_error = [] num_specimens = len(m0) for j in range(num_specimens): # get N times a random specimen # get the index of a random specimen i = int(helpers.rand_num() * num_specimens) # random number between 0 & N # get moment per random specimen m_m0 = m0[i]["total_m"] m_m1 = m1[i]["total_m"] # get corresponding error for that specimen, for Q_DB only m0 & m1 e_m0 = m0[i]["error"] e_m1 = m1[i]["error"] # calculate new m0_err and m1_err to calculate new Q_DB_error frac_m0 = helpers.rand_num() * (0.02 * e_m0) + 1 - 0.01 * e_m0 m0_err = frac_m0 * m_m0 frac_m1 = helpers.rand_num() * (0.02 * e_m1) + 1 - 0.01 * e_m1 m1_err = frac_m1 * m_m1 Q_DB_error.append((m1_err - m0_err) / m0_err) Hlab_DB.append(m1[i]["lab_field"]) if Q_str == "DSC": if m2[i]["total_m"] != None: m_m2 = m2[i]["total_m"] m_m3 = m3[i]["total_m"] m_m4 = m4[i]["total_m"] e_m2 = m2[i]["error"] e_m3 = m3[i]["error"] e_m4 = m4[i]["error"] # and check for the corrected version, if so replace the moments if (c_str == "_corr"): m0M = [m0[i]["x"], m0[i]["y"], m0[i]["z"]] m1M = [m1[i]["x"], m1[i]["y"], m1[i]["z"]] m2M = [m2[i]["x"], m2[i]["y"], m2[i]["z"]] m3M = [m3[i]["x"], m3[i]["y"], m3[i]["z"]] m4M = [m4[i]["x"], m4[i]["y"], m4[i]["z"]] NRMrem = helpers.list_mult_num( helpers.list_plus_list(m1M, m2M), 0.5) m1pTRM = helpers.list_min_list(m1M, NRMrem) m2pTRM = helpers.list_min_list(m2M, NRMrem) m3pTRM = helpers.list_min_list(m3M, NRMrem) m4pTRM = helpers.list_min_list(m4M, NRMrem) m_m0 = m0[i]["total_m"] # m_m0_corr m_m1 = helpers.norm(NRMrem) + helpers.norm( m1pTRM) # m_m1_corr m_m2 = helpers.norm(NRMrem) - helpers.norm( m2pTRM) # exception to the rule m_m3 = helpers.norm(NRMrem) + helpers.norm(m3pTRM) m_m4 = helpers.norm(NRMrem) + helpers.norm(m4pTRM) frac_m0 = helpers.rand_num() * (0.02 * e_m0) + 1 - 0.01 * e_m0 m0_err = frac_m0 * m_m0 frac_m1 = helpers.rand_num() * (0.02 * e_m1) + 1 - 0.01 * e_m1 m1_err = frac_m1 * m_m1 frac_m2 = helpers.rand_num() * (0.02 * e_m2) + 1 - 0.01 * e_m2 m2_err = frac_m2 * m_m2 frac_m3 = helpers.rand_num() * (0.02 * e_m3) + 1 - 0.01 * e_m3 m3_err = frac_m3 * m_m3 Q_DSC_error.append( 2 * ((1 + alpha) * m1_err - m0_err - alpha * m3_err) / (2 * m0_err - m1_err - m2_err)) Hlab_DSC.append(m2[i]["lab_field"]) if (Q_str == "DB"): Q_error = Q_DB_error Hlab = Hlab_DB elif (Q_str == "DSC"): Q_error = Q_DSC_error Hlab = Hlab_DSC N = len(Q_error) if N > 1: avgH = sum(Hlab) / N # calculate standard deviation on Hlab, and determine x and y stdevH1 = [] x = [] y = [] for k in range(N): stdevH1.append((Hlab[k] - avgH)**2) x.append(Hlab[k]) y.append(Q_error[k]) stdevH = math.sqrt(sum(stdevH1) / (N - 1)) # calculate Sx, Sy, Sxx, Syy, Sxy Sx = sum(x) Sy = sum(y) Sxy = helpers.dot_product(x, y) Sxx = helpers.dot_product(x, x) # calculate linear fit is not all at the same Hlab if stdevH > stdevH_min: b = (N * Sxy - Sx * Sy) / (N * Sxx - Sx**2) a = Sy / N - b * Sx / N PI = -1 * a / b N2.append(N) stdevHl.append(stdevH) aa.append(a) bb.append(b) intercept.append(PI) H0.append(a + b * Hlist[0]) H1.append(a + b * Hlist[1]) H2.append(a + b * Hlist[2]) H3.append(a + b * Hlist[3]) H4.append(a + b * Hlist[4]) H5.append(a + b * Hlist[5]) H6.append(a + b * Hlist[6]) H7.append(a + b * Hlist[7]) H8.append(a + b * Hlist[8]) H9.append(a + b * Hlist[9]) H10.append(a + b * Hlist[10]) # end of the big while loop, add one to m (this should be within the if statement) m += 1 killCounter += 1 # sort columns and apply cut-off cutOffValue = 0.01 * (100 - Confidence) / 2 cutOff = int(NumCycles * cutOffValue) H0.sort() H1.sort() H2.sort() H3.sort() H4.sort() H5.sort() H6.sort() H7.sort() H8.sort() H9.sort() H10.sort() Q_Hlist = [H0, H1, H2, H3, H4, H5, H6, H7, H8, H9, H10] # determine the average of the bootstrap over the 11 labfields # take the average of each of the labfield specified in Q_Hlist Boot_int_min = [] Boot_int_max = [] Boot_avg = [] if len(Q_Hlist[0]) != 0: h = 0 for el in Q_Hlist: Boot_avg.append([Hlist[h], sum(el) / len(el)]) h += 1 F = cutOff # the minimum value F first L = m - cutOff - 1 # the maximum value L last ( -1 because python counts from 0) y_min = [] y_max = [] for w in range(len(Q_Hlist)): y_min.append(Q_Hlist[w][F]) y_max.append(Q_Hlist[w][L]) for w in range(len(Hlist)): Boot_int_min.append([Hlist[w], y_min[w]]) Boot_int_max.append([Hlist[w], y_max[w]]) # determine the x axis intercept for lower bound ind_min = 999 for i in range(len(y_min) - 1): if (y_min[i] < 0) & (y_min[i + 1] > 0): ind_min = i if ind_min == 999: ictLow = None else: slope_min = (y_min[ind_min + 1] - y_min[ind_min]) / ( Hlist[ind_min + 1] - Hlist[ind_min]) ictLow = -1 * (y_min[ind_min] - Hlist[ind_min] * slope_min) / slope_min # determine the x axis intercept for upper bound ind_max = 999 for j in range(len(y_max) - 1): if (y_max[j] < 0) & (y_max[j + 1] > 0): ind_max = j if ind_max == 999: ictHigh = None else: slope_max = (y_max[ind_max + 1] - y_max[ind_max]) / ( Hlist[ind_max + 1] - Hlist[ind_max]) ictHigh = -1 * (y_max[ind_max] - Hlist[ind_max] * slope_max) / slope_max # write corresponding PI min and max values, these are the intercepts of the bootstrap intervals PI_min = ictHigh PI_max = ictLow else: PI_min = None PI_max = None return [PI_min, PI_max, Boot_int_min, Boot_int_max, Boot_avg]
def direction_stat_dir_type(x, y, z, type_dir): # center of mass mean_x = sum(x) / len(x) mean_y = sum(y) / len(y) mean_z = sum(z) / len(z) # transform the NRM vector if type_dir == 1: # free x_prime = helpers.list_min_num(x, mean_x) y_prime = helpers.list_min_num(y, mean_y) z_prime = helpers.list_min_num(z, mean_z) if type_dir == 2: # forced / anchored x_prime = x y_prime = y z_prime = z orient_tensor = [[helpers.dot_product(x_prime, x_prime), helpers.dot_product(x_prime, y_prime), helpers.dot_product(x_prime, z_prime)], [helpers.dot_product(x_prime, y_prime), helpers.dot_product(y_prime, y_prime), helpers.dot_product(y_prime, z_prime)], [helpers.dot_product(x_prime, z_prime), helpers.dot_product(z_prime, y_prime), helpers.dot_product(z_prime, z_prime)]] # this numpy statement stays here :-) orient_tensor = numpy.array(orient_tensor) # get the eigenvalues (tau), and eigenvectors(V) tau, V = numpy.linalg.eig(orient_tensor) # tau & V to list, instead of numpy tau = tau.tolist() V = V.tolist() # print("help",tau, V) # rescale tau to sum-to-one tau = helpers.list_div_num(tau, sum(tau)) # find index max tau (eigenvaule 1) ind_tau_max = -1 tau_max = -1 for idx in range(len(tau)): if tau[idx] > tau_max: tau_max = tau[idx] ind_tau_max = idx # transpose V TV = helpers.transpose_list(V) # find eigenvector and eigenvalue 1 v1 = TV[ind_tau_max] # [0] is necessary to make it a vector instead of matrix e1 = tau[ind_tau_max] # the other two eigenvaules and vectors sum_e23 = sum(tau) - tau_max # define the reference vector r1 = (x_prime[0] - x_prime[len(x_prime) - 1]) r2 = (y_prime[0] - y_prime[len(x_prime) - 1]) r3 = (z_prime[0] - z_prime[len(x_prime) - 1]) R = [r1, r2, r3] dot = helpers.dot_product(v1, R) if dot < -1: dot = -1 elif dot > 1: dot = 1 if math.acos(dot) > (math.pi / 2.): PD = helpers.list_mult_num(v1, -1) else: PD = v1 [Mdec, Minc, R] = helpers.cart2dir(PD[0], PD[1], PD[2]) CMvec = [mean_x, mean_y, mean_z] MAD = math.atan(math.sqrt(sum_e23 / e1)) * 180 / math.pi return [Mdec, Minc, MAD, CMvec]
def results(Q_str, c_str, Eps_alt): Q = sc["MSP_Q_calc" + c_str]["Q_" + Q_str] x = [] y = [] EpsAlt = [] # if (Q_str == "DSC"): # check for None in the specimen list for i in range(len(Q)): if Q[i][1] != None: x.append(Q[i][1]) y.append(Q[i][2]) EpsAlt.append(Eps_alt[i][1]) N = len(x) # sumber of specimens if N > 1: # if N> 1 then you have enough specimens to calculate the Linear regression # determine x and y coordinates, and Eps # calculate Sx, Sy, Sxx, Syy, Sxy Sx = sum(x) Sy = sum(y) Sxy = helpers.dot_product(x, y) Sxx = helpers.dot_product(x, x) Syy = helpers.dot_product(y, y) # calculate linear regression coefficients LRb = (N * Sxy - Sx * Sy) / (N * Sxx - Sx**2) LRa = Sy / N - LRb * Sx / N # determine PI PI = -1 * LRa / LRb # get two points for the linear regression line x1 = -1 x2 = 500 y1 = LRa + LRb * x1 y2 = LRa + LRb * x2 Line_fig = [[x1, y1], [x2, y2]] # calculate the average, agv X and Y for r squared avg_x = Sx / N avg_y = Sy / N xDiff = helpers.list_min_num(x, avg_x) yDiff = helpers.list_min_num(y, avg_y) x2Sum = helpers.dot_product(xDiff, xDiff) y2Sum = helpers.dot_product(yDiff, yDiff) xySum = helpers.dot_product(xDiff, yDiff) r_sq = (xySum / math.sqrt(x2Sum * y2Sum))**2 # difficlt expression: yexp = Hlab[i]*LRb - LRa Yexp = helpers.list_min_num(helpers.list_mult_num(x, LRb), -1 * LRa) yminYexp = helpers.list_min_list(y, Yexp) ChiSum = helpers.dot_product(yminYexp, yminYexp) chi_sq = ChiSum / N # calculate the average epsilon for DSC only if (Q_str == "DB"): delta_b = None avg_eps_alt = None else: delta_b = LRa + 1 avg_eps_alt = sum(EpsAlt) / N sc["MSP_results_Q_" + Q_str + c_str]["PI"] = PI sc["MSP_results_Q_" + Q_str + c_str]["avg_eps_alt"] = avg_eps_alt sc["MSP_results_Q_" + Q_str + c_str]["delta_b"] = delta_b sc["MSP_results_Q_" + Q_str + c_str]["r_sq"] = r_sq sc["MSP_results_Q_" + Q_str + c_str]["chi_sq"] = chi_sq sc["MSP_results_Q_" + Q_str + c_str][ "Line_fig"] = Line_fig # line through point 1 and 2, [[x1,y1], [x2,y2]]
def s_tensor_calc(sc): """ Function to calculate the s-tensor for the anisotropy correction, following the calculations in the standard paleointensity definitions by Paterson et al., 2014. input: anisotropy measurements output: the s-tensor """ # input: preprocessed/ aniso_trm [x+, x-, y+, y-, z+, z-, check, orderd] # output: anisotropy_statistics/ s_tensor [s] Xp_list = sc["preprocessed"]["aniso_trm"]["x+"] Xm_list = sc["preprocessed"]["aniso_trm"]["x-"] Yp_list = sc["preprocessed"]["aniso_trm"]["y+"] Ym_list = sc["preprocessed"]["aniso_trm"]["y-"] Zp_list = sc["preprocessed"]["aniso_trm"]["z+"] Zm_list = sc["preprocessed"]["aniso_trm"]["z-"] check_list = sc["preprocessed"]["aniso_trm"]["check"] orderd = sc["preprocessed"]["aniso_trm"]["orderd"] if Xp_list != None: # then to the calculations for anisotropy tensor master_list = [Xp_list, Yp_list, Zp_list, Xm_list, Ym_list, Zm_list] # get the order of the aniso measurements and make TRM vec of all measurements meas_ord = [] for meas in orderd: if meas["type"] != 87: # do not append the check, type 80 is not in orderd meas_ord.append(int(meas["type"]) - 80) # small helper funtion to add coordinates to TRM vec def construct_TRM(input_list): TRM.append(input_list[0]["x"]) TRM.append(input_list[0]["y"]) TRM.append(input_list[0]["z"]) return TRM # add all TRM x, y, z coordinates of the 6 measurements in the correct order TRM = [] for i in range(len(master_list)): TRM = construct_TRM(master_list[int(meas_ord[i]) - 1]) # make P matrix with the 6 direction positions of measuring the TRM xp_dir = [1, 0, 0] # x+ yp_dir = [0, 1, 0] # y+ zp_dir = [0, 0, 1] # z+ xm_dir = [-1, 0, 0] # x- ym_dir = [0, -1, 0] # y- zm_dir = [0, 0, -1] # z- # add all direction to a master list master_dir = [xp_dir, yp_dir, zp_dir, xm_dir, ym_dir, zm_dir] # make the P matrix with correct directions according to measurement order P = [] for i in range(len(meas_ord)): P.append(master_dir[int(meas_ord[i]) - 1]) # generate the design matrix A A = [] for i in range(len(P)): Ai1 = [P[i][0], 0, 0, P[i][1], 0, P[i][2]] Ai2 = [0, P[i][1], 0, P[i][0], P[i][2], 0] Ai3 = [0, 0, P[i][2], 0, P[i][1], P[i][0]] A.append(Ai1) A.append(Ai2) A.append(Ai3) At = helpers.transpose_list(A) AtA = [] AtA_j = [] for i in range(len(At)): for j in range(len(At)): AtA_j.append(helpers.dot_product(At[i], At[j])) AtA.append(AtA_j) AtA_j = [] # make numpy arrays and do numpy linalg.inv # make numpy array (1) AtA = numpy.array(AtA) # do numpy calculation (2) AtA_inv = numpy.linalg.inv(AtA) # back to python lists (3) AtA_inv = AtA_inv.tolist() # make x, that is the inversion of AtA times At x = [] x_j = [] for i in range(len(AtA_inv)): for j in range(len(A)): x_j.append(helpers.dot_product(AtA_inv[i], A[j])) x.append(x_j) x_j = [] # next step is to muliply x with TRM tensor to get s # s (6x1) = x (6 x 18) * TRM (18 x 1) s = [] for i in range(len(x)): s.append(helpers.dot_product(x[i], TRM)) sc["anisotropy_statistics"]["Aniso_tensor"]["s_tensor"] = s return (sc)
def anisotropy_calc(sc): """ Function that calculates the paleointensity correction factor when anisotropy data is available. It uses the previously calculated s-tensor. First the direction of the NRM for the selected part of the Arai plot is determined to give Mhat_ChRM. Which is used to get the anisotropy correction c. The anisotropy correction in multiplied with Banc to get Banc_aniso_corr. input: s-tensor, the direction of the applied labfield, direction of the data selection, the original Banc output: the anisotropy correction anis_c, the anisotropy corrected paleointensity estimate Banc_aniso_corr """ # input: preprocessed/ field_basics [field_dir_vec] # s_tensor [s1_list, s2_list, s3_list, s4_list, s5_list, s6_list, scheck_list] # arai_statistics/ PI_Banc_est [B_anc] # directional_statistics/ mean_dir_stat [Mdec_free, Minc_free] # output: anisotropy_statistics/ Anisotropy_Correction [anis_c, Banc_aniso_corr] Mdec_free = sc["directional_statistics"]["mean_dir_stat"]["Mdec_free"] Minc_free = sc["directional_statistics"]["mean_dir_stat"]["Minc_free"] field_dir_vec = sc["preprocessed"]["field_basics"]["field_dir_vec"] B_anc = sc["arai_statistics"]["PI_Banc_est"]["B_anc"] Xp_list = sc["preprocessed"]["aniso_trm"]["x+"] s_tensor = sc["anisotropy_statistics"]["Aniso_tensor"]["s_tensor"] if Xp_list != None: # then there is anisotropy data, do calculations, if no anisotropy data present do nothing anis_c = [] # the direction of the NRM, should be determined from the selected Arai plot # this is the Mdec_free & Minc_free, the unit vecotr of this gives -> Mhat_ChRM # get mean unit vector Mhat_ChRM = helpers.dir2cart(Mdec_free, Minc_free, 1) Blab_orient = field_dir_vec # it was: helpers.dir2cart(Mdec_free, Minc_free, 1), that is wrong!! (13 feb 2020) s1 = s_tensor[0] s2 = s_tensor[1] s3 = s_tensor[2] s4 = s_tensor[3] s5 = s_tensor[4] s6 = s_tensor[5] A1 = [s1, s4, s6] A2 = [s4, s2, s5] A3 = [s6, s5, s3] A = [A1, A2, A3] # make A and Mhat_ChRM into a numpy arrays (1) A = numpy.array(A) Mhat_ChRM = numpy.array(Mhat_ChRM) # do numpy calculation (2) Hanc = numpy.linalg.solve(A, Mhat_ChRM) # back to python lists (3) A = A.tolist() Mhat_ChRM = Mhat_ChRM.tolist() Hanc = Hanc.tolist() # unit vector in the direction of the ancient field Hanc_hat = helpers.list_div_num(Hanc, helpers.norm(Hanc)) Manc = [] Mlab = [] for i in range(len(A)): Manc.append(helpers.dot_product(A[i], Hanc_hat)) Mlab.append(helpers.dot_product(A[i], field_dir_vec)) aniso_c = helpers.norm(Mlab) / helpers.norm(Manc) Banc_aniso_corr = aniso_c * B_anc sc["anisotropy_statistics"]["Anisotropy_Correction"][ "aniso_c"] = aniso_c sc["anisotropy_statistics"]["Anisotropy_Correction"][ "Banc_aniso_corr"] = Banc_aniso_corr return (sc)