def hessian(dim, posterior, alpha): h_diagonal = [] h_constant = [] h_a = [] for index, a_vector in enumerate(alpha): p_sum = SUM(posterior[:, index]) a_tri_gamma = POLYGAMMA(1, a_vector) a_tri_gamma_sum = POLYGAMMA(1, SUM(a_vector)) h_diagonal.append(DIAGONAL(1 / (-1 * p_sum * a_tri_gamma))) h_constant.append(((a_tri_gamma_sum * SUM(1 / a_tri_gamma)) - 1) * a_tri_gamma_sum * p_sum) h_a.append(((-1 / p_sum) * (1 / a_tri_gamma)).reshape(1, dim + 1)) return ASARRAY(h_diagonal), h_constant, ASARRAY(h_a)
def g_estimation(data_set, size, alpha, posterior, dim, K): G = [] data_set = CONCAT((data_set, FULL((size, 1), 1)), axis=1) pixel_log = ASARRAY([LOG(data / SUM(data)) for data in data_set]) for index, aV in enumerate(alpha): G.append(g_matrix_generator(aV, posterior[:, [index]], pixel_log, dim)) return ASARRAY(G)
def clusterDropTest(mix, alpha, dropingCriteria, K, DIM, pixelSize): mixInfo = [] alphaInfo = [] for j in range(K): if SUM(mix[:, j: j + 1]) > dropingCriteria: mixInfo.append(mix[:, j: j + 1]) alphaInfo.append(alpha[j]) else: print("Cluster having alpha :", alpha[j], " & Mix :", j, " is removed!") return (ASARRAY(mixInfo).T).reshape(pixelSize, len(alphaInfo)), ASARRAY(alphaInfo).reshape(len(alphaInfo), DIM), len(mixInfo)
def pdf(p_v, a_v): try: return EXP(GAMMALN(SUM(a_v)) - SUM(GAMMALN(a_v)) + SUM(SUBS(a_v[:-1], 1) * LOG(p_v)) - SUM(a_v) * LOG(1 + SUM(p_v))) except RuntimeWarning: print("pVector :>", p_v) print("aVector :>", a_v) print("GAMMALN(SUM(a_v)) :>", GAMMALN(SUM(a_v))) print("GAMMALN(a_v) :>", GAMMALN(a_v)) print("LOG(p_v) :>", LOG(p_v)) print("SUM(a_v) :>", SUM(a_v)) print("LOG(1 + SUM(p_v) :>", LOG(1 + SUM(p_v))) exit(0)
def g_estimation(self, data_set, alpha, beta, posterior, dim, K): q_alpha = [] q_beta = [] q_alpha_square = [] q_beta_square = [] q_alpha_beta_square = [] for index, (a_vector, b_vector) in enumerate(zip(alpha, beta)): a_d_gamma = POLYGAMMA(0, a_vector).reshape(1, dim) b_d_gamma = POLYGAMMA(0, b_vector).reshape(1, dim) ab_d_gamma = POLYGAMMA(0, a_vector + b_vector).reshape(1, dim) a_t_gamma = POLYGAMMA(1, a_vector).reshape(1, dim) b_t_gamma = POLYGAMMA(1, b_vector).reshape(1, dim) ab_t_gamma = POLYGAMMA(1, a_vector + b_vector).reshape(1, dim) a_data = ASARRAY([LOG(data / (1 + data)) for data in data_set]).reshape(len(data_set), dim) b_data = ASARRAY([LOG(1 / (1 + data)) for data in data_set]).reshape(len(data_set), dim) q_alpha.append(SUM(posterior[:, [index]] * (ab_d_gamma - a_d_gamma + a_data), axis=0).reshape(1, dim)) q_beta.append(SUM(posterior[:, [index]] * (ab_d_gamma - b_d_gamma + b_data), axis=0).reshape(1, dim)) q_alpha_square.append(SUM(posterior[:, [index]] * (ab_t_gamma - a_t_gamma), axis=0).reshape(1, dim)) q_beta_square.append(SUM(posterior[:, [index]] * (ab_t_gamma - b_t_gamma), axis=0).reshape(1, dim)) q_alpha_beta_square.append(SUM(posterior[:, [index]] * ab_t_gamma, axis=0).reshape(1, dim)) return ASARRAY(q_alpha).reshape(K, dim), ASARRAY(q_beta).reshape(K, dim), ASARRAY(q_alpha_square).reshape(K, dim), ASARRAY( q_beta_square).reshape(K, dim), ASARRAY(q_alpha_beta_square).reshape(K, dim)
def split_stamp(input_file): choice = parameters["-l"].lower() if choice == "all": choice = range(7) else: choices = { 'kingdom': 0, 'family': 4, 'class': 2, 'order': 3, 'strain': 7, 'phylum': 1, 'genus': 5, 'species': 6, "all": 7 } choice = [choices[choice]] for p in choice: h = {} f = open(input_file) line = f.readline() head = line.split()[p] info = line.split()[8:] c = 0 for line in f: line = line.split("\t") temp_head = line[p] temp_info = line[8:] if temp_head not in h: h[temp_head] = [] h[temp_head] += [[float(x) for x in temp_info]] f.close() o = open(head + "__" + input_file.replace(".spf", ".xls"), "w+") o.write(head + "\t" + "\t".join(info) + "\n") for i in h: o.write(i + "\t" + "\t".join([str(x) for x in list(SUM(h[i], axis=0))]) + "\n") o.close() print "It's done with the STAMP file splitting :)"
def HELLINGER_loss(a, y): return (1 / sqrt(2)) * SUM((sqrt(a) - sqrt(y))**2)
def KULLBACKLEIBLER_loss(a, y): return SUM(where(a != 0, y * nan_to_num(log(y / a)), 0)) - SUM(y) + SUM(a)
def KULLBACK_loss(a, y): return SUM(where(a != 0, y * nan_to_num(log(y / a)), 0))
def CROSS_ENTROPY_loss(a, y): return SUM(nan_to_num(-y * log(a) - (1 - y) * log(1 - a)))
def normalise(result): return result/(SUM(result)*1.)
def mix_updater(posterior, size, cluster): return (SUM(posterior, axis=0) / size).reshape(1, cluster)
def posterior_estimator(pdf, mix): return ASARRAY([(mix * pV) / SUM(mix * pV) for pV in pdf]).reshape(len(pdf), mix.size)
def geo_transformation(pixels, DIM, pixelSize): return ASARRAY( [pixels[:, d:d + 1] / (1 + SUM(pixels[:, 0:d], axis=1).reshape(pixelSize, 1)) for d in range(DIM)]).T.reshape( pixelSize, DIM)
* @param {Integer} a. * @return {String} which contains the Sum of Array. */ """ if __name__ == '__main__': K = CONST['K'] print("K :>", K) cluster_drop_val = CONST['cluster_drop_val'] alpha, data, dim, size, mix = initial_algorithm(K) counter = 0 obj = {'alpha': []} while True: pdf, posterior = estimation_step(K, mix, alpha, data) mix, alpha = maximization_step(K, alpha, data, dim, posterior, size) obj['alpha'].append(alpha) # mix, alpha, K = cluster_drop_test(mix, alpha, cluster_drop_val, K, dim) converge = convergence_test(obj['alpha'], CONST["algConverge"]) labels = predict_labels(posterior) counter = counter + 1 print("Converge :>", converge) if converge: print("predictLabels :>", labels) print("################### Final Parameters ###################") print("K : ", K) print("Mix : ", mix, SUM(mix)) print("Alpha : ", alpha) print("Counter : ", counter) exit()
def g_matrix_generator(alpha, posterior, log_pixels, dim): return SUM( posterior * (POLYGAMMA(0, SUM(alpha)) - POLYGAMMA(0, alpha).reshape(1, dim + 1) + log_pixels.reshape(len(log_pixels), dim + 1)), axis=0).reshape(dim + 1, 1)