def avaliacao(metrica, arq): arquivo = open('arquivo.txt', 'r') #abrir o arquivo subjectscores = open(arq, 'r') #ler arquivo conteudo_texto = arquivo.read() # conteudo_texto1 = subjectscores.read() # a = conteudo_texto.split('\n') # separar por quebra de linha c = a[:-1] #tirar ultimo elemento aa = conteudo_texto1.split('\n') #separar por espaco cc = aa[:-1] arquivo.close() #fechar arquivo subjectscores.close() x = np.array([]) y = np.array([]) l = np.array([]) total_pontos = 0 #plotar grafico for i in xrange(0, len(cc)): w = cc[i].split('\t') #buscar linha q = len(w) #tamanho da linha ww = w[2:q] #tirar primeiro elemento total_pontos = len(ww) + total_pontos b = c[i].split(';') #separar por , psnr = b[metrica] #buscar psnr h = np.asarray(ww[0:13]) #criando vetor com media de mos t = h.astype(np.float) v = np.mean(t) if (psnr !=str('inf')): x = np.insert(x,0,psnr) #guardar y = np.insert(y,0,v) #vetor media mos r = float(psnr) #converter para float p = q-1.0 #tamanho da linha menos o 1 item d = np.zeros(p) #criar vetor de 0s dd = d + r #criar vetor de psnr do tm d mos #plt.plot(dd, ww, 'o', color='r') #plotar psnr x mos out = outlier.outlier(t) #calcular outliers l = np.append(l, out) #vetor com n de outliers por coluna mos razao_outliers = np.sum(l)/total_pontos coeficiente_spearman = spearman.spearman(x,y) #spearman/s = stats.spearmanr(x,y) coeficiente_pearson = pearson.pearson(x,y) #pearson/p = stats.pearsonr(x,y) a, b = regressaoLinear.regressaoLinear(x,y) #regressao linear x, t, y, v = levenbergMarquart.levenberg(x,y) #funcao logistica linear = '(' + str(a) + ')*x + (' + str(b) + ')' logistica = '(' + str(v[0])+ ') * (' + str(0.5) + '-(' + str(v[1]) + ')/(exp(' + str(v[1]) + '*(x-(' + str(v[2])+ '))))) + (' + str(v[3])+ ') * x+(' + str(v[4]) + ')' return razao_outliers, coeficiente_spearman, coeficiente_pearson, logistica, linear
def avaliacao(metrica, arq): arquivo = open('arquivo.txt', 'r') subjectscores = open(arq, 'r') conteudo_texto = arquivo.read() conteudo_texto1 = subjectscores.read() a = conteudo_texto.split('\n') c = a[:-1] aa = conteudo_texto1.split('\n') cc = aa[:-1] arquivo.close() subjectscores.close() x = np.array([]) y = np.array([]) l = np.array([]) total_pontos = 0 for i in xrange(0, len(cc)): w = cc[i].split('\t') #buscar linha q = len(w) #tamanho da linha ww = w[2:q] #tirar primeiro elemento total_pontos = len(ww) + total_pontos b = c[i].split(';') #separar por , psnr = b[metrica] #buscar metrica h = np.asarray(ww[0:13]) #criando vetor com media de mos t = h.astype(np.float) v = np.mean(t) if (psnr !=str('inf')): x = np.insert(x,0,psnr) #guardar y = np.insert(y,0,v) #vetor media mos r = float(psnr) #converter para float p = q-1.0 #tamanho da linha menos o 1 item d = np.zeros(p) #criar vetor de 0s dd = d + r #criar vetor de psnr do tm d mos out = outlier.outlier(t) #calcular outliers l = np.append(l, out) #vetor com n de outliers por coluna mos razao_outliers = np.sum(l)/total_pontos coeficiente_spearman = spearman.spearman(x,y) #spearman/s = stats.spearmanr(x,y) coeficiente_pearson = pearson.pearson(x,y) #pearson/p = stats.pearsonr(x,y) a, b = regressaoLinear.regressaoLinear(x,y) #regressao linear x, t, y, v = levenbergMarquart.levenberg(x,y) #funcao logistica linear = '(' + str(a) + ')*x + (' + str(b) + ')' logistica = '(' + str(v[0])+ ') * (' + str(0.5) + '-(' + str(v[1]) + ')/(exp(' + str(v[1]) + '*(x-(' + str(v[2])+ '))))) + (' + str(v[3])+ ') * x+(' + str(v[4]) + ')' return razao_outliers, coeficiente_spearman, coeficiente_pearson, logistica, linear
def build_correlations(sets): # Here the script loops through all acquired lists (by looping through data-sets, attributes and their possible values) # and runs the pearson function against all acquired lists (including itself). correlations = {} for set_i, set_x in enumerate(sets): set_total_x = sets[set_x]["TOTAAL"]["TOTAAL"] for att_i, attribute_x in enumerate(sets[set_x]): if attribute_x == "TOTAAL": continue for val_i, value_x in enumerate(sets[set_x][attribute_x]): for set_j, set_y in enumerate(sets): set_total_y = sets[set_y]["TOTAAL"]["TOTAAL"] set_total = [sum(pair) for pair in zip(set_total_x, set_total_y)] count_x = [val for enum, val in enumerate(sets[set_x][attribute_x][value_x]) if set_total[enum]>0] if len(count_x)<=1: continue for att_j, attribute_y in enumerate(sets[set_y]): if attribute_y == "TOTAAL": continue for val_j, value_y in enumerate(sets[set_y][attribute_y].keys()[:val_i+1]): if set_x == set_y and attribute_x == attribute_y and value_x == value_y: continue count_y = [val for enum, val in enumerate(sets[set_y][attribute_y][value_y]) if set_total[enum]>0] if len(count_y)<=1: continue my_pearson = pearson(count_x, count_y) if my_pearson == 2: continue key = str(set_x)+"-"+str(attribute_x)+"-"+str(value_x)+"_"+str(set_y)+"-"+str(attribute_y)+"-"+str(value_y) correlations[key] = ({"set_a":{"set":set_x, "attribute":attribute_x, "value":value_x, "amount-value": sum(count_x), "amount-total": sum(set_total_x)}, "set_b":{"set":set_y, "attribute":attribute_y, "value":value_y, "amount-value": sum(count_y), "amount-total": sum(set_total_y)}, "pearsons":my_pearson}) return correlations
def process_pearsons_job(): try: if skr_config.LOGIN_ENABLED and session.get('logged_in') != True: return redirect('/login') parameters = build_seekr_parameters(request) application.logger.debug('CURRENT METHOD: process_pearsons_job') t1 = time.perf_counter() counts, names, comparison_counts, comparison_names, counter = _run_seekr_algorithm( parameters=parameters) t2 = time.perf_counter() application.logger.debug('Running the algorithm took %.3f seconds' % (t2 - t1)) fixup_counts_warnings = fixup_counts(counts, counter) if comparison_counts is None: comparison_counts = counts comparison_names = names else: fixup_comparision_warnings = fixup_counts(comparison_counts, counter) pearsons = pearson(counts, comparison_counts) application.logger.debug("Finished Pearson's. Converting to .csv.") csv_string = get_pearsons_csv(names, pearsons, comparison_names) last_modified = email.utils.formatdate(time.time(), usegmt=True) headers = { 'Content-Type': 'application/csv', 'Content-Disposition': 'attachment;filename = pearsons.csv', 'Content-Length': str(len(csv_string)), 'Last-Modified': last_modified } return (csv_string, headers) except Exception as e: application.logger.exception('Error in /files/pearsons') #TODO change error from json return jsonify({'error': "Server_Error: 500"})
for line in line_arr : gpas.append(float(line)); #print gpas # Read each line in. scores = [] #for x in range(nt) : for x in range(0,5) : test = [] line = raw_input() line_arr = re.split(r' ', line) for line in line_arr : test.append(float(line)) scores.append(test) # For each set of test scores, determine the correlation # to the GPA scores. best = 0; best_index = 0; it = 0; for test in scores : it = it+1; coeff = pearson(gpas,test); #print "Predictor: %i , correlation coefficient: %5.3f" % (it,coeff) if (coeff>best) : best = coeff; best_index = it; print best_index
def get_data_for_pearsons(counts, comparison_counts, col1_names, col2_names): similarity = pearson(counts, comparison_counts) df = pandas.DataFrame(data=similarity, index=col1_names, columns=col2_names) return df
def run_seekr_algorithm(parameters): """ Launch the SEEKR algorithm using the parameters from the Web Service and return a zip file of the results The logic in this method tries to avoid recalculating k-mers and normalization for performance reasons. The logic otherwise would be simpler - 1) Calculate the normalization (mean and std dev) 2) Calculate the frequencies of the user set and then apply the normalization from step 1 3) Calculate the frequencies of the comparison set if it exists and apply the normalization from step 1 4) Calculate the Pearson's R correlations between sequences in the user set and the comparison set. If no comparison set exists, calculate the correlations between the user set sequences In any of these steps, if we already have a precomputed value, we will load that instead of performing the computation. Notes ----- numpy's corrcoef is an efficient way to calculate Pearson's correlations, but since its implementation computes a covariance matrix, the output is always a square matrix. So if we had 10 sequences in a user set and compare against 10,000 sequences in a comparision set, numpy.corrcoef will calculate a matrix that is 10,010x10,010. The pearson function called supports non-square matrices and is thus used for comparing against the comparision set. e.g. it's matrix would be 10x10,000. """ outfile = 'test1.csv' mean_std_loaded = False names = None comparison_names = None normalization_path = get_precomputed_normalization_path(parameters) if normalization_path is not None: mean = np.load(normalization_path[0]) std = np.load(normalization_path[1]) mean_std_loaded = True normal_set = parameters['normal_set'] if normal_set is None: raise SeekrServerError('No normalization set Provided') comparison_set = None if 'comparison_set' in parameters: comparison_set = parameters['comparison_set'] if 'comparison_set_files' in parameters: if normal_set == skr_config.SETTING_USER_SET: (mean, std, counts, names) = compute_normalization_and_frequency( infasta=TextIOWrapper(parameters['user_set_files']), kmer_length=parameters['kmer_length'], outfile=outfile) counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['comparison_set_files']), outfile=None, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) comparison_counts = counter.make_count_file() comparison_names = get_names_from_counter(counter) elif normal_set == skr_config.SETTING_COMPARISION_SET: (mean, std, comparison_counts, comparison_names) = compute_normalization_and_frequency( infasta=TextIOWrapper(parameters['comparison_set_files']), kmer_length=parameters['kmer_length']) counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['user_set_files']), outfile=outfile, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) counts = counter.make_count_file() names = get_names_from_counter(counter) elif mean_std_loaded: counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['user_set_files']), outfile=outfile, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) counts = counter.make_count_file() comparision_counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['comparison_set_files']), outfile=None, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) comparison_counts = comparision_counter.make_count_file() names = get_names_from_counter(counter) comparison_names = get_names_from_counter(comparision_counter) else: raise SeekrServerError('Normalization for Comparision Set File is not valid') similarity = pearson(counts, comparison_counts) elif comparison_set is not None and len(comparison_set) > 0 and comparison_set != 'user_set': unnormalized_frequency_path, names_path = get_precomputed_frequency_path(comparison_set, parameters['kmer_length']) assert unnormalized_frequency_path is not None and names_path is not None if normal_set == skr_config.SETTING_USER_SET: (mean, std, counts, names) = compute_normalization_and_frequency( infasta=TextIOWrapper(parameters['user_set_files']), kmer_length=parameters['kmer_length'], outfile=outfile) counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['comparison_set_files']), outfile=None, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) comparison_counts = _unnormalized_frequency_to_normalized(unnormalized_frequency_path, mean, std) comparison_names = load_names_from_path(names_path) elif normal_set == skr_config.SETTING_COMPARISION_SET: raise SeekrServerError('') elif mean_std_loaded: counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['user_set_files']), outfile=outfile, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) counts = counter.make_count_file() comparison_counts = _unnormalized_frequency_to_normalized(unnormalized_frequency_path, mean, std) names = get_names_from_counter(counter) comparison_names = load_names_from_path(names_path) else: raise SeekrServerError('No normalization set Provided') similarity = pearson(counts, comparison_counts) else: if mean_std_loaded: counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['user_set_files']), outfile=outfile, k=parameters['kmer_length'], label=True, silent=True, binary=False, mean=mean, std=std) counts = counter.make_count_file() elif normal_set == skr_config.SETTING_USER_SET: counter = kmer_counts.BasicCounter(infasta=TextIOWrapper(parameters['user_set_files']), outfile=outfile, k=parameters['kmer_length'], label=True, silent=True, binary=False) counts = counter.make_count_file() else: raise SeekrServerError('Normalization type is not valid') names = get_names_from_counter(counter) similarity = np.corrcoef(counts) #TODO refactor - original code saved to csv on disk - move this to a separate operation with open(outfile) as csvFile: counts_text = csvFile.read() bytes_io = BytesIO() np.save(bytes_io, similarity) bytes_io.seek(0) pearsons_file_in_memory = bytes_io.read() return counts_text, pearsons_file_in_memory
# проверка на ошибки размерности if round(p.sum(), 15) != 1.: print("Ошибка массива Y summ <> 1 ", p.sum()) sys.exit(0) if x.shape != y.shape: print("Ошибка размерности!") print("X", x.shape) print("Y", y.shape) sys.exit(0) # конец проверок ошибок на размерность # x - массив значений случайной величины, диапазон смещен # y - массив частот попадания # p - массив вероятностей pears = pearson.pearson(x, p) # x_ = _x[i] + self.c - self.Xa - смещенная относительно центра сл.вел # x__= (x[i] + self.c - self.Xa) / self.c - смещенная и нормированная сл.вел with open('data/data_calc.csv', 'w') as csvfile: for i in range(0, x.shape[0]): test = [x[i],y[i],p[i],pears.x_[i],pears.x__[i]] csv.writer(csvfile).writerow(test) with open('data/report.txt','w') as reportfile: reportfile.write(pears.__str__()) # Calc function points # lB = np.min(pears.x__)-2 # левая граница, как миниму смещенного и центрированного массива # rB = np.max(pears.x__) # правая как максимум lB = pears.f.l[1]
def process_jobs(): try: if skr_config.LOGIN_ENABLED and session.get('logged_in') != True: return redirect('/login') parameters = build_seekr_parameters(request) application.logger.debug(parameters) t1 = time.perf_counter() counts, names, comparison_counts, comparison_names, counter = _run_seekr_algorithm( parameters=parameters) t2 = time.perf_counter() application.logger.debug('Running the algorithm took %.3f seconds' % (t2 - t1)) if len(names) <= skr_config.MAX_VISUAL_SEQ_LENGTH and len( comparison_names ) <= skr_config.MAX_VISUAL_SEQ_LENGTH and parameters[ 'kmer_length'] <= skr_config.MAX_VISUAL_KMER_LENGTH: fixup_counts_warnings = fixup_counts(counts, counter) if comparison_counts is None: comparison_counts = counts comparison_names = names else: fixup_comparision_warnings = fixup_counts( comparison_counts, counter) #reorder according to hierarchical cluster example if len(counts) > 1: Z = cluster_vis.cluster_kmers(counts) ordering = cluster_vis.get_ordering(Z) ordered_counts = counts[ordering, :] ordering_int_list = ordering.astype(int).tolist() ordered_names = [names[i] for i in ordering_int_list] else: ordered_counts = counts ordering_int_list = [0] ordered_names = names if len(comparison_counts) > 1: comparison_Z = cluster_vis.cluster_kmers(comparison_counts) comparison_ordering = cluster_vis.get_ordering(comparison_Z) comparison_ordered_counts = comparison_counts[ comparison_ordering, :] comparison_ordering_int_list = comparison_ordering.astype( int).tolist() comparison_ordered_names = [ comparison_names[i] for i in comparison_ordering_int_list ] else: comparison_ordered_counts = comparison_counts comparison_ordering_int_list = [0] comparison_ordered_names = comparison_names pearsons = pearson(counts, comparison_counts) # shorten length of names returned down to 20 characters new_names = [] for s in names: if len(s) > skr_config.SEQUENCE_NAME_DISPLAY_LENGTH: new_names.append( s[:skr_config.SEQUENCE_NAME_DISPLAY_LENGTH]) else: new_names.append(s) names = new_names new_names = [] for s in comparison_names: if len(s) > skr_config.SEQUENCE_NAME_DISPLAY_LENGTH: new_names.append( s[:skr_config.SEQUENCE_NAME_DISPLAY_LENGTH]) else: new_names.append(s) comparison_names = new_names kmers = [ ''.join(i) for i in product('AGTC', repeat=parameters['kmer_length']) ] norm_npm = counts scale_npm = norm_npm.flatten() mean = np.mean(scale_npm) z_npm = stats.zscore(scale_npm) count = 0 for i in z_npm: if i >= 2: scale_npm[count] = 2 elif i < -1: scale_npm[count] = -1 count = count + 1 clean_counts = np.reshape(scale_npm, np.shape(norm_npm)) pearsons = pearsons.round(3) counts = counts.round(3) clean_counts = clean_counts.round(3) pearsons = str(pearsons.tolist()) counts = str(counts.tolist()) clean_counts = str(clean_counts.tolist()) return jsonify({ 'user_names': names, 'comparison_names': comparison_names, 'kmer_bins': kmers, 'pearson_matrix': pearsons, 'kmer_matrix': counts, 'kmer_matrix_clean': clean_counts, 'user_cluster': ordering_int_list, 'comparison_cluster': comparison_ordering_int_list, 'user_warnings': fixup_counts_warnings, 'comparison_warnings': fixup_comparision_warnings }) else: return jsonify({'visual_flag': True}) except SeekrServerError as e: application.logger.exception(e) return jsonify({'error': str(e)}) except Exception as e: application.logger.exception(e) return jsonify({'error': '500'})
w2 = np.zeros((len(input_layer), len(letters))) for i, yi in enumerate(letters): index = np.where(output_layer == yi) w2[index, i] = 1 / len(output_layer[index]) def result(input_data): for i, learn_data in enumerate(w1): pattern_layer[i] = np.sum(np.exp(-(learn_data - input_data) ** 2 / sigma ** 2)) summation_layer = np.dot(pattern_layer, w2) output_layer = letters[np.argmax(summation_layer)] return output_layer return result x = ([[]]*10000) y = [] for i in range(len(x)): x[i] = [(uniform(18, 100)), (uniform(0, 10)),] length = len(x) for i in range(length): if x[i][0]<22: y.append((uniform(300, 600))) if x[i][0] > 22 and x[i][0]<28: y.append((uniform(500, 700))) if x[i][0]>28: y.append((uniform(700, 2000))) output = PNN(x, y) a = [25, 5 ] print(output(a)) pearson_res= math.fabs(pearson(x[0],y)*100)
def avaliacao(metrica, arq): # abrir arquivo de avaliacoes objetivas (metricas) arquivo = open('metricas.txt', 'r') conteudo_texto = arquivo.read() a = conteudo_texto.split('\n') c = a[:-1] arquivo.close() # abrir aquivo de avaliacoes subjetivas subjectscores = open(arq, 'r') conteudo_texto1 = subjectscores.read() aa = conteudo_texto1.split('\n') cc = aa[:-1] subjectscores.close() # abrir arquivo de avaliacoes objetivas (metricas) arquivo2 = open('jpeginfo.txt', 'r') conteudo_texto2 = arquivo2.read() aaa = conteudo_texto2.split('\n') ccc = aaa[:-1] arquivo2.close() #inicializacao das variaveis x = np.array([]) y = np.array([]) l = np.array([]) total_pontos = 0 ref = '' ab = '' #plot METRICA X DMOS for i in xrange(0, len(cc)): w = aa[i].split('\t') for line in ccc: if w[0] in line: #print line cond = line.split() #print cond[2] if cond[2] == '0': ab = cond[1] #q = len(w) for line in cc: if ab in line: w0 = line w1 = w0.split('\t') ww = w[2:(len(w))] ww1 = w1[2:(len(w))] #print ww1 total_pontos = len(ww) + total_pontos b = c[i].split(';') psnr = b[metrica] h = np.asarray( ww[0:12]) #substituir numeracao por eliminacao de valores nulos h1 = np.asarray(ww1[0:12]) #t = h.astype(np.float) v2 = np.mean(h1.astype(np.float)) v1 = np.mean(h.astype(np.float)) v = v2 - v1 if (psnr != str('inf')): x = np.insert(x, 0, psnr) y = np.insert(y, 0, v1) r = float(psnr) #p = (len(w))-1.0 #outlier out = outlier.outlier(h.astype(np.float)) l = np.append(l, out) razao_outliers = np.sum(l) / total_pontos coeficiente_spearman = spearman.spearman( x, y) #spearman/s = stats.spearmanr(x,y) coeficiente_pearson = pearson.pearson(x, y) #pearson/p = stats.pearsonr(x,y) coeficiente_anova_F, coeficiente_anova_p, relacao_anova = anova.anova(x, y) a, b = regressaoLinear.regressaoLinear(x, y) #regressao linear x, t, y, v = levenbergMarquart.levenberg(x, y) #funcao logistica linear = '(' + str(a) + ')*x + (' + str(b) + ')' logistica = '(' + str(v[0]) + ') * (' + str(0.5) + '-(' + str( v[1]) + ')/(exp(' + str(v[1]) + '*(x-(' + str( v[2]) + '))))) + (' + str(v[3]) + ') * x+(' + str(v[4]) + ')' return razao_outliers, coeficiente_spearman, coeficiente_pearson, coeficiente_anova_F, coeficiente_anova_p, relacao_anova, logistica, linear
def cc(l,k): cc = pearson.pearson(l,k) return cc