Exemplo n.º 1
0
 def plotZeroCount(self,figurename,**kwargs):
     zeroCount = []
     for i in range(len(self.matrix)):
         zeros = len(np.flatnonzero(self.matrix[i] == 0))
         if zeros != len(self.matrix):
             zeroCount.append(zeros)
     #--endfor
     histogram(figurename,
             zeroCount,
             int(len(self.matrix)/100),
             xlab = '# of Zeros', ylab = 'Frequency',
             **kwargs)
Exemplo n.º 2
0
def CalculateMeanCoverage(Contigs, Information, param):
    # tuples like (cont lenght, contig name)
    list_of_cont_tuples = [(Contigs[contig].length, contig) for contig in Contigs]
    # sorted as longest first
    list_of_cont_tuples = sorted(list_of_cont_tuples, key=lambda tuple: tuple[0], reverse=True)
    # coverages of longest contigs
    longest_contigs = list_of_cont_tuples[:1000]
    cov_of_longest_contigs = [Contigs[contig[1]].coverage for contig in longest_contigs]
    # Calculate mean coverage from the 1000 longest contigs
    n = float(len(cov_of_longest_contigs))
    mean_cov = sum(cov_of_longest_contigs) / n
    # If there is only one contig above the size threshold, n can be 1
    if n == 1:
        n += 1

    std_dev = (
        sum(list(map((lambda x: x ** 2 - 2 * x * mean_cov + mean_cov ** 2), cov_of_longest_contigs))) / (n - 1)
    ) ** 0.5
    extreme_obs_occur = True
    print >> Information, "Mean coverage before filtering out extreme observations = ", mean_cov
    print >> Information, "Std dev of coverage before filtering out extreme observations= ", std_dev

    ## SMOOTH OUT THE MEAN HERE by removing extreme observations ##
    while extreme_obs_occur:
        extreme_obs_occur, filtered_list = RemoveOutliers(mean_cov, std_dev, cov_of_longest_contigs)
        n = float(len(filtered_list))
        try:
            mean_cov = sum(filtered_list) / n
        except ZeroDivisionError:
            break
        std_dev = (
            sum(list(map((lambda x: x ** 2 - 2 * x * mean_cov + mean_cov ** 2), filtered_list))) / (n - 1)
        ) ** 0.5
        cov_of_longest_contigs = filtered_list

    print >> Information, "Mean coverage after filtering = ", mean_cov
    print >> Information, "Std coverage after filtering = ", std_dev
    print >> Information, "Length of longest contig in calc of coverage: ", longest_contigs[0][0]
    print >> Information, "Length of shortest contig in calc of coverage: ", longest_contigs[-1][0]

    if param.plots:
        plots.histogram(
            cov_of_longest_contigs,
            param,
            bins=50,
            x_label="coverage",
            y_label="frequency",
            title="BESST_cov_1000_longest_cont" + param.bamfile.split("/")[-1],
        )

    return (mean_cov, std_dev)
Exemplo n.º 3
0
def compareMatrix(m1,m2,figurename = 'comparison.png',**kwargs):
    """compare 2 matrixes, output correlation coefficient
    
    Parameters
    ----------
        m1,m2 : contactmatrix instances
        must be the same dimensions
        figurename : str
        filename for columnwise pearson corr histogram, set None to escape this step
    """
    if not (isinstance(m1,contactmatrix) and isinstance(m2,contactmatrix)):
        raise TypeError, "Invalid argument type, 2 contactmatrixes are required"
    if len(m1) != len(m2):
        raise TypeError, "Invalid argument, dimensions of matrixes must meet"
    from scipy.stats import spearmanr,pearsonr
    
    flat1 = m1.matrix.flatten()
    flat2 = m2.matrix.flatten()
    nonzeros = (flat1 > 0) * (flat2 > 0)
    flat1 = flat1[nonzeros]
    flat2 = flat2[nonzeros]
    print 'pearsonr:'
    print pearsonr(flat1,flat2)
    print 'spearmanr:'
    print spearmanr(flat1,flat2)
    del flat1
    del flat2
    if not (figurename is None):
        corr = []
        for i in range(len(m1)):
            r = pearsonr(m1.matrix[i],m2.matrix[i])
            #print r
            if not np.isnan(r[0]):
                corr.append(r[0])
      
        histogram(figurename,
                corr,
                100,
                xlab = 'Correlation Coefficient', ylab = 'Frequency',
                **kwargs)
Exemplo n.º 4
0
 def plotSum(self,figurename,outlier=False,line=None,**kwargs):
     """
     Print the rowsum frequency histogram
     
     Parameters:
     -----------
     figurename: string
     Name of the plot
     outlier: bool
     option to select plotting the outlier line, only functioning if 'line' parameter is set to None
     line: float/array/list
     draw vertical lines at a list of positions 
     """
     rowsum = self.rowsum()
     if line is None:
         if outlier:
             line = (np.percentile(rowsum,75) - np.percentile(rowsum,25))*1.5 + np.percentile(rowsum,75)
     
     histogram(figurename,
             rowsum[rowsum > 0],
             int(len(self.matrix)/100),
             xlab = 'Row sums', ylab = 'Frequency',
             line = line,
             **kwargs)
Exemplo n.º 5
0
def GiveScoreOnEdges(G, Scaffolds, small_scaffolds, Contigs, param, Information, plot):

    span_score_obs = []
    std_dev_score_obs = []
    gap_obs = []
    nr_link_obs = []
    cnt_sign = 0

    for edge in G.edges():
        mean_ = 0
        std_dev = 0
        if G[edge[0]][edge[1]]['nr_links'] != None:
            n = G[edge[0]][edge[1]]['nr_links']
            obs_squ = G[edge[0]][edge[1]]['obs_sq']
            mean_ = G[edge[0]][edge[1]]['obs'] / float(n)
            data_observation = (n * param.mean_ins_size - G[edge[0]][edge[1]]['obs']) / float(n)
            try:
                len1 = Scaffolds[ edge[0][0] ].s_length
            except KeyError:
                len1 = small_scaffolds[ edge[0][0] ].s_length
            try:
                len2 = Scaffolds[ edge[1][0] ].s_length
            except KeyError:
                len2 = small_scaffolds[ edge[1][0] ].s_length
            if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2:
                gap = param_est.GapEstimator(param.mean_ins_size, param.std_dev_ins_size, param.read_len, mean_, len1, len2)
            else:
                gap = data_observation

            G[edge[0]][edge[1]]['gap'] = int(gap)
            if -gap > len1 or -gap > len2:
                G[edge[0]][edge[1]]['score'] = 0
                continue

            #std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap)

            if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2:
                std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap)
            else:
                std_dev_d_eq_0 = 2 ** 32

            try:
                std_dev = ((obs_squ - n * mean_ ** 2) / (n - 1)) ** 0.5
                #chi_sq = (n - 1) * (std_dev ** 2 / std_dev_d_eq_0 ** 2)
            except ZeroDivisionError:
                std_dev = 2 ** 32
                #chi_sq = 0


            try:
                l1 = G[edge[0]][edge[1]][Scaffolds[edge[0][0]].name]
            except KeyError:
                l1 = G[edge[0]][edge[1]][small_scaffolds[edge[0][0]].name]
            try:
                l2 = G[edge[0]][edge[1]][Scaffolds[edge[1][0]].name]
            except KeyError:
                l2 = G[edge[0]][edge[1]][small_scaffolds[edge[1][0]].name]

            l1.sort()
            n_obs = len(l1)
            l1_mean = sum(l1) / float(n_obs)
            #l1_median = l1[len(l1) / 2]
            l1 = map(lambda x: x - l1_mean, l1)
            #l1 = map(lambda x: x - l1_median, l1)
            max_obs2 = max(l2)
            l2.sort(reverse=True)
            l2 = map(lambda x: abs(x - max_obs2), l2)
            l2_mean = sum(l2) / float(n_obs)
            #l2_median = l2[len(l2) / 2]
            l2 = map(lambda x: x - l2_mean, l2)
            #l2 = map(lambda x: x - l2_median, l2)
            KS_statistic, p_value = ks_2samp(l1, l2)


            #M_W_statistic, p_val = mannwhitneyu(l1, l2)

            #diff = map(lambda x: abs(abs(x[1]) - abs(x[0])), zip(l1, l2))
            #sc = sum(diff) / len(diff)

            if len(l1) < 3:
                span_score = 0
            else:
                span_score = 1 - KS_statistic

            try:
                std_dev_score = min(std_dev / std_dev_d_eq_0, std_dev_d_eq_0 / std_dev) #+ span_score #+ min(n/E_links, E_links/float(n))
            except ZeroDivisionError:
                std_dev_score = 0
                sys.stderr.write(str(std_dev) + ' ' + str(std_dev_d_eq_0) + ' ' + str(span_score) + '\n')

            G[edge[0]][edge[1]]['score'] = std_dev_score + span_score if std_dev_score > 0.5 and span_score > 0.5 else 0
            if param.plots:
                span_score_obs.append(span_score)
                std_dev_score_obs.append(std_dev_score)
                gap_obs.append(gap)
                nr_link_obs.append(n_obs)


    if param.plots:
        plots.histogram(span_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Dispersity_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1])
        plots.histogram(std_dev_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Standard_deviation_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(std_dev_score_obs, span_score_obs, param, x_label='std_dev_score_obs', y_label='span_score_obs', title='Score_correlation' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(std_dev_score_obs, gap_obs, param, x_label='std_dev_score_obs', y_label='estimated gap size', title='Gap_to_sigma' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(span_score_obs, gap_obs, param, x_label='span_score_obs', y_label='estimated gap size', title='Gap_to_span' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(span_score_obs, nr_link_obs, param, x_label='span_score_obs', y_label='Number links', title='Obs_to_span' + plot + '.' + param.bamfile.split('/')[-1])

    for edge in G.edges():
        if G[edge[0]][edge[1]]['nr_links'] != None:
            try:
                G[edge[0]][edge[1]]['score']
            except KeyError:
                sys.stderr.write(str(G[edge[0]][edge[1]]) + ' ' + str(Scaffolds[edge[0][0]].s_length) + ' ' + str(Scaffolds[edge[1][0]].s_length))
    print >> Information, 'Number of significantly spurious edges:', cnt_sign

    return()
Exemplo n.º 6
0
def GiveScoreOnEdges(G, Scaffolds, small_scaffolds, Contigs, param, Information, plot):

    span_score_obs = []
    std_dev_score_obs = []
    gap_obs = []
    nr_link_obs = []
    cnt_sign = 0

    for edge in G.edges():
        mean_ = 0
        std_dev = 0
        if G[edge[0]][edge[1]]['nr_links'] != None:
            n = G[edge[0]][edge[1]]['nr_links']
            obs_squ = G[edge[0]][edge[1]]['obs_sq']
            mean_ = G[edge[0]][edge[1]]['obs'] / float(n)
            data_observation = (n * param.mean_ins_size - G[edge[0]][edge[1]]['obs']) / float(n)
            try:
                len1 = Scaffolds[ edge[0][0] ].s_length
            except KeyError:
                len1 = small_scaffolds[ edge[0][0] ].s_length
            try:
                len2 = Scaffolds[ edge[1][0] ].s_length
            except KeyError:
                len2 = small_scaffolds[ edge[1][0] ].s_length
            if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2:
                gap = param_est.GapEstimator(param.mean_ins_size, param.std_dev_ins_size, param.read_len, mean_, len1, len2)
            else:
                gap = data_observation

            G[edge[0]][edge[1]]['gap'] = int(gap)
            if -gap > len1 or -gap > len2:
                G[edge[0]][edge[1]]['score'] = 0
                continue

            #std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap)

            if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2:
                std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap)
            else:
                std_dev_d_eq_0 = 2 ** 32

            try:
                std_dev = ((obs_squ - n * mean_ ** 2) / (n - 1)) ** 0.5
                #chi_sq = (n - 1) * (std_dev ** 2 / std_dev_d_eq_0 ** 2)
            except ZeroDivisionError:
                std_dev = 2 ** 32
                #chi_sq = 0


            try:
                l1 = G[edge[0]][edge[1]][Scaffolds[edge[0][0]].name]
            except KeyError:
                l1 = G[edge[0]][edge[1]][small_scaffolds[edge[0][0]].name]
            try:
                l2 = G[edge[0]][edge[1]][Scaffolds[edge[1][0]].name]
            except KeyError:
                l2 = G[edge[0]][edge[1]][small_scaffolds[edge[1][0]].name]

            #max_obs1 = max(l1)
            #min_obs1 = min(l1)
            l1.sort()
            n_obs = len(l1)
            l1_mean = sum(l1) / float(n_obs)
            l1 = map(lambda x: x - l1_mean, l1)
            max_obs2 = max(l2)
            #min_obs2 = min(l2)
            l2.sort(reverse=True)
            l2 = map(lambda x: abs(x - max_obs2), l2)
            l2_mean = sum(l2) / float(n_obs)
            l2 = map(lambda x: x - l2_mean, l2)
            KS_statistic, p_value = ks_2samp(l1, l2)
            #M_W_statistic, p_val = mannwhitneyu(l1, l2)

            #diff = map(lambda x: abs(abs(x[1]) - abs(x[0])), zip(l1, l2))
            #sc = sum(diff) / len(diff)

            if len(l1) < 3:
                span_score = 0
            else:
                span_score = 1 - KS_statistic


#            try:
#                span_score = 1 - sc / float(min((max_obs1 - min_obs1), (max_obs2 - min_obs2)))
#            except ZeroDivisionError:
#                span_score = 0
#            if span_score < 0:
#                span_score = 0
#                print  'ZEEERO', max_obs1 - min_obs1, max_obs2 - min_obs2, gap, sc, Scaffolds[edge[0][0]].contigs[0].name, Scaffolds[edge[1][0]].contigs[0].name

# if len(l1) > 3:
#     print >> Information , 'avg_diff: ', sc, 'span1: ', (max_obs1 - min_obs1), 'span2: ', (max_obs2 - min_obs2), 'Span score: ', span_score, 'pval: ', p_value, 'Est gap: ', gap, 'Nr_links: ', len(l1) #, Scaffolds[edge[0][0]].contigs[0].name, Scaffolds[edge[1][0]].contigs[0].name, len(diff)


                #print >> Information , l1
                #print >> Information , l2
                #print >> Information , diff
            #print  span_score

#
#            k = normal.MaxObsDistr(n, 0.95)
#            if 2 * param.read_len < len1 and 2 * param.read_len < len2:
#                #span_max1 = min(param.mean_ins_size + k * param.std_dev_ins_size - 2 * param.read_len, len1 - param.read_len + max(0, gap))
#                #span_max2 = min(param.mean_ins_size + k * param.std_dev_ins_size - 2 * param.read_len, len2 - param.read_len + max(0, gap))
#
#                span_max1 = min(param.mean_ins_size + k * param.std_dev_ins_size - param.read_len, len1 + max(0, gap))
#                span_max2 = min(param.mean_ins_size + k * param.std_dev_ins_size - param.read_len, len2 + max(0, gap))
#
#                try:
#                    span_obs1 = Scaffolds[ edge[0][0] ].upper_right_nbrs_obs[edge[1]] - Scaffolds[ edge[0][0] ].lower_right_nbrs_obs[edge[1]] if edge[0][1] == 'R' else Scaffolds[ edge[0][0] ].upper_left_nbrs_obs[edge[1]] - Scaffolds[ edge[0][0] ].lower_left_nbrs_obs[edge[1]]
#                except KeyError:
#                    span_obs1 = small_scaffolds[ edge[0][0] ].upper_right_nbrs_obs[edge[1]] - small_scaffolds[ edge[0][0] ].lower_right_nbrs_obs[edge[1]] if edge[0][1] == 'R' else small_scaffolds[ edge[0][0] ].upper_left_nbrs_obs[edge[1]] - small_scaffolds[ edge[0][0] ].lower_left_nbrs_obs[edge[1]]
#                try:
#                    span_obs2 = Scaffolds[ edge[1][0] ].upper_right_nbrs_obs[edge[0]] - Scaffolds[ edge[1][0] ].lower_right_nbrs_obs[edge[0]] if edge[1][1] == 'R' else Scaffolds[ edge[1][0] ].upper_left_nbrs_obs[edge[0]] - Scaffolds[ edge[1][0] ].lower_left_nbrs_obs[edge[0]]
#                except KeyError:
#                    span_obs2 = small_scaffolds[ edge[1][0] ].upper_right_nbrs_obs[edge[0]] - small_scaffolds[ edge[1][0] ].lower_right_nbrs_obs[edge[0]] if edge[1][1] == 'R' else small_scaffolds[ edge[1][0] ].upper_left_nbrs_obs[edge[0]] - small_scaffolds[ edge[1][0] ].lower_left_nbrs_obs[edge[0]]
#
#
#                #span_score1 = min((max(0, gap) + 2 * param.read_len + span_obs1) / float(span_max1) , float(span_max1) / (max(0, gap) + 2 * param.read_len + span_obs1)) if span_obs1 > 0 else 0
#                #span_score2 = min((max(0, gap) + 2 * param.read_len + span_obs2) / float(span_max2) , float(span_max2) / (max(0, gap) + 2 * param.read_len + span_obs2)) if span_obs2 > 0 else 0
#
#                span_score1 = min((max(0, gap) + param.read_len + span_obs1) / float(span_max1) , float(span_max1) / (max(0, gap) + param.read_len + span_obs1)) if span_obs1 > 0 else 0
#                span_score2 = min((max(0, gap) + param.read_len + span_obs2) / float(span_max2) , float(span_max2) / (max(0, gap) + param.read_len + span_obs2)) if span_obs2 > 0 else 0
#
#                span_score = min(span_score1, span_score2)
#
#                #span_score = (max(0, gap) + param.read_len + span_obs1) / float(span_max1)
#            else:
#                span_score = 0


            try:
                std_dev_score = min(std_dev / std_dev_d_eq_0, std_dev_d_eq_0 / std_dev) #+ span_score #+ min(n/E_links, E_links/float(n))
            except ZeroDivisionError:
                std_dev_score = 0
                sys.stderr.write(str(std_dev) + ' ' + str(std_dev_d_eq_0) + ' ' + str(span_score) + '\n')

            G[edge[0]][edge[1]]['score'] = std_dev_score + span_score if std_dev_score > 0.5 and span_score > 0.5 else 0
            if param.plots:
                span_score_obs.append(span_score)
                std_dev_score_obs.append(std_dev_score)
                gap_obs.append(gap)
                nr_link_obs.append(n_obs)


    if param.plots:
        plots.histogram(span_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Dispersity_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1])
        plots.histogram(std_dev_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Standard_deviation_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(std_dev_score_obs, span_score_obs, param, x_label='std_dev_score_obs', y_label='span_score_obs', title='Score_correlation' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(std_dev_score_obs, gap_obs, param, x_label='std_dev_score_obs', y_label='estimated gap size', title='Gap_to_sigma' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(span_score_obs, gap_obs, param, x_label='span_score_obs', y_label='estimated gap size', title='Gap_to_span' + plot + '.' + param.bamfile.split('/')[-1])
        plots.dot_plot(span_score_obs, nr_link_obs, param, x_label='span_score_obs', y_label='Number links', title='Obs_to_span' + plot + '.' + param.bamfile.split('/')[-1])

    for edge in G.edges():
        if G[edge[0]][edge[1]]['nr_links'] != None:
            try:
                G[edge[0]][edge[1]]['score']
            except KeyError:
                sys.stderr.write(str(G[edge[0]][edge[1]]) + ' ' + str(Scaffolds[edge[0][0]].s_length) + ' ' + str(Scaffolds[edge[1][0]].s_length))
    print >> Information, 'Number of significantly spurious edges:', cnt_sign

    return()
Exemplo n.º 7
0
    print 'start hyperparameter selection', time.time()
    best_regressor = regressors[hyperparameter_selection(
        regressors, train_x, train_y, 5)]
    print best_regressor.C, best_regressor.epsilon, best_regressor.kernel, best_regressor.degree, best_regressor.gamma

else:
    best_regressor = SVR(C=0.25, epsilon=0.25, kernel='linear')

best_regressor.fit(train_x, train_y)
prediction = best_regressor.predict(test_x)

np.save('prediction.npy', prediction)

print 'RMSE, MAE, MRE (all):', mean_squared_error(test_y, prediction)**0.5, \
    mean_absolute_error(test_y, prediction), \
    mean_relative_error(test_y, prediction)

# determine error if only best 24 players are selected
indices = []
for index in range(len(test_x)):
    if test[index, 0] in test_players.keys():
        indices.append(index)
print 'RMSE, MAE, MRE (24 best):', mean_squared_error(test_y[indices], prediction[indices])**0.5, \
    mean_absolute_error(test_y[indices], prediction[indices]), \
    mean_relative_error(test_y[indices], prediction[indices])
print zip(test_y[indices], prediction[indices])

if HISTOGRAM:
    histogram(test_y, prediction)
Exemplo n.º 8
0
def levels(E, ket, epsilon=1e-8, colors=''):
    """Return the degenerate subspace index and optionally the colormap"""
    # irreducible representations
    # 0 - unidimensional symmetric representation (reuns)
    # 1 - unidimensional anti-symmetric representation (reuna)
    # 2 - bidimensional representation (rebde)
    ir_reps = np.zeros([E.size], dtype=np.uint8)
    return_colors = len(colors)
    if return_colors:
        colormap = [''] * E.size  # colors used

    # Group energy levels such that a level contains all the eigenvalues with
    # the same value
    delta = np.diff(E)
    avgSpacing = (E[-1] - E[0]) / E.size
    relsp = delta / avgSpacing
    print('levels epsilon:', epsilon)
    print('avgSpacing:', avgSpacing)

    levels = np.split(E, np.where(relsp > epsilon)[0] + 1)
    states = np.split(ket, np.where(relsp > epsilon)[0] + 1)

    # Energy difference (between two consecutive levels) histogram
    histogram(delta,
              xlabel=r'$\Delta E$',
              xscale='log',
              bins=np.pad(np.logspace(-15, 1, 17), (1, 0), mode='constant'),
              ylabel='No. of levels',
              fname='hist_delta.pdf',
              figsize=(5.8, 3))
    # Relative spacing histogram
    histogram(relsp,
              xscale='log',
              ylabel='No. of levels',
              bins=np.pad(np.logspace(-13, 1, 15), (1, 0), mode='constant'),
              fname='hist_relsp.pdf',
              xlabel='$s$',
              figsize=(2.8, 3))
    # Energy difference bar plot
    bar_plot(delta,
             figsize=(5.8, 3),
             ylabel=r'$\Delta E$',
             yscale='log',
             xlabel='index',
             fname='bar_delta.pdf',
             dpi=720)
    # Relative spacing bar plot
    bar_plot(relsp,
             figsize=(5.8, 3),
             yscale='log',
             fname='relsp.pdf',
             dpi=720,
             label=r'$\varepsilon=' + latex_float(epsilon) + '$',
             axhline_y=epsilon,
             ylabel='$s$',
             xlabel='index')

    # Check for bidimensional representation selection problems
    levels_cp = list(levels)
    states_cp = list(states)
    log = open('log.txt', 'a')
    log.write('\n\nlevels epsilon: ' + str(epsilon))
    for i in range(len(levels_cp)):
        if levels_cp[i].size > 2:
            local_relsp = np.diff(levels_cp[i]) / avgSpacing
            log.write('\nInfo: Found ' + str(levels_cp[i].size) + ' levels ' +
                      'in the bidimensional representation with: \nenergy: ' +
                      str(levels_cp[i]) + '\ndelta: ' +
                      str(np.diff(levels_cp[i])) + '\nrelsp: ' +
                      str(local_relsp))
            # Try to fix the problem
            if levels_cp[i].size > 3:
                log.write('\nError: Cannot choose where to split!')
                raise RuntimeError('Cannot choose where to split!')
            elif local_relsp[0] == local_relsp[1]:
                log.write('\nWarning: 3 consecutive levels with identical ' +
                          'relative spacings')
                # log.write('\nket: ' + str(states_cp[i]))
                n2 = np.array([states_cp[i][j][1] for j in range(3)])
                log.write('\nn2: ' + str(n2))
                # Find the dominant parity
                unique, counts = np.unique(n2 % 2, return_counts=True)
                log.write('\nDominant parity: ' +
                          ('odd' if unique[np.argmax(counts)] else 'even'))
                # Find the current position
                j = [np.array_equal(levels_cp[i], k)
                     for k in levels].index(True)
                # Select the levels with different parity for the bidimensional
                # representation
                dominant = n2 % 2 == unique[np.argmax(counts)]
                different = n2 % 2 != unique[np.argmax(counts)]
                # Bidimensional representation levels
                bd_l = [levels_cp[i][dominant][0]]
                # Bidimensional representation states
                bd_st = [states_cp[i][dominant][0]]
                if counts[0] < 3 and counts[1] < 3:
                    bd_l.append(levels_cp[i][different][0])
                    bd_st.append(states_cp[i][different][0])
                else:
                    logging.warning('3 consecutive quantum numbers with ' +
                                    'the same parity!')
                    bd_l.append(levels_cp[i][dominant][2])
                    bd_st.append(states_cp[i][dominant][2])
                # Unidimensional representation levels
                u_l = [levels_cp[i][dominant][1]]
                # Unidimensional representation states
                u_st = [states_cp[i][dominant][1]]

                levels[j:j] = [np.array(bd_l), np.array(u_l)]
                states[j:j] = [np.array(bd_st), np.array(u_st)]
                del levels[j + 2]
                del states[j + 2]

                log.write('\nresult: ' + str(levels[j]) + str(levels[j + 1]) +
                          '\nwith: ' + str(states[j]) + str(states[j + 1]))
            else:
                # Find the current position
                j = [np.array_equal(levels_cp[i], k)
                     for k in levels].index(True)
                # Split at the maximum relative spacing
                levels[j:j] = np.split(
                    levels_cp[i],
                    np.where(local_relsp == local_relsp.max())[0] + 1)
                states[j:j] = np.split(
                    states_cp[i],
                    np.where(local_relsp == local_relsp.max())[0] + 1)
                del levels[j + 2]
                del states[j + 2]
                log.write('\nresult: ' + str(levels[j]) + str(levels[j + 1]))

    k = 0
    for i in range(len(levels)):
        for j in range(levels[i].size):
            if return_colors:
                colormap[i + j + k] = colors[i % len(colors)]
            if levels[i].size > 1:  # degenerate subspace -> rebde
                ir_reps[i + j + k] = 2
            else:
                if states[i][0][1] % 2:  # n2 odd -> reuna
                    ir_reps[i + j + k] = 1
                else:  # n2 even -> reuns
                    ir_reps[i + j + k] = 0
        k += levels[i].size - 1

    log.close()
    if return_colors:
        return ir_reps, colormap
    return ir_reps
Exemplo n.º 9
0
    numBins = 10  # number of bins in histograms
    figWidth = 14.4  # width of figure in inches
    nrow = 2  # number of subplot rows
    ncol = 3  # number of subplot columns
    nplot = 1  # current plot number

    # create single figure with subplots for all plots
    fig = plt.figure()
    fig.set_size_inches(figWidth, figWidth / 1.6)

    # histogram of population
    ax = fig.add_subplot(nrow, ncol, nplot)
    plots.histogram(dfPop,
                    fig=fig,
                    ax=ax,
                    numBins=numBins,
                    title='Population Distribution',
                    xlabel=['height'],
                    ylabel=['count'])

    # create array of x values for calculating pdf values
    xmin = dfPop.loc[:, 'height'].min()
    xmax = dfPop.loc[:, 'height'].max()
    x = np.linspace(xmin, xmax, 500)

    # plot normal probability density function with population mean and variance
    pdf = pdfnorm(x, mu, sigma)
    ax = ax.twinx()
    plots.scatter(x,
                  pdf,
                  fig=fig,
Exemplo n.º 10
0
else:
    best_regressor = SVR(C=0.25, epsilon=0.25, kernel='linear')

best_regressor.fit(train_x, train_y)
prediction = best_regressor.predict(test_x)

np.save('prediction.npy', prediction)

print 'RMSE, MAE, MRE (all):', mean_squared_error(test_y, prediction)**0.5, \
    mean_absolute_error(test_y, prediction), \
    mean_relative_error(test_y, prediction)

# determine error if only best 24 players are selected
indices = []
for index in range(len(test_x)):
    if test[index, 0] in test_players.keys():
        indices.append(index)
print 'RMSE, MAE, MRE (24 best):', mean_squared_error(test_y[indices], prediction[indices])**0.5, \
    mean_absolute_error(test_y[indices], prediction[indices]), \
    mean_relative_error(test_y[indices], prediction[indices])
print zip(test_y[indices], prediction[indices])

if HISTOGRAM:
    histogram(test_y, prediction)