Exemplo n.º 1
0
    def mann_whitney(self):
        "Perform Mann-Whitney test on data" ""
        #set alpha
        alpha = 0.05

        #extracts selected samples for statistical test
        geno_select = []
        for key, value in self.mBar.samples.items():
            if value.get() == 1:
                geno_select.append(key)

        #re-establish the original order of genotypes
        geno_test = []
        for i in Global.geno:
            for j in geno_select:
                if i == j:
                    geno_test.append(j)

        #generates all combinations of genotype pairs
        comp = tuple(combinations(geno_test, 2))

        #calculate p-values from Mann-Whitney test and put them into list
        p_val = []
        for _, (ii, jj) in enumerate(comp):
            for s in Global.struct:
                p_val.append(
                    mw(Global.col[s][Global.col.index == ii],
                       Global.col[s][Global.col.index == jj])[1])

        #put p-values in array then dataframe
        p_val = [round(i, 4) for i in p_val]
        p_arr = np.array(p_val).reshape(len(comp), len(Global.struct))

        df = pd.DataFrame(p_arr, columns=Global.struct)

        df_row = []
        for pp, (ii, jj) in enumerate(comp):
            df_row.append([ii, jj])
        df.insert(loc=0, column='Genotypes', value=df_row)
        df = df.set_index('Genotypes')

        #saves p-values dataframe in text file
        stat_fname = filedialog.asksaveasfilename() + '.txt'
        #with open(Global.file_name.split('.')[0] + '_MW_pval' + '-' \
        #          + str(Global.export_number_stats) + '.txt', 'w') as outfile:
        with open(stat_fname, 'w') as outfile:
            outfile.write('Mann-Whitney two-sided test for genotype \
combinations indicated in the \n"Genotypes" columns and fungal structures \
indicated as column headers.\nValues indicate p-value.\n\n')
            df.to_string(outfile)
Exemplo n.º 2
0
def example3():
    import toolshed as ts
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    import seaborn as sns
    from scipy.stats import mannwhitneyu as mw
    import numpy as np

    iterator = JimFile(args.input, args.regions)
    #it = ts.reader(args.input) #'/scratch/ucgd/serial/quinlan_lab/data/u1021864/regionsmafsdnds.bed.gz'
    #iterable = (Interval(**iv) for iv in it)

    results = defaultdict(lambda : defaultdict(list))
    ms = defaultdict(list)
    ff = args.genome
    cpg_cutoff = {}
    maf_cutoff = float(args.maf) if args.maf else 1e-05
    start = 0
    end = .2
    step = .025
    j = start
    #for i in frange(start, end, step):
    #    cpg_cutoff[str(j)+"-"+str(i)] = (j, i)
    #    j = i
    #cpg_cutoff['0.2-1'] = (.2, 1)
    cpg_cutoff['0-1'] = (0, 1)

    base = []
    cons = []
    genes = None
    #genes = Fasta(ff)
    if args.regions == "chunks":
        regioner = smallchunk
        chunksize = args.regionsize
    if args.regions in ["domains", "nodoms", "all"]:
        regioner = byregiondist
        chunksize = ""
    if args.regions == "genes":
        regioner = bytranscriptdist
        chunksize = ""
    y = list(windower(iterator, regioner, chunksize))
    comparison = args.comparison
    if args.exclude:
        exclude = args.exclude
        ex = "ex" + args.exclude + "."
    else:
        exclude = None
        ex = ""
    cv = []
    if args.conservation:
        for r in ts.reader(args.conservation):
            v = get_conservation(r)
            cv.append(v)
    cpg=1
    if y:
        for iv in y: # iterable, size_grouper(1)
            #cpg = CpG(iv, genes = genes)
            b = baseline(iv, maf_cutoff = maf_cutoff, exclude = exclude, comparison = comparison, patt = patt)
            ms['baseline'].append((iv,b[3]/b[4],cpg))
            base.append(b)
    count = 0.0
    totlen = 0.0
    if base:
        for b in base:
            count += b[3]
            totlen += b[4]
        baserate = count/totlen
    for iv, b in zip(y, base):
        u = upton(b, baserate)
        c = constraint(iv, maf_cutoff = maf_cutoff, genes = genes, upton = u)
        r = RVIS(iv, maf_cutoff = 1e-3, patt = patt)
        ct = (iv,
               c,
               cpg)
        if c != 0:
            ms['nzconstraint'].append(ct)
        ms['constraint'].append(ct)
        ct = (iv,
                u,
                cpg)
        ms['upton'].append((ct[0],ct[1][3],ct[2]))
        ct = (iv,
                r,
                cpg)
        ms['rvis'].append((ct[0],ct[1],ct[2]))
        cons.append((u[0],u[1],u[2],c))
       # results['iafi'].append((iv, IAFI_inline(iv, n_samples=61000)))
       # results['frv'].append((iv, FRV_inline(iv, maf_cutoff=maf_cutoff)))
       # results['count_nons'].append((iv, count_nons(iv)))
        # TODO: jim add a lot more metrics here... e.g.:
    bedname = "."+ rtz(maf_cutoff) + "." + comparison + "." + args.regions + str(chunksize) + "." + ex
    f1 = open("constraint" + bedname + ".bed","w")
    f2 = open("baseline" + bedname + ".bed","w")
    for b,c in zip(base,cons):
        f1.write("\t".join(map(str,c))+"\n")
        f2.write("\t".join(map(str,b))+"\n")
    f1.close()
    f2.close()

    cutoffs = set()
    for cutoff in cpg_cutoff:
        co = str(cpg_cutoff[cutoff][0])+'-'+str(cpg_cutoff[cutoff][1])
        cutoffs.add(co)
        for metric in ms:
            for ct in ms[metric]:
                if ct[2] >= cpg_cutoff[cutoff][0] and ct[2] <= cpg_cutoff[cutoff][1]:
                    results[metric][co].append(ct)

    option = args.truetype
    trusrc = ""
    if option == "clinvar" or option == "c":
        func = clinvar
        trusrc = "clinvar"
    if option == "pli" or option == "p":
        func = pli
        trusrc = "pli"
    for metric in results:
        for cutoff in cutoffs:
            imgname = metric + "." + trusrc + "." + comparison + "." + args.regions + str(chunksize) + "." + ex + cutoff + "." + rtz(maf_cutoff)
            print metric, cutoff
            fig, axes = plt.subplots(2)
            fig.tight_layout()
            counts = evaldoms(results[metric][cutoff],
                    args.pathogenic, # forweb_cleaned_exac_r03_march16_z_data_pLI.txt from ExAC ftp or clinvar_20150305.tidy.vcf.gz from clinvar src
                    func)
            imin, imax = np.percentile(counts[True] + counts[False], [0.01, 99.99])
            axes[0].hist(counts[True], bins=80) #,label = cutoff)
            axes[0].set_xlabel("pathogenic")
            axes[0].set_xlim(imin, imax)
            props = dict(boxstyle = 'round', facecolor = 'whitesmoke', alpha = 0.5)
            axes[0].text(.875, .8, "CpG frac:\n" + cutoff.replace("-"," - "), transform = axes[0].transAxes, bbox = props)
            #axes[0].legend(loc = 1, frameon = True)
            axes[1].hist(counts[False], bins=80)
            axes[1].set_xlabel("not-pathogenic")
            axes[1].set_xlim(imin, imax)
            plt.show()
            plt.savefig(imgname + ".dist.png", bbox_inches = 'tight')
            print metrics(counts[True], counts[False], imgname + ".auc.png", cutoff = cutoff)
            print mw(counts[True], counts[False])
            del fig
            plt.close()
Exemplo n.º 3
0
def EvaluateAdaptiveBiostate(BVP_array=None, EDA_array=None):
    global hb_measures
    global m_baseHR
    global m_baseHRV
    global m_previousHR
    global m_previousHRV

    global EDA_measures
    global m_baseTonic
    global m_basePhasic
    global m_previousTonic
    global m_previousPhasic

    global m_prevLevel
    global m_curLevel
    global m_lastAction

    global levelStatesList
    global progressionStateList
    global m_Iteration
    #1: higher arousal
    #0: no notificable change
    #-1: lower arousal

    hr_rate = 0
    hrv_rate = 0
    tonic_rate = 0
    phasic_rate = 0

    if BVP_array is not None:
        working_data, hb_measure = hb.process(BVP_array,
                                              64.0,
                                              report_time=True)
    #TODO: Test the difference between resting and playing against the easiest possible character
    # High heart rate and high HRV = decrease level

    if EDA_array is not None:
        current_EDA = nk.eda_process(EDA_array, freq=1.9, sampling_rate=4)

    current_HR = hb_measure['bpm']
    current_HRV = hb_measure['rmssd']

    current_Tonic = current_EDA['df']['EDA_Tonic']
    current_Phasic = current_EDA['df']['EDA_Phasic']

    current_Tonic_mean = np.mean(current_Tonic)
    current_Phasic_mean = np.mean(current_Phasic)
    # Firstly, is he excited compared to baseline this is for HR and EDA
    # Secondly, is he exicted compared to previous
    # 1:0 : Might need to switch back
    # 0:1 : switch up
    # 0:0 : switch up
    # 1:1 : stay
    #Do the evaluation stuff

    #If there is NO difference significantly between HR & EDA and the arousal was higher before, switch back

    #HR
    hr_state = m_baseHR * percentage_change < current_HR
    if m_previousHR == None:
        hr_rate = 1 if hr_state else 0
    elif hr_state == 1:
        prev_hr_state = m_previousHR * percentage_change
        if prev_hr_state < current_HR:
            hr_rate = 1
        elif current_HR * percentage_change < m_previousHR:
            hr_rate = -1
        else:
            hr_rate = 0
    else:
        hr_rate = 0

    #HRV, use HRV in the end.
    hrv_state = m_baseHRV > current_HRV * percentage_change
    if m_previousHRV == None:
        hrv_rate = 1 if hrv_state else 0  #Compare with previous set
    elif hrv_state == 1:
        prev_hrv_state = m_previousHRV
        if prev_hrv_state > current_HRV * percentage_change:
            hrv_rate = 1
        elif current_HRV > m_previousHRV * percentage_change:
            hrv_rate = -1
        else:
            hrv_rate = 0
    else:
        if current_HRV > m_previousHRV * percentage_change:
            hrv_rate = -1
        elif current_HRV * percentage_change < m_previousHRV:
            hrv_rate = 1
        else:
            hrv_rate = -1 if current_HRV > m_baseHRV * percentage_change else 0

    #EDA Tonic

    tonic_stats = mw(current_Tonic, m_baseTonic, alternative='greater')

    if m_previousTonic is None:
        if tonic_stats.pvalue <= p_value:
            tonic_rate = 1  #Increase or stay
        else:
            tonic_rate = 0  #Increase!
    else:
        if tonic_stats.pvalue > p_value:
            tonic_rate = 1  #must upgrade
        else:
            tonic_greater_stats = mw(current_Tonic,
                                     m_previousTonic,
                                     alternative='greater')
            tonic_less_stats = mw(current_Tonic,
                                  m_previousTonic,
                                  alternative='less')
            if tonic_greater_stats.pvalue <= p_value:
                # This is good. Stay or increase
                tonic_rate = 1
            elif tonic_less_stats.pvalue <= p_value:
                tonic_rate = -1  #Revert back to the previous!
            else:
                tonic_rate = 0

    #EDA Phasic
    phasic_stats = mw(current_Phasic, m_basePhasic, alternative='greater')

    if m_previousPhasic is None:
        if phasic_stats.pvalue <= p_value:
            phasic_rate = 1
        else:
            phasic_rate = 0
    else:
        if phasic_stats.pvalue > p_value:
            phasic_rate = 1
        else:
            phasic_greater_stats = mw(current_Phasic,
                                      m_previousPhasic,
                                      alternative='greater')
            phasic_less_stats = mw(current_Phasic,
                                   m_previousPhasic,
                                   alternative='less')
            if phasic_greater_stats.pvalue <= p_value:
                #stay! Or increae
                phasic_rate = 1
            elif phasic_less_stats.pvalue <= p_value:
                #Revert back
                phasic_rate = -1
            else:
                #increase! depending on the previous action
                phasic_rate = 0

    #Phasic component is the main,as
    result = 0

    #Higher arousal
    if ((phasic_rate + tonic_rate + hr_rate) / 3.0) > 0.5:
        #Check last action
        if m_lastAction == m_DECREASE:
            if m_curLevel == 1:
                result = 0
                m_lastAction = m_STAY
            elif levelStatesList[m_curLevel - 2]['HR_mean'] is None:
                result = -1
                m_lastAction = m_DECREASE
            else:
                lowerAffectiveState = (
                    (1 if levelStatesList[m_curLevel -
                                          2]['HR_mean'] < current_HR else 0) +
                    (1 if levelStatesList[m_curLevel - 2]['HRV'] > current_HRV
                     else 0) +
                    (1 if levelStatesList[m_curLevel - 2]['Tonic_mean'] <
                     current_Tonic_mean else 0) +
                    (1 if levelStatesList[m_curLevel - 2]['Phasic_mean'] <
                     current_Phasic_mean else 0)) / 4
                if levelStatesList[m_curLevel - 2]['HR_mean'] is None:
                    result = -1
                    m_lastAction = m_DECREASE
                elif lowerAffectiveState > 0.5:
                    result = -1
                    m_lastAction = m_DECREASE
                else:
                    result = 0
                    m_lastAction = m_STAY
        elif m_lastAction == m_INCREASE:
            if m_curLevel == 5:
                result = 0
                m_lastAction = m_STAY
            elif levelStatesList[m_curLevel]['HR_mean'] is None:
                result = 1
                m_lastAction = m_INCREASE
            else:
                upperAffectiveState = (
                    (1 if levelStatesList[m_curLevel]['HR_mean'] < current_HR
                     else 0) +
                    (1 if levelStatesList[m_curLevel]['HRV'] > current_HRV else
                     0) + (1 if levelStatesList[m_curLevel]['Tonic_mean'] <
                           current_Tonic_mean else 0) +
                    (1 if levelStatesList[m_curLevel]['Phasic_mean'] <
                     current_Phasic_mean else 0)) / 4
                if upperAffectiveState > 0.5:
                    if hrv_rate == -1:
                        result = -1
                        m_lastAction = m_DECREASE
                    else:
                        result = 1
                        m_lastAction = m_INCREASE
                else:
                    result = 0
                    m_lastAction = m_STAY
        else:
            if hrv_rate == -1:
                result = -1
                m_lastAction = m_DECREASE
            elif hrv_rate == 0:
                result = 0
                m_lastAction = m_STAY
            else:
                result = 1
                m_lastAction = m_INCREASE
    # significant lower arousal -- SHOULD NOT STAY
    elif ((phasic_rate + tonic_rate + hr_rate) / 3.0) < 0.0:
        #REVERT!
        if m_curLevel == 1:
            result = 1
            m_lastAction = m_INCREASE
        elif m_curLevel == 5:
            result = -1
            m_lastAction = m_DECREASE
        elif m_lastAction == m_DECREASE:
            result = 1
            m_lastAction = m_INCREASE
        elif m_lastAction == m_INCREASE:
            result = -1
            m_lastAction = m_DECREASE
        else:
            upperAffectiveState = (
                (1 if levelStatesList[m_curLevel]['HR_mean'] < current_HR else
                 0) +
                (1 if levelStatesList[m_curLevel]['HRV'] > current_HRV else 0)
                + (1 if levelStatesList[m_curLevel]['Tonic_mean'] <
                   current_Tonic_mean else 0) +
                (1 if levelStatesList[m_curLevel]['Phasic_mean'] <
                 current_Phasic_mean else 0)) / 4
            lowerAffectiveState = (
                (1 if levelStatesList[m_curLevel - 2]['HR_mean'] < current_HR
                 else 0) +
                (1 if levelStatesList[m_curLevel - 2]['HRV'] > current_HRV else
                 0) + (1 if levelStatesList[m_curLevel - 2]['Tonic_mean'] <
                       current_Tonic_mean else 0) +
                (1 if levelStatesList[m_curLevel - 2]['Phasic_mean'] <
                 current_Phasic_mean else 0)) / 4
            if upperAffectiveState > 0.5:
                m_lastAction = m_DECREASE
                result = -1
            elif lowerAffectiveState > 0.5:
                m_lastAction = m_INCREASE
                result = 1
            else:
                m_lastAction = m_DECREASE
                result = -1

    #No real change between this and the previous, do the same action again
    else:
        if m_previousPhasic is None:
            result = 1
            m_lastAction = m_INCREASE
        elif m_curLevel == 1:
            result = 1
            m_lastAction = m_INCREASE
        elif m_curLevel == 5:
            result = -1
            m_lastAction = m_DECREASE
        elif m_lastAction == m_DECREASE:
            result = -1
        elif m_lastAction == m_INCREASE:
            result = 1
        else:
            result = 0
            m_lastAction = m_STAY

    #Finish off

    m_prevLevel = m_curLevel
    m_curLevel += result
    if m_curLevel > 5:
        m_curLevel = 5
    elif m_curLevel < 1:
        m_curLevel = 1

    m_previousHR = current_HR
    m_previousHRV = current_HRV

    levelStatesList[m_prevLevel - 1]['HR_mean'] = m_previousHR
    levelStatesList[m_prevLevel - 1]['HRV'] = m_previousHRV

    progressionStateList[m_Iteration]['HR_mean'] = m_previousHR
    progressionStateList[m_Iteration]['HRV'] = m_previousHRV

    m_previousTonic = current_Tonic
    m_previousPhasic = current_Phasic

    levelStatesList[m_prevLevel - 1]['Tonic_mean'] = current_Tonic_mean
    levelStatesList[m_prevLevel - 1]['Phasic_mean'] = current_Phasic_mean

    progressionStateList[m_Iteration]['Tonic_mean'] = current_Tonic_mean
    progressionStateList[m_Iteration]['Phasic_mean'] = current_Phasic_mean

    progressionStateList[m_Iteration]['Level'] = m_prevLevel

    m_Iteration += 1
    print("Baseline HR: {0}    HRV: {1}".format(m_baseHR, m_baseHRV))
    print(
        "Phasic:{0}, tonic: {1}, hr: {2}(BPM){3:.3f}, hrv: {4}(RMSSD){5:.3f}".
        format(phasic_rate, tonic_rate, hr_rate, current_HR, hrv_rate,
               current_HRV))
    print("Done with result: {0}".format(result))
    return result
Exemplo n.º 4
0
        'NodeJS/CSV/Participant{0}BVPReg{1}.csv'.format(
            args.participantID, dataType),
        delimiter=';',
        skip_header=1,
        unpack=True)
    working_non, non_m = hb.process(BVP_Non, 64.0, calc_freq=True)

    EDA_df_Non = pd.read_csv('NodeJS/CSV/Participant{0}EDAReg{1}.csv'.format(
        args.participantID, dataType),
                             delimiter=';')
    processed_eda_Non = nk.eda_process(EDA_df_Non["EDA"],
                                       freq=1.9,
                                       sampling_rate=4)

    MWTonic_stats_greater = mw(processed_eda_Affect['df']['EDA_Tonic'],
                               processed_eda_Non['df']['EDA_Tonic'],
                               alternative='greater')
    MWTonic_stats_less = mw(processed_eda_Affect['df']['EDA_Tonic'],
                            processed_eda_Non['df']['EDA_Tonic'],
                            alternative='less')
    MWPhasic_stats_greater = mw(processed_eda_Affect['df']['EDA_Phasic'],
                                processed_eda_Non['df']['EDA_Phasic'],
                                alternative='greater')
    MWPhasic_stats_less = mw(processed_eda_Affect['df']['EDA_Phasic'],
                             processed_eda_Non['df']['EDA_Phasic'],
                             alternative='less')

    print("Baseline HR analysis")
    times, baseline = genfromtxt(
        'NodeJS/CSV/Participant{0}BVPBaseline5.csv'.format(
            args.participantID, dataType),
Exemplo n.º 5
0
def EvaluateAdaptiveBiostate(BVP_array=None, EDA_array=None):
    #1: higher arousal
    #0: no notificable change
    #-1: lower arousal
    hr_rate = 0
    hrv_rate = 0
    tonic_rate = 0
    phasic_rate = 0

    if BVP_array is not None:
        working_data, hb_measure = hb.process(BVP_array,
                                              64.0,
                                              report_time=True)
    #TODO: Test the difference between resting and playing against the easiest possible character
    # High heart rate and high HRV = decrease level

    if EDA_array is not None:
        current_EDA = nk.eda_process(EDA_array, freq=1.9, sampling_rate=4)

    current_HR = hb_measure['bpm']
    current_HRV = hb_measure['rmssd']
    # Firstly, is he excited compared to baseline this is for HR and EDA
    # Secondly, is he exicted compared to previous
    # 1:0 : Might need to switch back
    # 0:1 : switch up
    # 0:0 : switch up
    # 1:1 : stay
    #Do the evaluation stuff

    #If there is NO difference significantly between HR & EDA and the arousal was higher before, switch back

    #HR
    print(m_baseHR)
    print(percentage_change)
    hr_state = m_baseHR * percentage_change < current_HR
    if m_previousHR == None:
        hr_rate = 1 if hr_state else 0
    elif hr_state == 1:
        prev_hr_state = m_previousHR * percentage_change
        if prev_hr_state < current_HR:
            hr_rate = 1
        elif current_HR * percentage_change < m_previousHR:
            hr_rate = -1
        else:
            hr_rate = 0
    else:
        hr_rate = 0

    #HRV, use HRV in the end.
    hrv_state = m_baseHRV > current_HRV * percentage_change
    if m_previousHRV == None:
        hrv_rate = 1 if hrv_state else 0  #Compare with previous set
    elif hrv_state == 1:
        prev_hrv_state = m_previousHRV
        if prev_hr_state > current_HRV * percentage_change:
            hr_rate = 1
        elif current_HRV < m_previousHRV * percentage_change:
            hrv_rate = -1
        else:
            hrv_rate = 0
    else:
        hrv_rate = -1 if current_HRV > m_baseHRV * percentage_change else 0

    #EDA Tonic
    current_Tonic = current_EDA['df']['EDA_Tonic']
    tonic_stats = mw(current_Tonic, m_baseTonic, alternative='greater')

    if m_previousTonic == None:
        if tonic_stats.p_value > p_value:
            tonic_rate = 1  #must upgrade
        else:
            tonic_greater_stats = mw(current_Tonic,
                                     m_previousTonic,
                                     alternative='greater')
            tonic_less_stats = mw(current_Tonic,
                                  m_previousTonic,
                                  alternative='less')
            if tonic_greater_stats.p_value <= p_value:
                # This is good. Stay or increase
                tonic_rate = 1
            elif tonic_less_stats.p_value <= p_value:
                tonic_rate = -1  #Revert back to the previous!
            else:
                tonic_rate = 0
    else:
        if tonic_stats.p_value <= p_value:
            tonic_rate = 1  #Increase or stay
        else:
            tonic_rate = 0  #Increase!

    #EDA Phasic
    current_Phasic = current_EDA['df']['EDA_Phasic']
    phasic_stats = mw(current_Phasic, m_basePhasic, alternative='greater')

    if m_previousPhasic == None:
        if phasic_stats.p_value > p_value:
            phasic_rate = 1
        else:
            phasic_greater_stats = mw(current_Phasic,
                                      m_previousPhasic,
                                      alternative='greater')
            phasic_less_stats = mw(current_Phasic,
                                   m_previousPhasic,
                                   alternative='less')
            if phasic_greater_stats.p_value <= p_value:
                #stay! Or increae
                phasic_rate = 1
            elif phasic_less_stats.p_value <= p_value:
                #Revert back
                phasic_rate = -1
            else:
                #increase! depending on the previous action
                phasic_rate = 0

    else:
        if phasic_stats.p_value <= p_value:
            phasic_rate = 1
        else:
            phasic_rate = 0

    #Phasic component is the main,as
    result = 0

    #Higher arousal
    if ((current_Phasic + current_Tonic + current_HR) / 3.0) > 0.5:
        if hrv_rate == -1:
            result = -1
            m_lastAction = m_DECREASE
        else:
            result = 1
            m_lastAction = m_INCREASE
            #
    # significant lower arousal --
    elif ((current_Phasic + current_Tonic + current_HR) / 3.0) < 0.0:
        #REVERT!
        result = m_prevLevel - m_curLevel
        m_lastAction = m_REVERT
    #No real change between this and the previous, do the same action again
    else:
        if m_lastAction == m_DECREASE:
            result = -1
        elif m_lastAction == m_INCREASE:
            result = 1

    #Finish off
    m_prevLevel = m_curLevel
    m_curLevel += result

    m_previousHR = hb_measure['bpm']
    m_previousHRV = hb_measure['rmssd']
    m_previousTonic = current_Tonic
    m_previousPhasic = current_Phasic
    return result
Exemplo n.º 6
0
                                                 '_mean_score',
                                                 axis='columns')
    original_sort_list.append(original_sort_cols)
best_sort = pd.concat(best_sort_list)
best_sort['Sequence Population'] = ['$Dev^+$'] * len(best_sort)

original_sort = pd.concat(original_sort_list)
original_sort['Sequence Population'] = ['All Predicted'] * len(original_sort)

comb_sort = pd.concat([best_sort, original_sort])

#test if increasing or decreasing scores
b = best_sort[best_sort['Assay Name'] == sorts[9]]['Assay Score']
o = original_sort[original_sort['Assay Name'] == sorts[9]]['Assay Score']
print(b)
print(mw(b, o, alternative='greater'))
print(mw(b, o, alternative='less'))

a_palette = ['#1f77b4', '#FF0000']  #match colors
sns.set_palette(a_palette)
ax = sns.violinplot(data=comb_sort,
                    x='Assay Name',
                    y='Assay Score',
                    hue='Sequence Population',
                    hue_order=['All Predicted', '$Dev^+$'],
                    cut=0,
                    linewidth=.75,
                    split=True,
                    gridsize=20,
                    scale='width',
                    inner="quart",