示例#1
0
    def get_node_overlap_image():
        import pyupset as pyu
        import matplotlib.pyplot as plt
        import pandas as pd
        from six import BytesIO

        network_ids = request.args.get('networks')
        if not network_ids:
            return flask.abort(500)

        networks = [
            api.get_network(int(network_id.strip()))
            for network_id in network_ids.split(',')
        ]

        data_dict = {
            network.name.replace('_', ' '): pd.DataFrame(network.nodes())
            for network in networks
        }
        pyu.plot(data_dict)
        buf = BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        output = make_response(buf.getvalue())
        output.headers["Content-type"] = "image/png"
        return output
示例#2
0
def upset(sets=[[]],resultdir='../../results/validation/EGFR'):
    '''
    sets= list of lists
    '''
    #with open('./test_data_dict.pckl', 'rb') as f:
    #   data_dict = pickle.load(f)
    
    #pyu.plot(data_dict)
    for fl,test in sets:#[['EGFRSetsp.csv','MeTeOR'],['EGFRSetsp2.csv','MeTeORSimple'],['EGFRSetsPredp.csv','MeTeORPred'],['EGFRSetsPredp2.csv','MeTeORPredSimple']]:
        df=pd.read_csv('{}/{}'.format(resultdir,fl),sep='\t')
        mydata={}
        title=fl
        colNames=list(df.columns[2:])
        sNone=[x for x in colNames if 'None' in x][0]
        for col in df.columns[2:]:
                #print(col)
                mydata[col]=pd.DataFrame(df['Genes'][df[col]==1])
        
        for arg in ['size','degree']:
            pyu.plot(mydata, colNames,title,resultdir,sort_by=arg)
            #pyu.plot(mydata, sort_by=arg, query=[('IPMS')])
            pl.savefig('{}/UpsetOverlap_None_{}_{}.pdf'.format(resultdir,arg,test))
    
        mydata.pop(sNone,None)
        colNames.pop(colNames.index(sNone))
        for arg in ['size','degree']:
            pyu.plot(mydata, colNames,title,resultdir,sort_by=arg)
            pl.savefig('{}/UpsetOverlap_{}_{}.pdf'.format(resultdir,arg,test))
示例#3
0
    def plot_element_by_source(self, element, filter_func=lambda x: bool(x), min_bound=1, max_bound=1000000000):
        element_by_source = self.get_element_by_source(element)

        df_dict = dict()
        column_name = ['attribute']
        for source in element_by_source:
            filtered_elements = list(filter(filter_func, element_by_source[source]))
            df_dict[source] = pd.DataFrame(filtered_elements, columns=column_name)
        x = pyu.plot(df_dict, unique_keys=column_name, inters_size_bounds=(min_bound, max_bound))
        x['input_data'] = element_by_source
        return x
示例#4
0
    # Add the information of the dataset to the dictionary
    data_dict[row["TypeTerm"]] = pd.DataFrame({'Property': subjTermList})

    print(row["TypeTerm"])
    print(subjTermList)
    print()

tock = datetime.now()
diff = tock - tick  # the result is a datetime.timedelta object
print(str(diff.total_seconds()) + " seconds")
print("Plot")
tick = datetime.now()

# Create the UpSet Plot using the given dictionary
pyu.plot(data_dict,
         unique_keys=['Property'],
         sort_by='degree',
         inters_size_bounds=(10, 20))
# Plot the UpSet Plot
plt.show(pyu)
#current_figure = plt.gcf()
#current_figure.savefig("test.png")

tock = datetime.now()
diff = tock - tick  # the result is a datetime.timedelta object
print(str(diff.total_seconds()) + " seconds")
"""
PLOT THE VOCABULARY AS A GRAPH
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
#from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
import networkx as nx
示例#5
0
def main():

    parser = argparse.ArgumentParser("Script to create the Venn Plots")
    parser.add_argument("-t",
                        "--type",
                        choices=["missing", "full", "fusion"],
                        required=True)
    parser.add_argument("-c",
                        "--configuration",
                        required=True,
                        type=argparse.FileType("r"))
    parser.add_argument("-em",
                        "--exclude-mikado",
                        dest="exclude",
                        action="store_true",
                        default=False,
                        help="Flag. If set, Mikado results will be excluded")
    parser.add_argument("-o",
                        "--out",
                        type=str,
                        help="Output file",
                        required=True)
    parser.add_argument("--format",
                        choices=["svg", "tiff", "png"],
                        default=None)
    # parser.add_argument("-a", "--aligner", choices=["STAR", "TopHat"],
    #                     required=True)
    parser.add_argument(
        "--transcripts",
        action="store_true",
        default=False,
        help="Flag. If set, Venn plotted against transcripts, not genes.")
    parser.add_argument("--title", default="Venn Diagram")
    args = parser.parse_args()

    options = parse_configuration(args.configuration,
                                  exclude_mikado=args.exclude)

    sets = OrderedDict()

    total = Counter()
    first = True

    # Update the sets for each gene and label
    if args.transcripts is True:
        colname = "ref_id"
        ccode = "ccode"
        tag = "transcripts"
    else:
        colname = "ref_gene"
        ccode = "best_ccode"
        tag = "genes"

    for aligner in ["STAR", "TopHat"]:
        for method in options["methods"]:
            refmap = "{}.refmap".format(
                re.sub(".stats$", "", options["methods"][method][aligner][0]))
            with open(refmap) as ref:
                tsv = csv.DictReader(ref, delimiter="\t")
                meth = "{} ({})".format(method, aligner)
                sets[meth] = set()
                for row in tsv:
                    if first:
                        total.update([row[colname]])
                    if row[ccode].lower() in ("na", "x", "p", "i",
                                              "ri") and args.type == "missing":
                        sets[meth].add(row[colname])
                    elif row[ccode] in ("=", "_") and args.type == "full":
                        sets[meth].add(row[colname])
                    elif row[ccode][0] == "f" and args.type == "fusion":
                        sets[meth].add(row[colname])
                    else:
                        continue
                if first:
                    for gid in total:
                        total[gid] = 0
                    first = False

    for aligner in ["STAR", "TopHat"]:
        for method in sorted(options["methods"].keys()):
            set_name = "{} ({})".format(method, aligner)
            # print(set_name)
            sets[set_name] = pd.DataFrame(list(sets[set_name]),
                                          columns=["TID"])

    pyu.plot(
        sets,
        # sort_by="degree",
        inters_size_bounds=(100, 20000),
    )
    if args.format is None:
        args.format = "svg"
    plt.savefig(args.out, format=args.format)
#! /usr/bin/env python
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

import pyupset as pyu
import matplotlib as mpl
from pickle import load
import pandas as pd
import glob

genus_dict = {}
for file in glob.glob('*csv'):
    df = pd.read_csv(file, delimiter=",")
    x = file.split('.')[0]
    genus_dict[x] = df
    print(x)

genus_dict['reads'] = genus_dict.pop('ERR1719497_paired_gather_all')
genus_dict['assembly'] = genus_dict.pop('tara_f135_full_megahit')

pplot = pyu.plot(genus_dict, unique_keys=['name'])
pplot['figure'].savefig('plot-gather.png')
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = (20.0, 5.0)


def filter_model_name(model_name):
    model_name = model_name.replace('SingleClassification', 'STNN-C')
    model_name = model_name.replace('SingleRegression', 'STNN-R')
    model_name = model_name.replace('MultiClassification', 'MTNN-C')
    model_name = model_name.replace('RandomForest', 'RF')
    model_name = model_name.replace('ConsensusDocking', 'CD')
    model_name = model_name.replace('Docking', 'Dock')
    return model_name

selected_names = ['Baseline', 'ConsensusDocking_efr1_opt', 'IRV_d',
                  'RandomForest_h', 'SingleClassification_a', 'SingleRegression_b', 'MultiClassification_b',
                  'LSTM_b']
plot_dict = {}

for model_name in selected_names:
    positive_rank = rank_df[rank_df['label']>0][model_name]
    positive_rank = positive_rank.where(positive_rank<250)
    positive_rank = positive_rank.dropna()
    filtered_index = positive_rank.keys()
    filtered_df = pd.DataFrame(data=filtered_index, columns=['Items'])
    plot_dict[filter_model_name(model_name)] = filtered_df


matplotlib.rcParams.update({'font.size': 15})
fig = pyu.plot(plot_dict, inters_size_bounds=(1, 50))
fig[0]['figure'].savefig('./plottings/prospective_screening_venn/venn_diagram', bbox_inches='tight')
示例#8
0
    '../out/Generic-production/feature_selection/boruta/Confirmed.boruta_features.csv',
    header=None)
generic_confirmed_df.columns = ['Features']
generic_confirmed_df = homogenise_tissue_specific_features(
    generic_confirmed_df, reset_index=False)
#print(generic_confirmed_df.head())

confirmed_features_dict = {}
confirmed_features_dict['CKD'] = ckd_confirmed_df
confirmed_features_dict['Epilepsy'] = epilepsy_confirmed_df
confirmed_features_dict['ALS'] = als_confirmed_df
confirmed_features_dict['Generic'] = generic_confirmed_df

min_inters_size = 1
pyu.plot(confirmed_features_dict,
         sort_by='degree',
         inters_size_bounds=(min_inters_size, np.inf))

cur_fig = matplotlib.pyplot.gcf()
cur_fig.savefig('Confirmed_features_intersection_between_classifiers.pdf',
                bbox_inches='tight')

# === Print intersection / union sets ===
# Degree 4
intersection_disease_features = list(
    set(ckd_confirmed_df['Features'].tolist())
    & set(epilepsy_confirmed_df['Features'].tolist())
    & set(als_confirmed_df['Features'].tolist()))
#print('intersection_disease_features:', intersection_disease_features)
disease_generic_intersection = list(
    set(intersection_disease_features)
示例#9
0
    def upset_plots_gen(self):

        # Takes about 3.5 hours to process in total.
        self.spikes_dataframe_gen(n_sample_groups=300)
        ups = self.pyupset_format()

        plt.rc('font', size=12)
        pyu.plot(ups,
                 unique_keys=['SpaceGroup', 'TimeGroup'],
                 inters_degree_bounds=(2, 2),
                 sort_by='size')
        plt.title('Pairwise Spike Coincidences', {
            'fontsize': 18,
            'fontweight': 'bold'
        })
        plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/2-way.png')

        plt.rc('font', size=12)
        pyu.plot(ups,
                 unique_keys=['SpaceGroup', 'TimeGroup'],
                 inters_degree_bounds=(2, 2),
                 sort_by='size',
                 query=[('304', '94'), ('211', '193'), ('335', '131')])
        plt.title('Pairwise Spike Coincidences', {
            'fontsize': 18,
            'fontweight': 'bold'
        })
        plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/2-way_c.png')

        plt.rc('font', size=12)
        pyu.plot(ups,
                 unique_keys=['SpaceGroup', 'TimeGroup'],
                 inters_degree_bounds=(3, 3),
                 sort_by='size')
        plt.title('3-Way Spike Coincidences', {
            'fontsize': 18,
            'fontweight': 'bold'
        })
        plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/3-way.png')

        plt.rc('font', size=12)
        pyu.plot(ups,
                 unique_keys=['SpaceGroup', 'TimeGroup'],
                 inters_degree_bounds=(4, 4),
                 sort_by='size')
        plt.title('4-way Spike Coincidences', {
            'fontsize': 18,
            'fontweight': 'bold'
        })
        plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/4-way.png')

        plt.rc('font', size=12)
        pyu.plot(ups,
                 unique_keys=['SpaceGroup', 'TimeGroup'],
                 inters_degree_bounds=(5, 7),
                 sort_by='degree',
                 query=[('304', '94', '211', '193', '335', '131', '171')])
        plt.title('5, 6, and 7-way Spike Coincidences', {
            'fontsize': 18,
            'fontweight': 'bold'
        })
        plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/567-way.png')
示例#10
0
c_str = ''
a_str = ''
for a_key, a_value in tp.items():
    a_str += a_key + ","
    for b_key, b_value in tp.items():
        print(a_key + " tp of " + b_key + " tp: " +
              str(len(a_value.intersection(b_value)) / float(len(b_value))))
        c_str += "{:0.2f}".format(
            round(len(a_value.intersection(b_value)) / float(len(b_value)),
                  2)) + ","
        #c_str += str(len(a_value.intersection(b_value))) + ","
    c_str += '\n'
print(a_str)
print(c_str)
'''
#store true positives in dict
true_positives_dict['ED2'] = generate_truepositives(ground_truth, ed2)

true_positives_dict['NADEEF'] = generate_truepositives(ground_truth, nadeef)

true_positives_dict['KATARA'] = generate_truepositives(ground_truth, katara)

true_positives_dict['Gaussian'] = generate_truepositives(ground_truth, gaussian)
true_positives_dict['Histogram'] = generate_truepositives(ground_truth, histogram)
true_positives_dict['Mixture'] = generate_truepositives(ground_truth, mixture)

true_positives_dict['ActiveClean'] = generate_truepositives(ground_truth, active_clean)
true_positives_dict['BoostClean'] = generate_truepositives(ground_truth, boost_clean)

pyu.plot(true_positives_dict, sort_by='degree', inters_size_bounds=(2500, np.inf))
df3 = df2[df2['Count']>2]
df3.to_excel(writer,"More than 2")
df3 = df2[df2['Count']>3]
df3.to_excel(writer,"More than 3")
writer.save()


modes = ["APCI","APPI","ESI","LDI"]

######## Negative ##########
df_dict_neg = {'APPI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='APPI')],
               'APCI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='APCI')],
               'ESI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='ESI')],
               'LDI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='LDI')]}

upset = pyu.plot(df_dict_neg,unique_keys=['Formula'],sort_by='degree')

plt.savefig(outputdata+"UpSetNeg.png",dpi=300)

intsets = upset['intersection_sets']
intsetkeys = []
for y in intsets:
    intsetkeys.append(y)

def plotcommontoall():
    df_common = intsets[intsetkeys[-1]]
    df_common = df_common.sort_values('Mass')
    glocmap = cm.viridis_r

    sns.set_style("white")
    sns.set_context("paper",font_scale=2)