示例#1
0
def main():
    args = docopt.docopt(__doc__)
    output = args["--output"]
    verbose = args["-v"]
    if args["--padmetRef"]:
        padmetRef = PadmetRef(args["--padmetRef"])
    else:
        padmetRef = None
    padmet_path = args["--padmet"]
    compare_padmet.compare_padmet(padmet_path, output, padmetRef, verbose)
示例#2
0
文件: analysis.py 项目: AuReMe/aureme
def analysis_on_group(group_name, groups, config_data, verbose):
    """Create reaction dendrogram and extract specific reactions using metabolic networks.

    Args:
        group_name (str): Name of the group from group_template.tsv.
        groups (list): All the species inside the group.
        config_data (dict): Dictionary with all configuration paths.
        verbose (bool): Verbose.
    """

    database_path = config_data['database_path']

    padmet_from_networks_path = config_data['padmet_from_networks_path']
    analysis_path = config_data['analysis_path']

    all_padmet_path = [
        os.path.join(padmet_from_networks_path, name + ".padmet")
        for name in groups
    ]
    group_analysis_path = analysis_path + '/' + group_name

    if not os.path.isdir(group_analysis_path):
        if len(groups) == 1:
            sys.exit('A group must contain more than one member.')

        for padmet_path in all_padmet_path:
            if not os.path.exists(padmet_path):
                org_name = os.path.splitext(os.path.basename(padmet_path))[0]
                sys.exit(
                    "Padmet file of organism %s from group %s not found in %s"
                    % (org_name, group_name, padmet_from_networks_path))

        # Compare the padmet to create the reactions.csv file needed to create the reaction dendrogram.
        compare_padmet.compare_padmet(padmet_path=",".join(all_padmet_path),
                                      output=group_analysis_path,
                                      padmetRef=database_path,
                                      verbose=verbose)

        dendrogram_reactions_distance.reaction_figure_creation(
            reaction_file=group_analysis_path + '/reactions.csv',
            output=group_analysis_path + '/dendrogram_output',
            padmetRef=database_path,
            verbose=verbose)

    else:
        print(
            group_analysis_path +
            ' already exists. Delete it if you want to relaunch the analysis.')
示例#3
0
文件: compare.py 项目: AuReMe/aucome
def run_compare(run_id, nb_cpu_to_use, verbose):
    """Compare the gorup specified by the user.

    Args:
        run_id (str): ID of the run
        nb_cpu_to_use (int): number of CPU for multiprocessing
        verbose (boolean): verbose
    """
    if verbose:
        print('--- Running compare step ---')
    compare_start_time = time.time()
    config_data = parse_config_file(run_id)

    analysis_path = config_data['analysis_path']
    analysis_group_file_path = config_data['analysis_group_file_path']
    compare_output_path = analysis_path + '/compare_group'

    database_path = config_data['database_path']
    padmet_from_networks_path = config_data['padmet_from_networks_path']

    # Create a dictionary containing the group name and the species inside the group.
    group_data = {}
    padmets = []
    with open(analysis_group_file_path, 'r') as group_file:
        group_reader = csv.reader(group_file, delimiter='\t')
        cluster_reactions = {}
        for row in group_reader:
            group_name = row[0]
            groups = [species for species in row[1:] if species != '']
            group_data[group_name] = groups
            if group_name != 'all':
                padmets.extend([
                    padmet_from_networks_path + '/' + species + '.padmet'
                    for species in groups
                ])

    padmets = list(set(padmets))

    if not os.path.isdir(compare_output_path):
        os.mkdir(compare_output_path)

    padmetref = PadmetRef(database_path)
    # Create the reactions.tsv file needed to create dendrogram.
    padmet_path = ','.join(padmets)
    compare_padmet.compare_padmet(padmet_path=padmet_path,
                                  output=compare_output_path,
                                  padmetRef=padmetref,
                                  verbose=verbose)

    # Read the reactions.tsv file and remove the column unused.
    reactions_file = compare_output_path + '/' + 'reactions.tsv'
    reactions_dataframe = pa.read_csv(reactions_file, sep='\t')
    columns = [
        column for column in reactions_dataframe.columns
        if '(sep=;)' not in column and '_formula' not in column
    ]
    reactions_dataframe = reactions_dataframe[columns].copy()
    reactions_dataframe.set_index('reaction', inplace=True)

    # For each group, extract the reactions present in its species to create supervenn sets.
    supervenn_sets = []
    supervenn_labels = []
    for group_name in group_data:
        if group_name != 'all':
            groups = group_data[group_name]
            reactions_temp = []
            for species in groups:
                species_reactions_dataframe = reactions_dataframe[
                    reactions_dataframe[species] == 1]
                reactions_temp.extend(
                    species_reactions_dataframe.index.tolist())
            supervenn_sets.append(set(reactions_temp))
            supervenn_labels.append(group_name)
            cluster_reactions[group_name] = set(reactions_temp)

    supervenn(supervenn_sets,
              supervenn_labels,
              chunks_ordering='occurence',
              sets_ordering='minimize gaps')
    plt.savefig(compare_output_path + '/compare_group.png',
                bbox_inches='tight')
    plt.clf()

    dendrogram_reactions_distance.reaction_figure_creation(
        reactions_file,
        os.path.join(compare_output_path, "dendrogram_output"),
        padmetRef_file=database_path,
        verbose=verbose)

    compare_end_time = (time.time() - compare_start_time)
    integer_part, decimal_part = str(compare_end_time).split('.')
    compare_time = ".".join([integer_part, decimal_part[:3]])

    if verbose:
        print("--- compare step done in: %ss ---" % compare_time)
示例#4
0
def test_compare_padmet():
    fabo_1_padmetSpec = from_pgdb_to_padmet('test_data/pgdb',
                                            extract_gene=True)
    fabo_1_padmetSpec.delNode('ACYLCOASYN-RXN')
    fabo_1_padmetSpec.generateFile('fabo_1.padmet')

    fabo_2_padmetSpec = from_pgdb_to_padmet('test_data/pgdb',
                                            extract_gene=True)
    fabo_2_padmetSpec.delNode('ACYLCOADEHYDROG-RXN')
    fabo_2_padmetSpec.generateFile('fabo_2.padmet')

    compare_padmet('fabo_1.padmet,fabo_2.padmet',
                   'output',
                   padmetRef=None,
                   verbose=False)

    genes_fabo_1 = []
    genes_fabo_2 = []
    with open('output/genes.tsv', 'r') as genes_file:
        csvreader = csv.reader(genes_file, delimiter='\t')
        for row in csvreader:
            if row[1] == '1':
                genes_fabo_1.append(row[0])
            if row[2] == '1':
                genes_fabo_2.append(row[0])

    assert set(FABO_GENES).issubset(set(genes_fabo_1))

    assert set(FABO_GENES).issubset(set(genes_fabo_2))

    reactions_fabo_1 = []
    reactions_fabo_2 = []
    with open('output/reactions.tsv', 'r') as reactions_file:
        csvreader = csv.reader(reactions_file, delimiter='\t')
        for row in csvreader:
            if row[1] == '1':
                reactions_fabo_1.append(row[0])
            if row[2] == '1':
                reactions_fabo_2.append(row[0])

    expected_fabo_1_rxns = [
        rxn for rxn in FABO_RXNS if rxn != 'ACYLCOASYN-RXN'
    ]
    expected_fabo_2_rxns = [
        rxn for rxn in FABO_RXNS if rxn != 'ACYLCOADEHYDROG-RXN'
    ]

    assert set(expected_fabo_1_rxns).issubset(set(reactions_fabo_1))

    assert set(expected_fabo_2_rxns).issubset(set(reactions_fabo_2))

    pathway_fabo_1 = []
    pathway_fabo_2 = []
    with open('output/pathways.tsv', 'r') as pathways_file:
        csvreader = csv.reader(pathways_file, delimiter='\t')
        for row in csvreader:
            if row[0] != 'pathway':
                pathway_fabo_1.append(row[0])
                pathway_fabo_2.append(row[0])
            if row[3] != 'fabo_1_rxn_assoc (sep=;)':
                pwy_reactions_fabo_1 = row[3].split(';')
            if row[4] != 'fabo_2_rxn_assoc (sep=;)':
                pwy_reactions_fabo_2 = row[4].split(';')

    assert pathway_fabo_1 == ['FAO-PWY']

    assert pathway_fabo_2 == ['FAO-PWY']

    assert set(expected_fabo_1_rxns).issubset(set(pwy_reactions_fabo_1))

    assert set(expected_fabo_2_rxns).issubset(set(pwy_reactions_fabo_2))

    metabolites_fabo_1 = []
    metabolites_fabo_2 = []
    with open('output/metabolites.tsv', 'r') as metabolites_file:
        csvreader = csv.reader(metabolites_file, delimiter='\t')
        for row in csvreader:
            if row[1] != 'fabo_1_rxn_consume' or row[1] != '':
                if row[0] != 'metabolite':
                    metabolites_fabo_1.append(row[0])
            if row[3] != 'fabo_1_rxn_produce' or row[3] != '':
                if row[0] != 'metabolite':
                    metabolites_fabo_1.append(row[0])
            if row[2] != 'fabo_2_rxn_consume' or row[2] != '':
                if row[0] != 'metabolite':
                    metabolites_fabo_2.append(row[0])
            if row[2] != 'fabo_2_rxn_produce' or row[2] != '':
                if row[0] != 'metabolite':
                    metabolites_fabo_2.append(row[0])

    metabolites_fabo_1 = list(set(metabolites_fabo_1))
    metabolites_fabo_2 = list(set(metabolites_fabo_2))

    assert set(FABO_CPDS).issubset(set(metabolites_fabo_1))

    assert set(FABO_CPDS).issubset(set(metabolites_fabo_2))

    os.remove('fabo_1.padmet')
    os.remove('fabo_2.padmet')
    shutil.rmtree('output')