def main():
    parser = argparse.ArgumentParser(description='Creating Clustering Info Summary')
    parser.add_argument('proteosafe_parameters', help='proteosafe_parameters')
    parser.add_argument('networking_pairs_results_file', help='networking_pairs_results_file')
    parser.add_argument('networking_pairs_results_file_filtered', help='networking_pairs_results_file_filtered')
    parser.add_argument('networking_pairs_results_file_filtered_classic_output', help='networking_pairs_results_file_filtered_classic_output')
    args = parser.parse_args()

    param_obj = ming_proteosafe_library.parse_xml_file(open(args.proteosafe_parameters))

    top_k_val = 10
    max_component_size = 0

    if "TOPK" in param_obj:
        top_k_val = int(param_obj["TOPK"][0])

    if "MAXIMUM_COMPONENT_SIZE" in param_obj:
        max_component_size = int(param_obj["MAXIMUM_COMPONENT_SIZE"][0])

    G = molecular_network_filtering_library.loading_network(args.networking_pairs_results_file, hasHeaders=True)
    if G == None:
        exit(0)

    molecular_network_filtering_library.filter_top_k(G, top_k_val)
    molecular_network_filtering_library.filter_component(G, max_component_size)
    molecular_network_filtering_library.output_graph_with_headers(G, args.networking_pairs_results_file_filtered)

    molecular_network_filtering_library.output_graph(G, args.networking_pairs_results_file_filtered_classic_output)
Пример #2
0
def main():
    usage()

    output_file_path = sys.argv[2]
    input_file_path = sys.argv[4]
    params_file_path = sys.argv[5]
    top_k_val = 10
    max_component_size = 0

    params = ming_proteosafe_library.parse_xml_file(open(params_file_path, "r"))

    if "TOPK" in params:
        top_k_val = int(params["TOPK"][0])

    if "MAXIMUM_COMPONENT_SIZE" in params:
        max_component_size = int(params["MAXIMUM_COMPONENT_SIZE"][0])

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_file_path, hasHeaders=True)
    #Returning None means that there are no edges in the output
    if G == None:
        exit(0)
    molecular_network_filtering_library.filter_top_k(G, top_k_val)
    molecular_network_filtering_library.filter_component(G, max_component_size)
    molecular_network_filtering_library.output_graph(G, output_file_path)
Пример #3
0
def main():
    parser = argparse.ArgumentParser(description='Creating GraphML')
    parser.add_argument('input_pairs', help='input_pairs')
    parser.add_argument('input_clusterinfosummary',
                        help='input_clusterinfosummary')
    parser.add_argument('input_librarysearch', help='input_librarysearch')
    parser.add_argument('output_graphml', help='output_graphml')
    parser.add_argument('--input_analoglibrarysearch',
                        help='input_analoglibrarysearch')
    parser.add_argument('--input_pairsfolder', help='input_pairsfolder')
    args = parser.parse_args()

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(args.input_pairs,
                                                            hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(
        G, args.input_clusterinfosummary)
    molecular_network_filtering_library.add_library_search_results_to_graph(
        G, args.input_librarysearch)

    if args.input_pairsfolder is not None:
        all_pairs_files = glob.glob(os.path.join(args.input_pairsfolder, "*"))
        for additional_pairs_file in all_pairs_files:
            print("Adding Additional Edges", additional_pairs_file)
            molecular_network_filtering_library.add_additional_edges(
                G, additional_pairs_file)

    if args.input_analoglibrarysearch is not None:
        molecular_network_filtering_library.add_library_search_results_to_graph(
            G, args.input_librarysearch, annotation_prefix="Analog:")

    nx.write_graphml(G, args.output_graphml, infer_numeric_types=True)
Пример #4
0
def main():
    usage()

    output_file_path = sys.argv[2]
    input_file_path = sys.argv[4]
    params_file_path = sys.argv[5]
    top_k_val = 10
    max_component_size = 0

    params = ming_proteosafe_library.parse_xml_file(open(
        params_file_path, "r"))

    if "TOPK" in params:
        top_k_val = int(params["TOPK"][0])

    if "MAXIMUM_COMPONENT_SIZE" in params:
        max_component_size = int(params["MAXIMUM_COMPONENT_SIZE"][0])

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_file_path,
                                                            hasHeaders=True)
    #Returning None means that there are no edges in the output
    if G == None:
        exit(0)
    molecular_network_filtering_library.filter_top_k(G, top_k_val)
    molecular_network_filtering_library.filter_component(G, max_component_size)
    molecular_network_filtering_library.output_graph(G, output_file_path)
def main():
    input_pairs = sys.argv[1]

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_pairs, hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(G, sys.argv[2])
    molecular_network_filtering_library.add_library_search_results_to_graph(G, sys.argv[3])

    nx.write_graphml(G, sys.argv[4])
Пример #6
0
def main():
    input_pairs = sys.argv[1]

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_pairs,
                                                            hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(
        G, sys.argv[2])
    molecular_network_filtering_library.add_library_search_results_to_graph(
        G, sys.argv[3])

    nx.write_graphml(G, sys.argv[4], infer_numeric_types=True)
def create_graphml(input_pairs,
                   input_clusterinfosummary,
                   input_librarysearch,
                   input_analoglibrarysearch,
                   input_pairsfolder,
                   output_graphml,
                   collapse_ion_edges=False):
    logger = logging_utils.get_logger(__name__)
    # Doing other filtering
    logger.debug("Creating network")
    G = molecular_network_filtering_library.loading_network(input_pairs,
                                                            hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(
        G, input_clusterinfosummary)
    molecular_network_filtering_library.add_library_search_results_to_graph(
        G, input_librarysearch)
    # mark all nodes as feature or ion identity nodes (constants.NODE.TYPE_ATTRIBUTE)
    logger.debug("Mark all node types")
    ion_network_utils.mark_all_node_types(G)

    # add analogs
    if input_analoglibrarysearch is not None:
        logger.debug("Add analog library search results")
        molecular_network_filtering_library.add_library_search_results_to_graph(
            G, input_analoglibrarysearch, annotation_prefix="Analog:")

    # add additional edges - e.g. ion identity edges between different ion species of the same molecule
    if input_pairsfolder is not None:
        all_pairs_files = glob.glob(os.path.join(input_pairsfolder, "*"))
        logger.debug("Adding additional edges from files: " +
                     str(len(all_pairs_files)))
        for additional_pairs_file in all_pairs_files:
            logger.debug("Adding Additional Edges from " +
                         str(additional_pairs_file))
            molecular_network_filtering_library.add_additional_edges(
                G, additional_pairs_file)

        # collapse all ion identity networks, each into a single node
        if collapse_ion_edges:
            logger.debug("Collapsing additional edges of type: " +
                         CONST.EDGE.ION_TYPE)
            try:
                G = ion_network_utils.collapse_ion_networks(G)
            except:
                logger.debug("Failed collapsing")
                pass

    # export graphml
    logger.info("Writing graphml: " + output_graphml)
    nx.write_graphml(G, output_graphml, infer_numeric_types=True)
def main():
    input_pairs = sys.argv[1]

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_pairs, hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(G, sys.argv[2])
    molecular_network_filtering_library.add_library_search_results_to_graph(G, sys.argv[3])

    folder_for_additional_pairs = sys.argv[4]
    all_pairs_files = ming_fileio_library.list_files_in_dir(folder_for_additional_pairs)
    for additional_pairs_file in all_pairs_files:
        print("Adding Additional Edges", additional_pairs_file)
        molecular_network_filtering_library.add_additional_edges(G, additional_pairs_file)


    nx.write_graphml(G, sys.argv[5], infer_numeric_types=True)
Пример #9
0
def test_network_topology():
    print("test topology")

    # Getting pairs data
    pairs_filename = "pairs.tsv"
    if not os.path.exists(pairs_filename):
        pairs_data_url = "https://gnps.ucsd.edu/ProteoSAFe/DownloadResultFile?task=b8b5f2f0581c46f1b61ce047d3bbad16&block=main&file=pairs/pairs.tsv"
        os.system('wget "{}" -O pairs.tsv'.format(pairs_data_url))

        pairs_df = pd.read_csv(pairs_filename, sep="\t", nrows=10000)
        pairs_df.to_csv(pairs_filename, sep="\t", index=False)

    G = molecular_network_filtering_library.loading_network(
        pairs_filename, hasHeaders=True, edgetype="Spec2Vec")
    molecular_network_filtering_library.filter_top_k(G, 10)
    molecular_network_filtering_library.filter_component_additive(G, 100)
    nx.write_graphml(G, "additive.graphml")
Пример #10
0
def main():
    input_pairs = sys.argv[1]

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_pairs,
                                                            hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(
        G, sys.argv[2])
    molecular_network_filtering_library.add_library_search_results_to_graph(
        G, sys.argv[3])

    folder_for_additional_pairs = sys.argv[4]
    all_pairs_files = ming_fileio_library.list_files_in_dir(
        folder_for_additional_pairs)
    for additional_pairs_file in all_pairs_files:
        print("Adding Additional Edges", additional_pairs_file)
        molecular_network_filtering_library.add_additional_edges(
            G, additional_pairs_file)

    nx.write_graphml(G, sys.argv[5], infer_numeric_types=True)
def create_graphml(input_pairs, input_clusterinfosummary, input_librarysearch,
                   input_analoglibrarysearch, input_pairsfolder,
                   output_graphml):
    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(input_pairs,
                                                            hasHeaders=True)
    molecular_network_filtering_library.add_clusterinfo_summary_to_graph(
        G, input_clusterinfosummary)
    molecular_network_filtering_library.add_library_search_results_to_graph(
        G, input_librarysearch)

    if input_pairsfolder is not None:
        all_pairs_files = glob.glob(os.path.join(input_pairsfolder, "*"))
        for additional_pairs_file in all_pairs_files:
            print("Adding Additional Edges", additional_pairs_file)
            molecular_network_filtering_library.add_additional_edges(
                G, additional_pairs_file)

    if input_analoglibrarysearch is not None:
        molecular_network_filtering_library.add_library_search_results_to_graph(
            G, input_analoglibrarysearch, annotation_prefix="Analog:")

    nx.write_graphml(G, output_graphml, infer_numeric_types=True)
def main():
    parser = argparse.ArgumentParser(
        description='Creating Clustering Info Summary')
    parser.add_argument('proteosafe_parameters', help='proteosafe_parameters')
    parser.add_argument('networking_pairs_results_file',
                        help='networking_pairs_results_file')
    parser.add_argument('networking_pairs_results_file_filtered',
                        help='networking_pairs_results_file_filtered')
    parser.add_argument(
        'networking_pairs_results_file_filtered_classic_output',
        help='networking_pairs_results_file_filtered_classic_output')
    args = parser.parse_args()

    param_obj = ming_proteosafe_library.parse_xml_file(
        open(args.proteosafe_parameters))

    top_k_val = 10
    max_component_size = 0

    if "TOPK" in param_obj:
        top_k_val = int(param_obj["TOPK"][0])

    if "MAXIMUM_COMPONENT_SIZE" in param_obj:
        max_component_size = int(param_obj["MAXIMUM_COMPONENT_SIZE"][0])

    G = molecular_network_filtering_library.loading_network(
        args.networking_pairs_results_file, hasHeaders=True)
    if G == None:
        exit(0)

    molecular_network_filtering_library.filter_top_k(G, top_k_val)
    molecular_network_filtering_library.filter_component(G, max_component_size)
    molecular_network_filtering_library.output_graph_with_headers(
        G, args.networking_pairs_results_file_filtered)

    molecular_network_filtering_library.output_graph(
        G, args.networking_pairs_results_file_filtered_classic_output)
Пример #13
0
def main():
    usage()

    output_file_path = sys.argv[2]
    input_file_path = sys.argv[4]
    params_file_path = sys.argv[5]
    pairsinfo_path = sys.argv[6]
    top_k_val = 10
    max_component_size = 0

    params = parse_xml_file(open(params_file_path, "r"))

    if "TOPK" in params:
        top_k_val = int(params["TOPK"][0])

    if "MAXIMUM_COMPONENT_SIZE" in params:
        max_component_size = int(params["MAXIMUM_COMPONENT_SIZE"][0])

    full_command = pairsinfo_path + " " + "--outfile " + output_file_path + " --input-file " + input_file_path

    print full_command
    os.system(full_command)

    if not os.path.exists(output_file_path):
        print("No Network Pairs, adjust clustering accordingly")
        open(output_file_path, "w")
        exit(0)

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(output_file_path)
    #Returning None means that there are no edges in the output
    if G == None:
        exit(0)
    molecular_network_filtering_library.filter_top_k(G, top_k_val)
    molecular_network_filtering_library.filter_component(G, max_component_size)
    molecular_network_filtering_library.output_graph(G, output_file_path)
Пример #14
0
def main():
    usage()

    output_file_path = sys.argv[2]
    input_file_path = sys.argv[4]
    params_file_path = sys.argv[5]
    pairsinfo_path = sys.argv[6]
    top_k_val = 10
    max_component_size = 0

    params = parse_xml_file(open(params_file_path, "r"))

    if "TOPK" in params:
        top_k_val = int(params["TOPK"][0])

    if "MAXIMUM_COMPONENT_SIZE" in params:
        max_component_size = int(params["MAXIMUM_COMPONENT_SIZE"][0])

    full_command = pairsinfo_path + " " + "--outfile " + output_file_path + " --input-file " + input_file_path

    print full_command
    os.system(full_command)

    if not os.path.exists(output_file_path):
        print("No Network Pairs, adjust clustering accordingly")
        open(output_file_path, "w")
        exit(0)

    #Doing other filtering
    G = molecular_network_filtering_library.loading_network(output_file_path)
    #Returning None means that there are no edges in the output
    if G == None:
        exit(0)
    molecular_network_filtering_library.filter_top_k(G, top_k_val)
    molecular_network_filtering_library.filter_component(G, max_component_size)
    molecular_network_filtering_library.output_graph(G, output_file_path)