Пример #1
0
def upload_ebs_files(dirpath,
                     ndex,
                     group_id=None,
                     template_network=None,
                     layout=None,
                     update=False,
                     filter=None,
                     max=None,
                     nci_table=None):
    my_layout = _check_layout_(layout)
    my_filter = _check_filter_(filter)
    my_template_network = _check_template_network_(template_network)
    network_id_map = {}
    network_count = 0
    if max is not None:
        logger.info("max files: " + str(max))

    in_dir = []
    in_nci_table = []
    not_in_nci_table = []
    if nci_table:
        for row in nci_table:
            if "Pathway Name" in row:
                in_nci_table.append(row["Pathway Name"])
                #
                # if "Corrected Pathway Name" in row:
                #     in_nci_table.append(row["Corrected Pathway Name"])

    skipped = []
    account_network_map = search_for_non_biopax_networks(ndex)
    files = []
    file_network_names = []
    for file in listdir(dirpath):
        if file.endswith(".sif"):
            files.append(file)
            network_name = network_name_from_path(file)
            file_network_names.append(network_name)

    logger.info(str(len(files)) + 'SIF files to load')
    logger.info(
        str(len(account_network_map)) + 'Non-Biopax Networks in the account')

    account_networks = account_network_map.keys()
    account_not_file = list(
        set(account_networks).difference(set(file_network_names)))

    logger.info("%s Networks in the account not in upload files" %
                (len(account_not_file)))
    for network_name in account_not_file:
        logger.info(" - %s" % (network_name))

    for filename in files:
        network_count = network_count + 1
        if max is not None and network_count > max:
            break

        logger.info("loading ndexebs file #" + str(network_count) + ": " +
                    filename)
        path = join(dirpath, filename)
        network_name = network_name_from_path(path)
        in_dir.append(network_name)

        matching_networks = account_network_map.get(network_name)
        matching_network_count = 0
        if matching_networks and update:
            matching_network_count = len(matching_networks)
            if matching_network_count > 1:
                logger.info(
                    "skipping this file because %s existing networks match '%s'"
                    % (len(matching_networks), network_name))
                skipped.append(network_name + " :duplicate names")
                continue

        ebs = load_ebs_file_to_dict(path)

        if len(ebs) == 0:
            logger.info(
                "skipping this file because no rows were found when processing it as EBS"
            )
            skipped.append(network_name + " :no rows in file")
            continue

        ebs_network = ebs_to_network(ebs, name=network_name)

        if len(ebs_network.nodes()) == 0:
            logger.info(
                "skipping this network because no nodes were found when processing it as EBS"
            )
            skipped.append(network_name + " :no nodes in file")
            continue

        # Do this one first to establish subnetwork and view ids from template
        # this is not ideal, but ok for special case of this loader
        if my_template_network:
            toolbox.apply_network_as_template(ebs_network, template_network)
            logger.info("applied graphic style from " +
                        str(template_network.get_name()))

        if my_filter:
            if filter == "cravat_1":
                cravat_edge_filter(ebs_network)
            if filter == "ndex_1":
                ndex_edge_filter(ebs_network)

        if my_layout:
            if layout == "directed_flow":
                layouts.apply_directed_flow_layout(
                    ebs_network,
                    node_width=25,
                    use_degree_edge_weights=True,
                    iterations=200)
                logger.info("applied directed_flow layout")

        provenance_props = [{"name": "dc:title", "value": network_name}]

        if nci_table:
            add_nci_table_properties(ebs_network, network_name, nci_table,
                                     not_in_nci_table)

        ebs_network.set_network_attribute(
            "description", NCI_DESCRIPTION_TEMPLATE % network_name)

        ebs_network.set_network_attribute("organism", "human")

        if nci_table:
            ebs_network.set_network_attribute("version", "27-Jul-2015")

        ebs_network.update_provenance("Created by NDEx EBS network converter",
                                      entity_props=provenance_props)

        if update:
            if matching_network_count == 0:
                logger.info("saving new network " + network_name)
                network_url = ndex.save_cx_stream_as_new_network(
                    ebs_network.to_cx_stream())
                network_id = network_url.split("/")[-1]

            elif matching_network_count == 1:
                network_to_update = matching_networks[0]
                logger.info("updating network " +
                            network_to_update.get("name") + " with " +
                            network_name)
                network_id = network_to_update.get("externalId")
                ndex.update_cx_network(ebs_network.to_cx_stream(), network_id)

            else:
                raise ValueError(
                    "unexpected case: should not try to update when more than one matching network"
                )
        else:
            logger.info("saving new network " + network_name)
            network_url = ndex.save_cx_stream_as_new_network(
                ebs_network.to_cx_stream())
            network_id = network_url.split("/")[-1]

        network_id_map[network_name] = network_id

    if group_id:
        logger.info("granting networks to group id " + group_id)
        ndex.grant_networks_to_group(group_id, network_id_map.values())

    for network_name in account_network_map:
        networks = account_network_map[network_name]
        if len(networks) > 1:
            logger.info(
                "Skipped %s because of multiple non-BioPAX matches in the account"
                % (network_name))

    logger.info("-----------------")
    for network_name in skipped:
        logger.info("Skipped %s" % (network_name))

    return network_id_map
import ndex.beta.layouts as layout
import time
from ndex.networkn import NdexGraph

uuidTest = "153b6970-6193-11e5-8ac5-06603eb7f303"
M = NdexGraph(server='http://ndexbio.org', uuid=uuidTest)

start = time.time()
layout.apply_directed_flow_layout(M, iterations=500)
elapsed_time = time.time() - start
print elapsed_time
M.upload_to('http://www.ndexbio.org/', 'cc.zhang', 'piggyzhang')
 def test2(self):
     with open(path.join(HERE, 'filtered.cx'), 'r') as cx_file:
         cx = json.load(cx_file)
         g = NdexGraph(cx)
         layouts.apply_directed_flow_layout(g)
         self.assertEqual(g.node[80]['diffusion_input'], 1.0)
Пример #4
0
def main():
    parser = argparse.ArgumentParser(
        description='create NDEx network from TSV, one edge per line')

    parser.add_argument('username')
    parser.add_argument('password')
    parser.add_argument('server')
    parser.add_argument('tsv')
    parser.add_argument('plan')
    parser.add_argument('name')
    parser.add_argument('desc')
    parser.add_argument(
        '-t',
        action='store',
        dest='template_id',
        help='network id for the network to use as a graphic template')
    parser.add_argument('-l',
                        action='store',
                        dest='layout',
                        help='name of the layout to apply')
    parser.add_argument('-u',
                        action='store',
                        dest='update_uuid',
                        help='uuid of the network to update')

    #    parser.add_argument('update_username' )
    #    parser.add_argument('update_password')
    #    parser.add_argument('update_server')

    arg = parser.parse_args()

    try:
        # set up the ndex connection
        # error thrown if cannot authenticate
        my_ndex = nc.Ndex("http://" + arg.server, arg.username, arg.password)

        #        current_directory = os.path.dirname(os.path.abspath(__file__))

        #       plan_filename = os.path.join(current_directory, "import_plans", arg.plan)

        print "loading plan from: " + arg.plan

        try:
            import_plan = d2c.TSVLoadingPlan(arg.plan)

        except jsonschema.ValidationError as e1:
            print "Failed to parse the loading plan '" + arg.plan + "': " + e1.message
            print 'at path: ' + str(e1.absolute_path)
            print "in block: "
            print e1.instance
            return

        # set up the tsv -> cx converter

        print "parsing tsv file using loading plan ..."
        tsv_converter = d2c.TSV2CXConverter(import_plan)

        #print json.dumps(cx, indent=4)
        template_network = None
        if arg.template_id:
            response = my_ndex.get_network_as_cx_stream(arg.template_id)
            template_cx = response.json()
            template_network = networkn.NdexGraph(template_cx)

        # If update_uuid is set, then we get the existing network's attributes and provenance
        if arg.update_uuid:
            response = my_ndex.get_network_aspect_as_cx_stream(
                arg.update_uuid, "networkAttributes")
            network_attributes = response.json()
            provenance = my_ndex.get_provenance(arg.update_uuid)
            ng = tsv_converter.convert_tsv_to_cx(
                arg.tsv,
                network_attributes=network_attributes,
                provenance=provenance)
            if template_network:
                toolbox.apply_network_as_template(ng, template_network)
            else:
                response = my_ndex.get_network_aspect_as_cx_stream(
                    arg.update_uuid, "cyVisualProperties")
                visual_properties = response.json()
                if len(visual_properties) > 0:
                    ng.unclassified_cx.append(
                        {"cyVisualProperties": visual_properties})
            if arg.layout:
                if arg.layout == "df_simple":
                    layouts.apply_directed_flow_layout(ng)

            my_ndex.update_cx_network(ng.to_cx_stream(), arg.update_uuid)
        else:
            ng = tsv_converter.convert_tsv_to_cx(arg.tsv,
                                                 name=arg.name,
                                                 description=arg.desc)
            if template_network:
                toolbox.apply_network_as_template(ng, template_network)
            if arg.layout:
                if arg.layout == "df_simple":
                    layouts.apply_directed_flow_layout(ng)
            my_ndex.save_cx_stream_as_new_network(ng.to_cx_stream())

        print "Done."

    except jsonschema.exceptions.ValidationError as ve:
        print str(ve)
        exit(1)
    except requests.exceptions.RequestException, e:
        print "error in request to NDEx server: " + str(e)
        raise e
 def test2(self):
     with open (path.join(HERE,'filtered.cx'),'r') as cx_file:
         cx=json.load(cx_file)
         g = NdexGraph(cx)
         layouts.apply_directed_flow_layout(g)
         self.assertEqual(g.node[80]['diffusion_input'], 1.0)
Пример #6
0
def ebs_to_df(file_name):
    node_table = []
    id_list = []

    # ebs = {"edge_table": edge_table, "node_table": node_table}
    # network_name = network_name_from_path(path)
    # path_to_sif = path.join('sif', 'pid_EXTENDED_BINARY_SIF_2016-09-24T14:04:47.203937', file_name)

    path_to_sif = path.join('biopax', 'sif', file_name)
    with open(path_to_sif, 'rU') as f:
        lines = f.readlines()
        mode = "edge"
        edge_lines = []
        edge_rows_tuples = []
        node_rows_tuples = []
        node_lines = []
        edge_fields = []
        node_fields = []
        for index in range(len(lines)):
            line = lines[index]
            if index is 0:
                edge_fields = [h.strip() for h in line.split('\t')]
            elif line == '\n':
                mode = "node_header"
            elif mode is "node_header":
                node_fields = [h.strip() for h in line.split('\t')]
                mode = "node"
            elif mode is "node":
                node_tuple = tuple(line.split('\t'))
                node_rows_tuples.append(node_tuple)
                node_lines.append(line)
            elif mode is "edge":
                edge_tuple = tuple(line.split('\t'))
                edge_rows_tuples.append(edge_tuple)
                edge_lines.append(line)

        df = pd.DataFrame.from_records(edge_rows_tuples, columns=edge_fields)

        df_nodes = pd.DataFrame.from_records(node_rows_tuples, columns=node_fields)

        df_with_a = df.join(df_nodes.set_index('PARTICIPANT'), on='PARTICIPANT_A')

        df_with_a_b = df_with_a.join(df_nodes.set_index('PARTICIPANT'), on='PARTICIPANT_B', lsuffix='_A', rsuffix='_B')
        df_with_a_b = df_with_a_b.replace('\n', '', regex=True)
        df_with_a_b['PARTICIPANT_A'] = df_with_a_b['PARTICIPANT_A'].map(lambda x: x.lstrip('[').rstrip(']'))
        df_with_a_b['PARTICIPANT_B'] = df_with_a_b['PARTICIPANT_B'].map(lambda x: x.lstrip('[').rstrip(']'))

        network = t2n.convert_pandas_to_nice_cx_with_load_plan(df_with_a_b, load_plan)

        network.set_name(file_name.replace('.sif', ''))

        # ==========================
        # APPLY LAYOUT
        # ==========================
        network.apply_template(username=my_username, password=my_password, server=my_server,
                               uuid=cytoscape_visual_properties_template_id)

        network.merge_node_attributes('alias_a', 'alias_b', 'alias')
        network.merge_node_attributes('PARTICIPANT_TYPE_A', 'PARTICIPANT_TYPE_B', 'type')

        get_uniprot_gene_symbol_mapping(network)

        if sys.version_info.major == 3:
            node_items = network.nodes.items()
        else:
            node_items = network.nodes.iteritems()

        for k, v in node_items:
            # ==============================================
            # CONVERT NODE NAME FROM UNIPROT TO GENE SYMBOL
            # ==============================================
            participant_name = v.get_name()
            if '_HUMAN' in participant_name and node_mapping.get(participant_name) is not None:
                v.set_node_name(node_mapping.get(participant_name))
            elif len(participant_name) > 25:
                v.set_node_name(participant_name.split('/')[0])

            # =============================
            # SET REPRESENTS
            # =============================
            aliases = network.get_node_attribute(v, 'alias')
            if aliases is not None and aliases != 'null' and len(aliases) > 0:
                v.set_node_represents(aliases[0])
            else:
                v.set_node_represents(v.get_name())
                if aliases == 'null':
                    network.remove_node_attribute(v, 'alias')

            if aliases is not None and len(aliases) > 1:
                replace_alias = network.get_node_attribute_objects(k, 'alias')
                replace_alias.set_values(aliases[1:])
                network.set_node_attribute(v, 'alias', aliases[1:])
            else:
                network.remove_node_attribute(v, 'alias')

            node_type = network.get_node_attribute(v, 'type')
            network.set_node_attribute(k, 'type', participant_type_map.get(node_type))

        # =============================
        # POST-PROCESS EDGE ATTRIBUTES
        # =============================
        if sys.version_info.major == 3:
            edge_items = network.edges.items()
        else:
            edge_items = network.edges.iteritems()

        neighbor_of_map = {}
        controls_state_change_map = {}
        other_edge_exists = {}
        for k, v in edge_items:
            s = v.get_source()
            t = v.get_target()
            i = v.get_interaction()
            if i == 'neighbor-of':
                if not neighbor_of_map.has_key(s):
                    neighbor_of_map[s] = {}
                if not neighbor_of_map.has_key(t):
                    neighbor_of_map[t] = {}
                neighbor_of_map[s][t] = k
                neighbor_of_map[t][s] = k
            elif i == 'controls-state-change-of':
                if controls_state_change_map.get(s) is None:
                    controls_state_change_map[s] = {}
                if controls_state_change_map.get(t) is None:
                    controls_state_change_map[t] = {}
                controls_state_change_map[s][t] = k
                controls_state_change_map[t][s] = k
            else:
                if not other_edge_exists.has_key(s):
                    other_edge_exists[s] = {}
                if not other_edge_exists.has_key(t):
                    other_edge_exists[t] = {}
                other_edge_exists[s][t] = True
                other_edge_exists[t][s] = True

            if i in DIRECTED_INTERACTIONS:
                network.set_edge_attribute(v, 'directed', True)
            else:
                network.set_edge_attribute(v, 'directed', False)

        # =============================
        # REMOVE neighbor-of EDGES
        # =============================
        n_edges = neighbor_of_map.iteritems()
        for s, ti in n_edges:
            inner_neighbor = ti.iteritems()
            for t, i in inner_neighbor:
                found_other_edges = False
                if other_edge_exists.get(s) is not None:
                    if other_edge_exists[s].get(t) is not None:
                        found_other_edges = True
                        network.remove_edge(i)
                        #=========================================
                        # REMOVE EDGE ATTRIBUTES FOR DELETED EDGE
                        #=========================================
                        net_attrs = network.get_edge_attributes(i)
                        for net_attr in net_attrs:
                            network.remove_edge_attribute(i, net_attr.get_name())

        n_edges = controls_state_change_map.iteritems()
        for s, ti in n_edges:
            inner_neighbor = ti.iteritems()
            for t, i in inner_neighbor:
                found_other_edges = False
                if other_edge_exists.get(s) is not None:
                    if other_edge_exists[s].get(t) is not None:
                        found_other_edges = True
                        network.remove_edge(i)
                        #=========================================
                        # REMOVE EDGE ATTRIBUTES FOR DELETED EDGE
                        #=========================================
                        net_attrs = network.get_edge_attributes(i)
                        for net_attr in net_attrs:
                            network.remove_edge_attribute(i, net_attr.get_name())

        #network.upload_to('dev.ndexbio.org', 'scratch', 'scratch')

        node_reader = csv.DictReader(node_lines, fieldnames=node_fields, dialect='excel-tab')
        for dict in node_reader:
            node_table.append(dict)

        #=======================
        # PROCESS NODES
        #=======================
        for node_info in node_table:
            node_to_update = network.get_node(node_info.get('PARTICIPANT').lstrip('[').rstrip(']'))

            participant_name = node_info.get('PARTICIPANT_NAME')
            if participant_name is not None:
                participant_name = participant_name.lstrip('[').rstrip(']')

            if node_to_update.get_name().startswith("CHEBI") and participant_name:
                if participant_name is not None:
                    node_to_update.set_node_name(participant_name)

            #=======================
            # SET REPRESENTS
            #=======================
            unification_xref = node_info.get('UNIFICATION_XREF')
            if unification_xref is not None and len(unification_xref) > 0:
                unification_xref_array_tmp = unification_xref.split(';')
                unification = unification_xref_array_tmp[0]
                unification_xref_array = []
                for uxr in unification_xref_array_tmp:
                    if uxr.upper().count('CHEBI') > 1:
                        unification_xref_array.append(uxr.replace('chebi:', '', 1))

                #network.set_node_attribute(node_to_update, 'UNIFICATION_XREF', unification_xref_array, type='list_of_string')
                if len(unification_xref_array) < 1:
                    if len(unification_xref_array_tmp) > 1:
                        unification_xref_array_tmp = unification_xref_array_tmp[1:]
                        network.set_node_attribute(node_to_update, 'alias', unification_xref_array_tmp, type='list_of_string')
                    elif len(unification_xref_array_tmp) == 1:
                        network.remove_node_attribute(v, 'alias')
                    else:
                        network.set_node_attribute(node_to_update, 'alias', unification_xref_array_tmp, type='list_of_string')
                else:
                    if len(unification_xref_array) > 1:
                        unification_xref_array = unification_xref_array[1:]
                        network.set_node_attribute(node_to_update, 'alias', unification_xref_array, type='list_of_string')
                    else:
                        network.remove_node_attribute(v, 'alias')

            else:
                unification = node_info.get('PARTICIPANT').lstrip('[').rstrip(']')

            node_to_update.set_node_represents(unification.replace('chebi:', '', 1))

            #=====================================
            # PREP UNIPROT TO GENE SYMBOL LOOKUP
            #=====================================
            if participant_name is not None and '_HUMAN' in participant_name and gene_symbol_mapping.get(participant_name) is None:
                id_list.append(participant_name)
            elif  participant_name is not None and '_HUMAN' in participant_name and gene_symbol_mapping.get(participant_name) is not None:
                gene_symbol_mapped_name = gene_symbol_mapping.get(participant_name)
                if len(gene_symbol_mapped_name) > 25:
                    node_to_update.set_node_name(gene_symbol_mapped_name.split('/')[0])
                else:
                    node_to_update.set_node_name(gene_symbol_mapping.get(participant_name))

                #node_to_update.set_node_name(gene_symbol_mapping.get(participant_name))

            network.set_node_attribute(node_to_update, 'type', participant_type_map.get(node_info.get('PARTICIPANT_TYPE')),
                                       type='string')

        # =================================
        # LOOKUP UNIPROT ID -> GENE SYMBOL
        # =================================
        url = 'https://biodbnet-abcc.ncifcrf.gov/webServices/rest.php/biodbnetRestApi.json?method=db2db&input=uniprot entry name&inputValues=' \
              + ','.join(id_list) + '&outputs=genesymbol&taxonId=9606&format=row'
        look_up_req = requests.get(url)
        look_up_json = look_up_req.json()
        if look_up_json is not None:
            for bio_db_item in look_up_json:
                gene_symbol_mapping[bio_db_item.get('InputValue')] = bio_db_item.get('Gene Symbol')
                node_mapping[bio_db_item.get('InputValue')] = bio_db_item.get('Gene Symbol')

        node_items = None
        if sys.version_info.major == 3:
            node_items = network.nodes.items()
        else:
            node_items = network.nodes.iteritems()

        for k, v in node_items:
            # =============================
            # POST-PROCESS NODES
            # =============================
            participant_name = v.get_name()
            if '_HUMAN' in participant_name and node_mapping.get(participant_name) is not None:
                v.set_node_name(node_mapping.get(participant_name))

        ebs_network = NdexGraph(cx=network.to_cx())

        layouts.apply_directed_flow_layout(ebs_network, node_width=25, use_degree_edge_weights=True, iterations=200)

        ebs_network.subnetwork_id = 1
        ebs_network.view_id = 1

        network_update_key = update_ncipid_mapping.get(network.get_name().upper())

        if network_update_key is not None:
            print("updating")

            network_properties = get_network_properties(my_server, my_username, my_password, network_update_key)

            for k, v in network_properties.items():
                if k.upper() == 'VERSION':
                    ebs_network.set_network_attribute('version', 'APR-2018')
                else:
                    ebs_network.set_network_attribute(k, v)

            return my_ndex.update_cx_network(ebs_network.to_cx_stream(), network_update_key)
        else:
            print("new network")
            upload_message = my_ndex.save_cx_stream_as_new_network(ebs_network.to_cx_stream())
            network_uuid = upload_message.split('/')[-1]

            #===========================
            # MAKE NETWORK PUBLIC
            #===========================
            time.sleep(1)
            my_ndex._make_network_public_indexed(network_uuid)

            return upload_message

    return ebs
Пример #7
0
            if True:  #sif_pathway_name in re_run_these:
                #if ndexebs.get('edge_table') and len(ndexebs.get('edge_table')) > 0:
                if file_title_mapping.get(sif_pathway_name) is not None:
                    sif_pathway_name = file_title_mapping.get(
                        sif_pathway_name
                    )  # ndexebs.get('edge_table')[0].get('PATHWAY_NAMES')

                network_summary = current_netpath_metadata.get(
                    sif_pathway_name)
                # {'node_table': [], 'edge_table': []}
                if len(ebs.get('node_table')) > 0:
                    network = ebs2cx.ebs_to_network(ebs, name=sif_pathway_name)
                    #network = trim_edges(network)
                    ebs2cx.ndex_edge_filter(network)

                    layouts.apply_directed_flow_layout(network)
                    #toolbox.apply_template(network, "c51cda49-6192-11e5-8ac5-06603eb7f303", server="http://dev2.ndexbio.org", username="******", password="******")
                    toolbox.apply_template(
                        network,
                        config_data.get('network_style').get('uuid'),
                        server=config_data.get('network_style').get('server'),
                        username=config_data.get('network_style').get('user'),
                        password=config_data.get('network_style').get(
                            'pass'))  # NCI PID
                    #toolbox.apply_template(network, "4b63be4e-4716-11e7-96f7-06832d634f41", server="http://dev.ndexbio.org", username="******", password="******") # Lipid Maps
                    #toolbox.apply_template(network, "23aa48fe-4fb0-11e7-a8b5-06832d634f41", server="http://dev.ndexbio.org", username="******", password="******") # Lipid Maps
                    #toolbox.apply_template(network, "71f237da-4704-11e7-a6ff-0660b7976219", server="http://dev2.ndexbio.org", username="******", password="******") # Netpath

                    #if network.pos:
                    #    if network.view_id is None:
                    #        network.view_id = 1