Exemplos de fast_hash_4 em Python, exemplos de eden.fast_hash_4 em Python

Exemplo n.º 1

0

Exibir arquivo

def enhance_base_cip(base_cip, abstract_cip, mergeids, base_graph,
                     hash_bitmask, mod_dict, core_hash):
    # we cheated a little with the core, so we need to undo our cheating
    whatever = base_cip.graph.copy()
    base_cip.graph = base_graph.subgraph(base_cip.graph.nodes() +
                                         mergeids).copy()

    for n in mergeids:
        base_cip.graph.node[n]['core'] = True

    for n, d in base_cip.graph.nodes(data=True):
        if 'core' not in d:
            d['interface'] = True
            d['distance_dependent_label'] = whatever.node[n][
                'distance_dependent_label']

    base_cip.core_hash = core_hash
    # merging cip info with the abstract graph
    base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash,
                                               abstract_cip.interface_hash,
                                               get_mods(mod_dict, mergeids), 0,
                                               hash_bitmask)

    base_cip.core_nodes_count = abstract_cip.core_nodes_count
    base_cip.radius = abstract_cip.radius
    base_cip.abstract_thickness = abstract_cip.thickness

    # i want to see what they look like :)
    base_cip.abstract_view = abstract_cip.graph
    base_cip.distance_dict = abstract_cip.distance_dict
    return base_cip

Exemplo n.º 2

0

Exibir arquivo

Arquivo: path.py Projeto: teresa-m/EDeN

 def _compute_vertex_based_features(self, seq):
     if seq is None or len(seq) == 0:
         raise Exception("ERROR: something went wrong, empty instance.")
     # extract kmer hash codes for all kmers up to r in all positions in seq
     feature_dict = {}
     seq_len = len(seq)
     neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)]
     for pos in range(seq_len):
         # construct features as pairs of kmers up to distance d for all radii up to r
         feature_list = defaultdict(lambda: defaultdict(float))
         for radius in range(self.min_r, self.r + 1):
             if radius < len(neighborhood_hash_cache[pos]):
                 for distance in range(self.min_d, self.d + 1):
                     if pos + distance + radius < seq_len:
                         feature_code = fast_hash_4(
                             neighborhood_hash_cache[pos][radius],
                             radius,
                             distance,
                             neighborhood_hash_cache[pos + distance][radius],
                             self.bitmask,
                         )
                         key = fast_hash_2(radius, distance, self.bitmask)
                         feature_list[key][feature_code] += 1
         feature_dict.update(self._normalization(feature_list, pos))
     data_matrix = self._convert_dict_to_sparse_matrix(feature_dict)
     return data_matrix

Exemplo n.º 3

0

Exibir arquivo

Arquivo: path.py Projeto: teresa-m/EDeN

 def _transform(self, instance_id, seq):
     if seq is None or len(seq) == 0:
         raise Exception("ERROR: something went wrong, empty instance # %d." % instance_id)
     if len(seq) == 2 and len(seq[1]) > 0:
         # assume the instance is a pair (header,seq) and extract only seq
         seq = seq[1]
     # extract kmer hash codes for all kmers up to r in all positions in seq
     seq_len = len(seq)
     neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)]
     # construct features as pairs of kmers up to distance d for all radii up to r
     feature_list = defaultdict(lambda: defaultdict(float))
     for pos in range(seq_len):
         for radius in range(self.min_r, self.r + 1):
             if radius < len(neighborhood_hash_cache[pos]):
                 for distance in range(self.min_d, self.d + 1):
                     second_endpoint = pos + distance
                     if second_endpoint + radius < seq_len:
                         feature_code = fast_hash_4(
                             neighborhood_hash_cache[pos][radius],
                             radius,
                             distance,
                             neighborhood_hash_cache[second_endpoint][radius],
                             self.bitmask,
                         )
                         key = fast_hash_2(radius, distance, self.bitmask)
                         feature_list[key][feature_code] += 1
     return self._normalization(feature_list, instance_id)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: graph.py Projeto: BackofenLab/Graphlearn_long_range

 def _transform_vertex_pair_base(self,
                                 graph,
                                 vertex_v,
                                 vertex_u,
                                 distance,
                                 feature_list,
                                 connection_weight=1):
     # for all radii
     for radius in range(self.min_r, self.r + 2, 2):
         for label_index in range(graph.graph['label_size']):
             if radius < len(graph.node[vertex_v]['neighborhood_graph_hash'][label_index]) and \
                     radius < len(graph.node[vertex_u]['neighborhood_graph_hash'][label_index]):
                 # feature as a pair of neighbourhoods at a radius,distance
                 # canonicazation of pair of neighborhoods
                 vertex_v_hash = graph.node[vertex_v][
                     'neighborhood_graph_hash'][label_index][radius]
                 vertex_u_hash = graph.node[vertex_u][
                     'neighborhood_graph_hash'][label_index][radius]
                 if vertex_v_hash < vertex_u_hash:
                     first_hash, second_hash = (vertex_v_hash,
                                                vertex_u_hash)
                 else:
                     first_hash, second_hash = (vertex_u_hash,
                                                vertex_v_hash)
                 feature = fast_hash_4(first_hash, second_hash, radius,
                                       distance, self.bitmask)
                 key = fast_hash_2(radius, distance, self.bitmask)
                 # if self.weighted == False :
                 if graph.graph.get('weighted', False) is False:
                     feature_list[key][feature] += 1
                 else:
                     feature_list[key][feature] += connection_weight * \
                         (graph.node[vertex_v]['neighborhood_graph_weight'][radius] +
                          graph.node[vertex_u]['neighborhood_graph_weight'][radius])

Exemplo n.º 5

0

Exibir arquivo

Arquivo: sequence.py Projeto: fabriziocosta/EDeN

 def _transform_distance(self,
                         feature_list=None,
                         pos=None,
                         radius=None,
                         seq_len=None,
                         neigh_hash_cache=None,
                         neighborhood_weight_cache=None):
     distances = list(range(self.min_d, self.d + 1))
     distances += list(range(-self.d, -self.min_d))
     for distance in distances:
         end = pos + distance
         # Note: after having computed pos, we now treat
         # distance as the positive value only
         distance = abs(distance)
         cond1 = self.weights_dict is None
         if cond1 or self.weights_dict.get((radius, distance), 0) != 0:
             if end >= 0 and end + radius < seq_len:
                 pfeat = neigh_hash_cache[pos][radius]
                 efeat = neigh_hash_cache[end][radius]
                 feature_code = fast_hash_4(pfeat,
                                            efeat,
                                            radius,
                                            distance,
                                            self.bitmask)
                 key = fast_hash_2(radius, distance, self.bitmask)
                 if neighborhood_weight_cache:
                     pw = neighborhood_weight_cache[pos][radius]
                     feature_list[key][feature_code] += pw
                     ew = neighborhood_weight_cache[end][radius]
                     feature_list[key][feature_code] += ew
                 else:
                     feature_list[key][feature_code] += 1

Exemplo n.º 6

0

Exibir arquivo

 def _transform(self, instance_id, seq):
     if seq is None or len(seq) == 0:
         raise Exception('ERROR: something went wrong, empty instance # %d.' % instance_id)
     if len(seq) == 2 and len(seq[1]) > 0:
         # assume the instance is a pair (header,seq) and extract only seq
         seq = seq[1]
     # extract kmer hash codes for all kmers up to r in all positions in seq
     seq_len = len(seq)
     neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)]
     # construct features as pairs of kmers up to distance d for all radii up to r
     feature_list = defaultdict(lambda: defaultdict(float))
     for pos in range(seq_len):
         for radius in range(self.min_r, self.r + 1):
             if radius < len(neighborhood_hash_cache[pos]):
                 for distance in range(self.min_d, self.d + 1):
                     second_endpoint = pos + distance
                     if second_endpoint + radius < seq_len:
                         feature_code = fast_hash_4(neighborhood_hash_cache[pos][radius],
                                                    radius,
                                                    distance,
                                                    neighborhood_hash_cache[second_endpoint][radius],
                                                    self.bitmask)
                         key = fast_hash_2(radius, distance, self.bitmask)
                         feature_list[key][feature_code] += 1
     return self._normalization(feature_list, instance_id)

Exemplo n.º 7

0

Exibir arquivo

 def _transform_distance(self,
                         feature_list=None,
                         pos=None,
                         radius=None,
                         seq_len=None,
                         neigh_hash_cache=None,
                         neighborhood_weight_cache=None):
     distances = list(range(self.min_d, self.d + 1))
     distances += list(range(-self.d, -self.min_d))
     for distance in distances:
         end = pos + distance
         # Note: after having computed pos, we now treat
         # distance as the positive value only
         distance = abs(distance)
         cond1 = self.weights_dict is None
         if cond1 or self.weights_dict.get((radius, distance), 0) != 0:
             if end >= 0 and end + radius < seq_len:
                 if self.use_only_context:
                     pfeat = 42
                 else:
                     pfeat = neigh_hash_cache[pos][radius]
                 efeat = neigh_hash_cache[end][radius]
                 feature_code = fast_hash_4(pfeat, efeat, radius, distance,
                                            self.bitmask)
                 key = fast_hash_2(radius, distance, self.bitmask)
                 if neighborhood_weight_cache:
                     pw = neighborhood_weight_cache[pos][radius]
                     feature_list[key][feature_code] += pw
                     ew = neighborhood_weight_cache[end][radius]
                     feature_list[key][feature_code] += ew
                 else:
                     feature_list[key][feature_code] += 1

Exemplo n.º 8

0

Exibir arquivo

Arquivo: decompose.py Projeto: fabriziocosta/GraphLearn

def enhance_base_cip(base_cip, abstract_cip, mergeids, base_graph, hash_bitmask, mod_dict, core_hash):
    '''

    Parameters
    ----------
    base_cip: cip
        a cip that was extracted from the base graph
    abstract_cip: cip
        a cip that was extracted from the abstract graph
    mergeids: list of int
        nodes in the base cip that are in the core of the abstract cip
    base_graph: graph
        the base graph
    hash_bitmask: int
        n/c
    mod_dict: dict
        {id in base_graph: modification to interface hash}
        if there is an exceptionaly important nodetype in thebase graph it makes sure
        that every substitution will preserve this nodetype Oo
        used eg to mark the beginning/end of rna sequences.
        endnode can only be replaced by endnode :)
    core_hash:
        hash for the core that will be used in the finished CIP

    Returns
    -------
        a finished? CIP
    '''
    # we cheated a little with the core, so we need to undo our cheating
    whatever = base_cip.graph.copy()
    base_cip.graph = base_graph.subgraph(base_cip.graph.nodes() + mergeids).copy()

    for n in mergeids:
        base_cip.graph.node[n]['core'] = True

    for n, d in base_cip.graph.nodes(data=True):
        if 'core' not in d:
            d['interface'] = True
            d['distance_dependent_label'] = whatever.node[n]['distance_dependent_label']

    base_cip.core_hash = core_hash
    # merging cip info with the abstract graph
    base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash,
                                               abstract_cip.interface_hash,
                                               get_mods(mod_dict, mergeids), 0,
                                               hash_bitmask)

    base_cip.core_nodes_count = abstract_cip.core_nodes_count
    base_cip.radius = abstract_cip.radius
    base_cip.abstract_thickness = abstract_cip.thickness

    # i want to see what they look like :)
    base_cip.abstract_view = abstract_cip.graph
    base_cip.distance_dict = abstract_cip.distance_dict
    return base_cip

Exemplo n.º 9

0

Exibir arquivo

Arquivo: graph.py Projeto: xuan-hh/EDeN

 def _transform_vertex_pair_valid(self,
                                  graph,
                                  vertex_v,
                                  vertex_u,
                                  radius,
                                  distance,
                                  feature_list,
                                  connection_weight=1):
     cw = connection_weight
     # we need to revert to r/2 and d/2
     radius_dist_key = (radius / 2, distance / 2)
     # reweight using external weight dictionary
     len_v = len(graph.nodes[vertex_v]['neigh_graph_hash'])
     len_u = len(graph.nodes[vertex_u]['neigh_graph_hash'])
     if radius < len_v and radius < len_u:
         # feature as a pair of neighborhoods at a radius,distance
         # canonicalization of pair of neighborhoods
         vertex_v_labels = graph.nodes[vertex_v]['neigh_graph_hash']
         vertex_v_hash = vertex_v_labels[radius]
         vertex_u_labels = graph.nodes[vertex_u]['neigh_graph_hash']
         vertex_u_hash = vertex_u_labels[radius]
         if vertex_v_hash < vertex_u_hash:
             first_hash, second_hash = (vertex_v_hash, vertex_u_hash)
         else:
             first_hash, second_hash = (vertex_u_hash, vertex_v_hash)
         feature = fast_hash_4(first_hash, second_hash, radius, distance,
                               self.bitmask)
         # half features are those that ignore the central vertex v
         # the reason to have those is to help model the context
         # independently from the identity of the vertex itself
         half_feature = fast_hash_3(vertex_u_hash, radius, distance,
                                    self.bitmask)
         if graph.graph.get('weighted', False) is False:
             if self.use_only_context is False:
                 feature_list[radius_dist_key][feature] += cw
             feature_list[radius_dist_key][half_feature] += cw
         else:
             weight_v = graph.nodes[vertex_v]['neigh_graph_weight']
             weight_u = graph.nodes[vertex_u]['neigh_graph_weight']
             weight_vu_radius = weight_v[radius] + weight_u[radius]
             val = cw * weight_vu_radius
             # Note: add a feature only if the value is not 0
             if val != 0:
                 if self.use_only_context is False:
                     feature_list[radius_dist_key][feature] += val
                 half_val = cw * weight_u[radius]
                 feature_list[radius_dist_key][half_feature] += half_val

Exemplo n.º 10

0

Exibir arquivo

Arquivo: graph.py Projeto: fabriziocosta/EDeN

 def _transform_vertex_pair_valid(self,
                                  graph,
                                  vertex_v,
                                  vertex_u,
                                  radius,
                                  distance,
                                  feature_list,
                                  connection_weight=1):
     cw = connection_weight
     # we need to revert to r/2 and d/2
     radius_dist_key = (radius / 2, distance / 2)
     # reweight using external weight dictionary
     len_v = len(graph.nodes[vertex_v]['neigh_graph_hash'])
     len_u = len(graph.nodes[vertex_u]['neigh_graph_hash'])
     if radius < len_v and radius < len_u:
         # feature as a pair of neighborhoods at a radius,distance
         # canonicalization of pair of neighborhoods
         vertex_v_labels = graph.nodes[vertex_v]['neigh_graph_hash']
         vertex_v_hash = vertex_v_labels[radius]
         vertex_u_labels = graph.nodes[vertex_u]['neigh_graph_hash']
         vertex_u_hash = vertex_u_labels[radius]
         if vertex_v_hash < vertex_u_hash:
             first_hash, second_hash = (vertex_v_hash, vertex_u_hash)
         else:
             first_hash, second_hash = (vertex_u_hash, vertex_v_hash)
         feature = fast_hash_4(
             first_hash, second_hash, radius, distance, self.bitmask)
         # half features are those that ignore the central vertex v
         # the reason to have those is to help model the context
         # independently from the identity of the vertex itself
         half_feature = fast_hash_3(vertex_u_hash,
                                    radius, distance, self.bitmask)
         if graph.graph.get('weighted', False) is False:
             feature_list[radius_dist_key][feature] += cw
             feature_list[radius_dist_key][half_feature] += cw
         else:
             weight_v = graph.nodes[vertex_v]['neigh_graph_weight']
             weight_u = graph.nodes[vertex_u]['neigh_graph_weight']
             weight_vu_radius = weight_v[radius] + weight_u[radius]
             val = cw * weight_vu_radius
             # Note: add a feature only if the value is not 0
             if val != 0:
                 feature_list[radius_dist_key][feature] += val
                 half_val = cw * weight_u[radius]
                 feature_list[radius_dist_key][half_feature] += half_val

Exemplo n.º 11

0

Exibir arquivo

Arquivo: graph.py Projeto: teresa-m/EDeN

 def _transform_vertex_pair(self,
                            graph,
                            vertex_v,
                            vertex_u,
                            distance,
                            feature_list,
                            connection_weight=1):
     # for all radii
     for radius in range(self.min_r, self.r + 2, 2):
         for label_index in range(graph.graph['label_size']):
             if radius < len(graph.node[vertex_v]
                             ['neigh_graph_hash'][label_index]) and \
                     radius < len(graph.node[vertex_u]['neigh_graph_hash'][label_index]):
                 # feature as a pair of neighborhoods at a radius,distance
                 # canonicalization of pair of neighborhoods
                 vertex_v_hash = graph.node[vertex_v]['neigh_graph_hash'][
                     label_index][radius]
                 vertex_u_hash = graph.node[vertex_u]['neigh_graph_hash'][
                     label_index][radius]
                 if vertex_v_hash < vertex_u_hash:
                     first_hash, second_hash = (vertex_v_hash,
                                                vertex_u_hash)
                 else:
                     first_hash, second_hash = (vertex_u_hash,
                                                vertex_v_hash)
                 feature = fast_hash_4(first_hash, second_hash, radius,
                                       distance, self.bitmask)
                 # half features are those that ignore the central vertex v
                 # the reason to have those is to help model the context
                 # independently from the identity of the vertex itself
                 half_feature = fast_hash_3(vertex_u_hash, radius, distance,
                                            self.bitmask)
                 key = fast_hash_2(radius, distance)
                 if graph.graph.get('weighted', False) is False:
                     feature_list[key][feature] += 1
                     feature_list[key][half_feature] += 1
                 else:
                     val = connection_weight * \
                         (graph.node[vertex_v]['neigh_graph_weight'][radius] +
                          graph.node[vertex_u]['neigh_graph_weight'][radius])
                     feature_list[key][feature] += val
                     half_val = \
                         connection_weight * \
                         graph.node[vertex_u]['neigh_graph_weight'][radius]
                     feature_list[key][half_feature] += half_val

Exemplo n.º 12

0

Exibir arquivo

Arquivo: graph.py Projeto: teresa-m/EDeN

 def _transform_vertex_pair(self,
                            graph,
                            vertex_v,
                            vertex_u,
                            distance,
                            feature_list,
                            connection_weight=1):
     # for all radii
     for radius in range(self.min_r, self.r + 2, 2):
         for label_index in range(graph.graph['label_size']):
             if radius < len(graph.node[vertex_v]
                             ['neigh_graph_hash'][label_index]) and \
                     radius < len(graph.node[vertex_u]['neigh_graph_hash'][label_index]):
                 # feature as a pair of neighborhoods at a radius,distance
                 # canonicalization of pair of neighborhoods
                 vertex_v_hash = graph.node[vertex_v]['neigh_graph_hash'][label_index][radius]
                 vertex_u_hash = graph.node[vertex_u]['neigh_graph_hash'][label_index][radius]
                 if vertex_v_hash < vertex_u_hash:
                     first_hash, second_hash = (vertex_v_hash,
                                                vertex_u_hash)
                 else:
                     first_hash, second_hash = (vertex_u_hash,
                                                vertex_v_hash)
                 feature = fast_hash_4(first_hash, second_hash,
                                       radius, distance, self.bitmask)
                 # half features are those that ignore the central vertex v
                 # the reason to have those is to help model the context
                 # independently from the identity of the vertex itself
                 half_feature = fast_hash_3(vertex_u_hash,
                                            radius, distance, self.bitmask)
                 key = fast_hash_2(radius, distance)
                 if graph.graph.get('weighted', False) is False:
                     feature_list[key][feature] += 1
                     feature_list[key][half_feature] += 1
                 else:
                     val = connection_weight * \
                         (graph.node[vertex_v]['neigh_graph_weight'][radius] +
                          graph.node[vertex_u]['neigh_graph_weight'][radius])
                     feature_list[key][feature] += val
                     half_val = \
                         connection_weight * \
                         graph.node[vertex_u]['neigh_graph_weight'][radius]
                     feature_list[key][half_feature] += half_val

Exemplo n.º 13

0

Exibir arquivo

Arquivo: sequence.py Projeto: gianlucacorrado/EDeN

 def _compute_vertex_based_features(self, seq, weights=None):
     if seq is None or len(seq) == 0:
         raise Exception('ERROR: something went wrong, empty instance.')
     # extract kmer hash codes for all kmers up to r in all positions in seq
     vertex_based_features = []
     seq_len = len(seq)
     if weights:
         if len(weights) != seq_len:
             raise Exception('ERROR: sequence and weights \
                 must be same length.')
         neighborhood_weight_cache = \
             [self._compute_neighborhood_weight(weights, pos)
              for pos in range(seq_len)]
     neigh_hash_cache = [self._compute_neighborhood_hash(seq, pos)
                         for pos in range(seq_len)]
     for pos in range(seq_len):
         # construct features as pairs of kmers up to distance d
         # for all radii up to r
         local_features = defaultdict(lambda: defaultdict(float))
         for radius in range(self.min_r, self.r + 1):
             if radius < len(neigh_hash_cache[pos]):
                 for distance in range(self.min_d, self.d + 1):
                     end = pos + distance
                     if end + radius < seq_len:
                         feature_code = \
                             fast_hash_4(neigh_hash_cache[pos][radius],
                                         neigh_hash_cache[end][radius],
                                         radius,
                                         distance,
                                         self.bitmask)
                         key = fast_hash_2(radius, distance, self.bitmask)
                         if weights:
                             local_features[key][feature_code] += \
                                 neighborhood_weight_cache[pos][radius]
                             local_features[key][feature_code] += \
                                 neighborhood_weight_cache[end][radius]
                         else:
                             local_features[key][feature_code] += 1
         vertex_based_features.append(self._normalization(local_features,
                                                          inner_normalization=False,
                                                          normalization=self.normalization))
     data_matrix = self._convert_dict_to_sparse_matrix(vertex_based_features)
     return data_matrix

Exemplo n.º 14

0

Exibir arquivo

Arquivo: sequence.py Projeto: gianlucacorrado/EDeN

 def _transform(self, orig_seq):
     seq, weights = self._get_sequence_and_weights(orig_seq)
     # extract kmer hash codes for all kmers up to r in all positions in seq
     seq_len = len(seq)
     neigh_hash_cache = [self._compute_neighborhood_hash(seq, pos)
                         for pos in range(seq_len)]
     if weights:
         if len(weights) != seq_len:
             raise Exception('ERROR: sequence and weights \
                 must be same length.')
         neighborhood_weight_cache = \
             [self._compute_neighborhood_weight(weights, pos)
              for pos in range(seq_len)]
     # construct features as pairs of kmers up to distance d
     # for all radii up to r
     feature_list = defaultdict(lambda: defaultdict(float))
     for pos in range(seq_len):
         for radius in range(self.min_r, self.r + 1):
             if radius < len(neigh_hash_cache[pos]):
                 for distance in range(self.min_d, self.d + 1):
                     end = pos + distance
                     if end + radius < seq_len:
                         feature_code = \
                             fast_hash_4(neigh_hash_cache[pos][radius],
                                         neigh_hash_cache[end][radius],
                                         radius,
                                         distance,
                                         self.bitmask)
                         key = fast_hash_2(radius, distance, self.bitmask)
                         if weights:
                             feature_list[key][feature_code] += \
                                 neighborhood_weight_cache[pos][radius]
                             feature_list[key][feature_code] += \
                                 neighborhood_weight_cache[end][radius]
                         else:
                             feature_list[key][feature_code] += 1
     return self._normalization(feature_list,
                                inner_normalization=self.inner_normalization,
                                normalization=self.normalization)

Exemplo n.º 15

0

Exibir arquivo

 def _compute_vertex_based_features(self, seq):
     if seq is None or len(seq) == 0:
         raise Exception('ERROR: something went wrong, empty instance.')
     # extract kmer hash codes for all kmers up to r in all positions in seq
     feature_dict = {}
     seq_len = len(seq)
     neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)]
     for pos in range(seq_len):
         # construct features as pairs of kmers up to distance d for all radii up to r
         feature_list = defaultdict(lambda: defaultdict(float))
         for radius in range(self.min_r, self.r + 1):
             if radius < len(neighborhood_hash_cache[pos]):
                 for distance in range(self.min_d, self.d + 1):
                     if pos + distance + radius < seq_len:
                         feature_code = fast_hash_4(neighborhood_hash_cache[pos][radius],
                                                    radius,
                                                    distance,
                                                    neighborhood_hash_cache[pos + distance][radius],
                                                    self.bitmask)
                         key = fast_hash_2(radius, distance, self.bitmask)
                         feature_list[key][feature_code] += 1
         feature_dict.update(self._normalization(feature_list, pos))
     data_matrix = self._convert_dict_to_sparse_matrix(feature_dict)
     return data_matrix

Exemplo n.º 16

0

Exibir arquivo

Arquivo: decompose.py Projeto: fabriziocosta/GraphLearn

def extract_cips_base(node,
                      graphmanager,
                      base_thickness_list=None,
                      hash_bitmask=None,
                      mod_dict={},
                      radius_list=[],
                      thickness_list=None,
                      node_filter=lambda x, y: True):
    '''
    Parameters
    ----------
    node: int
        id of a node
    graphmanager: graph-wrapper
        the wrapper that contains the graph
    base_thickness_list: [int]
        thickness of SOMETHING
    hash_bitmask: int
        see above
    mod_dict: dict
        see above
    **argz: dict
        more args
        I guess these are meant:
        radius_list=None,
        thickness_list=None,

        node_filter=lambda x, y: True):

    Returns
    -------
        [CIP]
        a list of core_interface_pairs
    '''

    # if not filter(abstract_graph, node):
    #    return []

    # PREPARE
    abstract_graph = graphmanager.abstract_graph()
    base_graph = graphmanager.base_graph()

    if 'hlabel' not in abstract_graph.node[abstract_graph.nodes()[0]]:
        edengraphtools._label_preprocessing(abstract_graph)
    if 'hlabel' not in base_graph.node[base_graph.nodes()[0]]:
        edengraphtools._label_preprocessing(base_graph)

    # LOOK UP ABSTRACT GRAPHS NODE AND
    # EXTRACT CIPS NORMALY ON ABSTRACT GRAPH
    for n, d in abstract_graph.nodes(data=True):
        if node in d['contracted']:
            abs_node = n
            break
    else:
        raise Exception("IMPOSSIBLE NODE")

    abstract_cips = graphtools.extract_core_and_interface(root_node=abs_node, graph=abstract_graph, radius_list=[0],
                                                          thickness_list=thickness_list, hash_bitmask=hash_bitmask,
                                                          node_filter=node_filter)

    # VOR EVERY ABSTRACT CIP: EXTRACT BASE CIP
    cips = []

    for abstract_cip in abstract_cips:

        base_level_cips = graphtools.extract_core_and_interface(node, base_graph, radius_list=radius_list,
                                                                thickness_list=base_thickness_list,
                                                                hash_bitmask=hash_bitmask)
        # VOR EVERY BASE CIP: hash interfaces and save the abstract view
        for base_cip in base_level_cips:
            cores = [n for n, d in base_cip.graph.nodes(data=True) if 'interface' not in d]
            base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash,
                                                       abstract_cip.interface_hash,
                                                       get_mods(mod_dict, cores), 1337,
                                                       hash_bitmask)
            base_cip.abstract_view = abstract_cip.graph
            cips.append(base_cip)

    return cips

Exemplo n.º 17

0

Exibir arquivo

def extract_cips_base(node,
                      graphmanager,
                      base_thickness_list=None,
                      hash_bitmask=None,
                      mod_dict={},
                      **argz):
    '''
    :param node: node in the BASE graph
    ::
    :return:  a  list of cips
    '''
    # if not filter(abstract_graph, node):
    #    return []

    #PREPARE
    abstract_graph = graphmanager.abstract_graph()
    base_graph = graphmanager.base_graph()
    vectorizer = graphmanager.vectorizer
    if 'hlabel' not in abstract_graph.node[abstract_graph.nodes()[0]]:
        vectorizer._label_preprocessing(abstract_graph)
    if 'hlabel' not in base_graph.node[base_graph.nodes()[0]]:
        vectorizer._label_preprocessing(base_graph)

    # LOOK UP ABSTRACT GRAPHS NODE AND
    # EXTRACT CIPS NORMALY ON ABSTRACT GRAPH
    for n, d in abstract_graph.nodes(data=True):
        if node in d['contracted']:
            abs_node = n
            break
    else:
        raise Exception("IMPOSSIBLE NODE")
    radiuslist_backup = argz['radius_list']
    argz['radius_list'] = [0]
    abstract_cips = graphtools.extract_core_and_interface(
        abs_node,
        abstract_graph,
        vectorizer=vectorizer,
        hash_bitmask=hash_bitmask,
        **argz)

    # VOR EVERY ABSTRACT CIP: EXTRACT BASE CIP
    cips = []
    argz['radius_list'] = radiuslist_backup
    for abstract_cip in abstract_cips:
        argz['thickness_list'] = base_thickness_list
        base_level_cips = graphtools.extract_core_and_interface(
            node,
            base_graph,
            vectorizer=vectorizer,
            hash_bitmask=hash_bitmask,
            **argz)
        # VOR EVERY BASE CIP: hash interfaces and save the abstract view
        for base_cip in base_level_cips:

            cores = [
                n for n, d in base_cip.graph.nodes(data=True)
                if 'interface' not in d
            ]
            base_cip.interface_hash = eden.fast_hash_4(
                base_cip.interface_hash, abstract_cip.interface_hash,
                get_mods(mod_dict, cores), 1337, hash_bitmask)
            base_cip.abstract_view = abstract_cip.graph
            cips.append(base_cip)

    return cips

Exemplo n.º 18

0

Exibir arquivo

Arquivo: ubergraphlearn.py Projeto: antworteffekt/GraphLearn

def extract_cips(node,
                 abstract_graph,
                 base_graph,
                 abstract_radius_list=None,
                 abstract_thickness_list=None,
                 base_thickness_list=None,
                 vectorizer=None,
                 hash_bitmask=None,
                 mod_dict={},
                 **argz):
    '''
    :param node: node in the abstract graph
    :param abstract_graph:  the abstract graph expanded
    :param base_graph:  the underlying real graph
    :param abstract_radius: radius in abstract graph
    :param abstract_thickness: thickness in abstr
    :param base_thickness:  thickness for the base graph
    :return:  a  list of cips
    '''
    # if not filter(abstract_graph, node):
    #    return []
    if 'hlabel' not in abstract_graph.node[abstract_graph.nodes()[0]]:
        vectorizer._label_preprocessing(abstract_graph)
    if 'hlabel' not in base_graph.node[base_graph.nodes()[0]]:
        vectorizer._label_preprocessing(base_graph)

    # on the abstract graph we use the normal extract cip stuff:
    abstract_cips = graphtools.extract_core_and_interface(node,
                                                          abstract_graph,
                                                          radius_list=abstract_radius_list,
                                                          thickness_list=abstract_thickness_list,
                                                          vectorizer=vectorizer,
                                                          hash_bitmask=hash_bitmask,
                                                          **argz)

    cips = []
    for acip in abstract_cips:

            # now we need to calculate the real cips:
            # the trick is to also use the normal extractor, but in order to do that we need
            # to collapse the 'core'

            # MERGE THE CORE OF THE ABSTRACT GRAPH IN THE BASE GRAPH
        mergeids = [base_graph_id for radius in range(
            acip.radius + 1) for abstract_node_id in acip.distance_dict.get(radius)
            for base_graph_id in abstract_graph.node[abstract_node_id]['contracted']]
        base_copy = base_graph.copy()
        for node in mergeids[1:]:
            graphtools.merge(base_copy, mergeids[0], node)

        # do cip extraction and calculate the real core hash
        base_level_cips = graphtools.extract_core_and_interface(mergeids[0],
                                                                base_copy,
                                                                radius_list=[0],
                                                                thickness_list=base_thickness_list,
                                                                vectorizer=vectorizer,
                                                                hash_bitmask=hash_bitmask,
                                                                **argz)
        core_hash = graphtools.graph_hash(base_graph.subgraph(mergeids), hash_bitmask=hash_bitmask)

        # now we have a bunch of base_level_cips and need to attach info from the abstract cip.
        for base_cip in base_level_cips:

            # we cheated a little with the core, so we need to undo our cheating
            whatever=base_cip.graph.copy()
            base_cip.graph = base_graph.subgraph(base_cip.graph.nodes() + mergeids).copy()

            for n in mergeids:
                base_cip.graph.node[n]['core'] = True



            for n,d in base_cip.graph.nodes(data=True):
                if 'core' not in d:
                    d['interface']=True

                    d['distance_dependent_label'] = whatever.node[n]['distance_dependent_label']


            base_cip.core_hash = core_hash


            # merging cip info with the abstract graph
            base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash,
                                                       acip.interface_hash,
                                                       get_mods(mod_dict,mergeids),0,
                                                       hash_bitmask)




            base_cip.core_nodes_count = acip.core_nodes_count
            base_cip.radius = acip.radius
            base_cip.abstract_thickness = acip.thickness

            # i want to see what they look like :)
            base_cip.abstract_view=acip.graph

            cips.append(base_cip)
    return cips