Пример #1
0
    def find_current(self, rs):
        if isinstance(rs, MergeHistoryNode):
            source_node = rs
        else:
            source_node = self.rsids.get(rs)

        if source_node is None:
            return None

        target = source_node
        chain = [target]

        # Walk forward from source to sink node
        while 1:
            next_t = target.child
            if next_t is not None:
                target = next_t
                chain.append(target)
            else:
                break

        # Now walk backward to find first node that isn't deleted
        for i in range(len(chain) - 1, 0, -1):
            if not chain[i].deleted:
                return chain[i].rsid
Пример #2
0
    def load(cls, path_to_cluster):
        """Load from disk.

        Args:
            path_to_cluster (str): Folder where `ClusterChain` was saved to using `ClusterChain.save`.

        Returns:
            ClusterChain: The loaded object.
        """

        if os.path.isfile(path_to_cluster):
            C = smat_util.load_matrix(path_to_cluster)
            return cls.from_partial_chain(C)

        config_path = os.path.join(path_to_cluster, "config.json")
        if not os.path.exists(config_path):
            raise ValueError(
                f"Cluster config file, {config_path}, does not exist")

        with open(config_path, "r", encoding="utf-8") as fin:
            config = json.loads(fin.read())
            length = config.get("len", None)
            if length is None:
                raise ValueError(
                    f'Cluster config file, {config_path}, does not have "len" parameter'
                )

        chain = []
        for i in range(length):
            chain.append(
                smat_util.load_matrix(
                    os.path.join(path_to_cluster,
                                 f"C{i}.npz")).tocsc().astype(np.float32))

        return cls(chain)
Пример #3
0
def create_chain(elem):
    global errors
    bldg_name, ways = elem
    if len(ways) < 3:
        return (bldg_name, [])
    curr = (ways[0][0], ways[0][1])
    start = (ways[0][0], ways[0][1])
    d = defaultdict(list)
    loop = False
    for way in ways:
        d[(way[0], way[1])].append((way[2], way[3]))
        if (way[2], way[3]) == start:
            loop = True
    if not loop:
        return (bldg_name, [])
    chain = []
    visited = set()
    count = 0
    while curr != start or count == 0:
        chain.append(curr)
        next_nodes = d[curr]
        curr = None
        for next_node in next_nodes:
            if next_node not in visited:
                curr = next_node
                visited.add(next_node)
                break
        count += 1
        if count > len(ways) or curr is None:
            return (bldg_name, [])
    return (bldg_name, chain)
Пример #4
0
  def find_current(self,rs):
    if isinstance(rs,MergeHistoryNode):
      source_node = rs
    else:
      source_node = self.rsids.get(rs)

    if source_node is None:
      return None

    target = source_node
    chain = [target]

    # Walk forward from source to sink node
    while 1:
      next_t = target.child
      if next_t is not None:
        target = next_t
        chain.append(target)
      else:
        break

    # Now walk backward to find first node that isn't deleted
    for i in range(len(chain)-1,0,-1):
      if not chain[i].deleted:
        return chain[i].rsid
Пример #5
0
def getChainsFromConnections(connections,checkConnections=True):
    '''Take a list of connections and return a list of connection chains
       connections is a dictionary of connections between elements (which must be hashable)
         and can be generated using getElementConnections
       The checkConnections option tests that there is only one one path
         through each point (aka 2 or fewer connections, no branching)
       Returns a list of chains (lists of elements)
       '''
    connections = deepcopy(connections) # Protect the input from modification
    if checkConnections: # Check that there is no branching
        assert all( len(v)<3 for k,v in viewitems(connections) ), 'Aborting; this network has branching'

    chains = []
    while len(connections): # loop over possible chains
        # Pick a starting point (an end point if possible)
        currPt = _firstOrOther([pt for pt,conn in viewitems(connections)
                                   if len(conn)==1],
                               next(iter(connections))) # was connections.keys()[0]
        # Form a chain and move the current point forward
        chain = [currPt]
        currPt = connections.pop(currPt)[0]
        while currPt: # loop to fill a chain, stop on an invalid
            chain.append(currPt)
            if len(connections)==0:
                break
            connections[currPt] = deletecases(connections[currPt], [chain[-2]])
            currPt = _firstOrOther(connections.pop(currPt,[]))
        chains.append(chain)
    return chains
def getChainsFromConnections(connections, checkConnections=True):
    '''Take a list of connections and return a list of connection chains
       connections is a dictionary of connections between elements (which must be hashable)
         and can be generated using getElementConnections
       The checkConnections option tests that there is only one one path
         through each point (aka 2 or fewer connections, no branching)
       Returns a list of chains (lists of elements)
       '''
    connections = deepcopy(connections)  # Protect the input from modification
    if checkConnections:  # Check that there is no branching
        assert all(len(v) < 3 for k, v in viewitems(
            connections)), 'Aborting; this network has branching'

    chains = []
    while len(connections):  # loop over possible chains
        # Pick a starting point (an end point if possible)
        currPt = _firstOrOther(
            [pt for pt, conn in viewitems(connections) if len(conn) == 1],
            next(iter(connections)))  # was connections.keys()[0]
        # Form a chain and move the current point forward
        chain = [currPt]
        currPt = connections.pop(currPt)[0]
        while currPt:  # loop to fill a chain, stop on an invalid
            chain.append(currPt)
            if len(connections) == 0:
                break
            connections[currPt] = deletecases(connections[currPt], [chain[-2]])
            currPt = _firstOrOther(connections.pop(currPt, []))
        chains.append(chain)
    return chains
Пример #7
0
 def walk(self, callback, chain):
     res = callback(self, chain)
     assert res in (True, False), "Walk function must return boolean, not %r" % (res, )
     if not res:
         return False
     chain.append(self)
     self._walk(callback, chain)
     chain.pop()
Пример #8
0
 def walk(self, callback, chain):
     res = callback(self, chain)
     assert res in (True, False), "Walk function must return boolean, not %r" % (res, )
     if not res:
         return False
     chain.append(self)
     self._walk(callback, chain)
     chain.pop()
Пример #9
0
    def _is_reg_free(self,
                     addr,
                     reg,
                     ignore_current_bb,
                     level,
                     total_steps,
                     debug=False,
                     prev=None):
        if level >= self.cfg_exploration_depth:
            raise RegUsed("Max depth %#x %s" % (addr, map(hex, prev)))

        if not ignore_current_bb:
            if not addr in self.reg_free_map:
                # we reached some weird bb
                raise RegUsed("Weird block %#x %s" % (addr, map(hex, prev)))
            if reg in self.reg_free_map[addr]:
                return [addr]
            if reg in self.reg_not_free_map[addr]:
                raise RegUsed("Not free in bb %#x %s" % (addr, map(hex, prev)))

        if prev is None:
            prev = []

        try:
            succ, is_terminate = self.get_all_succ(addr)
            # if addr==0x0804B390:
            #    import IPython; IPython.embed()
            if is_terminate:
                return [addr]
            if len(succ) == 0:
                # no successors is weird, the cfg may be incomplete (e.g., NRFIN_00026 original 0x0897F4D5)
                raise RegUsed("No successors  %#x %s" % (addr, map(hex, prev)))
            total_steps[0] += 1
            if total_steps[0] >= self.max_cfg_steps:
                raise RegUsed("Too many steps  %#x %s" %
                              (addr, map(hex, prev)))
        except CfgError:
            # something weird is happening in the cfg, let's assume no reg is free
            raise RegUsed("CFG error %#x %s" % (addr, map(hex, prev)))

        free_regs_in_succ_list = []
        chain = []
        for s in succ:
            if s in prev:
                continue  # avoid exploring already explored nodes (except the first one).
            new_prev = list(prev)
            new_prev.append(s)
            pchain = self._is_reg_free(s,
                                       reg,
                                       False,
                                       level=level + 1,
                                       total_steps=total_steps,
                                       prev=new_prev,
                                       debug=debug)
            chain.append(pchain)
        chain.append(addr)
        return chain
Пример #10
0
 def chain(self, length=-1, reverse=False, stop_ds=None):
     if stop_ds:
         # resolve all formats to the same format
         stop_ds = Dataset(stop_ds)
     chain = []
     current = self
     while length != len(chain) and current != stop_ds:
         chain.append(current)
         if not current.previous:
             break
         current = Dataset(current.previous)
     if not reverse:
         chain.reverse()
     return chain
Пример #11
0
        def aux(node, indent=2, chain=None):
            if chain is None:
                chain = []

            result = ["%s%s" % (" " * indent, node.render())]
            children = sorted(self.get_children(node))
            children = [aux(c, indent=indent + 2, chain=chain + [c.key]) for c in children if c.key not in chain]

            chain.append(node.key)
            p = self.packages.get(node.key)
            if p:
                seen.add(p)

            result += list(flatten(children))
            return result
Пример #12
0
 def chain(self, length=-1, reverse=False, stop_jobid=None):
     if stop_jobid:
         # resolve whatever format to the bare jobid
         stop_jobid = Dataset(stop_jobid)
         if stop_jobid:
             stop_jobid = stop_jobid.jobid
     chain = []
     current = self
     while length != len(chain) and current.jobid != stop_jobid:
         chain.append(current)
         if not current.previous:
             break
         current = Dataset(current.previous)
     if not reverse:
         chain.reverse()
     return chain
Пример #13
0
    def chain_starting_with(self, ci: CommonInterval) -> List[CommonInterval]:
        lvl = self.reverse_index[ci]
        chain = []

        ci_index = self.nesting_levels[lvl].index(ci)
        items_after_ci = self.nesting_levels[lvl][ci_index + 1:]

        cur = ci
        for other in items_after_ci:
            if CommonInterval.intersect(cur, other):
                chain.append(other)
                cur = other
            else:
                break

        return [ci] + chain if chain else []
Пример #14
0
 def reconstruct_exons(self, path):
     # reverse negative stranded data so that all paths go from
     # small -> large genomic coords
     if self.strand == Strand.NEG:
         path.reverse()
     # convert from integer node labels to genome (start, end) tuples
     path = [self.get_node_interval(nid) for nid in path]
     # collapse contiguous nodes along path
     newpath = []
     chain = [path[0]]
     for v in path[1:]:
         if chain[-1].end != v.start:
             # update path with merge chain node
             newpath.append(Exon(chain[0].start, chain[-1].end))
             # reset chain
             chain = []
         chain.append(v)
     # add last chain
     newpath.append(Exon(chain[0].start, chain[-1].end))
     return newpath
Пример #15
0
def noun_string(data_org):
    """Finds strings of three nouns or more. Returns DataFrame"""
    chains = []
    tokens = word_tokenize(data_org)
    #tokenize to prepare for tagging
    w_tag = dict(nltk.pos_tag(tokens))
    chain = []
    for w, tag in w_tag.items():
        #find all nouns based on treebank format
        if tag.startswith('N'):
            chain.append(w)
        else:
            if len(chain) >= 3:
                chains.append(" ".join(chain))
            chain = []

    #move information to dataframe for printing to excel
    df_noun_string = pd.DataFrame({'Noun Strings (3+ Nouns in a row)': chains},
                                  columns=['Noun Strings (3+ Nouns in a row)'])
    return df_noun_string
Пример #16
0
 def get_path_from_parent(self, parent):
     """
     Return a list of PathInfos containing the path from the parent
     model to the current model, or an empty list if parent is not a
     parent of the current model.
     """
     if self.model is parent:
         return []
     model = self.concrete_model
     # Get a reversed base chain including both the current and parent
     # models.
     chain = model._meta.get_base_chain(parent)
     chain.reverse()
     chain.append(model)
     # Construct a list of the PathInfos between models in chain.
     path = []
     for i, ancestor in enumerate(chain[:-1]):
         child = chain[i + 1]
         link = child._meta.get_ancestor_link(ancestor)
         path.extend(link.get_reverse_path_info())
     return path
Пример #17
0
 def reconstruct_exons(self, path):
     # reverse negative stranded data so that all paths go from
     # small -> large genomic coords
     if self.strand == Strand.NEG:
         path.reverse()
     # convert from integer node labels to genome (start, end) tuples
     path = [self.get_node_interval(nid) for nid in path]
     # collapse contiguous nodes along path
     newpath = []
     chain = [path[0]]
     for v in path[1:]:
         if chain[-1].end != v.start:
             # update path with merge chain node
             newpath.append(Exon(chain[0].start,
                                 chain[-1].end))
             # reset chain
             chain = []
         chain.append(v)
     # add last chain
     newpath.append(Exon(chain[0].start, chain[-1].end))
     return newpath
Пример #18
0
 def get_path_from_parent(self, parent):
     """
     Return a list of PathInfos containing the path from the parent
     model to the current model, or an empty list if parent is not a
     parent of the current model.
     """
     if self.model is parent:
         return []
     model = self.concrete_model
     # Get a reversed base chain including both the current and parent
     # models.
     chain = model._meta.get_base_chain(parent)
     chain.reverse()
     chain.append(model)
     # Construct a list of the PathInfos between models in chain.
     path = []
     for i, ancestor in enumerate(chain[:-1]):
         child = chain[i + 1]
         link = child._meta.get_ancestor_link(ancestor)
         path.extend(link.get_reverse_path_info())
     return path
def coref(xml, start_id=1):
    soup = _soup(xml)
    token_by_ids = _token_by_ids(soup)
    
    docs_e = soup.findall('document')
    assert len(docs_e) == 1
    docs_e = docs_e[0]
    # Despite the name, this element contains conferences (note the "s")
    corefs_e = docs_e.findall('coreference')
    if not corefs_e:
        # No coreferences to process
        raise StopIteration
    assert len(corefs_e) == 1
    corefs_e = corefs_e[0]

    curr_id = start_id
    for coref_e in corefs_e:
        if corefs_e.tag != 'coreference':
            # To be on the safe side
            continue

        # This tag is now a full corference chain
        chain = []
        for mention_e in coref_e.getiterator('mention'):
            # Note: There is a "representative" attribute signalling the most
            #   "suitable" mention, we are currently not using this
            # Note: We don't use the head information for each mention
            sentence_id = int(mention_e.find('sentence').text)
            start_tok_id = int(mention_e.find('start').text)
            end_tok_id = int(mention_e.find('end').text) - 1

            mention_id = 'T%s' % (curr_id, )
            chain.append(mention_id)
            curr_id += 1
            yield TextBoundAnnotation(
                    ((token_by_ids[sentence_id][start_tok_id].start,
                    token_by_ids[sentence_id][end_tok_id].end), ),
                    mention_id, 'Mention', '')

        yield EquivAnnotation('Coreference', chain, '')
Пример #20
0
    def calculate(self):
        chain = []
        path = []
        path_cluster = []
        cluster = []

        dfs = DFS(self.mol)
        for bonds in Chem.FindAllSubgraphsOfLengthN(self.mol, self._order):
            dfs.reset(bonds)
            typ = dfs()
            nodes = dfs.nodes

            if typ == ChiType.chain:
                chain.append(nodes)
            elif typ == ChiType.path:
                path.append(nodes)
            elif typ == ChiType.path_cluster:
                path_cluster.append(nodes)
            else:
                cluster.append(nodes)

        return ChiBonds(chain, path, path_cluster, cluster)
Пример #21
0
    def calculate(self):
        chain = []
        path = []
        path_cluster = []
        cluster = []

        dfs = DFS(self.mol)
        for bonds in Chem.FindAllSubgraphsOfLengthN(self.mol, self._order):
            dfs.reset(bonds)
            typ = dfs()
            nodes = dfs.nodes

            if typ == ChiType.chain:
                chain.append(nodes)
            elif typ == ChiType.path:
                path.append(nodes)
            elif typ == ChiType.path_cluster:
                path_cluster.append(nodes)
            else:
                cluster.append(nodes)

        return ChiBonds(chain, path, path_cluster, cluster)
Пример #22
0
CHANGE_I =   [-1, -1, -1, # y or rows
               0,      0,
               1,  1,  1]

# the border, later drawn around the image
border = []
# saving chain_code as array
chain = []
# iterator for edges
curr_point = start_point
for direction in DIRECTIONS:
    idx = DIR_2_IDX[direction]
    new_point = (start_point[0]+CHANGE_I[idx], start_point[1]+CHANGE_J[idx])
    if img[new_point] != 0: # if is ROI
        border.append(new_point)
        chain.append(direction)
        curr_point = new_point
        break

count = 0
while curr_point != start_point:
    #figure direction to start search
    b_direction = (direction + 5) % 8
    dirs_1 = range(b_direction, 8)
    dirs_2 = range(0, b_direction)
    dirs = []
    dirs.extend(dirs_1)
    dirs.extend(dirs_2)
    for direction in dirs:
        idx = DIR_2_IDX[direction]
        new_point = (curr_point[0]+CHANGE_I[idx], curr_point[1]+CHANGE_J[idx])