def path_to_agp(g, path, object, sizes, status): lines = [] for (a, ao), (b, bo) in pairwise(path): ao = get_orientation(ao, status) e = g.get_edge(a.v, b.v) cline = AGPLine.cline(object, a.v, sizes, ao) gline = AGPLine.gline(object, e.length) lines.append(cline) lines.append(gline) # Do not forget the last one z, zo = path[-1] zo = get_orientation(zo, status) cline = AGPLine.cline(object, z.v, sizes, zo) lines.append(cline) return lines
def write_agp(self, filename): sizes = self.sz agp = [] for scaffold, lines in self.iter_scaffold(): for a, b in pairwise(lines): cline = AGPLine.cline(scaffold, a.tig, sizes, a.oo) gline = AGPLine.gline(scaffold, a.gaps) agp.append(cline) agp.append(gline) a = lines[-1] cline = AGPLine.cline(scaffold, a.tig, sizes, a.oo) agp.append(cline) fw = open(filename, "w") for a in agp: print >> fw, a fw.close() reindex([filename, "--inplace"]) return filename
def embed(args): """ %prog embed evidencefile scaffolds.fasta contigs.fasta Use SSPACE evidencefile to scaffold contigs into existing scaffold structure, as in `scaffolds.fasta`. Contigs.fasta were used by SSPACE directly to scaffold. Rules: 1. Only update existing structure by embedding contigs small enough to fit. 2. Promote singleton contigs only if they are big (>= min_length). """ p = OptionParser(embed.__doc__) p.set_mingap(default=10) p.add_option("--min_length", default=200, type="int", help="Minimum length to consider [default: %default]") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) evidencefile, scaffolds, contigs = args min_length = opts.min_length splitfasta, oagp, cagp = gaps([scaffolds, "--split", "--mingap={0}".format(opts.mingap)]) agp = AGP(cagp) p = agp.graph ef = EvidenceFile(evidencefile, contigs) sizes = ef.sz q = ef.graph logging.debug("Reference graph: {0}".format(p)) logging.debug("Patch graph: {0}".format(q)) newagp = deepcopy(agp) seen = set() deleted = set() for a in agp: if a.is_gap: continue name = a.component_id object = a.object if name in deleted: print >>sys.stderr, "* Skip {0}, already embedded".format(name) continue seen.add(name) target_name, tag = get_target(p, name) path = q.get_path(name, target_name, tag=tag) path_size = sum([sizes[x.v] for x, t in path]) if path else None status = NO_UPDATE # Heuristic, the patch must not be too long if path and path_size > min_length and len(path) > 3: path = None if not path: print >>sys.stderr, name, target_name, path, path_size, status continue backward = False for x, t in path: if x.v in seen: print >>sys.stderr, "* Does not allow backward" " patch on {0}".format(x.v) backward = True break if backward: continue # Build the path plus the ends vv = q.get_node(name) path.appendleft((vv, tag)) if tag == ">": path.reverse() status = INSERT_BEFORE elif target_name is None: status = INSERT_AFTER else: target = q.get_node(target_name) path.append((target, tag)) status = INSERT_BETWEEN print >>sys.stderr, name, target_name, path, path_size, status # Trim the ends off from the constructed AGPLines lines = path_to_agp(q, path, object, sizes, status) if status == INSERT_BEFORE: lines = lines[:-1] td = newagp.insert_lines(name, lines, delete=True, verbose=True) elif status == INSERT_AFTER: lines = lines[1:] td = newagp.insert_lines(name, lines, after=True, delete=True, verbose=True) else: lines = lines[1:-1] td = newagp.update_between(name, target_name, lines, delete=True, verbose=True) deleted |= td seen |= td # Recruite big singleton contigs CUTOFF = opts.min_length for ctg, size in sizes.items(): if ctg in seen: continue if size < CUTOFF: continue newagp.append(AGPLine.cline(ctg, ctg, sizes, "?")) # Write a new AGP file newagpfile = "embedded.agp" newagp.print_to_file(newagpfile, index=True) tidy([newagpfile, contigs])
def embed(args): """ %prog embed evidencefile scaffolds.fasta contigs.fasta Use SSPACE evidencefile to scaffold contigs into existing scaffold structure, as in `scaffolds.fasta`. Contigs.fasta were used by SSPACE directly to scaffold. Rules: 1. Only update existing structure by embedding contigs small enough to fit. 2. Promote singleton contigs only if they are big (>= min_length). """ p = OptionParser(embed.__doc__) p.set_mingap(default=10) p.add_option("--min_length", default=200, type="int", help="Minimum length to consider [default: %default]") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) evidencefile, scaffolds, contigs = args min_length = opts.min_length splitfasta, oagp, cagp = gaps([scaffolds, "--split", "--mingap={0}".format(opts.mingap)]) agp = AGP(cagp) p = agp.graph ef = EvidenceFile(evidencefile, contigs) sizes = ef.sz q = ef.graph logging.debug("Reference graph: {0}".format(p)) logging.debug("Patch graph: {0}".format(q)) newagp = deepcopy(agp) seen = set() deleted = set() for a in agp: if a.is_gap: continue name = a.component_id object = a.object if name in deleted: print >> sys.stderr, "* Skip {0}, already embedded".format(name) continue seen.add(name) target_name, tag = get_target(p, name) path = q.get_path(name, target_name, tag=tag) path_size = sum([sizes[x.v] for x, t in path]) if path else None status = NO_UPDATE # Heuristic, the patch must not be too long if path and path_size > min_length and len(path) > 3: path = None if not path: print >> sys.stderr, name, target_name, path, path_size, status continue backward = False for x, t in path: if x.v in seen: print >> sys.stderr, "* Does not allow backward" \ " patch on {0}".format(x.v) backward = True break if backward: continue # Build the path plus the ends vv = q.get_node(name) path.appendleft((vv, tag)) if tag == ">": path.reverse() status = INSERT_BEFORE elif target_name is None: status = INSERT_AFTER else: target = q.get_node(target_name) path.append((target, tag)) status = INSERT_BETWEEN print >> sys.stderr, name, target_name, path, path_size, status # Trim the ends off from the constructed AGPLines lines = path_to_agp(q, path, object, sizes, status) if status == INSERT_BEFORE: lines = lines[:-1] td = newagp.insert_lines(name, lines, \ delete=True, verbose=True) elif status == INSERT_AFTER: lines = lines[1:] td = newagp.insert_lines(name, lines, after=True, \ delete=True, verbose=True) else: lines = lines[1:-1] td = newagp.update_between(name, target_name, lines, \ delete=True, verbose=True) deleted |= td seen |= td # Recruite big singleton contigs CUTOFF = opts.min_length for ctg, size in sizes.items(): if ctg in seen: continue if size < CUTOFF: continue newagp.append(AGPLine.cline(ctg, ctg, sizes, "?")) # Write a new AGP file newagpfile = "embedded.agp" newagp.print_to_file(newagpfile, index=True) tidy([newagpfile, contigs])