示例#1
0
def save_coverages(contigs, coverage_filename):
    """
    :param contigs: A dict contig_name -> contig_id.
    :param coverage_filename: The name of the dsv file.
    """
    coverage_file = utils.parse_dsv(coverage_filename)

    # Determine if the file has a header.
    fields = next(coverage_file)
    has_header = not utils.is_number(fields[1])

    def add_coverages(contig_name, _coverages):
        try:
            contig_id = contigs.pop(contig_name)
        except KeyError:
            return
        for i, cov in enumerate(_coverages):
            db.session.add(
                Coverage(value=cov, name=header[i], contig_id=contig_id))

    header = fields[1:]
    if not has_header:
        header = ['cov_{}'.format(i) for i, _ in enumerate(fields[1:], 1)]
        contig_name, *_coverages = fields
        add_coverages(contig_name, _coverages)

    for contig_name, *_coverages in coverage_file:
        add_coverages(contig_name, _coverages)

    db.session.commit()
示例#2
0
def save_coverages(contigs, coverage_filename):
    """
    :param contigs: A dict contig_name -> contig_id.
    :param coverage_filename: The name of the dsv file.
    """
    coverage_file = utils.parse_dsv(coverage_filename)

    # Determine if the file has a header.
    fields = next(coverage_file)
    has_header = not utils.is_number(fields[1])

    def add_coverages(contig_name, _coverages):
        try:
            contig_id = contigs.pop(contig_name)
        except KeyError:
            return
        for i, cov in enumerate(_coverages):
            db.session.add(Coverage(value=cov, name=header[i], contig_id=contig_id))

    header = fields[1:]
    if not has_header:
        header = ['cov_{}'.format(i) for i, _ in enumerate(fields[1:], 1)]
        contig_name, *_coverages = fields
        add_coverages(contig_name, _coverages)

    for contig_name, *_coverages in coverage_file:
        add_coverages(contig_name, _coverages)

    db.session.commit()
示例#3
0
def save_bin_set_job(name, assembly_id, filename=None):
    assembly = Assembly.query.get(assembly_id)
    bin_set = BinSet(name=name, 
                     color=randcol.generate(luminosity='dark')[0],
                     submit_date=datetime.utcnow(),
                     assembly=assembly)
    db.session.add(bin_set)
    db.session.flush()

    # Query the contigs from the db to dict contig-name -> contig object
    query = assembly.contigs.options(load_only('name'))
    contigs = {c.name: c for c in query.all()}

    notfound = []
    if filename:
        # Dict: bin -> contigs
        bins = defaultdict(list)
        for contig_name, bin_name in utils.parse_dsv(filename):
            if contig_name in contigs:
                bins[bin_name].append(contig_name)
            else:
                notfound.append(contig_name)

        for bin_name, bin_contigs in bins.items():
            notfound.extend([c for c in bin_contigs if c not in contigs])
            bin_contigs = [contigs.pop(c) for c in bin_contigs]
            Bin(name=bin_name, color=randcol.generate(luminosity='dark')[0],
                bin_set_id=bin_set.id, contigs=bin_contigs)
        os.remove(filename)
    
    # Create a bin for the unbinned contigs.
    bin = Bin(name='unbinned', color='#939393', bin_set_id=bin_set.id,
              contigs=list(contigs.values()), unbinned=True)
    db.session.add(bin)

    db.session.flush()
    for bin in bin_set.bins:
        bin.recalculate_values()
    db.session.commit()
    return {
        'assembly': assembly.id, 
        'binSet': bin_set.id,
        'missing': list(contigs.keys()), 
        'notfound': notfound
    }
示例#4
0
def read_coverages(filename):
    coverage_file = utils.parse_dsv(filename)
    coverages = {}

    # Determine if the file has a header.
    fields = next(coverage_file)
    has_header = not utils.is_number(fields[1])
    if has_header:
        samples = fields[1:]
    else:
        samples = ['sample_{}'.format(i) for i, _ in enumerate(fields[1:], 1)]
        contig_name, *_coverages = fields
        coverages[contig_name] = {samples[i]: _coverages[i] for i, _ in enumerate(samples)}

    for contig_name, *_coverages in coverage_file:
        coverages[contig_name] = {samples[i]: _coverages[i] for i, _ in enumerate(samples)}

    os.remove(filename)
    return samples, coverages
示例#5
0
    def post(self, contigset_id):
        contigset = user_contigset_or_404(contigset_id)
        args = self.reqparse.parse_args()

        bin_file = tempfile.NamedTemporaryFile(delete=False)
        args.bins.save(bin_file)
        bin_file.close()

        # Dict: bin -> contigs
        bins = defaultdict(list)
        for contig_name, bin_name in utils.parse_dsv(bin_file.name):
            bins[bin_name].append(contig_name)

        bin_objects = []
        contigs = {c.name: c for c in contigset.contigs}
        for bin_name, bin_contigs in bins.items():
            bin_contigs = [contigs.pop(c) for c in bin_contigs]
            bin = Bin(name=bin_name, color=self.randcol.generate()[0],
                      contigs=bin_contigs)
            bin.recalculate_values()
            bin_objects.append(bin)

        # Create a bin for the unbinned contigs.
        bin = Bin(name='unbinned', color='#939393',
                  contigs=list(contigs.values()))
        bin.recalculate_values()
        bin_objects.append(bin)

        binset = Binset(name=args.name, color=self.randcol.generate()[0],
                        bins=bin_objects, contigset=contigset)

        os.remove(bin_file.name)
        db.session.add(binset)
        db.session.commit()
        return {'id': binset.id, 'name': binset.name, 'color': binset.color,
                'bins': [bin.id for bin in binset.bins], 'contigset': contigset.id}