def save_coverages(contigs, coverage_filename): """ :param contigs: A dict contig_name -> contig_id. :param coverage_filename: The name of the dsv file. """ coverage_file = utils.parse_dsv(coverage_filename) # Determine if the file has a header. fields = next(coverage_file) has_header = not utils.is_number(fields[1]) def add_coverages(contig_name, _coverages): try: contig_id = contigs.pop(contig_name) except KeyError: return for i, cov in enumerate(_coverages): db.session.add( Coverage(value=cov, name=header[i], contig_id=contig_id)) header = fields[1:] if not has_header: header = ['cov_{}'.format(i) for i, _ in enumerate(fields[1:], 1)] contig_name, *_coverages = fields add_coverages(contig_name, _coverages) for contig_name, *_coverages in coverage_file: add_coverages(contig_name, _coverages) db.session.commit()
def save_coverages(contigs, coverage_filename): """ :param contigs: A dict contig_name -> contig_id. :param coverage_filename: The name of the dsv file. """ coverage_file = utils.parse_dsv(coverage_filename) # Determine if the file has a header. fields = next(coverage_file) has_header = not utils.is_number(fields[1]) def add_coverages(contig_name, _coverages): try: contig_id = contigs.pop(contig_name) except KeyError: return for i, cov in enumerate(_coverages): db.session.add(Coverage(value=cov, name=header[i], contig_id=contig_id)) header = fields[1:] if not has_header: header = ['cov_{}'.format(i) for i, _ in enumerate(fields[1:], 1)] contig_name, *_coverages = fields add_coverages(contig_name, _coverages) for contig_name, *_coverages in coverage_file: add_coverages(contig_name, _coverages) db.session.commit()
def save_bin_set_job(name, assembly_id, filename=None): assembly = Assembly.query.get(assembly_id) bin_set = BinSet(name=name, color=randcol.generate(luminosity='dark')[0], submit_date=datetime.utcnow(), assembly=assembly) db.session.add(bin_set) db.session.flush() # Query the contigs from the db to dict contig-name -> contig object query = assembly.contigs.options(load_only('name')) contigs = {c.name: c for c in query.all()} notfound = [] if filename: # Dict: bin -> contigs bins = defaultdict(list) for contig_name, bin_name in utils.parse_dsv(filename): if contig_name in contigs: bins[bin_name].append(contig_name) else: notfound.append(contig_name) for bin_name, bin_contigs in bins.items(): notfound.extend([c for c in bin_contigs if c not in contigs]) bin_contigs = [contigs.pop(c) for c in bin_contigs] Bin(name=bin_name, color=randcol.generate(luminosity='dark')[0], bin_set_id=bin_set.id, contigs=bin_contigs) os.remove(filename) # Create a bin for the unbinned contigs. bin = Bin(name='unbinned', color='#939393', bin_set_id=bin_set.id, contigs=list(contigs.values()), unbinned=True) db.session.add(bin) db.session.flush() for bin in bin_set.bins: bin.recalculate_values() db.session.commit() return { 'assembly': assembly.id, 'binSet': bin_set.id, 'missing': list(contigs.keys()), 'notfound': notfound }
def read_coverages(filename): coverage_file = utils.parse_dsv(filename) coverages = {} # Determine if the file has a header. fields = next(coverage_file) has_header = not utils.is_number(fields[1]) if has_header: samples = fields[1:] else: samples = ['sample_{}'.format(i) for i, _ in enumerate(fields[1:], 1)] contig_name, *_coverages = fields coverages[contig_name] = {samples[i]: _coverages[i] for i, _ in enumerate(samples)} for contig_name, *_coverages in coverage_file: coverages[contig_name] = {samples[i]: _coverages[i] for i, _ in enumerate(samples)} os.remove(filename) return samples, coverages
def post(self, contigset_id): contigset = user_contigset_or_404(contigset_id) args = self.reqparse.parse_args() bin_file = tempfile.NamedTemporaryFile(delete=False) args.bins.save(bin_file) bin_file.close() # Dict: bin -> contigs bins = defaultdict(list) for contig_name, bin_name in utils.parse_dsv(bin_file.name): bins[bin_name].append(contig_name) bin_objects = [] contigs = {c.name: c for c in contigset.contigs} for bin_name, bin_contigs in bins.items(): bin_contigs = [contigs.pop(c) for c in bin_contigs] bin = Bin(name=bin_name, color=self.randcol.generate()[0], contigs=bin_contigs) bin.recalculate_values() bin_objects.append(bin) # Create a bin for the unbinned contigs. bin = Bin(name='unbinned', color='#939393', contigs=list(contigs.values())) bin.recalculate_values() bin_objects.append(bin) binset = Binset(name=args.name, color=self.randcol.generate()[0], bins=bin_objects, contigset=contigset) os.remove(bin_file.name) db.session.add(binset) db.session.commit() return {'id': binset.id, 'name': binset.name, 'color': binset.color, 'bins': [bin.id for bin in binset.bins], 'contigset': contigset.id}