def get_selection(session, filter_type=None, sample_ids=None): query = session.query(SelectionPressure).options( joinedload(SelectionPressure.clone), joinedload(SelectionPressure.sample), ) if filter_type == 'overall': query = query.filter(SelectionPressure.sample_id.is_(None)) elif filter_type == 'samples': if sample_ids: query.filter(SelectionPressure.sample_id.in_(sample_ids)) else: query = query.filter(~SelectionPressure.sample_id.is_(None)) base_fields = SelectionPressure.__table__.c.keys() base_fields.remove('id') base_fields.remove('sample_id') writer = StreamingTSV(['sample', 'subject'] + base_fields) yield writer.writeheader() for sel in yield_limit(query, SelectionPressure.id): row = {f: getattr(sel, f) for f in base_fields} row['sample'] = sel.sample.name if sel.sample else 'All Samples' row['subject'] = sel.clone.subject.identifier yield writer.writerow(row)
def get_sequences(session, sample, fmt, clones_only, min_subject_copies): meta_keys = set([m.key for m in session.query(SampleMetadata.key)]) seqs = session.query(Sequence).filter( Sequence.sample_id == sample.id ).join( SequenceCollapse ).options( joinedload(Sequence.clone), joinedload(Sequence.collapse), joinedload(Sequence.sample), joinedload(Sequence.subject), ) if clones_only: seqs = seqs.filter(~Sequence.clone_id.is_(None)) if min_subject_copies: seqs = seqs.filter( SequenceCollapse.copy_number_in_subject >= min_subject_copies ) writer = SequenceWriter(fmt, meta_keys) yield writer.writeheader() for seq in yield_limit(seqs, Sequence.ai): yield writer.writeseq(seq)
def get_sequences(session, seqs, format_name): seqs = seqs.join(SequenceCollapse) seqs = seqs.options( joinedload(Sequence.clone), joinedload(Sequence.collapse), joinedload(Sequence.sample), joinedload(Sequence.subject), ) meta_keys = set([m.key for m in session.query(SampleMetadata.key)]) writer = SequenceWriter(format_name, meta_keys) yield writer.writeheader() for seq in yield_limit(seqs, Sequence.ai): yield writer.writeseq(seq)
def get_clone_summary(session, include_lineages): fields = [ 'clone_id', 'subject', 'v_gene', 'j_gene', 'functional', 'insertions', 'deletions', 'cdr3_nt', 'cdr3_num_nt', 'cdr3_aa', 'uniques', 'instances', 'copies', 'germline', 'parent_id', 'avg_mutations_per_copy' ] if include_lineages: fields.append('lineage') writer = StreamingTSV(fields) yield writer.writeheader() for clone in yield_limit(session.query(Clone), Clone.id): row = {} for field in writer.fieldnames: try: row[field] = getattr(clone, field) except AttributeError: pass row.update({ 'clone_id': clone.id, 'subject': clone.subject.identifier, 'functional': 'T' if clone.functional else 'F', 'insertions': clone._insertions, 'deletions': clone._deletions, 'uniques': clone.overall_unique_cnt, 'instances': clone.overall_instance_cnt, 'copies': clone.overall_total_cnt, 'avg_mutations_per_copy': round(clone.overall_stats.total_mutations(normalize=True), 2) }) if include_lineages: row['lineage'] = clone.tree yield writer.writerow(row)
def get_clone_overlap(session): writer = StreamingTSV( ['clone_id', 'sample', 'uniques', 'copies', 'avg_mutations_per_copy']) stats = session.query(CloneStats).filter(~CloneStats.sample_id.is_(None)) yield writer.writeheader() for stat in yield_limit(stats, CloneStats.id): yield writer.writerow({ 'clone_id': stat.clone_id, 'sample': stat.sample.name, 'uniques': stat.unique_cnt, 'copies': stat.total_cnt, 'avg_mutations_per_copy': round(stat.total_mutations(normalize=True), 2) })
def get_sequences(session, sample, fmt, clones_only, min_subject_copies): meta_keys = set([m.key for m in session.query(SampleMetadata.key)]) seqs = session.query(Sequence).filter( Sequence.sample_id == sample.id).join(SequenceCollapse).options( joinedload(Sequence.clone), joinedload(Sequence.collapse), joinedload(Sequence.sample), joinedload(Sequence.subject), ) if clones_only: seqs = seqs.filter(~Sequence.clone_id.is_(None)) if min_subject_copies: seqs = seqs.filter( SequenceCollapse.copy_number_in_subject >= min_subject_copies) writer = SequenceWriter(fmt, meta_keys) yield writer.writeheader() for seq in yield_limit(seqs, Sequence.ai): yield writer.writeseq(seq)