def test_chanj_with_sambamba(): bed_lines = ["# chrom\tchromStart\tchromEnd\treadCount\tmeanCoverage" "\tsampleName", "1\t69089\t70007\t232\t25.4946\tADM992A10\t"] with pytest.raises(BedFormattingError): list(bed.chanjo(bed_lines))
def test_chanj_with_sambamba(): bed_lines = [ "# chrom\tchromStart\tchromEnd\treadCount\tmeanCoverage" "\tsampleName", "1\t69089\t70007\t232\t25.4946\tADM992A10\t" ] with pytest.raises(BedFormattingError): list(bed.chanjo(bed_lines))
def test_chanjo(): with open(TEST_OUTPUT, 'r') as handle: data_rows = [data for data in bed.chanjo(handle)] data = data_rows[0] assert data['name'] == '1-69090-70007' assert data['score'] == 0 assert data['strand'] == '+' assert list(data['elements']) == [('CCDS30547.1', 'OR4F5')]
def link_elements(chanjo_db, bed_iterable, batch_size=10000): """Load Sambamba BED output from a stream.""" rows = bed.chanjo(bed_iterable) stats = link_mod.rows(chanjo_db.session, rows) for index, stat in enumerate(stats): chanjo_db.add(stat) if index % batch_size == 0: chanjo_db.save() logger.debug('processed %s exons...', index) chanjo_db.save()
def process(sequence): """Process a sequence of exon lines. Args: sequence (sequence): list of chanjo bed lines Returns: Result: iterators of transcript models, number of transcripts processed """ exons = bed.chanjo(sequence) transcripts = groupby_tx(exons) models = (make_model(tx_id, exons) for tx_id, exons in iteritems(transcripts)) return Result(models=models, count=len(transcripts))