def test_summarize_arrow(): figure = Figure() figure.add(Arrow(10, 50, label='Foo')) figure.add(Arrow(60, 30, label='Bar')) expected_summary = """\ 10--Foo->50 30<-Bar--60 """ summary = summarize_figure(figure) assert summary == expected_summary
def summarize_figure(figure: Figure): """ Summarize the contents of a figure to text. Useful for testing. """ figure.show() # Test that all the display math works. summary = StringIO() for padding, track in figure.elements: spans = getattr(track, 'arrows', None) if spans is None: spans = getattr(track, 'tracks', [track]) else: spans.sort(key=attrgetter('x', 'w', 'label')) for i, span in enumerate(spans): if i: summary.write(', ') ys = getattr(span, 'ys', None) if ys is not None: summary.write('Coverage ') summary.write(', '.join(map(str, ys))) continue coverage_groups = getattr(span, 'coverage_groups', None) if coverage_groups is not None: summary.write('Coverage ') for j, (y, count) in enumerate(coverage_groups): if j: summary.write(', ') summary.write(str(y)) if count > 1: summary.write(f'x{count}') continue direction = getattr(span, 'direction', None) if direction is not None and direction != '': if direction >= 0: summary.write(f'{span.x}--{span.label}->{span.x+span.w}') else: summary.write(f'{span.x}<-{span.label}--{span.x+span.w}') continue span_text = getattr(span.label, 'text', span.label) or '' summary.write(span_text) color = getattr(span, 'color') if span.a or span.b: if color != 'none': summary.write(f'[{span.a}-{span.b}]') else: summary.write(f'({span.a}-{span.b})') regions = getattr(span, 'regions', []) for start, end, colour in regions: summary.write(f', {colour}{{{start}-{end}}}') summary.write('\n') return summary.getvalue()
def test_tiny_arrow_at_edge(svg_differ): expected_svg = Drawing(210, 35, origin=(0, 0)) expected_svg.append(Circle(197.5, 20, 10, stroke='black', fill='ivory')) expected_svg.append( Text('2.3', 11, 197.5, 20, text_anchor='middle', dy="0.35em")) expected_svg.append( Lines(200, 10, 195, 13.5, 195, 6.5, 200, 10, fill='black')) f = Figure() f.add(ArrowGroup([Arrow(195, 200, h=20, elevation=-1, label='2.3')])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow')
def test_summarize_regions(): figure = Figure() figure.add( Track(1, 200, label="Foo", regions=[(50, 100, 'lightgreen'), (110, 120, 'red')])) expected_summary = """\ Foo[1-200], lightgreen{50-100}, red{110-120} """ summary = summarize_figure(figure) assert summary == expected_summary
def test_scaled_arrow(svg_differ): expected_svg = Drawing(100, 35, origin=(0, 0)) expected_svg.append(Line(0, 10, 93, 10, stroke='black')) expected_svg.append(Circle(50, 20, 10, stroke='black', fill='ivory')) expected_svg.append( Text('2.3', 11, 50, 20, text_anchor='middle', dy="0.35em")) expected_svg.append( Lines(100, 10, 93, 13.5, 93, 6.5, 100, 10, fill='black')) f = Figure() f.add(Arrow(0, 200, h=20, elevation=-1, label='2.3')) svg = f.show(w=100) svg_differ.assert_equal(svg, expected_svg, 'test_arrow')
def start_drawing(width, height): expected_svg = Drawing(width, height, origin=(0, 0)) expected_svg.append( Rectangle(0, height - 15, 200, 10, stroke='lightgrey', fill='lightgrey')) expected_svg.append( Text('Header', 10, width / 2, height - 15, font_family='monospace', text_anchor='middle')) f = Figure() f.add(Track(0, width, label='Header')) return f, expected_svg
def test_add_track(self): figure = Figure() figure.add_track( Track(50, 300, direction='f', label="Another\ sequence", regions=[(50, 100, 'lightblue')])) figure.add_track(Track(110, 410, direction='r', label="Sequence 1",\ regions=[(150, 200, 'salmon')])) figure.show()
def test_summarize_zero_coverage(): figure = Figure() figure.add(Coverage(10, 20, [0, 0, 0]), gap=-4) figure.add(Track(10, 20, label="Bar")) with pytest.raises(ZeroDivisionError): summarize_figure(figure)
def test_add_partial_banner(): """ Last dash in the header banner can be less than 500 wide. """ figure = Figure() add_partial_banner(figure, 0, 500) add_partial_banner(figure, 0, 700) add_partial_banner(figure, 0, 1200) expected_figure = """\ [1-500], Partial Blast Results(1-500) [1-500], Partial Blast Results(1-700) [1-500], [1001-1200], Partial Blast Results(1-1200) """ assert expected_figure == summarize_figure(figure)
def test_summarize_smooth_coverage(): figure = Figure() figure.add(SmoothCoverage(10, 20, [11, 11, 21, 1, 1, 1]), gap=-4) figure.add(Track(12, 22, label="Bar")) expected_summary = """\ Coverage 11x2, 21, 1x3 Bar[12-22] """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_labels(): figure = Figure() figure.add(Track(1, 200, label="Foo")) figure.add(Track(1, 200, label="Bar", color='none')) expected_summary = """\ Foo[1-200] Bar(1-200) """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_label_objects(): figure = Figure() figure.add(Track(0, 0, label=Label(25, "Foo:"))) figure.add(Track(0, 0, label="Bar:")) expected_summary = """\ Foo: Bar: """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_smooth_coverage_ten_percent(): figure = Figure() figure.add(SmoothCoverage(10, 20, [100, 110, 111, 50]), gap=-4) figure.add(Track(12, 22, label="Bar")) expected_summary = """\ Coverage 100x2, 111, 50 Bar[12-22] """ summary = summarize_figure(figure) assert summary == expected_summary
def test_multitrack(self): figure = Figure() for i in range(0, 10): figure.add_track( Multitrack([ Track( i, i + 10, direction='f', label='Track {}F'.format(i)), Track(i + 20, i + 30, direction='r', label='Track {}R'.format(i)) ], join=True)) figure.show()
def test_summarize_multitracks(): figure = Figure() figure.add(Track(0, 0, label="Foo:")) figure.add( Multitrack([Track(10, 20, label="Bar"), Track(30, 40, label="Baz")])) expected_summary = """\ Foo: Bar[10-20], Baz[30-40] """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_arrow_group(): figure = Figure() figure.add( ArrowGroup([Arrow(10, 50, label='Foo'), Arrow(60, 30, label='Bar')])) figure.add( ArrowGroup([Arrow(1, 50, label='Baz'), Arrow(90, 100, label='Boom')])) expected_summary = """\ 10--Foo->50, 30<-Bar--60 1--Baz->50, 90--Boom->100 """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_multitracks_with_separate_label(): figure = Figure() figure.add(Track(0, 0, label="Foo:")) figure.add( Multitrack([ Track(10, 20), Track(30, 40), Track(10, 40, label="Bar", color='none') ])) expected_summary = """\ Foo: [10-20], [30-40], Bar(10-40) """ summary = summarize_figure(figure) assert summary == expected_summary
def test_draw_coverage(svg_differ): expected_figure = Figure() expected_figure.add(Track(0, 1, color='', h=-4)) # Just a spacer. expected_figure.add(Track(100, 200, label='Bar')) expected_svg = expected_figure.show() expected_svg.insert(0, draw.Rectangle(100, 20, 25, 5, fill='blue')) expected_svg.insert(1, draw.Rectangle(125, 20, 25, 10, fill='blue')) expected_svg.insert(2, draw.Rectangle(175, 20, 25, 1, fill='blue')) figure = Figure() coverage_depths = 25 * [5] + 25 * [10] + 25 * [0] + 25 * [1] figure.add(SmoothCoverage(100, 200, coverage_depths), gap=-4) figure.add(Track(100, 200, label="Bar")) svg = figure.show() svg_differ.assert_equal(svg, expected_svg, 'test_draw_coverage')
def test_arrow_group_small_neighbour(svg_differ): expected_figure = Figure() expected_figure.add(Track(1, 500, label='Header')) h = 20 expected_figure.add(Arrow(301, 315, elevation=-1, label='1.2', h=h), gap=-h) expected_figure.add(Arrow(1, 300, elevation=-1, label='1.1', h=h)) expected_svg = expected_figure.show() f = Figure() f.add(Track(1, 500, label='Header')) f.add( ArrowGroup([ Arrow(1, 300, elevation=-1, label='1.1', h=h), Arrow(301, 315, elevation=-1, label='1.2', h=h) ])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
def build_coverage_figure(genome_coverage_csv, blast_csv=None): min_position, max_position = 1, 500 coordinate_depths = Counter() contig_depths = Counter() contig_groups = defaultdict(set) # {coordinates_name: {contig_name}} reader = DictReader(genome_coverage_csv) for row in reader: query_nuc_pos = int(row['query_nuc_pos']) if row['refseq_nuc_pos']: refseq_nuc_pos = int(row['refseq_nuc_pos']) else: refseq_nuc_pos = min_position min_position = min(min_position, refseq_nuc_pos, query_nuc_pos) max_position = max(max_position, refseq_nuc_pos, query_nuc_pos) coordinates_name = row['coordinates'] contig_name = row['contig'] if row['coverage'] != '': row_coverage = int(row['coverage']) - int(row['dels']) coordinate_depths[coordinates_name] = max( coordinate_depths[coordinates_name], row_coverage) contig_depths[contig_name] = max(contig_depths[contig_name], row_coverage) contig_groups[coordinates_name].add(contig_name) if '' in coordinate_depths: # Force partial contigs to come last. coordinate_depths[''] = -1 position_offset = -min_position + 1 max_position += position_offset blast_rows = [] if blast_csv is not None: for blast_row in DictReader(blast_csv): for field_name in ('start', 'end', 'ref_start', 'ref_end'): # noinspection PyTypeChecker blast_row[field_name] = int(blast_row[field_name]) blast_rows.append(blast_row) blast_rows.sort(key=itemgetter('start', 'ref_start')) landmarks_path = (Path(__file__).parent.parent / "data" / "landmark_references.yaml") landmark_groups = yaml.safe_load(landmarks_path.read_text()) projects = ProjectConfig.loadDefault() f = Figure() for _, coordinates_name in sorted( (-depth, name) for name, depth in coordinate_depths.items()): for reference_set in landmark_groups: if coordinates_name != reference_set['coordinates']: continue prev_landmark = None for i, landmark in enumerate( sorted(reference_set['landmarks'], key=itemgetter('start'))): landmark.setdefault('frame', 0) if prev_landmark and 'end' not in prev_landmark: prev_landmark['end'] = landmark['start'] - 1 prev_landmark = landmark for frame, frame_landmarks in groupby(reference_set['landmarks'], itemgetter('frame')): subtracks = [] for landmark in frame_landmarks: landmark_colour = landmark.get('colour') if landmark_colour is None: continue subtracks.append( Track(landmark['start'] + position_offset, landmark['end'] + position_offset, label=landmark['name'], color=landmark_colour)) max_position = max(max_position, landmark['end'] + position_offset) f.add(Multitrack(subtracks)) break else: add_partial_banner(f, position_offset, max_position) contig_names = contig_groups[coordinates_name] sorted_contig_names = sort_contig_names(contig_names, contig_depths) ref_arrows = [] for contig_name in sorted_contig_names: if contig_name.startswith('contig-'): # No arrows on original contig tracks. continue contig_matcher = ContigMatcher(contig_name) ref_positions = None arrow_count = 0 for blast_row in blast_rows: if not contig_matcher.is_match(blast_row): continue if (ref_positions is None and coordinates_name != '' and blast_row['ref_name'] != coordinates_name): ref_positions = map_references(blast_row['ref_name'], coordinates_name, projects) arrow_count += 1 ref_start = int(blast_row['ref_start']) ref_end = int(blast_row['ref_end']) if ref_positions is None: coordinate_start = ref_start coordinate_end = ref_end else: coordinate_start = ref_positions[ref_start] coordinate_end = ref_positions[ref_end] ref_arrows.append( Arrow(coordinate_start + position_offset, coordinate_end + position_offset, elevation=1, label=f'{contig_matcher.num}.{arrow_count}')) if ref_arrows: f.add(ArrowGroup(ref_arrows)) for contig_name in sorted_contig_names: genome_coverage_csv.seek(0) reader = DictReader(genome_coverage_csv) build_contig(reader, f, contig_name, max_position, position_offset, blast_rows) if not f.elements: f.add(Track(1, max_position, label='No contigs found.', color='none')) return f
def test_arrow_group(svg_differ): expected_figure = Figure() expected_figure.add(Track(1, 500, label='Header')) h = 30 expected_figure.add(Arrow(1, 200, label='X', h=h), gap=-h) expected_figure.add(Arrow(300, 500, label='Y', h=h)) expected_svg = expected_figure.show() f = Figure() f.add(Track(1, 500, label='Header')) f.add( ArrowGroup( [Arrow(1, 200, label='X', h=h), Arrow(300, 500, label='Y', h=h)])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
def plot_blast(blast_csv): reader = DictReader(blast_csv) visited = set() figure = Figure() for row in reader: print(row.contig_num)
def test_arrow_group_reverse_overlap(svg_differ): expected_figure = Figure() expected_figure.add(Track(1, 500, label='Header')) h = 20 expected_figure.add(Arrow(1, 300, label='X', h=h), gap=3) expected_figure.add(Arrow(400, 250, label='Y', h=h)) expected_svg = expected_figure.show() f = Figure() f.add(Track(1, 500, label='Header')) f.add( ArrowGroup( [Arrow(1, 300, label='X', h=h), Arrow(400, 250, label='Y', h=h)])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')