def test_map(self): dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header_1 = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header_1, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) contig_alt_1 = loci.Contig( header_1, np.array([(101, 'gypsy1'), (107, 'gypsy4')], dtype=dtype_loci)) header_2 = loci.Header(reference='chr2', strand='+', category='gypsy') contig_2 = loci.Contig( header_2, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) contig_alt_2 = loci.Contig( header_2, np.array([(103, 'gypsy7'), (109, 'gypsy1')], dtype=dtype_loci)) query = loci.ContigSet(contig_1, contig_2) def func(contig): """dummy function that adds 100 to contig loci 'tip's""" array = np.copy(contig.loci) array['tip'] += 100 return loci.Contig(contig.header, array) query = query.map(func) answer = loci.ContigSet(contig_alt_1, contig_alt_2) assert query == answer
def test_create_contig_ids(): dtype_loci_query = np.dtype([('start', np.int64), ('stop', np.int64), ('element', 'O')]) query = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', strand='+', category='gypsy'), np.array([(1, 5, 'gypsy1'), (7, 9, 'gypsy4')], dtype=dtype_loci_query)), loci.Contig( loci.Header(reference='chr1', strand='-', category='gypsy'), np.array([(3, 8, 'gypsy7'), (9, 12, 'gypsy1')], dtype=dtype_loci_query))) dtype_loci_answer = np.dtype([('start', np.int64), ('stop', np.int64), ('element', 'O'), ('ID', 'O')]) answer = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', strand='+', category='gypsy'), np.array([(1, 5, 'gypsy1', 'gypsy_chr1_+_5'), (7, 9, 'gypsy4', 'gypsy_chr1_+_9')], dtype=dtype_loci_answer)), loci.Contig( loci.Header(reference='chr1', strand='-', category='gypsy'), np.array([(3, 8, 'gypsy7', 'gypsy_chr1_-_3'), (9, 12, 'gypsy1', 'gypsy_chr1_-_9')], dtype=dtype_loci_answer))) assert query.map(fingerprint.create_contig_ids) == answer
def test_count_reads_n2(): dtype_loci_reads = np.dtype([('tip', np.int64), ('element', 'O')]) reads = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', strand='+', category='gypsy', source='bam1'), np.array([(2, 'gypsy1'), (4, 'gypsy1'), (5, 'gypsy4'), (7, 'gypsy4'), (7, 'gypsy7'), (7, 'gypsy1'), (8, 'gypsy1'), (8, 'gypsy1')], dtype=dtype_loci_reads)), loci.Contig( loci.Header(reference='chr1', strand='+', category='gypsy', source='bam2'), np.array([(3, 'gypsy1'), (4, 'gypsy1'), (6, 'gypsy1'), (7, 'gypsy1'), (7, 'gypsy1'), (7, 'gypsy1'), (7, 'gypsy1'), (50, 'gypsy7')], dtype=dtype_loci_reads))) dtype_loci_query = np.dtype([('start', np.int64), ('stop', np.int64)]) query = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', strand='+', category='gypsy'), np.array([(1, 15), (30, 60)], dtype=dtype_loci_query))) dtype_loci_answer = np.dtype([ ('start', np.int64), ('stop', np.int64), ('median', np.int64), ('sample', [('0', [('name', 'O'), ('count', np.int64), ('element', [('0', [('name', 'O'), ('count', np.int64)]), ('1', [('name', 'O'), ('count', np.int64)])])]), ('1', [('name', 'O'), ('count', np.int64), ('element', [('0', [('name', 'O'), ('count', np.int64)]), ('1', [('name', 'O'), ('count', np.int64)])])])]) ]) answer = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', strand='+', category='gypsy'), np.array([(2, 8, 7, (('bam1', 8, (('gypsy1', 5), ('gypsy4', 2))), ('bam2', 7, (('gypsy1', 7), ('.', 0))))), (50, 50, 50, (('bam1', 0, (('.', 0), ('.', 0))), ('bam2', 1, (('gypsy7', 1), ('.', 0)))))], dtype=dtype_loci_answer))) assert fingerprint.count_reads(query, reads, trim=True, n_common_elements=2) == answer
def test_extract_gff_intervals(): gff = DATA_PATH + 'testAnnotation-2017-11-27.gff' query = fingerprintio.extract_gff_intervals(gff, 'chr1', ['Gypsy', 'Copia']) dtype_loci = np.dtype([('start', np.int64), ('stop', np.int64), ('element', '<O')]) answer = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', category='Gypsy', source='testAnnotation-2017-11-27.gff'), np.array([(3150, 3200, 'Gypsy-21_ClassI;chr1:3150-3200'), (24250, 24700, 'Gypsy-21_ClassI;chr1:24250-24700')], dtype=dtype_loci)), loci.Contig( loci.Header(reference='chr1', category='Copia', source='testAnnotation-2017-11-27.gff'), np.array([(98260, 98322, 'Copia-10_ClassI;chr1:98260-98322')], dtype=dtype_loci))) assert query == answer
def test_add_append_headers(self): """Contigs with same header should be appended""" dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) contig_2 = loci.Contig( header, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) query = loci.ContigSet(contig_1) query.add(contig_2, append_duplicate_headers=True) assert len(query) == 4 assert len(list(query.contigs())) == 1 assert len(query.headers()) == 1 query_loci = list(query.contigs())[0].loci answer_loci = np.array([(1, 'gypsy1'), (7, 'gypsy4'), (3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci) npt.assert_array_equal(query_loci, answer_loci)
def match_known_insertions(clusters, known_insertions, distance=0): """ Match clusters to known insertions annotated in the genome. Known insertions are represented as an object of :class:`loci.ContigSet` created from a gff file. Clusters are matched to a known insertion if they are for the same category and are within the specified distance of the insertions end. Fields required in 'clusters': 'start': int, 'stop': int, 'median': int Fields required in 'known_insertions': 'start': int, 'stop': int, 'element': str Fields appended to return value: 'known_element': str :param clusters: a collection of cluster loci (intervals) :type clusters: :class:`loci.ContigSet` :param known_insertions: a collection of cluster loci (intervals) :type known_insertions: :class:`loci.ContigSet` :param distance: maximum distance for connecting a cluster to a known insertion :type distance: int :return: a collection of cluster loci (intervals) tagged with known insertions :rtype: :class:`loci.ContigSet` """ matched = loci.ContigSet() # make known insertion headers un-stranded and drop origin file known_insertions = known_insertions.map(lambda x: loci.mutate_header(x, strand='.', source=None)) # loop through contigs for contig in clusters.contigs(): # get relevant known insertions known = known_insertions[contig.header.mutate(strand='.')] matches = np.array(list(_known_insertion_matcher(contig, known, distance=distance))) matches = np.array(matches, dtype=np.dtype([('known_element', '<O')])) matched.add(loci.Contig(contig.header, util.numpy.array.bind(contig.loci, matches))) return matched
def test_update(self): dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header_1 = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header_1, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) header_2 = loci.Header(reference='chr2', strand='+', category='gypsy') contig_2 = loci.Contig( header_2, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) query = loci.ContigSet(contig_1) query_2 = loci.ContigSet(contig_2) query.update(query_2.contigs()) assert len(query) == 4 assert len(list(query.contigs())) == 2 assert len(query.headers()) == 2
def test_headers(self): dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header_1 = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header_1, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) header_2 = loci.Header(reference='chr2', strand='+', category='gypsy') contig_2 = loci.Contig( header_2, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) query = set(loci.ContigSet(contig_1, contig_2).headers()) answer = {header_1, header_2} assert query == answer
def test_dtype_loci(self): dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header_1 = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header_1, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) header_2 = loci.Header(reference='chr2', strand='+', category='gypsy') contig_2 = loci.Contig( header_2, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) query = loci.ContigSet(contig_1, contig_2) assert query.dtype_loci() == contig_1.loci.dtype assert query.dtype_loci() == contig_1.loci.dtype
def test_init_clashing_headers(self): """Contigs with same header should cause ValueError""" dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) contig_2 = loci.Contig( header, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) try: loci.ContigSet(contig_1, contig_2) except ValueError: assert True else: assert False
def test_iter_values(self): dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) header_1 = loci.Header(reference='chr1', strand='+', category='gypsy') contig_1 = loci.Contig( header_1, np.array([(1, 'gypsy1'), (7, 'gypsy4')], dtype=dtype_loci)) header_2 = loci.Header(reference='chr2', strand='+', category='gypsy') contig_2 = loci.Contig( header_2, np.array([(3, 'gypsy7'), (9, 'gypsy1')], dtype=dtype_loci)) query = loci.ContigSet(contig_1, contig_2) answer = {('chr1', '+', 'gypsy', 1, 'gypsy1'), ('chr1', '+', 'gypsy', 7, 'gypsy4'), ('chr2', '+', 'gypsy', 3, 'gypsy7'), ('chr2', '+', 'gypsy', 9, 'gypsy1')} assert set(query.iter_values()) == answer
def fingerprint(bams, categories, references, minimum_reads, epsilon, minimum_epsilon=0, n_common_elements=0, method='SDBICAN', fingerprint_buffer=0, join_distance=0, quality=0, transposon_tag='ME', annotation=None, max_count_proportion=True, cores=1): """ Create a transposon fingerprint of one or more bam files. :param bams: :param categories: :param references: :param minimum_reads: :param epsilon: :param minimum_epsilon: :param n_common_elements: :param method: :param fingerprint_buffer: :param join_distance: :param quality: :param transposon_tag: :param annotation: :param max_count_proportion: :param cores: :return: """ if isinstance(bams, str): bams = [bams] if isinstance(references, str): references = [references] if isinstance(categories, str): categories = [categories] if references == [None]: references = fingerprintio.extract_references_from_bams(*bams) jobs = product([bams], [annotation], [categories], references, # job per reference [quality], [transposon_tag], [minimum_reads], [epsilon], [minimum_epsilon], [n_common_elements], [method], [fingerprint_buffer], [join_distance], [max_count_proportion]) result = loci.ContigSet() if cores == 1: # run on a single process for job in jobs: result.update(_fingerprint_dispatch(*job).contigs()) else: # create a pool of processes with Pool(cores) as pool: parts = pool.starmap(_fingerprint_dispatch, jobs) for part in parts: result.update(part.contigs()) return result
def test_extract_informative_read_tips(): """ Test extraction of informative reads. Not all families of reads extracted. Family with no reads ('NOT-A-FAMILY') extracted. """ bam = DATA_PATH + 'testA-2017-06-08.bam' query = fingerprintio.extract_informative_read_tips( bam, 'chr1', ['Gypsy', 'PIF-Harbinger', 'NOT-A-FAMILY'], quality=0, tag='ME') dtype_loci = np.dtype([('tip', np.int64), ('element', 'O')]) answer = loci.ContigSet( loci.Contig( loci.Header(reference='chr1', strand='+', category='Gypsy', source='testA-2017-06-08.bam'), np.array([(2452, 'Gypsy_Gypsy26_chr15_18793972'), (2506, 'Gypsy_Gypsy26_chr15_18793972'), (2553, 'Gypsy_Gypsy26_chr15_18793972'), (2566, 'Gypsy_Gypsy26_chr15_18793972'), (2577, 'Gypsy_Gypsy26_chr15_18793972'), (2577, 'Gypsy_Gypsy26_chr15_18793972'), (2841, 'Gypsy_Gypsy26_chr15_18793972'), (2841, 'Gypsy_Gypsy26_chr15_18793972'), (2841, 'Gypsy_Gypsy26_chr8_2502854'), (2973, 'Gypsy_Gypsy26_chr18_27801424'), (3024, 'Gypsy_Gypsy26_chr8_5114633'), (3062, 'Gypsy_Gypsy26_chr8_5114633'), (3039, 'Gypsy_Gypsy26_chr2_1987286'), (3138, 'Gypsy_Gypsy26_chr18_27801424'), (24065, 'Gypsy_Gypsy12_chr1_12715223'), (24184, 'Gypsy_Gypsy7_chr4_10302390'), (24195, 'Gypsy_Gypsy12_chr1_12715223'), (24217, 'Gypsy_Gypsy12_chr1_12715223')], dtype=dtype_loci)), loci.Contig( loci.Header(reference='chr1', strand='-', category='Gypsy', source='testA-2017-06-08.bam'), np.array([(3217, 'Gypsy_Gypsy26_chr15_18793972'), (3226, 'Gypsy_Gypsy26_chr15_18793972'), (3246, 'Gypsy_Gypsy26_chr15_18793972'), (3405, 'Gypsy_Gypsy26_chr2_1987286'), (3646, 'Gypsy_Gypsy26_chr15_18793972'), (3776, 'Gypsy_Gypsy26_chr18_27801424'), (3779, 'Gypsy_Gypsy26_chr8_5114633'), (3800, 'Gypsy_Gypsy26_chr8_5114633'), (24787, 'Gypsy_Gypsy7_chr4_10302390'), (24799, 'Gypsy_Gypsy29_chr11_13193899'), (24850, 'Gypsy_Gypsy7_chr4_10302390'), (24854, 'Gypsy_Gypsy12_chr1_12715223'), (24857, 'Gypsy_Gypsy23_chr15_8310356'), (24860, 'Gypsy_Gypsy23_chrUn_38723460'), (24872, 'Gypsy_Gypsy23_chrUn_38723460'), (24877, 'Gypsy_GYVIT1_chr6_13115950'), (24894, 'Gypsy_Gypsy23_chrUn_38723460'), (24895, 'Gypsy_Gypsy12_chr1_12715223'), (24910, 'Gypsy_Gypsy23_chr14_11656393'), (24919, 'Gypsy_Gypsy23_chrUn_38723460')], dtype=dtype_loci)), loci.Contig( loci.Header(reference='chr1', strand='+', category='PIF-Harbinger', source='testA-2017-06-08.bam'), np.array([(21282, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579'), (21308, 'PIF-Harbinger_Harbinger-3_chr2_4407914'), (21435, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579'), (21448, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579')], dtype=dtype_loci)), loci.Contig( loci.Header(reference='chr1', strand='-', category='PIF-Harbinger', source='testA-2017-06-08.bam'), np.array([(21834, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579'), (21945, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579'), (21968, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579'), (21982, 'PIF-Harbinger_Harbinger-3N3_chr16_20723579')], dtype=dtype_loci)), loci.Contig( loci.Header(reference='chr1', strand='+', category='NOT-A-FAMILY', source='testA-2017-06-08.bam'), np.array([], dtype=dtype_loci)), loci.Contig( loci.Header(reference='chr1', strand='-', category='NOT-A-FAMILY', source='testA-2017-06-08.bam'), np.array([], dtype=dtype_loci))) assert query == answer
def extract_informative_read_tips(bams, references, categories, quality=0, tag='ME'): """ Extract the tips of 'informative' reads from one or more bam files. Informative reads are those that flank potential transposon insertions. The specific element (mate element) that each read is linked to should be stored using a sam tag which is 'ME' by default. Reads are categorised by transposon (super-)families by matching family names to the start of each reads mate-element name. :param bams: Path(s) to one or more bam files :type bams: str | list[str] :param references: Name(s) of one or more bam references :type references: str | list[str] :param categories: Name(s) of one or more transposon (super-)families :type categories: str | list[str] :param quality: Minimum mapping quality of reads :type quality: int :param tag: Sam tag containing each reads mate element name :type tag: str :return: A set of contigs of read tips categorised by reference, strand, category (family), and source (bam file name) :rtype: :class:`loci2.ContigSet` """ if isinstance(bams, str): bams = [bams] if isinstance(references, str): references = [references] if isinstance(categories, str): categories = [categories] keys = product([ref.split(':')[0] for ref in references], ['+', '-'], categories, [os.path.basename(bam) for bam in bams]) dictionary = {loci.Header(*key): deque() for key in keys} for bam in bams: for reference in references: for read in _extract_bam_read_data(bam, reference, quality=quality, tags=[tag]): # match to a category category_matches = tuple( filter(lambda x: read[tag].startswith(x), categories)) # only include reads for specified categories if category_matches: # longest matching category is the best category category = max(category_matches, key=len) # read header header = loci.Header(reference=read['reference'], strand=read['strand'], category=category, source=read['source']) # append loci data to que tip = read['start'] if \ read['strand'] == '-' else \ read['stop'] dictionary[header].append((tip, read[tag])) dtype = np.dtype([('tip', np.int64), ('element', 'O')]) return loci.ContigSet(*(loci.Contig(header, np.array(data, dtype=dtype)) for header, data in dictionary.items()))
def extract_gff_intervals(gff, references, categories): """ Extract known transposon intervals from a gff anotation file. :param gff: Path to a gff file of transposon anotations :type gff: str :param references: Name(s) of one or more bam references :type references: str | list[str] :param categories: Name(s) of one or more transposon (super-)families :type categories: str | list[str] :return: A set of contigs of read tips categorised by reference, strand, category (family), and source (bam file name) :rtype: :class:`loci2.ContigSet` """ if isinstance(references, str): references = [references] if isinstance(categories, str): categories = [categories] source = os.path.basename(gff) references = [reference.split(':')[0] for reference in references] keys = product(references, categories) dictionary = { loci.Header(reference=key[0], category=key[1], source=source): deque() for key in keys } with zopen(gff, 'rb') as infile: for line in infile: line = line.decode().split('\t') # match to reference: reference = decode_column(line[0]) if reference in references: # match to a category feature_type = decode_column(line[2]) category_matches = tuple( filter(lambda x: feature_type.startswith(x), categories)) # only include reads for specified categories if category_matches: # longest matching category is the best category category = max(category_matches, key=len) header = loci.Header(reference=reference, category=category, source=source) dictionary[header].append( (int(line[3]), int(line[4]), feature_type)) dtype = np.dtype([('start', np.int64), ('stop', np.int64), ('element', '<O')]) return loci.ContigSet(*(loci.Contig(header, np.array(data, dtype=dtype)) for header, data in dictionary.items()))
def extract_anchor_intervals(bams, references, known_transposons, insert_size, quality=0): """ Extract 'anchor' read inserts from one or more bam files Anchor reads are paired reads in which neither has been mapped to a known transposon. The pair has then been mapped to a reference genome. Assuming that the insert size of the pair is smaller than the length of a transposon, the insert can be used to indicate a section of the samples genome in which there are no transposons on at least one allele. This can be used to infere heterozygousity of transposons insertions. Known transposon inserts from the reference genome are required for checking that anchor inserts overlapping these transposon are of a sensible length. Anchor reads are compressed to their interval unions for efficiency. :param bams: Path(s) to one or more bam files :type bams: str | list[str] :param references: Name(s) of one or more bam references :type references: str | list[str] :param known_transposons: Transposons known from the reference genome :type known_transposons: :class:`loci2.ContigSet` :param insert_size: Read pair insert size :type insert_size: int :param quality: Minimum maping quality of anchor reads :type quality: int :return: A set of contigs of unions of anchor inserts categorised by reference, strand, and source (bam file name) :rtype: :class:`loci2.ContigSet` """ if isinstance(bams, str): bams = [bams] if isinstance(references, str): references = [references] # simplify known transposon headers for comparison known_transposons = known_transposons.map(lambda x: loci.mutate_header( x, strand='.', category=None, source=None), append_duplicate_headers=True) jobs = product(bams, references) dtype = np.dtype([('start', np.int64), ('stop', np.int64)]) intervals = loci.ContigSet() for bam, reference in jobs: header = loci.Header(reference=reference.split(':')[0], source=os.path.basename(bam), strand='.') anchors = np.fromiter(_extract_bam_anchor_insert_data(bam, reference, quality=quality), dtype=dtype) anchor_lengths = interval.lengths(anchors) # calculate lengths on known tranposons within each anchor interval reference_name = reference.split(':')[0] local_tes_header = loci.Header(reference=reference_name, strand='.') local_tes = known_transposons[local_tes_header] contained_te_lengths = interval.length_of_contains( anchors, local_tes.loci) # filter anchors based on insert size adjusted_anchor_lengths = anchor_lengths - contained_te_lengths anchors = anchors[adjusted_anchor_lengths <= insert_size] # use unions of filtered anchors as loci intervals.add(loci.unions(loci.Contig(header=header, loci=anchors))) return intervals
def pair_clusters(clusters, distance=0, use_known_elements=True): """ Join matching clusters on opposite strands. Clusters of the same calgary are joined if they are within 2 * distance of one another. Clusters may also be joined if they have both been matched to the same known element. Fields required in 'clusters': 'start': int, 'stop': int, 'median': int, 'known_element': str, 'ID': str Fields appended to return value: 'pair' str :param clusters: a collection of cluster loci (intervals) :type clusters: :class:`loci.ContigSet` :param distance: the distance to search out from each cluster :type distance: int :param use_known_elements: specify whether to join pairs based on a common known element (default: True) :type use_known_elements: bool :return: a collection of cluster loci (intervals) with 'pair' field :rtype: :class:`loci.ContigSet` """ joint_clusters = loci.ContigSet() dtype_join_data = np.dtype([("pair", "<O")]) # new headers based on old but un-stranded new_headers = {h.mutate(strand='.') for h in clusters.headers()} for header in new_headers: # get forward and reverse loci for this key forward = clusters[header.mutate(strand='+')] reverse = clusters[header.mutate(strand='-')] # sort them into pairs based on median pairs = _cluster_pairer(forward, reverse, distance=distance, use_known_elements=use_known_elements) # create arrays for the new data forward_join_data = np.empty(len(forward), dtype=dtype_join_data) forward_join_data["pair"] = '.' reverse_join_data = np.empty(len(reverse), dtype=dtype_join_data) reverse_join_data["pair"] = '.' for f, r in pairs: if f is not None and r is not None: forward_join_data[f]["pair"] = reverse.loci[r]["ID"] reverse_join_data[r]["pair"] = forward.loci[f]["ID"] else: pass # combine existing data with join data and add to new contig set joint_clusters.add(loci.Contig(header.mutate(strand='+'), util.numpy.array.bind(forward.loci, forward_join_data))) joint_clusters.add(loci.Contig(header.mutate(strand='-'), util.numpy.array.bind(reverse.loci, reverse_join_data))) return joint_clusters
def test_init_empty(self): """""" answer = loci.ContigSet() assert type(answer) == loci.ContigSet