def testSetAttribute(self): expected = 5 read = protocol.ReadAlignment() values = read.attributes.attr['key'].values protocol.setAttribute(values, expected) tag, value = read.attributes.attr.items()[0] result = value.values[0].int32_value self.assertEqual(result, expected)
def serializeAttributes(self, msg): """ Sets the attrbutes of a message during serialization. """ attributes = self.getAttributes() for key in attributes: protocol.setAttribute( msg.attributes.attr[key].values, attributes[key]) return msg
def serializeMetadataAttributes(self, msg, tier=0): """ Sets the attrbutes of a message for metadata during serialization. """ attributes = self.getAttributes() for attribute_name in attributes: if self.validateAttribute(attribute_name, attributes, tier) is True: values = [] for dictionary in attributes[attribute_name]: for key in dictionary: values.append(dictionary[key]) protocol.setAttribute( msg.attributes.attr[attribute_name].values, values) return msg
def convertReadAlignment(self, read, readGroupSet, readGroupId): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self.getFileHandle(self._dataUrl) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() # ret.fragmentId = 'TODO' ret.aligned_quality.extend(read.query_qualities) ret.aligned_sequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.ClearField("alignment") else: ret.alignment.CopyFrom(protocol.LinearAlignment()) ret.alignment.mapping_quality = read.mapping_quality ret.alignment.position.CopyFrom(protocol.Position()) ret.alignment.position.reference_name = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.NEG_STRAND for operation, length in read.cigar: gaCigarUnit = ret.alignment.cigar.add() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operation_length = length gaCigarUnit.reference_sequence = "" # TODO fix this! ret.duplicate_fragment = SamFlags.isFlagSet(read.flag, SamFlags.DUPLICATE_READ) ret.failed_vendor_quality_checks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragment_length = read.template_length ret.fragment_name = read.query_name for key, value in read.tags: # Useful for inspecting the structure of read tags # print("{key} {ktype}: {value}, {vtype}".format( # key=key, ktype=type(key), value=value, vtype=type(value))) protocol.setAttribute(ret.attributes.attr[key].values, value) if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.next_mate_position.Clear() else: ret.next_mate_position.Clear() if read.next_reference_id != -1: ret.next_mate_position.reference_name = samFile.getrname( read.next_reference_id) else: ret.next_mate_position.reference_name = "" ret.next_mate_position.position = read.next_reference_start ret.next_mate_position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.next_mate_position.strand = protocol.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.number_reads = 2 else: ret.number_reads = 1 ret.read_number = -1 if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 2 else: ret.read_number = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 1 ret.improper_placement = not SamFlags.isFlagSet( read.flag, SamFlags.READ_PROPER_PAIR) ret.read_group_id = readGroupId ret.secondary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = readGroupSet.getReadAlignmentId(ret) return ret
def assertAlignmentsEqual(self, gaAlignment, pysamAlignment, readGroupInfo): if pysamAlignment.query_qualities is None: self.assertEqual(gaAlignment.aligned_quality, []) else: self.assertEqual(gaAlignment.aligned_quality, list(pysamAlignment.query_qualities)) self.assertEqual(gaAlignment.aligned_sequence, pysamAlignment.query_sequence) if reads.SamFlags.isFlagSet(pysamAlignment.flag, reads.SamFlags.READ_UNMAPPED): self.assertEqual(0, gaAlignment.alignment.ByteSize()) else: self.assertEqual(gaAlignment.alignment.mapping_quality, pysamAlignment.mapping_quality) self.assertEqual( gaAlignment.alignment.position.reference_name, readGroupInfo.samFile.getrname(pysamAlignment.reference_id)) self.assertEqual(gaAlignment.alignment.position.position, pysamAlignment.reference_start) # TODO test reverseStrand on position and on # nextMatePosition once it has been implemented. self.assertCigarEqual(gaAlignment.alignment.cigar, pysamAlignment.cigar) self.assertFlag(gaAlignment.duplicate_fragment, pysamAlignment, reads.SamFlags.DUPLICATE_READ) self.assertFlag(gaAlignment.failed_vendor_quality_checks, pysamAlignment, reads.SamFlags.FAILED_QUALITY_CHECK) self.assertEqual(gaAlignment.fragment_length, pysamAlignment.template_length) self.assertEqual(gaAlignment.fragment_name, pysamAlignment.query_name) compoundId = datamodel.ReadAlignmentCompoundId( self._gaObject.getCompoundId(), pysamAlignment.query_name) self.assertEqual(gaAlignment.id, str(compoundId)) ret = protocol.ReadAlignment() for key, value in pysamAlignment.tags: protocol.setAttribute(ret.attributes.attr[key].values, value) self.assertEqual(protocol.toJson(gaAlignment.attributes), protocol.toJson(ret.attributes)) if reads.SamFlags.isFlagSet(pysamAlignment.flag, reads.SamFlags.MATE_UNMAPPED): self.assertEqual(0, gaAlignment.next_mate_position.ByteSize()) else: self.assertEqual(gaAlignment.next_mate_position.position, pysamAlignment.next_reference_start) if pysamAlignment.next_reference_id != -1: self.assertEqual( gaAlignment.next_mate_position.reference_name, readGroupInfo.samFile.getrname( pysamAlignment.next_reference_id)) else: self.assertEqual(gaAlignment.next_mate_position.reference_name, "") if gaAlignment.number_reads == 1: self.assertFlag(False, pysamAlignment, reads.SamFlags.READ_PAIRED) elif gaAlignment.number_reads == 2: self.assertFlag(True, pysamAlignment, reads.SamFlags.READ_PAIRED) else: # we shouldn't be setting numberReads to anything else self.assertTrue(False) if gaAlignment.read_number is -1: self.assertFlag(False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 0: self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 1: self.assertFlag(False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 2: self.assertFlag(True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag(True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) else: # we shouldn't be setting readNumber to anything else self.assertTrue(False) self.assertFlag(not gaAlignment.improper_placement, pysamAlignment, reads.SamFlags.READ_PROPER_PAIR) self.assertEqual(gaAlignment.read_group_id, readGroupInfo.id) self.assertFlag(gaAlignment.secondary_alignment, pysamAlignment, reads.SamFlags.SECONDARY_ALIGNMENT) self.assertFlag(gaAlignment.supplementary_alignment, pysamAlignment, reads.SamFlags.SUPPLEMENTARY_ALIGNMENT)
def convertReadAlignment(self, read, readGroupSet, readGroupId): """ Convert a pysam ReadAlignment to a GA4GH ReadAlignment """ samFile = self.getFileHandle(self._dataUrl) # TODO fill out remaining fields # TODO refine in tandem with code in converters module ret = protocol.ReadAlignment() # ret.fragmentId = 'TODO' ret.aligned_quality.extend(read.query_qualities) ret.aligned_sequence = read.query_sequence if SamFlags.isFlagSet(read.flag, SamFlags.READ_UNMAPPED): ret.ClearField("alignment") else: ret.alignment.CopyFrom(protocol.LinearAlignment()) ret.alignment.mapping_quality = read.mapping_quality ret.alignment.position.CopyFrom(protocol.Position()) ret.alignment.position.reference_name = samFile.getrname( read.reference_id) ret.alignment.position.position = read.reference_start ret.alignment.position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_REVERSE_STRAND): ret.alignment.position.strand = protocol.NEG_STRAND for operation, length in read.cigar: gaCigarUnit = ret.alignment.cigar.add() gaCigarUnit.operation = SamCigar.int2ga(operation) gaCigarUnit.operation_length = length gaCigarUnit.reference_sequence = "" # TODO fix this! ret.duplicate_fragment = SamFlags.isFlagSet( read.flag, SamFlags.DUPLICATE_READ) ret.failed_vendor_quality_checks = SamFlags.isFlagSet( read.flag, SamFlags.FAILED_QUALITY_CHECK) ret.fragment_length = read.template_length ret.fragment_name = read.query_name for key, value in read.tags: # Useful for inspecting the structure of read tags # print("{key} {ktype}: {value}, {vtype}".format( # key=key, ktype=type(key), value=value, vtype=type(value))) protocol.setAttribute(ret.attributes.attr[key].values, value) if SamFlags.isFlagSet(read.flag, SamFlags.MATE_UNMAPPED): ret.next_mate_position.Clear() else: ret.next_mate_position.Clear() if read.next_reference_id != -1: ret.next_mate_position.reference_name = samFile.getrname( read.next_reference_id) else: ret.next_mate_position.reference_name = "" ret.next_mate_position.position = read.next_reference_start ret.next_mate_position.strand = protocol.POS_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.MATE_REVERSE_STRAND): ret.next_mate_position.strand = protocol.NEG_STRAND if SamFlags.isFlagSet(read.flag, SamFlags.READ_PAIRED): ret.number_reads = 2 else: ret.number_reads = 1 ret.read_number = -1 if SamFlags.isFlagSet(read.flag, SamFlags.FIRST_IN_PAIR): if SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 2 else: ret.read_number = 0 elif SamFlags.isFlagSet(read.flag, SamFlags.SECOND_IN_PAIR): ret.read_number = 1 ret.improper_placement = not SamFlags.isFlagSet( read.flag, SamFlags.READ_PROPER_PAIR) ret.read_group_id = readGroupId ret.secondary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SECONDARY_ALIGNMENT) ret.supplementary_alignment = SamFlags.isFlagSet( read.flag, SamFlags.SUPPLEMENTARY_ALIGNMENT) ret.id = readGroupSet.getReadAlignmentId(ret) return ret
def assertAlignmentsEqual(self, gaAlignment, pysamAlignment, readGroupInfo): if pysamAlignment.query_qualities is None: self.assertEqual(gaAlignment.aligned_quality, []) else: self.assertEqual( gaAlignment.aligned_quality, list(pysamAlignment.query_qualities)) self.assertEqual( gaAlignment.aligned_sequence, pysamAlignment.query_sequence) if reads.SamFlags.isFlagSet( pysamAlignment.flag, reads.SamFlags.READ_UNMAPPED): self.assertEqual(0, gaAlignment.alignment.ByteSize()) else: self.assertEqual( gaAlignment.alignment.mapping_quality, pysamAlignment.mapping_quality) self.assertEqual( gaAlignment.alignment.position.reference_name, readGroupInfo.samFile.getrname(pysamAlignment.reference_id)) self.assertEqual( gaAlignment.alignment.position.position, pysamAlignment.reference_start) # TODO test reverseStrand on position and on # nextMatePosition once it has been implemented. self.assertCigarEqual( gaAlignment.alignment.cigar, pysamAlignment.cigar) self.assertFlag( gaAlignment.duplicate_fragment, pysamAlignment, reads.SamFlags.DUPLICATE_READ) self.assertFlag( gaAlignment.failed_vendor_quality_checks, pysamAlignment, reads.SamFlags.FAILED_QUALITY_CHECK) self.assertEqual( gaAlignment.fragment_length, pysamAlignment.template_length) self.assertEqual( gaAlignment.fragment_name, pysamAlignment.query_name) compoundId = datamodel.ReadAlignmentCompoundId( self._gaObject.getCompoundId(), pysamAlignment.query_name) self.assertEqual(gaAlignment.id, str(compoundId)) ret = protocol.ReadAlignment() for key, value in pysamAlignment.tags: protocol.setAttribute(ret.attributes.attr[key].values, value) self.assertEqual( protocol.toJson(gaAlignment.attributes), protocol.toJson(ret.attributes)) if reads.SamFlags.isFlagSet( pysamAlignment.flag, reads.SamFlags.MATE_UNMAPPED): self.assertEqual(0, gaAlignment.next_mate_position.ByteSize()) else: self.assertEqual( gaAlignment.next_mate_position.position, pysamAlignment.next_reference_start) if pysamAlignment.next_reference_id != -1: self.assertEqual( gaAlignment.next_mate_position.reference_name, readGroupInfo.samFile.getrname( pysamAlignment.next_reference_id)) else: self.assertEqual( gaAlignment.next_mate_position.reference_name, "") if gaAlignment.number_reads == 1: self.assertFlag( False, pysamAlignment, reads.SamFlags.READ_PAIRED) elif gaAlignment.number_reads == 2: self.assertFlag( True, pysamAlignment, reads.SamFlags.READ_PAIRED) else: # we shouldn't be setting numberReads to anything else self.assertTrue(False) if gaAlignment.read_number is -1: self.assertFlag( False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag( False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 0: self.assertFlag( True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag( False, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 1: self.assertFlag( False, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag( True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) elif gaAlignment.read_number == 2: self.assertFlag( True, pysamAlignment, reads.SamFlags.FIRST_IN_PAIR) self.assertFlag( True, pysamAlignment, reads.SamFlags.SECOND_IN_PAIR) else: # we shouldn't be setting readNumber to anything else self.assertTrue(False) self.assertFlag( not gaAlignment.improper_placement, pysamAlignment, reads.SamFlags.READ_PROPER_PAIR) self.assertEqual( gaAlignment.read_group_id, readGroupInfo.id) self.assertFlag( gaAlignment.secondary_alignment, pysamAlignment, reads.SamFlags.SECONDARY_ALIGNMENT) self.assertFlag( gaAlignment.supplementary_alignment, pysamAlignment, reads.SamFlags.SUPPLEMENTARY_ALIGNMENT)