def test_3DEC_UmiBarcodeDemuxMethod_matching_barcode(self): barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/') barcode_parser = BarcodeParser(barcode_folder) r1 = FastqRecord( '@Cluster_s_1_1101_1000', 'ATCACACACTATAGTCATTCAGGAGCAGGTTCTTCAGGTTCCCTGTAGTTGTGTGGTTTTGAGTGAGTTTTTTAAT', '+', 'AAAAA#EEEEEEEEEEEAEEEEEEEAEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEE/EEEEEEEEEEEEEEEEEE' ) r2 = FastqRecord( '@Cluster_s_1_1101_1002', 'ACCCCAGATCAACGTTGGACNTCNNCNTTNTNCTCNGCACCNNNNCNNNCTTATNCNNNANNNNNNNNNNTNNGN', '+', '6AAAAEEAEE/AEEEEEEEE#EE##<#6E#A#EEE#EAEEA####A###EE6EE#E###E##########E##A#' ) demux = UmiBarcodeDemuxMethod(umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, barcodeFileParser=barcode_parser, barcodeFileAlias='maya_384NLA', indexFileParser=None, indexFileAlias='illumina_merged_ThruPlex48S_RP', random_primer_read=None, random_primer_length=6) demultiplexed_record = demux.demultiplex([r1,r2]) # The barcode sequence is ACACACTA (first barcode) self.assertEqual( demultiplexed_record[0].tags['BC'], 'ACACACTA') self.assertEqual( demultiplexed_record[0].tags['bi'], 1)
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'maya_mspj1' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'MSPJIC8U3' self.longName = 'MSPJI, CB: 8bp UMI: 3bp' self.autoDetectable = True self.description = 'MSPJI barcoded fragments. 3bp umi followed by 8bp cell barcode.'
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'scartrace' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=0, barcodeRead=0, barcodeStart=0, barcodeLength=8, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'SCARC8R1' self.longName = 'Scartrace, CB: 8bp' self.description = '384 well format. Scar amplicon demultiplexing, cell barcode in read 1' self.autoDetectable = True
def demultiplex(self, records, **kwargs): if kwargs.get('probe') and records[0].sequence[self.barcodeLength + self.umiLength] != 'T': raise NonMultiplexable # add first 2 bases as ligation tag: ligation_start = self.barcodeLength + self.umiLength ligation_end = ligation_start + 2 ligation_sequence = records[0].sequence[ligation_start:ligation_end] ligation_qualities = records[0].qual[ligation_start:ligation_end] taggedRecords = UmiBarcodeDemuxMethod.demultiplex( self, records, **kwargs) taggedRecords[0].addTagByTag('lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[0].addTagByTag('lq', ligation_qualities, isPhred=True, make_safe=False) taggedRecords[1].addTagByTag('lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[1].addTagByTag('lq', ligation_qualities, isPhred=True, make_safe=False) #taggedRecords[0].sequence = taggedRecords[0].sequence[1:] #taggedRecords[0].qualities = taggedRecords[0].qualities[1:] return taggedRecords
def demultiplex(self, records, **kwargs): if kwargs.get('probe') and records[0].sequence[self.barcodeLength + \ self.umiLength: self.barcodeLength + self.umiLength + 4] != 'CATG': raise NonMultiplexable taggedRecords = UmiBarcodeDemuxMethod.demultiplex( self, records, **kwargs) return taggedRecords
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'lennart96NLA' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, random_primer_read=None, random_primer_length=None, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'NLAIII96C8U3SE' self.longName = 'NLAIII, 96 well CB: 8bp UMI: 3bp RP:6bp, single ended' self.autoDetectable = True self.description = '96 well format. 3bp umi followed by 8bp barcode. Single end: R2 is missing'
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'lennart96NLA' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=3, random_primer_read=1, random_primer_length=6, barcodeRead=0, barcodeStart=3, barcodeLength=8, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'NLAIII96C8U3' self.longName = 'NLAIII, 96well CB: 8bp UMI: 3bp RP: 6bp' self.autoDetectable = True self.description = '96 well format. 3bp umi followed by 8bp barcode. R2 starts with a 6bp random primer'
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'celseq1' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=8, umiLength=4, barcodeRead=0, barcodeStart=0, barcodeLength=8, random_primer_read=1, random_primer_length=6, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'CS1C8U4' self.longName = 'CELSeq 1, CB: 8bp, UMI: 4bp' self.autoDetectable = True self.description = 'R1 starts with a 8bp cell barcode followed by a 4bp UMI. R2 ends with a 6bp random primer'
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'celseq2' UmiBarcodeDemuxMethod.__init__(self, umiRead=1, umiStart=0, umiLength=8, barcodeRead=1, barcodeStart=8, barcodeLength=8, random_primer_read=0, random_primer_length=6, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'CS2C8U8S' self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp' self.autoDetectable = True self.description = 'R2 starts with a longer 8bp UMI followed by a 8bp cell barcode. R1 ends with a 6bp primer'
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'celseq2' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=6, barcodeRead=0, barcodeStart=6, barcodeLength=8, random_primer_read=None, random_primer_length=None, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'CS2C8U6NH' self.longName = 'CELSeq 2, CB: 8bp, UMI: 6bp, NO random primer' self.autoDetectable = False self.description = 'R1 starts with a 6bp UMI followed by a 8bp cell barcode. R2 has no random primer. Use this demultiplexing method for VASA'
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'celseq2_noNla' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=8, barcodeRead=0, barcodeStart=8, barcodeLength=8, random_primer_read=1, random_primer_length=6, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'CS2C8U8NNLA' self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp, NLAIII free' self.autoDetectable = True self.description = 'CEL-Seq2 without NLAIII digestable barcodes '
def demultiplex(self, records, **kwargs): if kwargs.get('probe') and not records[0].sequence[4:].startswith( 'CCTTGAACTTCTGGTTGTAG'): raise NonMultiplexable taggedRecords = UmiBarcodeDemuxMethod.demultiplex( self, records, **kwargs) return taggedRecords
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'scartrace' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=0, barcodeRead=1, barcodeStart=0, barcodeLength=8, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, random_primer_end=False, random_primer_read=0, random_primer_length=4, **kwargs) self.shortName = 'SCARC8R2R4' self.longName = 'Scartrace, CB: 8bp, with 4bp random sequence in read 1' self.description = '384 well format. Scar amplicon demultiplexing, cell barcode in read , 4bp random sequence in R1' self.autoDetectable = True
def __init__(self, barcodeFileParser, **kwargs): self.barcodeFileAlias = 'maya_384NLA' UmiBarcodeDemuxMethod.__init__(self, umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, random_primer_read=1, random_primer_length=6, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'scCHIC384C8U3' self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp, RP: 6BP' self.autoDetectable = True self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A. R2 ends with a 6bp random primer' self.sequenceCapture[0] = slice(self.barcodeLength + self.umiLength + 1, None) # dont capture the first base
def __init__(self, umiRead=0, umiStart=0, umiLength=8, # default settings UMI barcodeRead=0, barcodeStart=8, barcodeLength=8, # default settings Barcode enzymeRead=0, enzymeStart=16, enzymeLength=3, # default settings Enzyme ID ispcrRead=0, ispcrStart=19, ispcrLength=15, # default settings ISPCR ispcrSeq="CAGTGGTATCAGAGT", barcodeFileParser=None, # compatible, no need to change barcodeFileAlias=None, # passed from lower-level Classes, e.g. "reBS_nla384w" indexFileParser=None, # compatible, no need to change **kwargs): # additional arguments self.description = 'base class for restriction bisulfite' self.barcodeFileAlias = barcodeFileAlias # description , e.g. "maya_384NLA" self.barcodeFileParser = barcodeFileParser # Namespace for barcode file parse UmiBarcodeDemuxMethod.__init__( self, umiRead=umiRead, umiStart=umiStart, umiLength=umiLength, barcodeRead=barcodeRead, barcodeStart=barcodeStart, barcodeLength=barcodeLength, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.barcodeSummary = self.barcodeFileAlias self.umiRead = umiRead # 0:Read 1, 1: Read 2 etc self.umiStart = umiStart # First base self.umiLength = umiLength self.shortName = 'RB' self.longName = 'base class for restriction bisulfite' self.illumina_mux = IlluminaBaseDemultiplexer( indexFileParser=indexFileParser, indexFileAlias='illumina_merged_ThruPlex48S_RP') self.barcodeRead = barcodeRead self.barcodeStart = barcodeStart self.barcodeLength = barcodeLength self.enzymeRead = enzymeRead self.enzymeStart = enzymeStart self.enzymeLength = enzymeLength self.ispcrRead = ispcrRead self.ispcrStart = ispcrStart self.ispcrLength = ispcrLength self.autoDetectable = False self.sequenceCapture = [slice(None), slice(None)] # ranges # TAKE OUT IF STATEMENT if umiLength == 0: # if there is a barcode only if barcodeStart != 0: raise NotImplementedError( 'Complicated slice where we need to capture around a region') self.sequenceCapture[barcodeRead] = slice(barcodeLength, None) else: if umiRead != barcodeRead: raise NotImplementedError() if not(umiStart == 0 or barcodeStart == 0): raise NotImplementedError( 'Complicated slice where we need to capture around a region') self.sequenceCapture[barcodeRead] = slice( barcodeLength + umiLength + enzymeLength + ispcrLength, None)