示例#1
0
 def test_error_single_pos_single(self):
     self.args_single.interval = None
     self.args_single.positions_file = "unit_test/positions_missed.pos"
     self.classifier = bamutils.Classifier(self.args_single)
     self.classifier.get_positions()
     self.target_positions = self.classifier.target_positions
     temp_target_positions = []
     for target_position in self.target_positions:
         if target_position[1] == None and target_position[2] == None:
             break
         if target_position[1] == target_position[2]:
             temp_target_positions.append(target_position)
             tuples = self.classifier.bam.get_tuples(temp_target_positions)
             for it in tuples:
                 temp_target_positions = []
                 target_position[1] = target_position[1] - 1
                 temp_target_positions.append(target_position)
                 newtuples = self.classifier.bam.get_tuples(
                     temp_target_positions)
                 for new_it in newtuples:
                     if it[0] == new_it[0]:
                         self.assertEqual(
                             new_it, it, 'Error reading this'
                             ' sequence from the bam file(' + str(it[0]) +
                             ')'
                             ', please use range instead')
示例#2
0
 def test_error_single_pos_paired(self):
     self.args_paired.interval = None
     self.args_paired.positions_file = "unit_test/positions_missed.pos"
     self.classifier = bamutils.Classifier(self.args_paired)
     self.classifier.get_positions()
     self.target_positions = self.classifier.target_positions
     #get the alignment for the single alignment(n) and
     #then for the range(n-1,n), compare
     temp_target_positions = []
     for target_position in self.target_positions:
         if target_position[1] == None and target_position[2] == None:
             break
         if target_position[1] == target_position[2]:
             temp_target_positions.append(target_position)
             tuples = self.classifier.bam.get_tuples(temp_target_positions)
             for tt, nt in tuples:
                 temp_target_positions = []
                 target_position[1] = target_position[1] - 1
                 temp_target_positions.append(target_position)
                 newtuples = self.classifier.bam.get_tuples(
                     temp_target_positions)
                 for new_tt, new_nt in newtuples:
                     if nt[0] == new_nt[0] and tt[0] == new_tt[0]:
                         self.assertEqual(
                             new_tt, tt, 'Error reading this'
                             ' sequence from the bam file(' + str(tt[0]) +
                             ')'
                             ', please use range instead')
                         self.assertEqual(
                             new_nt, nt, 'Error reading this'
                             ' sequence from the bam file(' + str(tt[0]) +
                             ')'
                             ', please use range instead')
示例#3
0
 def run_classifier(self,args):
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     features = classifier.get_features()
     if args.export_features is not None:
         self.classifier.export_features(features)
     probabilities = classifier.predict(features)
     classifier.print_results(probabilities)
示例#4
0
 def test_get_positions_case7(self):
     """
     whole genome
     """
     args = self.args_paired
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     positions = classifier.target_positions
     for val in positions:
         if not val[1] == None:
             self.assertEqual(True, False,
                              'Region specified in whole genome')
示例#5
0
 def test_reference_base_single(self):
     if self.args_single.single:
         classifier = bamutils.Classifier(self.args_single)
         for chromosome_id in xrange(25):
             for position in xrange(100):
                 #Throws runtimeerror if unable to get base
                 try:
                     refbase = classifier.bam.get_reference_base(
                         chromosome_id, position, index=True)
                     self.assertRegexpMatches(
                         str(refbase), '[0-4]|[ACGTN]',
                         'Invalid Trinucleotide_context')
                 except RuntimeError:
                     pass
示例#6
0
 def test_get_positions_case9(self):
     """
     The interval is a range plus a positions file plus manifest
     interval:1
     manifest: None
     positions_file: None
     output: 1 (we are looking for the common region among the positions provided)
     """
     args = self.args_paired
     args.interval = '1'
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     positions = classifier.target_positions
     self.assertListEqual(sorted(positions), sorted([['1', None, None]]))
示例#7
0
    def test_get_positions_case2(self):
        """
        The interval is a range plus a positions file
        interval:1:1-1000
        positions: 1:1-1000, 1:1000, 1:1-2000, 1:1-20000
        output: 1:1-1000 (we are looking for the common region among the positions provided)
        """
        args = self.args_paired
        args.interval = '1:1-1000'
        args.positions_file = './unit_test/get_positions_posfile'
        classifier = bamutils.Classifier(args)
        classifier.get_positions()
        positions = classifier.target_positions

        self.assertListEqual(sorted(positions), sorted([['1', 1, 1000]]))
示例#8
0
 def test_get_positions_case8(self):
     """
     The interval is a range plus a positions file plus manifest
     interval:1:1-100
     manifest: 1:1-90,1:92-130, 1:500-650, 5:500-700, 6:6000-6500, 1:179076833-179076890
     output: 1:1000 (we are looking for the common region among the positions provided)
     """
     args = self.args_paired
     args.interval = '1:1-100'
     args.deep = True
     args.manifest = './unit_test/get_positions_manifest'
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     positions = classifier.target_positions
     self.assertListEqual(sorted(positions),
                          sorted([['1', 1, 90], ['1', 92, 100]]))
示例#9
0
    def test_get_positions_case1(self):
        """
        The interval is a chromosome plus a positions file
        interval:1
        positions: 1:1-20000, 1:21000-30000, 3:1-1000, 5:500-2000, 10:1-20000
        output: 1:1-20000, 1:21000-30000 (we are looking for the common region among the positions provided)
        """
        args = self.args_paired
        args.interval = '1'
        args.positions_file = './unit_test/get_positions_posfile'
        classifier = bamutils.Classifier(args)
        classifier.get_positions()
        positions = classifier.target_positions

        self.assertEqual(sorted(positions),
                         sorted([['1', 1, 20000], ['1', 21000, 30000]]))
示例#10
0
 def test_get_positions_case10(self):
     """
     The interval is a chromosome plus a positions file 
     interval.intersection(positions file) == []
     interval:1
     manifest: None
     positions_file: 1:1-20000,1:21000-30000,3:1-1000,5:500-2000,10:1-20000
     output: []
     """
     args = self.args_paired
     args.interval = '11'
     args.positions_file = './unit_test/get_positions_posfile'
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     positions = classifier.target_positions
     self.assertListEqual(positions, [])
示例#11
0
 def test_trinucleotide_context_paired(self):
     #only need to run it once.
     if self.args_paired.single:
         self.assertEqual(True, False, 'Single flag set')
     else:
         classifier = bamutils.Classifier(self.args_paired)
         for chromosome_id in xrange(25):
             for position in xrange(1000):
                 #Throws runtimeerror if unable to get base
                 try:
                     tc = classifier.bam.get_trinucleotide_context(
                         chromosome_id, position)
                     self.assertRegexpMatches(
                         tc, '[ACGTN]|[ACGTN]|[ACGTN]',
                         'Invalid Trinucleotide_context')
                 except RuntimeError:
                     pass
示例#12
0
 def test_get_positions_case3(self):
     """
     case1 + manifest file
     interval:1
     positions: 1:1-1000, 1:1000, 1:1-2000, 1:1-20000
     manifest: 1:1-150, 1:500-650, 5:500-700, 6:6000-6500, 1:179076833-179076890
     output: 1:500-650, 1:1-150 (we are looking for the common region among the positions provided)
     """
     args = self.args_paired
     args.interval = '1'
     args.deep = True
     args.positions_file = './unit_test/get_positions_posfile'
     args.manifest = './unit_test/get_positions_manifest'
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     positions = classifier.target_positions
     self.assertListEqual(
         sorted(positions),
         sorted([['1', 1, 90], ['1', 92, 130], ['1', 500, 650]]))
示例#13
0
def main():
    args = classifyui.args

    if args.verbose:
        level = logging.DEBUG

    else:
        level = logging.WARNING

    logging.basicConfig(
        filename=args.log_file,
        format='%(asctime)s %(message)s',
        #datefmt = '%m/%d/%Y %I:%M:%S %p',
        level=level)

    logging.warning("<<< mutationSeq_" + mutationSeq_version + " started >>>")
    logging.info("importing required modules")
    import bamutils

    logging.info(args)
    #==============================================================================
    # main body
    #==============================================================================
    logging.info("initializing a Classifier")
    classifier = bamutils.Classifier(args)

    logging.info("getting positions")
    classifier.get_positions()

    logging.info("generating features iterator")
    features = classifier.get_features()

    if args.export_features is not None:
        logging.info("exporting features")
        features = classifier.export_features(features)

    if args.features_only:
        classifier.print_features(features)
    else:
        probabilities = classifier.predict(features)
        classifier.print_results(probabilities)

    logging.warning("successfully completed.\n")
示例#14
0
    def test_get_positions_case11(self):
        """
        There is no overlap between manifest and positions file.
        manifest: 1:1-2000
        positions_file: 1:1000
        output: 1:1-2000 (since there is no amplicon around the position in pos file,
                    so we dont return anything)

        There was a bug in the code where lookup of points wasn't correct.
        i.e self.manifest[val[0]][val[1]:val[2]] return [] if val[1] == val[2]
        but self.manifest[val[0]][val[1]] doesn't
        """
        args = self.args_paired
        args.deep = True
        args.positions_file = './unit_test/get_positions_posfile_case11'
        args.manifest = './unit_test/get_positions_manifest_case11'

        classifier = bamutils.Classifier(args)
        classifier.get_positions()
        positions = classifier.target_positions
        self.assertListEqual(sorted(positions), sorted([['1', 1000, 1000]]))
        pass
示例#15
0
 def get_tuples(self,args):
     classifier = bamutils.Classifier(args)
     classifier.get_positions()
     pos = classifier.target_positions 
     tuples = classifier.bam.get_tuples(pos)
     return tuples,classifier
示例#16
0
def run_classifier(arguments, reffiles):
    output_vcf = []
    output_folder = arguments.out

    for i in xrange(len(reffiles)):
        reference_file = reffiles[i]
        #parse the pos file
        file_stream = open(reference_file, 'r')
        tfile = None
        nfile = None
        rfile = None
        manfile = None
        output = []
        for line in file_stream:
            l = line.strip().split()
            if line[0] == '#':
                if l[1] == 'tumour':
                    tfile = l[2]
                elif l[1] == 'normal':
                    nfile = l[2]
                if l[1] == 'reference':
                    rfile = l[2]
                if l[1] == 'manifest':
                    manfile = l[2]
            else:
                output.append(l[0] + ':' + l[1] + '\n')
        file_stream.close()
        #update arguments
        if not all((tfile, nfile, rfile)):
            logging.error('Invalid input (one of paths is missing)')

        arguments.out = output_folder + reffiles[i].strip().split(
            '/')[-1] + '.vcf'
        #create a positions file for classifier
        file_stream_w = open(arguments.out + '.tmp', 'w')
        for line in output:
            file_stream_w.write(line)
        file_stream_w.close()

        output_vcf.append(arguments.out)

        arguments.interval = None
        arguments.positions_file = arguments.out + '.tmp'
        arguments.samples = [
            'tumour:' + tfile, 'normal:' + nfile, 'reference:' + rfile,
            'model:' + arguments.model
        ]
        arguments.manifest = manfile

        logging.info("initializing a Classifier")
        classifier = bamutils.Classifier(arguments)

        logging.info("getting positions")
        classifier.get_positions()

        logging.info("generating features iterator")
        features = classifier.get_features()

        if arguments.export_features is not None:
            logging.info("exporting features")
            classifier.export_features(features)

        probabilities = classifier.predict(features)
        classifier.print_results(probabilities)

        logging.warning("successfully completed.\n")

        #remove the positions file
        os.remove(arguments.out + '.tmp')

    return output_vcf