Exemplo n.º 1
0
 def test_get_projects(self):
     """Test that getting the projects from a flowcell works
     """
     # Assert that an empty file returns an empty list
     fh, ssheet = tempfile.mkstemp(dir=self.rootdir, suffix=".csv")
     os.close(fh)
     self.assertListEqual([],sq.get_projects(ssheet),
                          "The list of projects for an empty file is not empty")
     
     # Generate artificial samplesheet data
     data = td.generate_samplesheet_data()
     projects = {}
     for d in data:
         projects[d[-1]] = 1
     
     # Write the data to a samplesheet
     td._write_samplesheet(data,ssheet)
      
     # Assert that the list of projects returned is the same that we generated
     self.assertListEqual(sorted(projects.keys()),sorted(sq.get_projects(ssheet)),
                          "The list of projects does not match the original list")
     
     # Assert that the list of projects returned is filtered as specified
     self.assertListEqual([projects.keys()[-1]],sq.get_projects(ssheet,projects.keys()[-1]),
                          "The filtered list of projects does not match the expected")
Exemplo n.º 2
0
 def test_get_project_names(self):
     """Get the projects from a samplesheet
     """
     # Assert that an empty file returns an empty list
     fh, ssheet = tempfile.mkstemp(dir=self.rootdir, suffix=".csv")
     os.close(fh)
     self.assertListEqual([],HiSeqRun.get_project_names(ssheet),
                          "The list of projects for an empty file is not empty")
     
     # Generate artificial samplesheet data
     data = td.generate_samplesheet_data()
     projects = {}
     for d in data:
         projects[d[-1]] = 1
     
     # Write the data to a samplesheet
     td._write_samplesheet(data,ssheet)
      
     # Assert that the list of projects returned is the same that we generated
     self.assertListEqual(sorted(projects.keys()),sorted(HiSeqRun.get_project_names(ssheet)),
                          "The list of projects does not match the original list")
Exemplo n.º 3
0
 def setUp(self):
     self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_")
     
     # Generate some test fastq_files
     indexes = {td.generate_barcode(): [],
                td.generate_barcode(): [],
                td.generate_barcode(): [],
                td.generate_barcode(): []}
     args = {'instrument': td.generate_instrument(),
             'run_number': random.randint(101,9999),
             'fcid': td.generate_fc_barcode(),
             'lane': 1,
             'pair': True}
     fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir)
     os.close(fd) 
     f2 = f1.replace("_R1","_R2")
     f1h = fu.FastQWriter(f1)
     f2h = fu.FastQWriter(f2)
     for n in range(1000):
         args['index'] = random.choice(indexes.keys()) 
         record = td.generate_fastq_record(**args)
         indexes[args['index']].append(record[0])
         f1h.write(record[0:4])
         f2h.write(record[4:])
     f1h.close()
     f2h.close()
     
     # Create a samplesheet to use for demultiplexing using all but the last index
     samplesheet = f1.replace("_R1.fastq.gz",".csv")
     sdata = []
     for n, index in enumerate(indexes.keys()[0:-1]):
          sdata.append([args['fcid'],
                        str(args['lane']),
                        "Sample_{}".format(str(n)),
                        "unknown",
                        index,
                        "DemuxTest",
                        "0",
                        "",
                        "",
                        "DemuxTestProject"])
          
     # Add an entry that will not match
     s = copy.copy(sdata[-1])
     s[1] = str(args['lane']+1)
     sdata.append(s)
     
     self.samplesheet = td._write_samplesheet(sdata,samplesheet)
     self.indexes = indexes
     self.fastq_1 = f1
     self.fastq_2 = f2
Exemplo n.º 4
0
    def setUp(self):
        self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_")

        # Generate some test fastq_files
        indexes = {
            td.generate_barcode(): [],
            td.generate_barcode(): [],
            td.generate_barcode(): [],
            td.generate_barcode(): []
        }
        args = {
            'instrument': td.generate_instrument(),
            'run_number': random.randint(101, 9999),
            'fcid': td.generate_fc_barcode(),
            'lane': 1,
            'pair': True
        }
        fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir)
        os.close(fd)
        f2 = f1.replace("_R1", "_R2")
        f1h = fu.FastQWriter(f1)
        f2h = fu.FastQWriter(f2)
        for n in range(1000):
            args['index'] = random.choice(indexes.keys())
            record = td.generate_fastq_record(**args)
            indexes[args['index']].append(record[0])
            f1h.write(record[0:4])
            f2h.write(record[4:])
        f1h.close()
        f2h.close()

        # Create a samplesheet to use for demultiplexing using all but the last index
        samplesheet = f1.replace("_R1.fastq.gz", ".csv")
        sdata = []
        for n, index in enumerate(indexes.keys()[0:-1]):
            sdata.append([
                args['fcid'],
                str(args['lane']), "Sample_{}".format(str(n)), "unknown",
                index, "DemuxTest", "0", "", "", "DemuxTestProject"
            ])

        # Add an entry that will not match
        s = copy.copy(sdata[-1])
        s[1] = str(args['lane'] + 1)
        sdata.append(s)

        self.samplesheet = td._write_samplesheet(sdata, samplesheet)
        self.indexes = indexes
        self.fastq_1 = f1
        self.fastq_2 = f2
Exemplo n.º 5
0
 def test_get_project_samples(self):
     """Test that getting the project samples from a samplesheet behaves as expected
     """
     
     # Generate artificial samplesheet data
     data = td.generate_samplesheet_data()
     fh, ssheet = tempfile.mkstemp(dir=self.rootdir, suffix=".csv")
     os.close(fh)
     td._write_samplesheet(data,ssheet)
      
     # Assert that getting samples for a non-existing project returns an empty list
     self.assertListEqual([],sq.get_project_samples(ssheet,td.generate_project()),
                          "Getting samples for a non-existing project returned unexpected output")
     
     # Iterate over the projects and assert that the returned samples are correct
     samples = {}
     for row in data:
         if row[9] not in samples:
             samples[row[9]] = []
         samples[row[9]].append(row[2])
     
     for proj, sample in samples.items():
         self.assertListEqual(sorted(sample),sorted(sq.get_project_samples(ssheet,proj)),
                              "The returned list of samples did not match the original")