Exemplo n.º 1
0
    def process(self):

        # Reduce inputs to only first element
        if hasattr(self.input_dir, '__iter__'):
            self.input_dir = self.input_dir[0]

        self.input_dir = os.path.join(self.input_dir, "Data/Intensities/BaseCalls/")
        if type(self.sample_sheet) == list:
            if len(self.sample_sheet) > 1:
                raise Exception('Too many sample sheet files: %s' % ','.join(self.sample_sheet))
            else:
                self.sample_sheet = self.sample_sheet[0]
            
        ss = SampleSheet(self.sample_sheet) 
        mask_length, double_idx = ss.get_mask_length()

        if double_idx:
            self.use_base_mask = "y*,I{0},I{0},Y*".format(mask_length)
        else:
            self.use_base_mask = "y*,I{0},Y*".format(mask_length)

        self.use_base_mask = str(self.use_base_mask)
        super(CasavaDemux, self).process()

        prj_dir = os.path.join(self.output_dir, 'Project_' + self.meta['pipeline']['project_name'])
        self.output_files = utils.find(prj_dir, "*.fastq.gz")

        #set the metadata
        self.meta['job']['sample_id'] = []
        sample_ids = ss.get_sample_ids()
        for output_file in self.output_files:
            for sample_id in sample_ids:
                if os.path.basename(output_file).startswith("%s_" % sample_id):
                    self.meta['job']['sample_id'].append(sample_id)
                    break
Exemplo n.º 2
0
 def process(self):
     self.output_files = ut.find(self.input_dirs, self.pattern,
                                 self.regex_match)
Exemplo n.º 3
0
 def process(self):
     self.output_files = ut.find(self.input_dirs, self.pattern, self.regex_match)