Python SampleSheet примеры использования

Язык программирования: Python

Пространство имен/Пакет: pypers.utils.samplesheet

Класс/Тип: SampleSheet

Примеров на hotexamples.com: 4

Python SampleSheet - 4 примера найдено. Это лучшие примеры Python кода для pypers.utils.samplesheet.SampleSheet, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SampleSheet(3)

get_sample_ids(2)

get_lines_count(1)

get_mask_length(1)

get_project_name(1)

validate(1)

Пример #1

Показать файл

    def process(self):

        # Reduce inputs to only first element
        if hasattr(self.input_dir, '__iter__'):
            self.input_dir = self.input_dir[0]

        self.input_dir = os.path.join(self.input_dir, "Data/Intensities/BaseCalls/")
        if type(self.sample_sheet) == list:
            if len(self.sample_sheet) > 1:
                raise Exception('Too many sample sheet files: %s' % ','.join(self.sample_sheet))
            else:
                self.sample_sheet = self.sample_sheet[0]
            
        ss = SampleSheet(self.sample_sheet) 
        mask_length, double_idx = ss.get_mask_length()

        if double_idx:
            self.use_base_mask = "y*,I{0},I{0},Y*".format(mask_length)
        else:
            self.use_base_mask = "y*,I{0},Y*".format(mask_length)

        self.use_base_mask = str(self.use_base_mask)
        super(CasavaDemux, self).process()

        prj_dir = os.path.join(self.output_dir, 'Project_' + self.meta['pipeline']['project_name'])
        self.output_files = utils.find(prj_dir, "*.fastq.gz")

        #set the metadata
        self.meta['job']['sample_id'] = []
        sample_ids = ss.get_sample_ids()
        for output_file in self.output_files:
            for sample_id in sample_ids:
                if os.path.basename(output_file).startswith("%s_" % sample_id):
                    self.meta['job']['sample_id'].append(sample_id)
                    break

Пример #2

Показать файл

    def process(self):

        # Reduce inputs to only first element
        if hasattr(self.input_dir, '__iter__'):
            self.input_dir = self.input_dir[0]

        if not self.input_dir.endswith('/'):
            self.input_dir += '/'

        (parent_dir, flowcell_dir) = os.path.split(os.path.dirname(self.input_dir))

        parsed = re.search(r'''(?P<DATE>\d{6})_
                               (?P<HISEQ_SN>\w{6})_
                               (?P<RUN_COUNT>\d{4})_
                               (?P<FC_POS>[AB])
                               (?P<FC_ID>.*$)''', flowcell_dir, re.X)

        ss = SampleSheet(os.path.join(self.input_dir, 'SampleSheet.csv'))
        ss_validated = os.path.join(self.output_dir, 'sample_sheet_validated.csv')
        project_name = ss.get_project_name() or 'DefaultProject'

        run_desc = 'Flowcell %s on %s/%s' % (parsed.group('FC_ID'),
                                             os.path.basename(parent_dir),
                                             parsed.group('FC_POS'))

        self.meta.update({
                'pipeline': {
                    'date'         : parsed.group('DATE'),
                    'descr'        : run_desc,
                    'fc_id'        : parsed.group('FC_ID'),
                    'fc_pos'       : parsed.group('FC_POS'),
                    'hiseq'        : os.path.basename(parent_dir),
                    'hiseq_sn'     : parsed.group('HISEQ_SN'),
                    'project_name' : project_name,
                    'run_count'    : int(parsed.group('RUN_COUNT')),
                    'nfiles'       : ss.get_lines_count()
                }
            })

        ss_validated = ss.validate(project_name, ss_validated)

        self.output_files = [ss_validated]

Пример #3

Показать файл

Файл: fofn.py Проект: blankenberg/pypers

    def create(sample_sheet, input_dir, output_dir=None, output_file_name=None):
        """
        Crete a file of file names and return the path to it

        Args:
             sample_sheet:
                full path to the sample sheet

             input_dir:
                path to the directory containing the input files

             output_file:
                name of the output fofn. If is not specified the name
        """


        if not os.path.exists(sample_sheet):
            raise Exception("input error: parameter `sample_sheet` %s does not exist" % sample_sheet)

        if not os.path.exists(input_dir):
            raise Exception("input error: parameter `input_dir` %s does not exist" % sample_sheet)


        print("*********************************")
        print("sample_sheet: %s" % os.path.abspath(sample_sheet))
        print("input_dir: %s" % os.path.abspath(input_dir))
        print("*********************************")

        #set default name of the output fofn
        if not output_file_name:
            output_file_name = os.path.basename(sample_sheet).rsplit(".", 1)[0] + "_fofn.csv"

        if not output_dir:
            output_dir = os.path.dirname(sample_sheet)

        output_file = os.path.join(output_dir, output_file_name)

        with open(output_file, 'w') as f_fofn:
            ss = SampleSheet(sample_sheet)
            sample_id_list = ss.get_sample_ids()
            for sample_id in sample_id_list:
                print("*********************************")
                print "sample_id : %s" %sample_id
                for root, dirs, file_list in os.walk(input_dir):
                    #group the files by sample id and read number
                    r1_files = [
                        os.path.join(root, file_name) for file_name in file_list if (
                            '%s_'%sample_id in file_name and Fofn.r1_regex.search(file_name)
                        )
                    ]
                    r2_files = [
                        os.path.join(root, file_name) for file_name in file_list if (
                            '%s_'%sample_id in file_name and Fofn.r2_regex.search(file_name)
                        )
                    ]

                    r1_file = ""
                    r2_file = ""

                    if r1_files:
                        for r1_file in r1_files:
                            #filter the R2 files that match the R1 file base
                            r2_matchs = [
                                r2_file for r2_file in r2_files if (
                                    Fofn.r1_regex.search(r1_file).group(1) in r2_file)
                            ]
                            if r2_matchs:
                                Fofn._write_record(f_fofn, r1_file, r2_matchs[0], sample_id)
                            else:
                                if r2_files:
                                    print("No R2 found for sample Id %s" % sample_id)
                                Fofn._write_record(f_fofn, r1_file, '', sample_id)
                    else:
                        if r2_files:
                            for r2_file in r2_files:
                                Fofn._write_record(f_fofn, r1_file, r2_file, sample_id)


        return output_file

Пример #4

Показать файл

Файл: fofn.py Проект: fronga/pypers

    def create(sample_sheet,
               input_dir,
               output_dir=None,
               output_file_name=None):
        """
        Crete a file of file names and return the path to it

        Args:
             sample_sheet:
                full path to the sample sheet

             input_dir:
                path to the directory containing the input files

             output_file:
                name of the output fofn. If is not specified the name
        """

        if not os.path.exists(sample_sheet):
            raise Exception(
                "input error: parameter `sample_sheet` %s does not exist" %
                sample_sheet)

        if not os.path.exists(input_dir):
            raise Exception(
                "input error: parameter `input_dir` %s does not exist" %
                sample_sheet)

        print("*********************************")
        print("sample_sheet: %s" % os.path.abspath(sample_sheet))
        print("input_dir: %s" % os.path.abspath(input_dir))
        print("*********************************")

        #set default name of the output fofn
        if not output_file_name:
            output_file_name = os.path.basename(sample_sheet).rsplit(
                ".", 1)[0] + "_fofn.csv"

        if not output_dir:
            output_dir = os.path.dirname(sample_sheet)

        output_file = os.path.join(output_dir, output_file_name)

        with open(output_file, 'w') as f_fofn:
            ss = SampleSheet(sample_sheet)
            sample_id_list = ss.get_sample_ids()
            for sample_id in sample_id_list:
                print("*********************************")
                print "sample_id : %s" % sample_id
                for root, dirs, file_list in os.walk(input_dir):
                    #group the files by sample id and read number
                    r1_files = [
                        os.path.join(root, file_name)
                        for file_name in file_list
                        if ('%s_' % sample_id in file_name
                            and Fofn.r1_regex.search(file_name))
                    ]
                    r2_files = [
                        os.path.join(root, file_name)
                        for file_name in file_list
                        if ('%s_' % sample_id in file_name
                            and Fofn.r2_regex.search(file_name))
                    ]

                    r1_file = ""
                    r2_file = ""

                    if r1_files:
                        for r1_file in r1_files:
                            #filter the R2 files that match the R1 file base
                            r2_matchs = [
                                r2_file for r2_file in r2_files
                                if (Fofn.r1_regex.search(r1_file).group(1) in
                                    r2_file)
                            ]
                            if r2_matchs:
                                Fofn._write_record(f_fofn, r1_file,
                                                   r2_matchs[0], sample_id)
                            else:
                                if r2_files:
                                    print("No R2 found for sample Id %s" %
                                          sample_id)
                                Fofn._write_record(f_fofn, r1_file, '',
                                                   sample_id)
                    else:
                        if r2_files:
                            for r2_file in r2_files:
                                Fofn._write_record(f_fofn, r1_file, r2_file,
                                                   sample_id)

        return output_file