示例#1
0
 def run(start_from_here=False):
     return cmdline(
         join(steps.orthomcl_bin_dir, 'orthomclFilterFasta.pl'),
         parameters=[
             new_proteomes_dir,
             min_length, max_percent_stop,
             new_good_proteomes,
             new_bad_proteomes])
示例#2
0
 def run():
     if assembly_names:
         return cmdline('prodigal',
             parameters=[
                 '-i', filtered_assemblies,
                 '-a', predicted_proteomes]),
                #'-o', join(steps.intermediate_dir, assembly_name + '.gff')]
     else:
         log.info('Skipping.')
         return 0
示例#3
0
 def run():
     if assembly_names:
         return cmdline('prodigal',
                        parameters=[
                            '-i', filtered_assemblies, '-a',
                            predicted_proteomes
                        ]),
         #'-o', join(steps.intermediate_dir, assembly_name + '.gff')]
     else:
         log.info('Skipping.')
         return 0
示例#4
0
        def run(start_from_here=False):
            assemblies = [
                join(p.assemblies, f)
                for f in listdir(p.assemblies)
                if f and f[0] != '.']

            if isdir(config.proteomes_dir):
                assemblies = filter_dublicated_proteomes(config.proteomes_dir, assemblies)
                if assemblies == []:
                    log.warn(all_considered_warning % config.proteomes_dir)
                    exit(1)

            assembly_names = [
                splitext(basename(asm))[0]
                for asm in assemblies]
            filtered_assemblies = [
                join(assemblies_dir, asm_name + '.fna')
                for asm_name in assembly_names]
            new_proteomes = [
                join(config.proteomes_dir, asm_name + '.fasta')
                for asm_name in assembly_names]

            if not isdir(assemblies_dir): mkdir(assemblies_dir)
            log.debug('   Created assemblies_dir ' + assemblies_dir)

            total_successful_filters = 0
            for assembly, filtered_asm in zip(assemblies, filtered_assemblies):
                if filter_assembly(assembly,
                                   filtered_asm,
                                   skip=(4, 7, 10, 23, 32, 38),
                                   skip_after=51) == 0:
                    total_successful_filters += 1
            if total_successful_filters == 0:
                log.error('No correct assemblies.')
                return 1

            for asm, prot, asm_name in zip(
                    filtered_assemblies, new_proteomes, assembly_names):
                res = cmdline('prodigal',
                    parameters=[
                       '-i', asm,
                       '-o', join(config.intermediate_dir, asm_name),
                       '-a', prot])()
                if res != 0:
                    return res
                log.info('')

            res = adjust_proteomes(new_proteomes, config.proteomes_dir,
                                   prot_id_field=0)
            if res != 0:
                return res

            # Recreate new_proteomes_directory
            if exists(new_proteomes_dir):
                rmtree(new_proteomes_dir)
            if not isdir(new_proteomes_dir):
                mkdir(new_proteomes_dir)
            for prot in new_proteomes:
                copy(prot, join(new_proteomes_dir, basename(prot)))

            return 0
示例#5
0
 def run(start_from_here=False):
     return cmdline(join(steps.orthomcl_bin_dir, 'orthomclFilterFasta.pl'),
                    parameters=[
                        new_proteomes_dir, min_length, max_percent_stop,
                        new_good_proteomes, new_bad_proteomes
                    ])
示例#6
0
        def run(start_from_here=False):
            assemblies = [
                join(p.assemblies, f) for f in listdir(p.assemblies)
                if f and f[0] != '.'
            ]

            if isdir(config.proteomes_dir):
                assemblies = filter_dublicated_proteomes(
                    config.proteomes_dir, assemblies)
                if assemblies == []:
                    log.warn(all_considered_warning % config.proteomes_dir)
                    exit(1)

            assembly_names = [splitext(basename(asm))[0] for asm in assemblies]
            filtered_assemblies = [
                join(assemblies_dir, asm_name + '.fna')
                for asm_name in assembly_names
            ]
            new_proteomes = [
                join(config.proteomes_dir, asm_name + '.fasta')
                for asm_name in assembly_names
            ]

            if not isdir(assemblies_dir): mkdir(assemblies_dir)
            log.debug('   Created assemblies_dir ' + assemblies_dir)

            total_successful_filters = 0
            for assembly, filtered_asm in zip(assemblies, filtered_assemblies):
                if filter_assembly(assembly,
                                   filtered_asm,
                                   skip=(4, 7, 10, 23, 32, 38),
                                   skip_after=51) == 0:
                    total_successful_filters += 1
            if total_successful_filters == 0:
                log.error('No correct assemblies.')
                return 1

            for asm, prot, asm_name in zip(filtered_assemblies, new_proteomes,
                                           assembly_names):
                res = cmdline('prodigal',
                              parameters=[
                                  '-i', asm, '-o',
                                  join(config.intermediate_dir, asm_name),
                                  '-a', prot
                              ])()
                if res != 0:
                    return res
                log.info('')

            res = adjust_proteomes(new_proteomes,
                                   config.proteomes_dir,
                                   prot_id_field=0)
            if res != 0:
                return res

            # Recreate new_proteomes_directory
            if exists(new_proteomes_dir):
                rmtree(new_proteomes_dir)
            if not isdir(new_proteomes_dir):
                mkdir(new_proteomes_dir)
            for prot in new_proteomes:
                copy(prot, join(new_proteomes_dir, basename(prot)))

            return 0