Пример #1
0
def main():
    Config.load_config()
    sonm_api = init_sonm_api()
    check_balance(sonm_api)
    Config.load_prices(sonm_api)
    init_nodes_state(sonm_api)
    scheduler = BackgroundScheduler()
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=100)
    futures_ = dict()
    try:
        scheduler.start()
        scheduler.add_job(print_state, 'interval', seconds=60, id='print_state')
        scheduler.add_job(reload_config, 'interval', kwargs={"sonm_api": sonm_api}, seconds=60, id='reload_config')
        scheduler.add_job(check_balance, 'interval', kwargs={"sonm_api": sonm_api}, seconds=600, id='check_balance')
        executor.submit(run_http_server)
        watch(executor, futures_)
        print_state()
        logger.info("Work completed")
    except KeyboardInterrupt:
        logger.info("Keyboard interrupt, script exiting")
    except SystemExit as e:
        logger.exception("System Exit", e)
    finally:
        logger.info("Script exiting. Sonm node will continue work")
        for n in Nodes.get_nodes_arr():
            n.stop_work()
        SonmHttpServer.KEEP_RUNNING = False
        executor.shutdown(wait=False)
        scheduler.shutdown(wait=False)
def main():
    info(' '.join(sys.argv))
    info()

    parser = OptionParser(usage='Usage: ' + basename(__file__) + ' --chr chr --vcf VCF_file --samples Sample1,Sample2 '
                                                                 '--bams BAM_file1,BAM_file2 -o Output_directory '
                                                                 '--features BED_file')
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('-o', dest='output_dir')
    parser.add_option('--samples', dest='sample_names')
    parser.add_option('--bams', dest='bams')
    parser.add_option('--vcf', dest='vcf_fpath')
    parser.add_option('--chr', dest='chrom')
    parser.add_option('--features', dest='features', help='BED file with real CDS/Exon/Gene/Transcript regions with '
                                                          'annotations (default "features" is in system_config)')
    (opts, args) = parser.parse_args(sys.argv[1:])

    cnf = Config(opts.__dict__, determine_sys_cnf(opts), {})
    cnf.verbose = False

    if not cnf.output_dir or not cnf.vcf_fpath or not cnf.chrom:
        critical(parser.usage)

    cnf.features = cnf.features or cnf.genome.features
    samples = [BaseSample(sample_name, None, bam=bam) for (sample_name, bam) in zip(cnf.sample_names.split(','), cnf.bams.split(','))]
    split_bams(cnf, samples, cnf.vcf_fpath)
    info('Done.')
Пример #3
0
def main():
    info(' '.join(sys.argv))
    info()
    parser = OptionParser(
        usage='Usage: ' + basename(__file__) +
        ' --bed BED_file --bam BAM_file -g hg19 -o Output_BEDGRAPH_file '
        '--work-dir work_directory --chr chromosome')
    parser.add_option('-o', dest='output_dir')
    parser.add_option('--samples', dest='sample_names')
    parser.add_option('--bams', dest='bams')
    parser.add_option('--vcf', dest='vcf_fpath')
    parser.add_option('--chr', dest='chrom')
    parser.add_option('--bed', dest='bed', help='BED file.')
    parser.add_option('-g',
                      '--genome',
                      dest='chr_len_fpath',
                      help='File with chromosomes lengths.')
    parser.add_option('--work-dir', dest='work_dir', help='Work directory.')
    (opts, args) = parser.parse_args(sys.argv[1:])

    cnf = Config(opts.__dict__, determine_sys_cnf(opts), {})
    samples = [
        BaseSample(sample_name, None, bam=bam)
        for (sample_name,
             bam) in zip(cnf.sample_names.split(','), cnf.bams.split(','))
    ]

    if not cnf.output_dir or not cnf.bams:
        critical(parser.usage)

    safe_mkdir(cnf.output_dir)
    safe_mkdir(cnf.work_dir)
    get_regions_coverage(cnf, samples)
    info('Done.')
Пример #4
0
    def __init__(self, path, verbose=False):
        # Set our project path
        self.project_path = path

        self.verbose = verbose

        # Load our configuration
        with open(os.path.join(path, "project.json"), "r") as file:
            self.project_cfg = json.load(file)

        self.global_config = Config()
        self.global_config.fromFile(
            os.path.join(self.project_path, "..\\global_config.json"))

        # Process our project includes ahead of time
        self.project_includes = " ".join(
            ("-I%s" % process_string(self.global_config, i))
            for i in self.project_cfg["includes"])

        self.project_includes += " -I" + self.project_path
Пример #5
0
 def get_price(self):
     predicted_price = Config.price_for_tag(self.tag)
     price_ = self.config["max_price"]
     predicted_w_coeff_ = 0
     predicted_ = 0
     if predicted_price:
         predicted_ = predicted_price["perHourUSD"]
         predicted_w_coeff_ = predicted_ * (
             1 + int(self.config["price_coefficient"]) / 100)
         if predicted_w_coeff_ < float(self.config["max_price"]):
             price_ = predicted_w_coeff_
     return price_, predicted_, predicted_w_coeff_
Пример #6
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input. ' \
                  'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir'
    parser = OptionParser(description=description, usage=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c'))
    parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis')
    parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :')
    parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.')
    parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        parser.print_usage()
        sys.exit(1)
    if len(args) == 1 and not args[0].endswith('.bam'):
        sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv'))
        bam_by_sample = OrderedDict()
        for s, b in zip(sample_names, bam_fpaths):
            bam_by_sample[s] = b
    else:
        bam_by_sample = find_bams(args)

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)
    check_genome_resources(cnf)

    cnf.output_dir = adjust_path(cnf.output_dir)
    verify_dir(dirname(cnf.output_dir), is_critical=True)
    safe_mkdir(cnf.output_dir)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Seq2C'
    set_up_dirs(cnf)

    samples = [
        source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath)
            for s_name, bam_fpath in bam_by_sample.items()]
    info('Samples: ')
    for s in samples:
        info('  ' + s.name)
    samples.sort(key=lambda _s: _s.key_to_sort())

    target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples, target_bed, cnf.output_dir
Пример #7
0
def main():
    info(' '.join(sys.argv))
    info()

    description = 'This script runs preprocessing.'

    parser = OptionParser(description=description)
    parser.add_option('-1', dest='left_reads_fpath', help='Left reads fpath')
    parser.add_option('-2', dest='right_reads_fpath', help='Right reads fpath')
    parser.add_option('--sample', dest='sample_name', help='Sample name')
    parser.add_option('-o', dest='output_dir', help='Output directory path')
    parser.add_option(
        '--downsample-to',
        dest='downsample_to',
        default=None,
        type='int',
        help=
        'Downsample reads to avoid excessive processing times with large files. '
        'Default is 1 million. Set to 0 to turn off downsampling.')
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)
    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    cnf = Config(opts.__dict__, determine_sys_cnf(opts),
                 determine_run_cnf(opts))
    left_reads_fpath = verify_file(opts.left_reads_fpath, is_critical=True)
    right_reads_fpath = verify_file(opts.right_reads_fpath, is_critical=True)
    output_dirpath = adjust_path(
        opts.output_dir) if opts.output_dir else critical(
            'Please, specify output directory with -o')
    verify_dir(dirname(output_dirpath),
               description='output_dir',
               is_critical=True)

    with workdir(cnf):
        sample_name = cnf.sample_name
        if not sample_name:
            sample_name = _get_sample_name(left_reads_fpath, right_reads_fpath)
        results_dirpath = run_fastq(cnf,
                                    sample_name,
                                    left_reads_fpath,
                                    right_reads_fpath,
                                    output_dirpath,
                                    downsample_to=cnf.downsample_to)

    verify_dir(results_dirpath, is_critical=True)
    info()
    info('*' * 70)
    info('Fastqc results:')
    info('  ' + results_dirpath)
Пример #8
0
    def index():
        nodes_content = []
        groups = defaultdict(list)
        for obj in Nodes.get_nodes_arr():
            groups[obj.tag].append(obj)

            nodes_content = [{
                'node_tag': tag,
                'predicted_price': Config.formatted_price_for_tag(tag),
                'nodes_table': NodesTable([node.as_table_item for node in nodes],
                                          classes=['table', 'table-striped', 'table-bordered'])
            }
                for tag, nodes in groups.items()]

        return render_template('index.html', nodes=nodes_content, token_balance=Config.balance)
Пример #9
0
def get_args():
    description = (
        'Plots a Circos plot given vardict variant file (with all dbSNP SNPs, not the PASS one), '
        'Seq2C CNV calls and Manta SVs.')
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)
    parser.add_option('--bed', dest='bed_fpath', help='Path to BED file')
    parser.add_option('-v', '--mutations', dest='mutations_fpath', help='Path to VarDict.txt file')
    parser.add_option('-c', '--seq2c', dest='seq2c_tsv_fpath', help='Path to seq2c copy number file')
    parser.add_option('--sv', dest='sv_fpath', help='Path to Manta SV call vcf.gz file')
    parser.add_option('-s', '--sample', dest='sample', help='Identifier of sample in VarDict and Seq2c files')
    parser.add_option('-o', '--output-dir', dest='output_dir', default="./",
                        help='Output directory. Defaults to ./')
    (opts, args) = parser.parse_args()
    run_cnf = determine_run_cnf(opts)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    return cnf
Пример #10
0
def main():
    parser = OptionParser(usage='Usage: ' + basename(__file__) +
                          ' -o Output_BED_file -g hg19 Input_BED_file')
    parser.add_option('-o', '--output-bed', dest='output_fpath')
    parser.add_option('-g', '--genome', dest='genome')
    (opts, args) = parser.parse_args(sys.argv[1:])

    if len(args) < 1:
        parser.print_help(file=sys.stderr)
        sys.exit(1)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), {})

    check_genome_resources(cnf)

    if not cnf.output_fpath:
        critical(parser.usage)

    sort_bed(cnf, verify_bed(args[0], is_critical=True),
             adjust_path(cnf.output_fpath))
Пример #11
0
 def __init__(self, status, sonm_api, node_tag, deal_id, task_id, bid_id,
              price):
     self.RUNNING = False
     self.KEEP_WORK = True
     self.logger = logging.getLogger("monitor")
     self.node_tag = node_tag
     self.tag = self.node_tag.split('_')[0]
     self.config = Config.get_node_config(self.node_tag)
     self.status = status
     self.sonm_api = sonm_api
     self.bid_file = "out/orders/{}.yaml".format(self.node_tag)
     self.task_file = "out/tasks/{}.yaml".format(self.node_tag)
     self.bid_ = {}
     self.task_ = {}
     self.deal_id = deal_id
     self.task_id = task_id
     self.bid_id = bid_id
     self.price = "{0:.4f} USD/h".format(
         convert_price(price)) if price != "" else ""
     self.task_uptime = 0
     self.create_task_yaml()
     self.last_heartbeat = time.time()
Пример #12
0
 def reload_config(self):
     Config.reload_node_config(self.node_tag)
     self.config = Config.get_node_config(self.node_tag)
Пример #13
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--is-wgs',
                      dest='is_wgs',
                      action='store_true',
                      default=False,
                      help='whole genome sequencing')
    parser.add_option('--is-deep-seq',
                      dest='is_deep_seq',
                      action='store_true',
                      default=False,
                      help='deep targeted sequencing')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('-c', '--caller', dest='caller')
    parser.add_option('--qc', dest='qc', action='store_true', default=False)
    parser.add_option('--no-qc',
                      dest='qc',
                      action='store_false',
                      default=False)
    parser.add_option('--qc-caption', dest='qc_caption', help=SUPPRESS_HELP)
    parser.add_option('--no-tsv',
                      dest='tsv',
                      action='store_false',
                      default=True,
                      help=SUPPRESS_HELP)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No vcf files provided to input.')

    run_cnf = determine_run_cnf(opts,
                                is_targetseq=opts.is_deep_seq,
                                is_wgs=opts.is_wgs)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    vcf_fpath_by_sample = read_samples(args, cnf.caller)
    info()

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Variants'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name
    info(' '.join(sys.argv))

    samples = [
        source.VarSample(s_name, join(cnf.output_dir, s_name), vcf=vcf_fpath)
        for s_name, vcf_fpath in vcf_fpath_by_sample.items()
    ]
    samples.sort(key=lambda _s: _s.key_to_sort())

    check_genome_resources(cnf)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples
Пример #14
0
class ProjectManager:
    def __init__(self, path, verbose=False):
        # Set our project path
        self.project_path = path

        self.verbose = verbose

        # Load our configuration
        with open(os.path.join(path, "project.json"), "r") as file:
            self.project_cfg = json.load(file)

        self.global_config = Config()
        self.global_config.fromFile(
            os.path.join(self.project_path, "..\\global_config.json"))

        # Process our project includes ahead of time
        self.project_includes = " ".join(
            ("-I%s" % process_string(self.global_config, i))
            for i in self.project_cfg["includes"])

        self.project_includes += " -I" + self.project_path

    def build_library(self, release=False):
        """
        Build our library components.

        :param release: Whether or not to build for release.
        :return: List of library objects
        """

        verbose = False

        # Save our working directory
        cwd = os.getcwd()

        # Set our current directory to our project path
        os.chdir(self.project_path)

        result = []

        # If our library bin folder does not exit, create it
        if not os.path.isdir("./lib/bin"):
            os.mkdir("./lib/bin")

        # Arguments for our compiler
        cwArg = cwFlags + " " + self.project_includes + " -I%s\\lib\\" % self.project_path

        # pass down our build mode
        cwArg += " -DRELEASE" if release else " -DDEBUG"

        with open(self.project_path + "\\lib\\build.json", "r") as makefile:
            lib_make = json.load(makefile)
            os.chdir(self.project_path + "\\..\\tool\\")
            for file in lib_make["sources"]:
                # TODO: bin subfolder not auto created!
                compiler_string = "./mwcceppc.exe %s -c -o %s/lib/bin/%s.o %s/lib/%s" % (
                    cwArg, self.project_path, file, self.project_path, file)
                if self.verbose:
                    print(compiler_string)
                if subprocess.call(compiler_string):
                    raise RuntimeError("[FATAL] Failed to compile %s!", file)
                else:
                    result.append("%s/lib/bin/%s.o" %
                                  (self.project_path, file))

            # Restore our working directory
            os.chdir(cwd)

            self.library_objects = result + lib_make["objects"]

            return self.library_objects

    def compile_module(self, module, release=False):
        result = []
        module_cfg = None

        # Save our working directory
        cwd = os.getcwd()

        # Set our current directory to our project path
        os.chdir(self.project_path)

        # Load our module config
        module_cfg = json.load(open("./modules/%s/module.json" % module, "r"))

        if self.verbose:
            print("Compiling module %s..." % module)

        # Create our module bin folder
        if not os.path.isdir("./modules/%s/bin" % module):
            os.mkdir("./modules/%s/bin" % module)

        # Construct our compiler arguments
        cwArg = cwFlags + " " + self.project_includes + " " + " ".join(
            "-I%s" % (process_string(self.global_config, i))
            for i in module_cfg["includes"]) + " -I%s\\modules\\%s\\" % (
                self.project_path, module)

        # pass down our build mode
        cwArg += " -DRELEASE" if release else " -DDEBUG"

        os.chdir(self.project_path + "\\..\\tool\\")
        for file in module_cfg["sources"]:
            print("...compiling %s" % file)
            compiler_string = "./mwcceppc.exe %s -c -o %s/modules/%s/bin/%s.o %s/modules/%s/%s" % (
                cwArg, self.project_path, module, file, self.project_path,
                module, file)
            if self.verbose:
                print(compiler_string)
            if subprocess.call(compiler_string):
                raise RuntimeError("[FATAL] Failed to compile %s!", file)
            else:
                result.append("%s/modules/%s/bin/%s.o" %
                              (self.project_path, module, file))

        # Restore our working directory
        os.chdir(cwd)

        return result

    def apply_paddding_absolute(self, source_file, dest_filesize):
        source_file_read = open(source_file, 'rb').read()
        source_filesize = len(source_file_read)

        print(
            "Applying padding...:\n\tSource Filesize: %s\n\tTarget Filesize: %s"
            % (source_filesize, dest_filesize))

        if source_filesize > dest_filesize:
            raise ValueError(
                "Source filesize exceeds target padded size. Consider adding compression to debug target or increasing target filesize."
            )
        elif source_filesize == dest_filesize:
            print(
                "\n---\n\nSource file size matches target filesize. No padding applied.\n\n---\n"
            )
        else:
            open(source_file,
                 'wb').write(source_file_read +
                             bytes([0] * (dest_filesize - source_filesize)))

    def apply_paddding_relative(self, source_file, amount):
        source_file_read = open(source_file, 'rb').read()
        source_filesize = len(source_file_read)

        print(
            "Applying padding...:\n\tSource Filesize: %s\n\tRelative Padding Size: %s\n\tComputed Target Filesize: %s"
            % (source_filesize, amount, source_filesize + amount))

        open(source_file, 'wb').write(source_file_read + bytes([0] * amount))

    def build(self, release=False):
        """
        Build the project!

        :param release: Whether or not to build in release mode.
        """

        mode = "release" if release else "debug"
        print("Building configuration: %s" % mode)

        # Compile all modules
        objects = []

        for module in self.project_cfg["modules"]:
            objects += self.compile_module(module, release)

        objects += self.build_library(release)

        os.chdir(self.project_path + "/../tool/")

        code_file = self.project_path + "\\bin\\%s\\CODE.bin" % mode

        kamek_command = "Kamek.exe %s -static=%s -output-gecko=%s -output-code=%s -externals=%s/externals/%s" % (
            " ".join(objects), self.project_cfg["static"],
            self.project_path + "\\bin\\gecko.txt", code_file,
            self.project_path, self.project_cfg["externals"])
        if self.verbose:
            print(kamek_command)

        if subprocess.call(kamek_command):
            raise RuntimeError("Kamek fail")
        else:
            # TODO: this is far from optimal. add padding bytes for sbss/bss
            self.apply_paddding_relative(code_file, 1024)

            copyfile(code_file, self.global_config.paths["CODE_DEPLOY"])
            with open(self.project_path + "\\bin\\gecko.txt", "r") as gecko:
                pg = PatchGenerator()
                pg.parse_gecko(gecko.readlines())
                pg.process()
                pg.write_to_file(self.project_path + "\\bin\\PATCH.bin")

            copyfile(self.project_path + "\\bin\\PATCH.bin",
                     self.global_config.paths["PATCH_DEPLOY"])
Пример #15
0
def proc_opts():
    parser = OptionParser()
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('--expose-only',
                      dest='expose_to_ngs_server_only',
                      action='store_true',
                      default=False,
                      help='Only add project to the webserver')
    parser.add_option('--no-expose',
                      dest='expose',
                      action='store_false',
                      default=True,
                      help='Do not expose the reports')
    parser.add_option('-o', dest='output_dir')
    parser.add_option('--bed',
                      dest='bed',
                      help='BED file to run targetSeq and Seq2C analysis on.')
    parser.add_option('--downsample-to', dest='downsample_to', type='int')

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) < 1:
        critical('Usage: ' + __file__ + ' *.fq.gz -o output_dir')
    # if len(args) < 2:
    #     info('No dataset path specified, assuming it is the current working directory')
    #     dataset_dirpath = adjust_path(os.getcwd())
    #     jira_url = args[0]

    fastq_fpaths = [verify_file(fpath) for fpath in args]
    fastq_fpaths = [fpath for fpath in fastq_fpaths if fpath]
    info(str(len(fastq_fpaths)) + ' fastq files')

    run_cnf = determine_run_cnf(opts)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    cnf.output_dir = adjust_path(cnf.output_dir)
    info('Writing to ' + str(cnf.output_dir))

    cnf.project_name = cnf.project_name or 'preproc'

    if cnf.work_dir:
        cnf.debug = True
    else:
        all_work_dir = join(cnf.output_dir, 'work')
        safe_mkdir(all_work_dir)

        latest_fpath = join(all_work_dir, 'latest')

        if cnf.reuse_intermediate:
            cnf.work_dir = latest_fpath
        else:
            cnf.work_dir = join(
                all_work_dir,
                datetime.datetime.now().strftime("%Y-%b-%d_%H-%M"))
            if islink(latest_fpath):
                os.remove(latest_fpath)
            if isdir(latest_fpath):
                shutil.rmtree(latest_fpath)
            if not exists(latest_fpath):
                os.symlink(basename(cnf.work_dir), latest_fpath)

    cnf.work_dir = adjust_path(cnf.work_dir)
    safe_mkdir(cnf.work_dir)
    cnf.log_dir = join(cnf.work_dir, 'log')
    safe_mkdir(cnf.log_dir)
    set_up_log(cnf)
    try:
        subprocess.call(['chmod', '-R', 'g+w', cnf.work_dir])
    except OSError:
        err(traceback.format_exc())
        pass

    if cnf.samplesheet:
        cnf.samplesheet = verify_file(cnf.samplesheet, is_critical=True)

    info(' '.join(sys.argv))
    info()
    info('Created a temporary working directory: ' + cnf.work_dir)

    if cnf.project_name:
        info('Project name: ' + cnf.project_name)

    if cnf.samplesheet:
        info('Using custom sample sheet ' + cnf.samplesheet)

    check_genome_resources(cnf)
    check_system_resources(cnf, optional=['fastq'])

    return cnf, cnf.output_dir, fastq_fpaths
Пример #16
0
def test_init_config_no_file():
    with pytest.raises(FileNotFoundError):
        cfg = Config("test_files/no_file.json")
Пример #17
0
def test_init_incorrect_json():
    with pytest.raises(JSONDecodeError):
        cfg1 = Config("test_files/incorrect.json")
Пример #18
0
def test_init_config_no_json():
    with pytest.raises(JSONDecodeError):
        cfg1 = Config("test_files/file.txt")
Пример #19
0
def main():
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)
    parser.add_option('--work-dir', dest='work_dir', metavar='DIR')
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('--reannotate',
                      dest='reannotate',
                      action='store_true',
                      default=False,
                      help='re-annotate BED file with gene names')
    parser.add_option('--dedup',
                      dest='dedup',
                      action='store_true',
                      default=False,
                      help='count duplicates in coverage metrics')
    parser.add_option('--bed',
                      dest='bed',
                      help='BED file to run targetSeq and Seq2C analysis on.')
    parser.add_option(
        '--exons',
        '--exome',
        '--features',
        dest='features',
        help=
        'Annotated CDS/Exon/Gene/Transcripts BED file to make targetSeq exon/amplicon regions reports.'
    )

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No BAMs provided to input.')
    bam_fpaths = list(set([abspath(a) for a in args]))

    bad_bam_fpaths = []
    for fpath in bam_fpaths:
        if not verify_bam(fpath):
            bad_bam_fpaths.append(fpath)
    if bad_bam_fpaths:
        critical('BAM files cannot be found, empty or not BAMs:' +
                 ', '.join(bad_bam_fpaths))

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'TargQC'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name

    check_genome_resources(cnf)

    verify_bed(cnf.bed, is_critical=True)
    bed_fpath = adjust_path(cnf.bed)
    info('Using amplicons/capture panel ' + bed_fpath)

    features_bed_fpath = adjust_path(
        cnf.features) if cnf.features else adjust_path(cnf.genome.features)
    info('Features: ' + features_bed_fpath)

    genes_fpath = None
    if cnf.genes:
        genes_fpath = adjust_path(cnf.genes)
        info('Custom genes list: ' + genes_fpath)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    info('*' * 70)
    info()

    targqc_html_fpath = run_targqc(cnf, cnf.output_dir, bam_fpaths, bed_fpath,
                                   features_bed_fpath, genes_fpath)
    if targqc_html_fpath:
        send_email(
            cnf, 'TargQC report for ' + cnf.project_name + ':\n  ' +
            targqc_html_fpath)
def _read_args(args_list):
    options = [
        # (['-k', '--key-genes'], dict(
        #     dest='key_genes_fpath',
        #     help='list of key genes (they are at top priority when choosing one of multiple annotations)',
        #     default='/ngs/reference_data/genomes/Hsapiens/common/az_key_genes.300.txt')
        #  ),
        # (['-a', '--approved-genes'], dict(
        #     dest='approved_genes_fpath',
        #     help='list of HGNC approved genes (they are preferable when choosing one of multiple annotations)',
        #     default='/ngs/reference_data/genomes/Hsapiens/common/HGNC_gene_synonyms.txt')
        #  ),
        # (['-e', '--ensembl-bed'], dict(
        #     dest='ensembl_bed_fpath',
        #     help='reference BED file for annotation (Ensembl)')
        #  ),
        # (['-r', '--refseq-bed'], dict(
        #     dest='refseq_bed_fpath',
        #     help='reference BED file for annotation (RefSeq)')
        #  ),
        # (['-b', '--bedtools'], dict(
        #     dest='bedtools',
        #     help='path to bedtools',
        #     default='bedtools')
        #  ),
        (['-o', '--output-bed'], dict(dest='output_fpath')),
        (['--debug'],
         dict(
             dest='debug',
             help=
             'run in a debug more (verbose output, keeping of temporary files)',
             default=False,
             action='store_true')),
        (['--output-hg'],
         dict(
             dest='output_hg',
             help=
             'output chromosome names in hg-style (chrM, chr1, .., chr22, chrX, chrY)',
             default=False,
             action='store_true')),
        (['--output-grch'],
         dict(
             dest='output_grch',
             help='output chromosome names in GRCh-style (1, .., 22, X, Y, MT)',
             default=False,
             action='store_true')),
        (['-g', '--genome'], dict(dest='genome', default='hg19')),
    ]

    parser = OptionParser(
        usage='usage: %prog [options] Input_BED_file -o Standardized_BED_file',
        description='Scripts outputs a standardized version of input BED file. '
        'Standardized BED: 1) has 4 or 8 fields (for BEDs with primer info);'
        ' 2) has HGNC approved symbol in forth column if annotation is '
        'possible and not_a_gene_X otherwise;'
        ' 3) is sorted based on chromosome name -> start -> end;'
        ' 4) has no duplicated regions (regions with the same chromosome, start and end), '
        'the only exception is _CONTROL_ regions.')
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, args) = parser.parse_args(args_list)

    if len(args) != 1:
        parser.print_help(file=sys.stderr)
        sys.exit(1)

    cnf = Config(opts.__dict__, determine_sys_cnf(opts), {})

    work_dirpath = tempfile.mkdtemp()
    info('Creating a temporary working directory ' + work_dirpath)
    if not exists(work_dirpath):
        os.mkdir(work_dirpath)

    input_bed_fpath = abspath(args[0])
    info('Input: ' + input_bed_fpath)

    output_bed_fpath = adjust_path(cnf.output_fpath)
    info('Writing to: ' + output_bed_fpath)

    # process configuration
    # for k, v in opts.__dict__.iteritems():
    #     if k.endswith('fpath') and verify_file(v, is_critical=True):
    #         opts.__dict__[k] = verify_file(v, k)
    if cnf.output_grch and cnf.output_hg:
        info(
            'you cannot specify --output-hg and --output-grch simultaneously!')
    # if not which(opts.bedtools):
    #     info('bedtools executable not found, please specify correct path (current is %s)! '
    #         'Did you forget to execute "module load bedtools"?' % opts.bedtools)

    # if opts.debug:
    #     info('Configuration: ')
    #     for k, v in opts.__dict__.iteritems():
    #         info('\t' + k + ': ' + str(v))
    info()

    # opts.ensembl_bed_fpath = verify_file(opts.ensembl_bed_fpath or \
    #     ('/ngs/reference_data/genomes/Hsapiens/' + opts.genome + '/bed/Exons/Exons.with_genes.bed'))

    # opts.refseq_bed_fpath = verify_file(opts.refseq_bed_fpath or \
    #     ('/ngs/reference_data/genomes/Hsapiens/' + opts.genome + '/bed/Exons/RefSeq/RefSeq_CDS_miRNA.all_features.bed'))

    return input_bed_fpath, output_bed_fpath, work_dirpath, cnf
Пример #21
0
def main():
    """
        The function performs coding evaluation according to the following steps:
        1. Instantiate config object and initialize parameters to report, define path to the report file
        2. Iterate through all videos in the config, for each one:
            2.1 Calculate PSNR
            2.2 Calculate metrics (min, max, median, ratio)
            2.3 create record for the report with calculated data
            2.4 update list with metrics
        3. Update report data with metric list for each video
        4. Create report with report data
    :return:
    """
    # configuration file with input information
    cfg = Config("config/input_config_1.json")

    # parameters for report
    report_path = os.path.join(
        cfg.report_folder,
        "report_{}.html".format(datetime.now().strftime("%Y%m%d_%H_%M_%3S")))
    reports_data = {"total_videos": len(cfg.videos)}
    metrics_list = []

    # run video analysis
    for idx, input_videos in enumerate(cfg.videos):
        logging.info("videos: {} {}".format(input_videos.reference_video,
                                            input_videos.compressed_video))
        try:
            # calculate psnr values
            gen_info, psnrs_info = processing_psnr(
                input_videos.reference_video, input_videos.compressed_video)

            # analysis
            metrics = Metrics(psnrs_info)
            # min, max, median
            min, max, median = metrics.min_psnr, metrics.max_psnr, metrics.median_psnr
            # get ratio for all threshold values from the config file
            ratio = metrics.get_filtered_psnr(
                lambda psnr, threshold: psnr < threshold,
                input_videos.threshold)[1]

            # prepare record to report
            report_rec = {
                idx: {
                    "original":
                    input_videos.reference_video,
                    "compressed":
                    input_videos.compressed_video,
                    "Number of frames reference":
                    gen_info["general"][input_videos.reference_video],
                    "Number of frames compressed":
                    gen_info["general"][input_videos.compressed_video],
                    "Number of processed frames":
                    len(psnrs_info),
                    "max PSNR, dB":
                    max,
                    "min PSNR, dB":
                    min,
                    "median PSNR, dB":
                    median,
                    "Ratio of PSNR being below {} dB, %".format(input_videos.threshold):
                    ratio,
                    "PSNR":
                    psnrs_info
                }
            }
            metrics_list.append(report_rec)
        except VideoCaptureException:
            logging.warning(
                "videos: {} and {} were skipped since VideoCaptureException happened"
                .format(input_videos.reference_video,
                        input_videos.compressed_video))
            continue

    # create report
    reports_data.update({"metrics": metrics_list})
    create_report(report_name=report_path, report_data=reports_data)
Пример #22
0
def reload_config(sonm_api: SonmApi):
    Config.load_config()
    Config.load_prices(sonm_api)
    append_missed_nodes(sonm_api, Config.node_configs)
Пример #23
0
def setup_logging(default_config='logging.yaml', default_level=logging.INFO):
    if os.path.exists(join(Config.config_folder, default_config)):
        config = Config.load_cfg(default_config)
        dictConfig(config)
    else:
        logging.basicConfig(level=default_level)
Пример #24
0
def get_args():
    info(' '.join(sys.argv))
    info()
    description = (
        'The program will filter the VarDict output after vcf2txt.pl to '
        'candidate interpretable mutations, somatic or germline.')
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)

    parser.add_option('-o', dest='output_file')
    parser.add_option('--o-all-transcripts',
                      dest='all_transcripts_output_file')
    parser.add_option('--o-fm', dest='fm_output_file')
    parser.add_option('--o-reject', dest='rejected_output_file')

    parser.add_option('--cohort-freqs', dest='cohort_freqs_fpath')
    parser.add_option('--transcripts', dest='transcripts_fpath')

    parser.add_option('-D',
                      '--min-depth',
                      dest='filt_depth',
                      type='int',
                      help='The minimum total depth')
    parser.add_option('-V',
                      '--min-vd',
                      dest='min_vd',
                      type='int',
                      help='The minimum reads supporting variant')
    parser.add_option(
        '--gmaf',
        dest='min_gmaf',
        type='float',
        help=
        'When the GMAF is greater than specified, it\'s considered common SNP and filtered out.'
    )
    parser.add_option(
        '-f',
        '--min-freq',
        dest='min_freq',
        type='float',
        help='The minimum allele frequency for regular variants.')
    parser.add_option(
        '-F',
        '--min-freq-hs',
        '--act-min-freq',
        dest='act_min_freq',
        type='float',
        help=
        'The minimum allele frequency hotspot somatic mutations, typically lower then -f. '
        'Default: 0.01 or half -f, whichever is less')
    parser.add_option(
        '-N',
        '--keep-utr-intronic',
        dest='keep_utr_intronic',
        action='store_true',
        help=
        'Keep all intronic and UTR in the output, but will be set as "unknown".'
    )

    parser.add_option(
        '-p',
        '--platform',
        dest='platform',
        help=
        'The platform, such as WXS, WGS, RNA-Seq, VALIDATION, etc. No Default. '
        'Used for output in FM\'s format')

    parser.set_usage('Usage: ' + __file__ +
                     ' vcf2txt_res_fpath [opts] -o output_fpath')

    (opts, args) = parser.parse_args()
    if len(args) < 1:
        critical('Provide the first argument - output from vcf2txt.pl')
    logger.is_debug = opts.debug

    vcf2txt_res_fpath = verify_file(args[0], is_critical=True)

    run_cnf = determine_run_cnf(opts)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)
    if not cnf.genome:
        critical('Please, specify the --genome option (e.g. --genome hg19)')

    check_genome_resources(cnf)

    if not cnf.output_file:
        critical('Please, specify the output fpath with -o')

    info()

    return cnf, vcf2txt_res_fpath
Пример #25
0
def read_opts_and_cnfs(extra_opts,
                       key_for_sample_name=None,
                       required_keys=list(),
                       file_keys=list(),
                       dir_keys=list(),
                       description='',
                       extra_msg=None,
                       proc_name=None,
                       fpath_for_sample_name=None,
                       main_output_is_file=False,
                       main_output_is_dir=True):
    options = extra_opts
    if main_output_is_file:
        options += [(['-o', '--output-file'],
                     dict(dest='output_file',
                          metavar='FILE',
                          help='Output file'))]
        options += [(
            ['--output-dir'],
            dict(
                dest='output_dir',
                metavar='DIR',
                help=
                'Output directory (or directory name in case of bcbio final dir)',
                default=os.getcwd()))]
    elif main_output_is_dir:
        options += [(
            ['-o', '--output-dir'],
            dict(
                dest='output_dir',
                metavar='DIR',
                help=
                'Output directory (or directory name in case of bcbio final dir)',
                default=os.getcwd()))]
        options += [(['--output-file'],
                     dict(dest='output_file',
                          metavar='FILE',
                          help='Output file'))]

    options += [
        (['-s', '--sample', '--name'],
         dict(
             dest='sample',
             metavar='NAME',
             help=
             'Sample name (default is part of name of the first parameter prior to the first - or .'
         )),
        (['-c', '--caller'],
         dict(
             dest='caller',
             metavar='CALLER_NAME',
             help=
             'Variant caller name (default is part of name of the first parameter between the first - and following .'
         )),
        (['-t', '--nt', '--threads'],
         dict(dest='threads', type='int', help='Number of threads')),
        (
            ['--clean'],
            dict(  # do not keep work directory
                dest='keep_intermediate',
                action='store_false',
                help=SUPPRESS_HELP)),
        (['--debug'],
         dict(dest='debug',
              action='store_true',
              default=False,
              help=SUPPRESS_HELP)),
        (['--reuse'],
         dict(
             dest='reuse_intermediate',
             help=
             'reuse intermediate non-empty files in the work dir from previous run',
             action='store_true')),
        (['--sys-cnf'],
         dict(
             dest='sys_cnf',
             metavar='SYS_CNF.yaml',
             help=
             'System configuration file with paths to external tools and genome resources. The default is  '
             '(see default one %s)' % defaults['sys_cnf'])),
        (['--run-cnf'],
         dict(
             dest='run_cnf',
             metavar='RUN_CNF.yaml',
             default=defaults['run_cnf_exome_seq'],
             help=
             'Customised run details: list of annotations/QC metrics/databases/filtering criteria. '
             'The default is %s' % defaults['run_cnf_exome_seq'])),
        (['--transcripts'], dict(dest='transcripts_fpath')),
        (['--work-dir'],
         dict(dest='work_dir', metavar='DIR', help=SUPPRESS_HELP)),
        (['--log-dir'], dict(dest='log_dir', metavar='DIR',
                             help=SUPPRESS_HELP)),
        (['--proc-name'], dict(dest='proc_name', help=SUPPRESS_HELP)),
        (['--project-name'], dict(dest='project_name')),
        (['--no-check'],
         dict(dest='no_check', action='store_true', help=SUPPRESS_HELP)),
        (['-g', '--genome'], dict(dest='genome')),
        (['--email'], dict(dest='email', help=SUPPRESS_HELP)),
        (['--done-marker'], dict(dest='done_marker', help=SUPPRESS_HELP)),
    ]

    parser = OptionParser(description=description)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)

    req_keys_usage = ''
    if required_keys:
        req_keys_usage = '\nRequired options:'
    for args, kwargs in options:
        try:
            if kwargs['dest'] in required_keys:
                req_keys_usage += '\n  ' + '/'.join(args)
        except:
            err(format_exc())
            pass
    parser.set_usage(parser.get_usage() + req_keys_usage)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    errors = check_keys_presence(cnf, required_keys)
    if errors:
        parser.print_help()
        critical(errors)
    file_keys = [k for k in file_keys if k in required_keys]
    dir_keys = [k for k in dir_keys if k in required_keys]
    errors = check_dirs_and_files(cnf, file_keys, dir_keys)
    if errors:
        critical(errors)

    if cnf.sample:
        cnf.sample = remove_quotes(cnf.sample)
    else:
        if not fpath_for_sample_name:
            if not key_for_sample_name:
                critical('Error: --sample must be provided in options.')

            fpath_for_sample_name = cnf[key_for_sample_name]
            if not fpath_for_sample_name:
                critical('Error: --sample or ' + (str(key_for_sample_name)) +
                         ' must be provided in options.')

            key_fname = basename(cnf[key_for_sample_name])
            cnf.sample = key_fname.split('.')[0]

    if cnf.caller:
        cnf.caller = remove_quotes(cnf.caller)
    # elif key_for_sample_name and cnf[key_for_sample_name]:
    #     key_fname = basename(cnf[key_for_sample_name])
    #     try:
    #         cnf.caller = cnf.caller or key_fname.split('.')[0].split('-')[1]
    #     except:
    #         cnf.caller = ''
    else:
        cnf.caller = None

    cnf.proc_name = cnf.proc_name or proc_name
    set_up_dirs(cnf)
    info(' '.join(sys.argv))
    info()

    return cnf