Exemplo n.º 1
0
def write_to_sqlite(work_dir,
                    jira_case,
                    project_list_fpath,
                    country_id,
                    project_name,
                    samples_num=None,
                    analysis_dirpath=None,
                    html_report_url=None):
    info('Reading project list ' + project_list_fpath)
    conn = sqlite3.connect(project_list_fpath)
    c = conn.cursor()

    pid = project_name

    d = dict()
    if analysis_dirpath:
        d['Analyses_directory_' +
          (country_id if not is_local() else 'US')] = analysis_dirpath
    if project_name and (
            analysis_dirpath or not __unquote(d['Name'])
    ):  # update only if running after bcbio, or no value there at all
        d['Name'] = project_name
    if html_report_url and (
            analysis_dirpath or not __unquote(d['HTML_report_path'])
    ):  # update only if running after bcbio, or no value there at all
        d['HTML_report_path'] = html_report_url

    if jira_case:
        d['JIRA_URL'] = jira_case.url
        # if 'Updated By' in d and __unquote(d['Updated By']):
        d['Updated_By'] = getpass.getuser()
        if jira_case.description:
            d['Description'] = jira_case.summary
        if jira_case.data_hub:
            d['Data_Hub'] = jira_case.data_hub
        if jira_case.type:
            d['Type'] = jira_case.type
        if jira_case.department:
            d['Department'] = jira_case.department
        if jira_case.division:
            d['Division'] = jira_case.division
        if jira_case.assignee:
            d['Assignee'] = jira_case.assignee
        if jira_case.reporter:
            d['Reporter'] = jira_case.reporter

    if samples_num:
        d['Sample_Number'] = str(samples_num)

    d['Datestamp'] = timestamp()

    cmdl = '''
IF EXISTS (SELECT * FROM project WHERE PID="{pid}" AND Name="{project_name}")
    UPDATE project SET (...) WHERE PID="{pid}" AND Name="{project_name}"
ELSE
    INSERT INTO project VALUES (...)
'''.format(pid=pid, project_name=project_name)
    print cmdl
    c.execute(cmdl)
Exemplo n.º 2
0
def add_project_to_exac(cnf):
    info('Adding project to ExAC database')
    exac_venv_pythonpath = join(exac_venv_dir, 'bin', 'python')
    if is_local():
        exac_venv_pythonpath = 'python'
    cmdline = exac_venv_pythonpath + ' ' + join(exac_code_dir, 'manage.py') + ' ' + 'add_project' + \
              ' ' + cnf.project_name + ' ' + cnf.genome.name
    call(cnf, cmdline)
Exemplo n.º 3
0
#!/usr/bin/env python
import bcbio_postproc  # do not remove it: checking for python version and adding site dirs inside

import os
from os.path import abspath, dirname, realpath, join, relpath, splitext, isfile, getsize
import sys
from source.logger import critical, info, is_local, err
from source.file_utils import adjust_path, safe_mkdir, verify_file, add_suffix
from source.targetcov.bam_and_bed_utils import count_bed_cols

liftover_fpath = '/group/ngs/src/liftOver/liftOver'
chains_dirpath = '/group/ngs/src/liftOver'
chains = dict(hg38=join(chains_dirpath, 'hg19ToHg38.over.chain.gz'),
              grch37=join(chains_dirpath, 'hg19ToGRCh37.over.chain.gz'))
if is_local():
    liftover_fpath = '/Users/vladsaveliev/az/liftOver/liftOver'
    chains_dirpath = '/Users/vladsaveliev/az/liftOver'
    chains = dict(hg38=join(chains_dirpath, 'hg19ToHg38.over.chain.gz'), )


def main(args):
    if len(args) < 2:
        critical('Usage: ' + __file__ +
                 ' InputRootDirectory OutputRootDirectory [Build=hg38]')
        sys.exit(1)

    inp_root = adjust_path(args[0])
    out_root = adjust_path(args[1])

    build = 'hg38'
    if len(args) >= 3:
Exemplo n.º 4
0
def sync_with_ngs_server(cnf,
                         jira_url,
                         project_name,
                         sample_names,
                         summary_report_fpath,
                         dataset_dirpath=None,
                         bcbio_final_dirpath=None,
                         jira_case=None):

    if is_us(): loc = us
    elif is_uk(): loc = uk
    elif is_local(): loc = local
    elif is_sweden(): loc = sweden
    else:
        return None

    html_report_url = None
    if any(p in realpath((bcbio_final_dirpath or dataset_dirpath))
           for p in loc.proper_path_should_contain):
        info('Location is ' + loc.loc_id + ', exposing reports to ' +
             loc.reports_dirpath)

        if jira_case is None and jira_case != 'unreached' and is_az(
        ) and jira_url:
            info()
            info('Getting info from JIRA...')
            jira_case = retrieve_jira_info(jira_url)

        proj_dirpath_on_server = _symlink_dirs(
            cnf=cnf,
            loc=loc,
            project_name=project_name,
            final_dirpath=bcbio_final_dirpath,
            dataset_dirpath=dataset_dirpath)
        # html_report_fpath=summary_report_fpath,
        # html_report_url=html_report_url)

        if bcbio_final_dirpath:
            html_report_url = join(
                loc.
                report_url_base,  # http://ngs.usbod.astrazeneca.net/reports/
                relpath(
                    proj_dirpath_on_server,
                    loc.reports_dirpath),  # project_name/dataset/project_name
                relpath(summary_report_fpath, dirname(bcbio_final_dirpath)
                        ))  # final/2015_01_01_project/project.html
        elif dataset_dirpath:
            html_report_url = join(
                loc.report_url_base,
                relpath(proj_dirpath_on_server, loc.reports_dirpath),
                relpath(summary_report_fpath, dataset_dirpath))

        # html_report_full_url = join(loc.website_url_base, 'samples.php?project_name=' + project_name + '&file=' + html_report_url)
        # info('HTML url: ' + html_report_full_url)

        if verify_file(loc.csv_fpath, 'Project list'):
            write_to_csv_file(work_dir=cnf.work_dir,
                              jira_case=jira_case,
                              project_list_fpath=loc.csv_fpath,
                              country_id=loc.loc_id,
                              project_name=project_name,
                              samples_num=len(sample_names),
                              analysis_dirpath=dirname(bcbio_final_dirpath)
                              if bcbio_final_dirpath else None,
                              html_report_url=html_report_url)
    return html_report_url
Exemplo n.º 5
0
def write_to_csv_file(work_dir,
                      jira_case,
                      project_list_fpath,
                      country_id,
                      project_name,
                      samples_num=None,
                      analysis_dirpath=None,
                      html_report_url=None):
    info('Reading project list ' + project_list_fpath)
    with open(project_list_fpath) as f:
        lines = f.readlines()
    uncom_lines = [l.strip() for l in lines if not l.strip().startswith('#')]

    header = uncom_lines[0].strip()
    info('header: ' + header)
    header_keys = header.split(
        ','
    )  # 'Updated By,PID,Name,JIRA URL,HTML report path,Datestamp,Data Hub,Analyses directory UK,Analyses directory US,Type,Division,Department,Sample Number,Reporter,Assignee,Description,IGV,Notes'
    index_of_pid = header_keys.index('PID')
    if index_of_pid == -1: index_of_pid = 1

    values_by_keys_by_pid = OrderedDict()
    for l in uncom_lines[1:]:
        if l:
            values = map(__unquote, l.split(','))
            pid = values[index_of_pid]
            values_by_keys_by_pid[pid] = OrderedDict(zip(header_keys, values))

    pid = project_name
    with file_transaction(work_dir, project_list_fpath) as tx_fpath:
        if pid not in values_by_keys_by_pid.keys():
            # info(pid + ' not in ' + str(values_by_keys_by_pid.keys()))
            info('Adding new record for ' + pid)
            values_by_keys_by_pid[pid] = OrderedDict(
                zip(header_keys, [''] * len(header_keys)))
        else:
            info('Updating existing record for ' + pid)
        d = values_by_keys_by_pid[pid]
        for k in header_keys:
            if k not in d:
                err('Error: ' + k + ' not in ' + project_list_fpath + ' for ' +
                    pid)

        d['PID'] = pid

        if analysis_dirpath:
            d['Analyses directory ' +
              (country_id if not is_local() else 'US')] = analysis_dirpath
        if project_name and (
                analysis_dirpath or not __unquote(d['Name'])
        ):  # update only if running after bcbio, or no value there at all
            d['Name'] = project_name
        if html_report_url and (
                analysis_dirpath or not __unquote(d['HTML report path'])
        ):  # update only if running after bcbio, or no value there at all
            d['HTML report path'] = html_report_url

        if jira_case:
            d['JIRA URL'] = jira_case.url
            # if 'Updated By' in d and __unquote(d['Updated By']):
            d['Updated By'] = getpass.getuser()
            if jira_case.description:
                d['Description'] = jira_case.summary
            if jira_case.data_hub:
                d['Data Hub'] = jira_case.data_hub
            if jira_case.type:
                d['Type'] = jira_case.type
            if jira_case.department:
                d['Department'] = jira_case.department
            if jira_case.division:
                d['Division'] = jira_case.division
            if jira_case.assignee:
                d['Assignee'] = jira_case.assignee
            if jira_case.reporter:
                d['Reporter'] = jira_case.reporter
        if samples_num:
            d['Sample Number'] = str(samples_num)
        d['Datestamp'] = timestamp()

        new_line = ','.join(
            __requote(d.get(k, '').replace(',', ';').replace('\n', ' | '))
            or '' for k in header_keys)

        with open(tx_fpath, 'w') as f:
            os.umask(0002)
            try:
                os.chmod(tx_fpath, 0774)
            except OSError:
                err(format_exc())
            for l in lines:
                if not l:
                    pass
                if l.startswith('#'):
                    f.write(l)
                else:
                    l = unicode(l, 'utf-8')
                    l_ascii = l.encode('ascii', 'ignore')
                    if ',' + project_name + ',' in l_ascii or ',"' + project_name + '",' in l_ascii:
                        info('Old csv line: ' + l_ascii)
                        # f.write('#' + l)
                    else:
                        f.write(l)
            f.write(new_line + '\n')
        info()
        info('New line: ' + new_line)
        info()