def write_to_sqlite(work_dir, jira_case, project_list_fpath, country_id, project_name, samples_num=None, analysis_dirpath=None, html_report_url=None): info('Reading project list ' + project_list_fpath) conn = sqlite3.connect(project_list_fpath) c = conn.cursor() pid = project_name d = dict() if analysis_dirpath: d['Analyses_directory_' + (country_id if not is_local() else 'US')] = analysis_dirpath if project_name and ( analysis_dirpath or not __unquote(d['Name']) ): # update only if running after bcbio, or no value there at all d['Name'] = project_name if html_report_url and ( analysis_dirpath or not __unquote(d['HTML_report_path']) ): # update only if running after bcbio, or no value there at all d['HTML_report_path'] = html_report_url if jira_case: d['JIRA_URL'] = jira_case.url # if 'Updated By' in d and __unquote(d['Updated By']): d['Updated_By'] = getpass.getuser() if jira_case.description: d['Description'] = jira_case.summary if jira_case.data_hub: d['Data_Hub'] = jira_case.data_hub if jira_case.type: d['Type'] = jira_case.type if jira_case.department: d['Department'] = jira_case.department if jira_case.division: d['Division'] = jira_case.division if jira_case.assignee: d['Assignee'] = jira_case.assignee if jira_case.reporter: d['Reporter'] = jira_case.reporter if samples_num: d['Sample_Number'] = str(samples_num) d['Datestamp'] = timestamp() cmdl = ''' IF EXISTS (SELECT * FROM project WHERE PID="{pid}" AND Name="{project_name}") UPDATE project SET (...) WHERE PID="{pid}" AND Name="{project_name}" ELSE INSERT INTO project VALUES (...) '''.format(pid=pid, project_name=project_name) print cmdl c.execute(cmdl)
def add_project_to_exac(cnf): info('Adding project to ExAC database') exac_venv_pythonpath = join(exac_venv_dir, 'bin', 'python') if is_local(): exac_venv_pythonpath = 'python' cmdline = exac_venv_pythonpath + ' ' + join(exac_code_dir, 'manage.py') + ' ' + 'add_project' + \ ' ' + cnf.project_name + ' ' + cnf.genome.name call(cnf, cmdline)
#!/usr/bin/env python import bcbio_postproc # do not remove it: checking for python version and adding site dirs inside import os from os.path import abspath, dirname, realpath, join, relpath, splitext, isfile, getsize import sys from source.logger import critical, info, is_local, err from source.file_utils import adjust_path, safe_mkdir, verify_file, add_suffix from source.targetcov.bam_and_bed_utils import count_bed_cols liftover_fpath = '/group/ngs/src/liftOver/liftOver' chains_dirpath = '/group/ngs/src/liftOver' chains = dict(hg38=join(chains_dirpath, 'hg19ToHg38.over.chain.gz'), grch37=join(chains_dirpath, 'hg19ToGRCh37.over.chain.gz')) if is_local(): liftover_fpath = '/Users/vladsaveliev/az/liftOver/liftOver' chains_dirpath = '/Users/vladsaveliev/az/liftOver' chains = dict(hg38=join(chains_dirpath, 'hg19ToHg38.over.chain.gz'), ) def main(args): if len(args) < 2: critical('Usage: ' + __file__ + ' InputRootDirectory OutputRootDirectory [Build=hg38]') sys.exit(1) inp_root = adjust_path(args[0]) out_root = adjust_path(args[1]) build = 'hg38' if len(args) >= 3:
def sync_with_ngs_server(cnf, jira_url, project_name, sample_names, summary_report_fpath, dataset_dirpath=None, bcbio_final_dirpath=None, jira_case=None): if is_us(): loc = us elif is_uk(): loc = uk elif is_local(): loc = local elif is_sweden(): loc = sweden else: return None html_report_url = None if any(p in realpath((bcbio_final_dirpath or dataset_dirpath)) for p in loc.proper_path_should_contain): info('Location is ' + loc.loc_id + ', exposing reports to ' + loc.reports_dirpath) if jira_case is None and jira_case != 'unreached' and is_az( ) and jira_url: info() info('Getting info from JIRA...') jira_case = retrieve_jira_info(jira_url) proj_dirpath_on_server = _symlink_dirs( cnf=cnf, loc=loc, project_name=project_name, final_dirpath=bcbio_final_dirpath, dataset_dirpath=dataset_dirpath) # html_report_fpath=summary_report_fpath, # html_report_url=html_report_url) if bcbio_final_dirpath: html_report_url = join( loc. report_url_base, # http://ngs.usbod.astrazeneca.net/reports/ relpath( proj_dirpath_on_server, loc.reports_dirpath), # project_name/dataset/project_name relpath(summary_report_fpath, dirname(bcbio_final_dirpath) )) # final/2015_01_01_project/project.html elif dataset_dirpath: html_report_url = join( loc.report_url_base, relpath(proj_dirpath_on_server, loc.reports_dirpath), relpath(summary_report_fpath, dataset_dirpath)) # html_report_full_url = join(loc.website_url_base, 'samples.php?project_name=' + project_name + '&file=' + html_report_url) # info('HTML url: ' + html_report_full_url) if verify_file(loc.csv_fpath, 'Project list'): write_to_csv_file(work_dir=cnf.work_dir, jira_case=jira_case, project_list_fpath=loc.csv_fpath, country_id=loc.loc_id, project_name=project_name, samples_num=len(sample_names), analysis_dirpath=dirname(bcbio_final_dirpath) if bcbio_final_dirpath else None, html_report_url=html_report_url) return html_report_url
def write_to_csv_file(work_dir, jira_case, project_list_fpath, country_id, project_name, samples_num=None, analysis_dirpath=None, html_report_url=None): info('Reading project list ' + project_list_fpath) with open(project_list_fpath) as f: lines = f.readlines() uncom_lines = [l.strip() for l in lines if not l.strip().startswith('#')] header = uncom_lines[0].strip() info('header: ' + header) header_keys = header.split( ',' ) # 'Updated By,PID,Name,JIRA URL,HTML report path,Datestamp,Data Hub,Analyses directory UK,Analyses directory US,Type,Division,Department,Sample Number,Reporter,Assignee,Description,IGV,Notes' index_of_pid = header_keys.index('PID') if index_of_pid == -1: index_of_pid = 1 values_by_keys_by_pid = OrderedDict() for l in uncom_lines[1:]: if l: values = map(__unquote, l.split(',')) pid = values[index_of_pid] values_by_keys_by_pid[pid] = OrderedDict(zip(header_keys, values)) pid = project_name with file_transaction(work_dir, project_list_fpath) as tx_fpath: if pid not in values_by_keys_by_pid.keys(): # info(pid + ' not in ' + str(values_by_keys_by_pid.keys())) info('Adding new record for ' + pid) values_by_keys_by_pid[pid] = OrderedDict( zip(header_keys, [''] * len(header_keys))) else: info('Updating existing record for ' + pid) d = values_by_keys_by_pid[pid] for k in header_keys: if k not in d: err('Error: ' + k + ' not in ' + project_list_fpath + ' for ' + pid) d['PID'] = pid if analysis_dirpath: d['Analyses directory ' + (country_id if not is_local() else 'US')] = analysis_dirpath if project_name and ( analysis_dirpath or not __unquote(d['Name']) ): # update only if running after bcbio, or no value there at all d['Name'] = project_name if html_report_url and ( analysis_dirpath or not __unquote(d['HTML report path']) ): # update only if running after bcbio, or no value there at all d['HTML report path'] = html_report_url if jira_case: d['JIRA URL'] = jira_case.url # if 'Updated By' in d and __unquote(d['Updated By']): d['Updated By'] = getpass.getuser() if jira_case.description: d['Description'] = jira_case.summary if jira_case.data_hub: d['Data Hub'] = jira_case.data_hub if jira_case.type: d['Type'] = jira_case.type if jira_case.department: d['Department'] = jira_case.department if jira_case.division: d['Division'] = jira_case.division if jira_case.assignee: d['Assignee'] = jira_case.assignee if jira_case.reporter: d['Reporter'] = jira_case.reporter if samples_num: d['Sample Number'] = str(samples_num) d['Datestamp'] = timestamp() new_line = ','.join( __requote(d.get(k, '').replace(',', ';').replace('\n', ' | ')) or '' for k in header_keys) with open(tx_fpath, 'w') as f: os.umask(0002) try: os.chmod(tx_fpath, 0774) except OSError: err(format_exc()) for l in lines: if not l: pass if l.startswith('#'): f.write(l) else: l = unicode(l, 'utf-8') l_ascii = l.encode('ascii', 'ignore') if ',' + project_name + ',' in l_ascii or ',"' + project_name + '",' in l_ascii: info('Old csv line: ' + l_ascii) # f.write('#' + l) else: f.write(l) f.write(new_line + '\n') info() info('New line: ' + new_line) info()