Пример #1
0
def get_nowg_annot_modules(queries):
    dbpath = urllib.parse.unquote(queries['dbpath'][0])
    conn = sqlite3.connect(dbpath)
    cursor = conn.cursor()
    wgmodules = au.get_local_module_infos_of_type('webviewerwidget')
    annot_modules_with_wg = []
    for wgmodule in wgmodules:
        conf = wgmodules[wgmodule].conf
        if 'required_annotator' in conf:
            if wgmodule not in annot_modules_with_wg:
                annot_modules_with_wg.append(wgmodule)
    nowg_annot_modules = {}
    if table_exists(cursor, 'variant'):
        q = 'select name, displayname from variant_annotator'
        cursor.execute(q)
        for r in cursor.fetchall():
            m = r[0]
            if m in ['example_annotator', 'testannot', 'tagsampler']:
                continue
            annot_module = 'wg' + r[0]
            displayname = r[1]
            if annot_module not in annot_modules_with_wg and annot_module not in nowg_annot_modules:
                nowg_annot_modules[annot_module] = displayname
    content = nowg_annot_modules
    return content
Пример #2
0
def main():

    cmd_args = get_args()
    #create run output directory
    if not os.path.exists(cmd_args.rundir):
        os.makedirs(cmd_args.rundir)

    #installed module types
    module_types = au.get_local_module_types()

    passed = 0
    failed = 0
    modules_failed = []
    for mod_type in module_types:
        if (cmd_args.mod_types is None or mod_type in cmd_args.mod_types):
            print('\nRunning ' + mod_type + ' tests.')
            modules = au.get_local_module_infos_of_type(mod_type)
            for mod_name in modules:
                if cmd_args.modules is None or mod_name in cmd_args.modules:
                    module = modules[mod_name]
                    if (module.has_test):
                        tester = Tester(module, cmd_args.rundir)
                        exit_code = tester.run()
                        if exit_code == 0:
                            tester.verify()

                        tester.write_results()
                        if tester.test_passed:
                            passed += 1
                        else:
                            failed += 1
                            modules_failed.append(mod_name)
    modules_failed.sort()
    print('\nTests complete.  Passed: ' + str(passed) + '  Failed: ' +
          str(failed) + ' [' + ', '.join(modules_failed) + ']')
Пример #3
0
def get_nowg_annot_modules(request):
    queries = request.rel_url.query
    dbpath = queries['dbpath']
    conn = sqlite3.connect(dbpath)
    cursor = conn.cursor()
    wgmodules = au.get_local_module_infos_of_type('webviewerwidget')
    annot_modules_with_wg = []
    for wgmodule in wgmodules:
        conf = wgmodules[wgmodule].conf
        if 'required_annotator' in conf:
            annot_module = conf['required_annotator']
            if annot_module not in annot_modules_with_wg:
                annot_modules_with_wg.append(annot_module)
    nowg_annot_modules = {}
    if table_exists(cursor, 'variant'):
        q = 'select name, displayname from variant_annotator'
        cursor.execute(q)
        for r in cursor.fetchall():
            m = r[0]
            if m in ['example_annotator', 'testannot', 'tagsampler']:
                continue
            annot_module = r[0]
            displayname = r[1]
            if annot_module not in annot_modules_with_wg and annot_module not in nowg_annot_modules:
                nowg_annot_modules[annot_module] = displayname
    content = nowg_annot_modules
    return web.json_response(content)
Пример #4
0
def main():
    if len(sys.argv) < 2:
        print('Please provide a sqlite file path')
        exit()
    parser = argparse.ArgumentParser()
    parser.add_argument('dbpath', help='Path to aggregator output')
    parser.add_argument('-t',
                        dest='reporttypes',
                        nargs='+',
                        default=None,
                        help='report types')
    parsed_args = parser.parse_args(sys.argv[1:])
    dbpath = parsed_args.dbpath
    report_types = parsed_args.reporttypes
    run_name = os.path.basename(dbpath).rstrip('sqlite').rstrip('.')
    output_dir = os.path.dirname(dbpath)
    avail_reporters = au.get_local_module_infos_of_type('reporter')
    avail_reporter_names = [
        re.sub('reporter$', '', v) for v in avail_reporters.keys()
    ]
    cmd = [
        'cravat', 'dummyinput', '-n', run_name, '-d', output_dir, '--sc',
        '--sm', '--sa', '--sg', '--sp', '--str', '-t'
    ]
    if report_types is not None:
        cmd.extend(report_types)
    else:
        cmd.extend(avail_reporter_names)
    subprocess.run(cmd)
Пример #5
0
 def run_postaggregators (self):
     modules = au.get_local_module_infos_of_type('postaggregator')
     for module_name in modules:
         module = modules[module_name]
         self.announce_module(module)
         cmd = [module.script_path, 
                '-d', self.output_dir, 
                '-n', self.run_name]
         if self.verbose:
             print('    '.join(cmd))
         post_agg_cls = util.load_class('CravatPostAggregator', module.script_path)
         post_agg = post_agg_cls(cmd)
         post_agg.run()
Пример #6
0
def get_widgetlist ():
    content = []
    modules = au.get_local_module_infos_of_type('webviewerwidget')
    for module_name in modules:
        module = modules[module_name]
        conf = module.conf
        if 'required_annotator' in conf:
            req = conf['required_annotator']
        else: 
            # Removes wg.
            req = module_name[2:]
        content.append({'name': module_name, 
                        'title': module.title, 
                        'required_annotator': req})
    return content
Пример #7
0
def get_widgetlist (request):
    content = []
    modules = au.get_local_module_infos_of_type('webviewerwidget')
    for module_name in modules:
        module = modules[module_name]
        conf = module.conf
        if 'required_annotator' in conf:
            req = conf['required_annotator']
        else: 
            # Removes wg.
            req = module_name[2:]
        content.append({'name': module_name, 
                        'title': module.title, 
                        'required_annotator': req,
                        'helphtml_exists': module.helphtml_exists})
    return web.json_response(content)
Пример #8
0
async def get_nowg_annot_modules(request):
    # disabling this until required_annotator is included in the remote manifest.
    return web.json_response({})
    # Below is not run. Delete the above and change the below so that remote manifest's required_annotator is used.
    queries = request.rel_url.query
    job_id, dbpath = await get_jobid_dbpath(request)
    conn = await aiosqlite3.connect(dbpath)
    cursor = await conn.cursor()
    remote_widget_modules = au.get_remote_module_infos_of_type(
        'webviewerwidget')
    remote_widget_names = remote_widget_modules.keys()
    remote_annot_to_widgets = {}
    for remote_widget_name in remote_widget_names:
        conf = au.get_remote_module_config(remote_widget_name)
        if 'required_annotator' in conf:
            req_annot = conf['required_annotator']
            if req_annot not in remote_annot_to_widgets:
                remote_annot_to_widgets[req_annot] = []
            remote_annot_to_widgets[req_annot].append(remote_widget_name)
    wgmodules = au.get_local_module_infos_of_type('webviewerwidget')
    annot_modules_with_wg = []
    for wgmodule in wgmodules:
        conf = wgmodules[wgmodule].conf
        if 'required_annotator' in conf:
            annot_module = conf['required_annotator']
            if annot_module not in annot_modules_with_wg:
                annot_modules_with_wg.append(annot_module)
    nowg_annot_modules = {}
    r = await table_exists(cursor, 'variant')
    if r:
        q = 'select name, displayname from variant_annotator'
        await cursor.execute(q)
        for r in await cursor.fetchall():
            m = r[0]
            if m in ['example_annotator', 'testannot', 'tagsampler']:
                continue
            annot_module = r[0]
            displayname = r[1]
            if annot_module not in annot_modules_with_wg and annot_module not in nowg_annot_modules and annot_module in remote_annot_to_widgets:
                nowg_annot_modules[annot_module] = displayname
    content = nowg_annot_modules
    await cursor.close()
    await conn.close()
    return web.json_response(content)
Пример #9
0
def run_test(cmd_args):
    if cmd_args.rundir is None:
        cmd_args.rundir = "cravat_test_" + str(int(round(time.time() * 1000)))

    # create run output directory
    if not os.path.exists(cmd_args.rundir):
        os.makedirs(cmd_args.rundir)

    # installed module types
    module_types = au.get_local_module_types()

    passed = 0
    failed = 0
    modules_failed = []
    for mod_type in module_types:
        if cmd_args.mod_types is None or mod_type in cmd_args.mod_types:
            print("\nRunning " + mod_type + " tests.")
            modules = au.get_local_module_infos_of_type(mod_type)
            for mod_name in modules:
                if cmd_args.modules is None or mod_name in cmd_args.modules:
                    module = modules[mod_name]
                    # If a module has a test, it is usually a single 'input' file and 'key' but modules can
                    # have multiple input and key files.  This loop runs all input/key file pairs.
                    # Example input.1, key.1, input.2, key.2
                    for test_input_file in module.tests:
                        tester = Tester(module, cmd_args.rundir,
                                        test_input_file)
                        exit_code = tester.run()
                        if exit_code == 0:
                            tester.verify()

                        tester.write_results()
                        if tester.test_passed:
                            passed += 1
                        else:
                            failed += 1
                            fail_msg = mod_name + ("" if test_input_file
                                                   == "input" else " " +
                                                   test_input_file)
                            modules_failed.append(fail_msg)
    modules_failed.sort()
    print("\nTests complete.  Passed: " + str(passed) + "  Failed: " +
          str(failed) + " [" + ", ".join(modules_failed) + "]")
Пример #10
0
 def _initialize_converters(self):
     """ Reads in available converters.
         
         Loads any python files in same directory that start with _ as
         python modules. Initializes the CravatConverter class from that
         module and places them in a dict keyed by their input format
     """
     for module_info in au.get_local_module_infos_of_type('converter').values():
         # path based import from https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
         spec = importlib.util.spec_from_file_location(module_info.name,
                                                       module_info.script_path)
         module = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(module)
         converter = module.CravatConverter()
         if converter.format_name not in self.converters:
             self.converters[converter.format_name] = converter
         else:
             err_msg = 'Cannot load two converters for format %s' \
                 %converter.format_name
             raise ExpectedException(err_msg)
     self.possible_formats = list(self.converters.keys())
Пример #11
0
 async def make_col_info(self, level, conn=None, cursor=None):
     self.colnames_to_display[level] = []
     await self.exec_db(self.store_mapper)
     cravat_conf = self.conf.get_cravat_conf()
     if "report_module_order" in cravat_conf:
         priority_colgroupnames = cravat_conf["report_module_order"]
     else:
         priority_colgroupnames = [
             "base", "hg38", "hg19", "hg18", "tagsampler"
         ]
     # level-specific column groups
     self.columngroups[level] = []
     sql = "select name, displayname from " + level + "_annotator"
     await cursor.execute(sql)
     rows = await cursor.fetchall()
     for row in rows:
         (name, displayname) = row
         self.columngroups[level].append({
             "name": name,
             "displayname": displayname,
             "count": 0
         })
     # level-specific column names
     header_table = level + "_header"
     coldefs = []
     sql = "select col_def from " + header_table
     await cursor.execute(sql)
     for row in await cursor.fetchall():
         coljson = row[0]
         coldef = ColumnDefinition({})
         coldef.from_json(coljson)
         coldefs.append(coldef)
     columns = []
     self.colnos[level] = {}
     colcount = 0
     # level-specific column details
     for coldef in coldefs:
         self.colnos[level][coldef.name] = colcount
         colcount += 1
         if coldef.category in ["single", "multi"] and len(
                 coldef.categories) == 0:
             sql = "select distinct {} from {}".format(coldef.name, level)
             await cursor.execute(sql)
             rs = await cursor.fetchall()
             for r in rs:
                 coldef.categories.append(r[0])
         [colgrpname, _] = coldef.name.split("__")
         column = coldef.get_colinfo()
         columns.append(column)
         self.add_conditional_to_colnames_to_display(
             level, column, colgrpname)
         for columngroup in self.columngroups[level]:
             if columngroup["name"] == colgrpname:
                 columngroup["count"] += 1
     # adds gene level columns to variant level.
     if (self.nogenelevelonvariantlevel == False and level == "variant"
             and await self.exec_db(self.table_exists, "gene")):
         modules_to_add = []
         q = "select name from gene_annotator"
         await cursor.execute(q)
         gene_annotators = [v[0] for v in await cursor.fetchall()]
         modules_to_add = [m for m in gene_annotators if m != "base"]
         for module in modules_to_add:
             cols = []
             q = 'select col_def from gene_header where col_name like "{}__%"'.format(
                 module)
             await cursor.execute(q)
             rs = await cursor.fetchall()
             for r in rs:
                 cd = ColumnDefinition({})
                 cd.from_json(r[0])
                 cols.append(cd)
             q = 'select displayname from gene_annotator where name="{}"'.format(
                 module)
             await cursor.execute(q)
             r = await cursor.fetchone()
             displayname = r[0]
             self.columngroups[level].append({
                 "name": module,
                 "displayname": displayname,
                 "count": len(cols)
             })
             for coldef in cols:
                 self.colnos[level][coldef.name] = colcount
                 colcount += 1
                 if (coldef.category in ["category", "multicategory"]
                         and len(coldef.categories) == 0):
                     sql = "select distinct {} from {}".format(
                         coldef.name, level)
                     await cursor.execute(sql)
                     rs = await cursor.fetchall()
                     for r in rs:
                         coldef.categories.append(r[0])
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.add_conditional_to_colnames_to_display(
                     level, column, module)
                 self.var_added_cols.append(coldef.name)
     # Gene level summary columns
     if level == "gene":
         q = "select name from variant_annotator"
         await cursor.execute(q)
         done_var_annotators = [v[0] for v in await cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type("annotator")
         local_modules.update(
             au.get_local_module_infos_of_type("postaggregator"))
         summarizer_module_names = []
         for module_name in done_var_annotators:
             if module_name in [
                     "base",
                     "hg19",
                     "hg18",
                     "extra_vcf_info",
                     "extra_variant_info",
             ]:
                 continue
             if module_name not in local_modules:
                 if self.args.silent == False and module_name != 'original_input':
                     print(
                         "            [{}] module does not exist in the system. Gene level summary for this module is skipped."
                         .format(module_name))
                 continue
             module = local_modules[module_name]
             if "can_summarize_by_gene" in module.conf:
                 summarizer_module_names.append(module_name)
         local_modules[self.mapper_name] = au.get_local_module_info(
             self.mapper_name)
         summarizer_module_names = [self.mapper_name
                                    ] + summarizer_module_names
         for module_name in summarizer_module_names:
             mi = local_modules[module_name]
             sys.path = sys.path + [os.path.dirname(mi.script_path)]
             if module_name in done_var_annotators:
                 annot_cls = util.load_class(mi.script_path,
                                             "CravatAnnotator")
             elif module_name == self.mapper_name:
                 annot_cls = util.load_class(mi.script_path, "Mapper")
             cmd = {
                 "script_path": mi.script_path,
                 "input_file": "__dummy__",
                 "output_dir": self.output_dir,
             }
             annot = annot_cls(cmd)
             cols = mi.conf["gene_summary_output_columns"]
             columngroup = {
                 "name": mi.name,
                 "displayname": mi.title,
                 "count": len(cols),
             }
             self.columngroups[level].append(columngroup)
             for col in cols:
                 coldef = ColumnDefinition(col)
                 coldef.name = columngroup["name"] + "__" + coldef.name
                 coldef.genesummary = True
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.add_conditional_to_colnames_to_display(
                     level, column, mi.name)
             self.summarizing_modules.append([mi, annot, cols])
             for col in cols:
                 fullname = module_name + "__" + col["name"]
                 self.colnos[level][fullname] = len(self.colnos[level])
     # re-orders columns groups.
     colgrps = self.columngroups[level]
     newcolgrps = []
     for priority_colgrpname in priority_colgroupnames:
         for colgrp in colgrps:
             if colgrp["name"] == priority_colgrpname:
                 if colgrp["name"] in [self.mapper_name, "tagsampler"]:
                     newcolgrps[0]["count"] += colgrp["count"]
                 else:
                     newcolgrps.append(colgrp)
                 break
     colpos = 0
     for colgrp in newcolgrps:
         colgrp["lastcol"] = colpos + colgrp["count"]
         colpos = colgrp["lastcol"]
     colgrpnames = [
         v["displayname"] for v in colgrps
         if v["name"] not in priority_colgroupnames
     ]
     colgrpnames.sort()
     for colgrpname in colgrpnames:
         for colgrp in colgrps:
             if colgrp["displayname"] == colgrpname:
                 colgrp["lastcol"] = colpos + colgrp["count"]
                 newcolgrps.append(colgrp)
                 colpos += colgrp["count"]
                 break
     # re-orders columns.
     self.colname_conversion[level] = {}
     new_columns = []
     self.newcolnos[level] = {}
     newcolno = 0
     new_colnames_to_display = []
     for colgrp in newcolgrps:
         colgrpname = colgrp["name"]
         for col in columns:
             colname = col["col_name"]
             [grpname, _] = colname.split("__")
             if colgrpname == "base" and grpname in [
                     self.mapper_name, "tagsampler"
             ]:
                 newcolname = "base__" + colname.split("__")[1]
                 self.colname_conversion[level][newcolname] = colname
                 col["col_name"] = newcolname
                 new_columns.append(col)
                 self.newcolnos[level][newcolname] = newcolno
                 if newcolname in self.colnames_to_display[level]:
                     new_colnames_to_display.append(newcolname)
             elif grpname == colgrpname:
                 new_columns.append(col)
                 self.newcolnos[level][colname] = newcolno
                 if colname in self.colnames_to_display[level]:
                     new_colnames_to_display.append(colname)
             else:
                 continue
             newcolno += 1
     self.colinfo[level] = {"colgroups": newcolgrps, "columns": new_columns}
     self.colnames_to_display[level] = new_colnames_to_display
     # report substitution
     if level in ["variant", "gene"]:
         reportsubtable = level + "_reportsub"
         if await self.exec_db(self.table_exists, reportsubtable):
             q = "select * from {}".format(reportsubtable)
             await cursor.execute(q)
             reportsub = {
                 r[0]: json.loads(r[1])
                 for r in await cursor.fetchall()
             }
             self.column_subs[level] = []
             for i, column in enumerate(new_columns):
                 module, col = column["col_name"].split("__")
                 if module == self.mapper_name:
                     module = "base"
                 if module in reportsub and col in reportsub[module]:
                     self.column_subs[level].append(
                         SimpleNamespace(
                             module=module,
                             col=col,
                             index=i,
                             subs=reportsub[module][col],
                         ))
                     new_columns[i]["reportsub"] = reportsub[module][col]
     # display_select_columns
     if (level in self.extract_columns_multilevel
             and len(self.extract_columns_multilevel[level]) > 0
         ) or self.concise_report:
         self.display_select_columns[level] = True
     else:
         self.display_select_columns[level] = False
     # column numbers to display
     colno = 0
     self.colnos_to_display[level] = []
     for colgroup in self.colinfo[level]["colgroups"]:
         count = colgroup["count"]
         if count == 0:
             continue
         for col in self.colinfo[level]["columns"][colno:colno + count]:
             module_col_name = col["col_name"]
             if module_col_name in self.colnames_to_display[level]:
                 include_col = True
             else:
                 include_col = False
             if include_col:
                 self.colnos_to_display[level].append(colno)
             colno += 1
Пример #12
0
 def make_col_info (self, level):
     self.colnos[level] = {}
     
     # Columns from aggregator
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     self.cursor.execute(sql)
     for row in self.cursor.fetchall():
         (name, displayname) = row
         self.columngroups[level].append(
             {'name': name,
              'displayname': displayname,
              'count': 0})
     sql = 'select col_name, col_title, col_type from ' + level + '_header'
     self.cursor.execute(sql)
     columns = []
     colcount = 0
     for row in self.cursor.fetchall():
         (colname, coltitle, col_type) = row
         column = {'col_name': colname,
                   'col_title': coltitle,
                   'col_type': col_type}
         self.colnos[level][colname] = colcount
         colcount += 1
         columns.append(column)
         groupname = colname.split('__')[0]
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == groupname:
                 columngroup['count'] += 1
     if level == 'variant' and self.table_exists('gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         self.cursor.execute(q)
         gene_annotators = [v[0] for v in self.cursor.fetchall()]
         k = 'add_gene_module_to_variant'
         if self.conf.has_key(k):
             modules_to_add = self.conf.get_val(k)
         for module in gene_annotators:
             module_info = au.get_local_module_info(module)
             if module_info == None:
                 continue
             module_conf = module_info.conf
             if 'add_to_variant_level' in module_conf:
                 if module_conf['add_to_variant_level'] == True:
                     modules_to_add.append(module)
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             mi = au.get_local_module_info(module)
             cols = mi.conf['output_columns']
             self.columngroups[level].append({'name': mi.name, 
                                  'displayname': mi.title,
                                  'count': len(cols)})
             for col in cols:
                 self.colnos[level][colname] = colcount
                 colcount += 1
                 colname = mi.name + '__' + col['name']
                 column = {'col_name': colname,
                           'col_title': col['title'],
                           'col_type': col['type']}
                 columns.append(column)
                 self.var_added_cols.append(colname)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         self.cursor.execute(q)
         done_var_annotators = [v[0] for v in self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         for module_name in local_modules:
             mi = local_modules[module_name]
             conf = mi.conf
             if 'can_summarize_by_gene' in conf and module_name in done_var_annotators:
                 sys.path = sys.path + [os.path.dirname(mi.script_path)]
                 annot_cls = util.load_class('CravatAnnotator', mi.script_path)
                 annot = annot_cls([mi.script_path, '__dummy__'])
                 #m = __import__(module_name)
                 #o = m.CravatAnnotator(['', '__dummy__'])
                 cols = conf['gene_summary_output_columns']
                 for col in cols:
                     col['name'] = col['name'] 
                 columngroup = {}
                 columngroup['name'] = conf['name']
                 columngroup['displayname'] = conf['title']
                 columngroup['count'] = len(cols)
                 self.columngroups[level].append(columngroup)
                 for col in cols:
                     column = {'col_name': conf['name'] + '__' + col['name'],
                               'col_title': col['title'],
                               'col_type': col['type']}
                     columns.append(column)
                 self.summarizing_modules.append([mi, annot, cols])
                 annot.remove_log_file()
     colno = 0
     for colgroup in self.columngroups[level]:
         colno += colgroup['count']
         colgroup['lastcol'] = colno
     self.colinfo[level] = {'colgroups': self.columngroups[level], 'columns': columns}
Пример #13
0
 async def make_col_info(self, level):
     await self.store_mapper()
     cravat_conf = self.conf.get_cravat_conf()
     if 'report_module_order' in cravat_conf:
         priority_colgroupnames = cravat_conf['report_module_order']
     else:
         priority_colgroupnames = [
             'base', 'hg38', 'hg19', 'hg18', 'tagsampler'
         ]
     # level-specific column groups
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     await self.cursor.execute(sql)
     rows = await self.cursor.fetchall()
     for row in rows:
         (name, displayname) = row
         self.columngroups[level].append({
             'name': name,
             'displayname': displayname,
             'count': 0
         })
     # level-specific column names
     header_table = level + '_header'
     coldefs = []
     sql = 'select col_def from ' + header_table
     await self.cursor.execute(sql)
     for row in await self.cursor.fetchall():
         coljson = row[0]
         coldef = ColumnDefinition({})
         coldef.from_json(coljson)
         coldefs.append(coldef)
     columns = []
     self.colnos[level] = {}
     colcount = 0
     # level-specific column details
     for coldef in coldefs:
         self.colnos[level][coldef.name] = colcount
         colcount += 1
         if coldef.category in ['single', 'multi'] and len(
                 coldef.categories) == 0:
             sql = 'select distinct {} from {}'.format(coldef.name, level)
             await self.cursor.execute(sql)
             rs = await self.cursor.fetchall()
             for r in rs:
                 coldef.categories.append(r[0])
         [colgrpname, colonlyname] = coldef.name.split('__')
         column = coldef.get_colinfo()
         columns.append(column)
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == colgrpname:
                 columngroup['count'] += 1
     # adds gene level columns to variant level.
     if self.nogenelevelonvariantlevel == False and level == 'variant' and await self.table_exists(
             'gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         await self.cursor.execute(q)
         gene_annotators = [v[0] for v in await self.cursor.fetchall()]
         modules_to_add = [m for m in gene_annotators if m != 'base']
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             cols = []
             q = 'select col_def from gene_header where col_name like "{}__%"'.format(
                 module)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             for r in rs:
                 cd = ColumnDefinition({})
                 cd.from_json(r[0])
                 cols.append(cd)
             q = 'select displayname from gene_annotator where name="{}"'.format(
                 module)
             await self.cursor.execute(q)
             r = await self.cursor.fetchone()
             displayname = r[0]
             self.columngroups[level].append({
                 'name': module,
                 'displayname': displayname,
                 'count': len(cols)
             })
             for coldef in cols:
                 self.colnos[level][coldef.name] = colcount
                 colcount += 1
                 if coldef.category in ['category', 'multicategory'
                                        ] and len(coldef.categories) == 0:
                     sql = 'select distinct {} from {}'.format(
                         coldef.name, level)
                     await self.cursor.execute(sql)
                     rs = await self.cursor.fetchall()
                     for r in rs:
                         coldef.categories.append(r[0])
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.var_added_cols.append(coldef.name)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         await self.cursor.execute(q)
         done_var_annotators = [v[0] for v in await self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         local_modules.update(
             au.get_local_module_infos_of_type('postaggregator'))
         summarizer_module_names = []
         for module_name in done_var_annotators:
             if module_name in [
                     'base', 'hg19', 'hg18', 'extra_vcf_info',
                     'extra_variant_info'
             ]:
                 continue
             if module_name not in local_modules:
                 print(
                     '            [{}] module does not exist in the system. Gene level summary for this module is skipped.'
                     .format(module_name))
                 continue
             module = local_modules[module_name]
             if 'can_summarize_by_gene' in module.conf:
                 summarizer_module_names.append(module_name)
         local_modules[self.mapper_name] = au.get_local_module_info(
             self.mapper_name)
         summarizer_module_names = [self.mapper_name
                                    ] + summarizer_module_names
         for module_name in summarizer_module_names:
             mi = local_modules[module_name]
             sys.path = sys.path + [os.path.dirname(mi.script_path)]
             if module_name in done_var_annotators:
                 annot_cls = util.load_class(mi.script_path,
                                             'CravatAnnotator')
             elif module_name == self.mapper_name:
                 annot_cls = util.load_class(mi.script_path, 'Mapper')
             annot = annot_cls(
                 [mi.script_path, '__dummy__', '-d', self.output_dir], {})
             '''
             cols = conf['gene_summary_output_columns']
             columngroup = {}
             columngroup['name'] = os.path.basename(mi.script_path).split('.')[0]
             columngroup['displayname'] = conf['title']
             columngroup['count'] = len(cols)
             '''
             cols = mi.conf['gene_summary_output_columns']
             columngroup = {
                 'name': mi.name,
                 'displayname': mi.title,
                 'count': len(cols),
             }
             self.columngroups[level].append(columngroup)
             for col in cols:
                 coldef = ColumnDefinition(col)
                 coldef.name = columngroup['name'] + '__' + coldef.name
                 coldef.genesummary = True
                 column = coldef.get_colinfo()
                 columns.append(column)
             self.summarizing_modules.append([mi, annot, cols])
             for col in cols:
                 fullname = module_name + '__' + col['name']
                 self.colnos[level][fullname] = len(self.colnos[level])
     # re-orders columns groups.
     colgrps = self.columngroups[level]
     newcolgrps = []
     for priority_colgrpname in priority_colgroupnames:
         for colgrp in colgrps:
             if colgrp['name'] == priority_colgrpname:
                 if colgrp['name'] in [self.mapper_name, 'tagsampler']:
                     newcolgrps[0]['count'] += colgrp['count']
                 else:
                     newcolgrps.append(colgrp)
                 break
     colpos = 0
     for colgrp in newcolgrps:
         colgrp['lastcol'] = colpos + colgrp['count']
         colpos = colgrp['lastcol']
     colgrpnames = [
         v['displayname'] for v in colgrps
         if v['name'] not in priority_colgroupnames
     ]
     colgrpnames.sort()
     for colgrpname in colgrpnames:
         for colgrp in colgrps:
             if colgrp['displayname'] == colgrpname:
                 colgrp['lastcol'] = colpos + colgrp['count']
                 newcolgrps.append(colgrp)
                 colpos += colgrp['count']
                 break
     # re-orders columns.
     self.colname_conversion[level] = {}
     new_columns = []
     self.newcolnos[level] = {}
     newcolno = 0
     for colgrp in newcolgrps:
         colgrpname = colgrp['name']
         for col in columns:
             colname = col['col_name']
             [grpname, oricolname] = colname.split('__')
             if colgrpname == 'base' and grpname in [
                     self.mapper_name, 'tagsampler'
             ]:
                 newcolname = 'base__' + colname.split('__')[1]
                 self.colname_conversion[level][newcolname] = colname
                 col['col_name'] = newcolname
                 new_columns.append(col)
                 self.newcolnos[level][newcolname] = newcolno
                 #self.colnos[level][newcolname] = colno
                 #del self.colnos[level][oldcolname]
             elif grpname == colgrpname:
                 new_columns.append(col)
                 self.newcolnos[level][colname] = newcolno
             else:
                 continue
             newcolno += 1
     self.colinfo[level] = {'colgroups': newcolgrps, 'columns': new_columns}
     # report substitution
     if level in ['variant', 'gene']:
         reportsubtable = level + '_reportsub'
         if await self.table_exists(reportsubtable):
             q = 'select * from {}'.format(reportsubtable)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             self.report_substitution = {}
             for r in rs:
                 module = r[0]
                 sub = json.loads(r[1])
                 self.report_substitution[module] = sub
             self.column_subs[level] = {}
             self.column_sub_allow_partial_match[level] = {}
             for i in range(len(new_columns)):
                 column = new_columns[i]
                 [module, col] = column['col_name'].split('__')
                 if module in [self.mapper_name]:
                     module = 'base'
                 if module in self.report_substitution:
                     sub = self.report_substitution[module]
                     if col in sub:
                         if module in [
                                 'base', self.mapper_name
                         ] and col in ['all_mappings', 'all_so']:
                             allow_partial_match = True
                             self.column_subs[level][i] = {
                                 re.compile(fr'\b{key}\b'): val
                                 for key, val in sub[col].items()
                             }
                         else:
                             allow_partial_match = False
                             self.column_subs[level][i] = sub[col]
                         self.column_sub_allow_partial_match[level][
                             i] = allow_partial_match
                         new_columns[i]['reportsub'] = sub[col]
Пример #14
0
 async def make_col_info(self, level):
     cravat_conf = self.conf.get_cravat_conf()
     if 'report_module_order' in cravat_conf:
         priority_colgroups = cravat_conf['report_module_order']
     else:
         priority_colgroups = ['base', 'hg19', 'hg18', 'tagsampler']
     # ordered column groups
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     await self.cursor.execute(sql)
     rows = await self.cursor.fetchall()
     for priority_colgroup in priority_colgroups:
         for row in rows:
             colgroup = row[0]
             if colgroup == priority_colgroup:
                 (name, displayname) = row
                 self.columngroups[level].append({
                     'name': name,
                     'displayname': displayname,
                     'count': 0
                 })
     for row in rows:
         colgroup = row[0]
         if colgroup in priority_colgroups:
             pass
         else:
             (name, displayname) = row
             self.columngroups[level].append({
                 'name': name,
                 'displayname': displayname,
                 'count': 0
             })
     # ordered column names
     sql = 'select * from ' + level + '_header'
     await self.cursor.execute(sql)
     columns = []
     unordered_rows = await self.cursor.fetchall()
     rows = []
     self.ord_cols[level] = []
     for group in priority_colgroups:
         for row in unordered_rows:
             [col_group, col_name] = row[0].split('__')
             if col_group == group:
                 rows.append(row)
                 self.ord_cols[level].append(row[0])
     for row in unordered_rows:
         [col_group, col_name] = row[0].split('__')
         if col_group not in priority_colgroups:
             rows.append(row)
             self.ord_cols[level].append(row[0])
     # unordered column numbers
     self.colnos[level] = {}
     colcount = 0
     for row in unordered_rows:
         self.colnos[level][row[0]] = colcount
         colcount += 1
     # ordered column details
     for row in rows:
         (colname, coltitle, col_type) = row[:3]
         col_cats = json.loads(row[3]) if len(row) > 3 and row[3] else []
         col_width = row[4] if len(row) > 4 else None
         col_desc = row[5] if len(row) > 5 else None
         col_hidden = bool(row[6]) if len(row) > 6 else False
         col_ctg = row[7] if len(row) > 7 else None
         if col_ctg in ['single', 'multi'] and len(col_cats) == 0:
             sql = 'select distinct {} from {}'.format(colname, level)
             await self.cursor.execute(sql)
             rs = await self.cursor.fetchall()
             for r in rs:
                 col_cats.append(r[0])
         col_filterable = bool(row[8]) if len(row) > 8 else True
         link_format = row[9] if len(row) > 9 else None
         column = {
             'col_name': colname,
             'col_title': coltitle,
             'col_type': col_type,
             'col_cats': col_cats,
             'col_width': col_width,
             'col_desc': col_desc,
             'col_hidden': col_hidden,
             'col_ctg': col_ctg,
             'col_filterable': col_filterable,
             'link_format': link_format,
         }
         columns.append(column)
         groupname = colname.split('__')[0]
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == groupname:
                 columngroup['count'] += 1
     if level == 'variant' and await self.table_exists('gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         await self.cursor.execute(q)
         gene_annotators = [v[0] for v in await self.cursor.fetchall()]
         k = 'add_gene_module_to_variant'
         if self.conf.has_key(k):
             modules_to_add = self.conf.get_val(k)
         for module in gene_annotators:
             module_info = au.get_local_module_info(module)
             if module_info == None:
                 continue
             module_conf = module_info.conf
             if 'add_to_variant_level' in module_conf:
                 if module_conf['add_to_variant_level'] == True:
                     modules_to_add.append(module)
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             mi = au.get_local_module_info(module)
             cols = mi.conf['output_columns']
             self.columngroups[level].append({
                 'name': mi.name,
                 'displayname': mi.title,
                 'count': len(cols)
             })
             for col in cols:
                 colname = mi.name + '__' + col['name']
                 self.colnos[level][colname] = colcount
                 self.ord_cols[level].append(colname)
                 colcount += 1
                 col_type = col['type']
                 col_cats = col.get('categories', [])
                 col_width = col.get('width')
                 col_desc = col.get('desc')
                 col_hidden = col.get('hidden', False)
                 col_ctg = col.get('category', None)
                 if col_ctg in ['category', 'multicategory'
                                ] and len(col_cats) == 0:
                     sql = 'select distinct {} from {}'.format(
                         colname, level)
                     await self.cursor.execute(sql)
                     rs = await self.cursor.fetchall()
                     for r in rs:
                         col_cats.append(r[0])
                 col_filterable = col.get('filterable', True)
                 col_link_format = col.get('link_format')
                 column = {
                     'col_name': colname,
                     'col_title': col['title'],
                     'col_type': col_type,
                     'col_cats': col_cats,
                     'col_width': col_width,
                     'col_desc': col_desc,
                     'col_hidden': col_hidden,
                     'col_ctg': col_ctg,
                     'col_filterable': col_filterable,
                     'col_link_format': col_link_format,
                 }
                 columns.append(column)
                 self.var_added_cols.append(colname)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         await self.cursor.execute(q)
         done_var_annotators = [v[0] for v in await self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         for module_name in local_modules:
             mi = local_modules[module_name]
             conf = mi.conf
             if 'can_summarize_by_gene' in conf and module_name in done_var_annotators:
                 sys.path = sys.path + [os.path.dirname(mi.script_path)]
                 annot_cls = util.load_class('CravatAnnotator',
                                             mi.script_path)
                 annot = annot_cls([mi.script_path, '__dummy__'], {})
                 cols = conf['gene_summary_output_columns']
                 for col in cols:
                     col['name'] = col['name']
                 columngroup = {}
                 columngroup['name'] = conf['name']
                 columngroup['displayname'] = conf['title']
                 columngroup['count'] = len(cols)
                 self.columngroups[level].append(columngroup)
                 for col in cols:
                     col_type = col['type']
                     col_cats = col.get('categories', [])
                     col_ctg = col.get('category', None)
                     if col_type in ['category', 'multicategory'
                                     ] and len(col_cats) == 0:
                         sql = 'select distinct {} from {}'.format(
                             colname, level)
                         await self.cursor.execute(sql)
                         rs = await self.cursor.fetchall()
                         for r in rs:
                             col_cats.append(r[0])
                     col_filterable = col.get('filterable', True)
                     col_link_format = col.get('link_format')
                     column = {
                         'col_name': conf['name'] + '__' + col['name'],
                         'col_title': col['title'],
                         'col_type': col_type,
                         'col_cats': col_cats,
                         'col_width': col.get('width'),
                         'col_desc': col.get('desc'),
                         'col_hidden': col.get('hidden', False),
                         'col_ctg': col_ctg,
                         'col_filterable': col_filterable,
                         'col_link_format': col_link_format,
                     }
                     columns.append(column)
                 self.summarizing_modules.append([mi, annot, cols])
                 for col in cols:
                     fullname = module_name + '__' + col['name']
                     self.ord_cols[level].append(fullname)
                     self.colnos[level][fullname] = len(self.colnos[level])
     colno = 0
     for colgroup in self.columngroups[level]:
         colno += colgroup['count']
         colgroup['lastcol'] = colno
     self.colinfo[level] = {
         'colgroups': self.columngroups[level],
         'columns': columns
     }
     # report substitution
     if level in ['variant', 'gene']:
         reportsubtable = level + '_reportsub'
         if await self.table_exists(reportsubtable):
             q = 'select * from {}'.format(reportsubtable)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             self.report_substitution = {}
             for r in rs:
                 module = r[0]
                 sub = json.loads(r[1])
                 self.report_substitution[module] = sub
             self.column_subs[level] = {}
             columns = self.colinfo[level]['columns']
             for i in range(len(columns)):
                 column = columns[i]
                 [module, col] = column['col_name'].split('__')
                 if module in self.report_substitution:
                     sub = self.report_substitution[module]
                     if col in sub:
                         self.column_subs[level][i] = sub[col]
                         self.colinfo[level]['columns'][i][
                             'reportsub'] = sub[col]
Пример #15
0
 def parse_cmd_args(self, cmd_args):
     self.args = self.cmd_arg_parser.parse_args(cmd_args)
     self.annotator_names = self.args.annotators
     if self.annotator_names == None:
         self.annotators = au.get_local_module_infos_of_type('annotator')
     else:
         self.annotators = \
             au.get_local_module_infos_by_names(self.annotator_names)
     self.excludes = self.args.excludes
     if self.excludes == '*':
         self.annotators = {}
     elif self.excludes != None:
         for m in self.excludes:
             if m in self.annotators:
                 del self.annotators[m]
     self.input = os.path.abspath(self.args.input)
     self.run_name = self.args.run_name
     if self.run_name == None:
         self.run_name = os.path.basename(self.input)
     self.output_dir = self.args.output_dir
     if self.output_dir == None:
         self.output_dir = os.path.dirname(os.path.abspath(self.input))
     else:
         self.output_dir = os.path.abspath(self.output_dir)
     if os.path.exists(self.output_dir) == False:
         os.mkdir(self.output_dir)
     self.run_conf_path = ''
     if self.args.conf: 
         self.run_conf_path = self.args.conf
     self.verbose = self.args.verbose
     self.reports = self.args.reports
     self.input_assembly = self.args.liftover
     self.runlevel = 0
     if self.args.stc:
         self.runlevel = self.runlevels['converter']
     if self.args.stm:
         self.runlevel = self.runlevels['mapper']
     if self.args.sta:
         self.runlevel = self.runlevels['annotator']
     if self.args.stg:
         self.runlevel = self.runlevels['aggregator']
     if self.args.stp:
         self.runlevel = self.runlevels['postaggregator']
     if self.args.str:
         self.runlevel = self.runlevels['reporter']
     '''
     if self.args.rc:
         self.should_run_converter = True
         self.should_run_genemapper = True
         self.should_run_annotator = True
         self.should_run_aggregator = True
         self.should_run_reporter = True
     if self.args.rm:
         self.should_run_converter = False
         self.should_run_genemapper = True
         self.should_run_annotators = True
         self.should_run_aggregator = True
         self.should_run_reporter = True
     if self.args.ra:
         self.should_run_converter = False
         self.should_run_genemapper = False
         self.should_run_annotators = True
         self.should_run_aggregator = True
         self.should_run_reporter = True
     '''
     self.cleandb = self.args.cleandb