Пример #1
0
 def conceptPatientCount(self, top: pd.DataFrame, c_fullname: str, lc: LoggedConnection,
                         # Moderate degree to support work on several tables in parallel.
                         parallel_degree: int=8) -> int:
     counts = read_sql_step(
         '''
         select /*+ parallel({degree}) */
                c_fullname, c_hlevel, c_visualattributes, c_name
              , case
         when upper(meta.c_visualattributes)     like 'C%'
           then :sentinel * 1
         when lower(meta.c_tablename) <> 'concept_dimension'
           or lower(meta.c_operator) <> 'like'
           or lower(meta.c_facttablecolumn) <> 'concept_cd'
           then :sentinel * 2
         else coalesce((
             select count(distinct obs.patient_num)
             from (
                 select concept_cd
                 from {i2b2star}.concept_dimension
                 where concept_path like (meta.c_dimcode || '%')
                 ) cd
             join {i2b2star}.observation_fact obs
               on obs.concept_cd = cd.concept_cd), :sentinel * 3)
         end c_totalnum
         from {i2b2meta}.{table_name} meta
         where c_synonym_cd = 'N'
           and meta.c_fullname = :c_fullname
         '''.strip().format(i2b2star=self.i2b2star,
                            i2b2meta=self.i2b2meta,
                            degree=parallel_degree,
                            table_name=top.c_table_name),
         lc=lc, params=dict(c_fullname=c_fullname,
                            sentinel=self.sentinel)).set_index('c_fullname')
     [count] = counts.c_totalnum.values
     return int(count)
Пример #2
0
def topFolders(i2b2meta: str, lc: LoggedConnection) -> pd.DataFrame:
    folders = read_sql_step('''
    select c_table_cd, c_hlevel, c_visualattributes, c_name, upper(c_table_name) c_table_name, c_fullname
    from {i2b2meta}.table_access ta
    where upper(ta.c_visualattributes) like '_A%'
    order by ta.c_name
    '''.format(i2b2meta=i2b2meta).strip(), lc, {}).set_index('c_table_cd')
    return folders
Пример #3
0
 def cohorts(self, lc: et.LoggedConnection) -> pd.DataFrame:
     cohorts = rif_etl.read_sql_step(
         '''
             select site_schema, result_instance_id, start_date, task_id, count(distinct patient_num)
             from site_cohorts
             group by site_schema, result_instance_id, start_date, task_id
             order by start_date desc
         ''', lc)
     cohorts = cohorts.append(
         rif_etl.read_sql_step(
             '''
             select count(distinct site_schema) site_schema
                  , max(result_instance_id) result_instance_id
                  , max(start_date) start_date
                  , 'Total' task_id
                  , count(distinct patient_num)
              from site_cohorts''', lc)).set_index('task_id')
     return cohorts
Пример #4
0
 def top(self, lc: LoggedConnection) -> pd.Series:
     return read_sql_step('''
         select c_table_cd, c_hlevel, c_visualattributes, c_name
              , upper(c_table_name) c_table_name, c_fullname
         from {i2b2meta}.table_access ta
         where upper(ta.c_visualattributes) like '_A%'
           and ta.c_table_cd = :c_table_cd
         '''.format(i2b2meta=self.i2b2meta).strip(),
                         lc, dict(c_table_cd=self.c_table_cd)).set_index('c_table_cd').iloc[0]
Пример #5
0
 def uploads(self, lc: et.LoggedConnection) -> pd.DataFrame:
     return rif_etl.read_sql_step(
         '''
              select *
             from upload_status up
             where load_status like 'OK%' and ((
                   loaded_record > 0
               and substr(transform_name, -11) in (
                 select distinct task_id from site_cohorts
               )
             ) or (
               upload_label like 'migrate obs%'
             ) or (
               message like 'UP#%' and upload_label like '% #1 of 1%'
             ))
             order by load_date desc
             ''', lc).set_index('upload_id')
Пример #6
0
 def activeDescendants(self, lc: LoggedConnection) -> pd.DataFrame:
     top = self.top(lc)
     desc = read_sql_step(
         '''
         select c_fullname, c_hlevel, c_visualattributes, c_totalnum, c_name, c_tooltip
         from {i2b2meta}.{meta_table} meta
         where meta.c_hlevel > :c_hlevel
           and meta.c_fullname like (:c_fullname || '%')
           and upper(meta.c_visualattributes) like '_A%'
           and c_synonym_cd = 'N'
           and m_applied_path = '@'
         order by meta.c_hlevel, upper(meta.c_name)
         '''.format(i2b2meta=self.i2b2meta,
                    meta_table=top.c_table_name).strip(),
         lc=lc,
         params=dict(c_fullname=top.c_fullname, c_hlevel=int(top.c_hlevel))).set_index('c_fullname')
     return desc
Пример #7
0
 def inclusion_criteria(self, lc: et.LoggedConnection) -> pd.DataFrame:
     # ISSUE: sync with build_cohort.sql?
     # ISSUE: left out 'NAACCR|400:C509'
     return rif_etl.read_sql_step(
         '''
     select concept_cd, min(name_char) name_char
     from blueherondata_kumc_calamus.concept_dimension
     where concept_cd in (
       'SEER_SITE:26000',
       'NAACCR|400:C500',
       'NAACCR|400:C501',
       'NAACCR|400:C502',
       'NAACCR|400:C503',
       'NAACCR|400:C504',
       'NAACCR|400:C505',
       'NAACCR|400:C506',
       'NAACCR|400:C507',
       'NAACCR|400:C508',
       'NAACCR|400:C509'
     )
     group by concept_cd
     order by concept_cd
     ''', lc).set_index('concept_cd')