def does_period_have_data(inst, db, ssl, table, start_dt, stop_dt):
    assert start_dt is None or isinstance(start_dt, datetime.datetime)
    assert stop_dt is None or isinstance(stop_dt, datetime.datetime)
    if start_dt is None or stop_dt is None:
        ymd_filter = ''
    else:
        ymd_filter = tools.get_ymd_filter(tools.dt_to_iso8601(start_dt), tools.dt_to_iso8601(stop_dt))
    ssl_opt    = tools.format_ssl(ssl)

    sql =   """ SELECT 'found-data'
                FROM {tab} c
                WHERE 1 = 1
                     {filter}
                LIMIT 1
            """.format(tab=table, filter=ymd_filter)
    sql = ' '.join(sql.split())
    cmd = """ impala-shell -i {inst} -d {db} --quiet -B {ssl} -q "{sql}" | columns | cut -f 1
          """.format(inst=inst, db=db, ssl=ssl_opt, sql=sql)
    r = envoy.run(cmd)
    if r.status_code != 0:
        print(cmd)
        print(r.std_err)
        print(r.std_out)
        tools.abort("Error: does_period_have_data() failed!")
    else:
        if 'found-data' in r.std_out.strip():
            return True
        else:
            return False
def get_cmd(inst, db, child_table, child_col, parent_table, parent_col, start_ts, stop_ts, ssl):

    if start_ts is None or stop_ts is None:
        filter = ''
    else:
        filter = tools.get_ymd_filter(start_ts, stop_ts)

    sql =   """ WITH t1 AS (                         \
                    SELECT  c.{c_col} AS child_col,  \
                            p.{p_col} AS par_col     \
                    FROM {c_tab} c                   \
                        LEFT OUTER JOIN {p_tab} p    \
                           ON c.{c_col} = p.{p_col}  \
                    WHERE p.{p_col} IS NULL          \
                          {filter}                   \
                )                                    \
                SELECT COALESCE(COUNT(*), 0)         \
                FROM t1                              \
            """.format(c_col=child_col, p_col=parent_col, c_tab=child_table,
                       p_tab=parent_table, filter=filter)

    sql = ' '.join(sql.split())
    ssl_opt = tools.format_ssl(ssl)
    cmd = """ impala-shell -i {inst} -d {db} --quiet -B {ssl} -q "{sql}"
          """.format(inst=inst, db=db, ssl=ssl_opt, sql=sql)
    mode = 'incremental' if filter else 'full'
    return cmd, mode
def get_cmd(inst, db, table, cols, ssl):
    sql =   """ WITH t1 AS (                         \
                    SELECT  %s    ,                  \
                            COUNT(*) AS dup_cnt      \
                    FROM %s                          \
                    GROUP BY %s                      \
                    HAVING COUNT(*) > 1              \
                )                                    \
                SELECT COUNT(*)                      \
                FROM t1                              \
                WHERE dup_cnt > 1                    \
            """ % (cols, table, cols)
    sql = ' '.join(sql.split())
    sslopt = tools.format_ssl(ssl)
    cmd = """ impala-shell -i %s -d %s --quiet -B %s -q "%s"
          """ % (inst, db, sslopt, sql)
    return cmd
def get_first_dt_by_ymd(inst, db, table, ssl):
    sql =   """ with year_tab AS (
                   SELECT MIN(year) AS year
                     FROM {tab}
                ),
                mon_tab AS (
                   SELECT MIN(month) AS month
                     FROM {tab}   t
                        INNER JOIN year_tab yt
                           ON t.year = yt.year
                ),
                day_tab AS (
                   SELECT MIN(day) AS day
                     FROM {tab}   t
                        INNER JOIN year_tab yt
                           ON t.year = yt.year
                        INNER JOIN mon_tab mt
                           ON t.month = mt.month
                )
                SELECT year, month, day
                FROM year_tab
                   CROSS JOIN mon_tab
                   CROSS JOIN day_tab
            """.format(tab=table)

    ssl_option = tools.format_ssl(ssl)
    sql = ' '.join(sql.split())
    cmd =   """ impala-shell -i {inst} -d {db} --quiet --output_delimiter ',' -B {ssl} -q '{sql}' """\
            .format(inst=inst, db=db, sql=sql, ssl=ssl_option)
    try:
        stdout = subprocess.check_output(cmd, shell=True)[:-1] # remove ending newline
    except subprocess.CalledProcessError as e:
        return None #FIXME: why would this happen?

    if stdout:
        fields = stdout.split(',')
        assert len(fields) == 3, "Invalid fields: %s" % ','.join(fields)
        return datetime.datetime(int(fields[0]), int(fields[1]), int(fields[2]))
    else:  # no data found
        return None