示例#1
0
def tickets_assigned_in_interval(sw: SnowflakeWrapper, developer_id: str, interval: Interval) -> pd.DataFrame:
    sql = "SELECT " \
          "     i.KEY, " \
          "     i.FIELDS:priority:name::string IssuePriority, " \
          "     i.FIELDS:issuetype:name::string IssueType, " \
          "     i.FIELDS:status: name::string status, " \
          "     SUM(1) DAYS_ASSIGNED " \
          "FROM ( "
    days = []
    for i in range((interval.toDate(raw=True) - interval.fromDate(raw=True)).days + 1):
        day = interval.fromDate(raw=True) + timedelta(days=i)
        sql_date = Interval.strdate(day)
        days.append(
            f" SELECT "
            f"   DISTINCT KEY "
            f" FROM TIMELINES "
            f" WHERE "
            f"   ASSIGNEE IN ('{developer_id}') "
            f"   AND ( "
            f"     {sql_date} BETWEEN DATEFROM AND DATETO "
            f"     OR ( "
            f"       DATEDIFF('day', {sql_date}, DATEFROM) = 0 "
            f"       AND DATEDIFF('day', {sql_date}, DATETO) > 1 "
            f"     ) "
            f"   ) "
        )
    sql += " UNION ALL ".join(days) + ") t INNER JOIN ISSUES i ON t.KEY = i.KEY GROUP BY 1, 2, 3, 4 ORDER BY 5, 1;"
    # print(sql)
    return sw.fetch_df(sql)
示例#2
0
def get_authored_activity(sw: SnowflakeWrapper, interval: Interval, user_id: Union[None, list] = None):
    """
    includes comments, changelogs where the [optional] userId has performed the changes

    :param sw: SnowflakeWrapper
    :param interval: [start, end)
    :param user_id: ID of a user [if not given gets all users]
    :return:
    """
    ids = "" if user_id is None else f" USERID IN ({mask_in(user_id)}) AND "
    return sw.fetch_df(
        f"SELECT "
        f"    USERID, "
        f"    ARRAY_AGG( "
        f"        OBJECT_CONSTRUCT( "
        f"            'key', KEY, "
        f"            'dateCreated', DATECREATED, "
        f"            'changelogItem', CHANGELOGITEM "
        f"            ) "
        f"        ) CHANGELOGITEMS "
        f"FROM CHANGELOGS "
        f"WHERE "
        f"    {ids} "
        f"    DATECREATED >= {interval.fromDate()} AND "
        f"    DATECREATED < {interval.toDate()} "
        f"GROUP BY USERID;"
    )
示例#3
0
def tickets_assigned_per_day(sw: SnowflakeWrapper, developer_id: str, interval: Interval) -> pd.DataFrame:
    sql = "SELECT " \
          "     DAY_ASSIGNED, "\
          "     KEY " \
          "FROM ( "
    days = []
    for i in range((interval.toDate(raw=True) - interval.fromDate(raw=True)).days + 1):
        day = interval.fromDate(raw=True) + timedelta(days=i)
        sql_date = Interval.strdate(day)
        days.append(
            f" SELECT "
            f"   TO_DATE({sql_date}) AS DAY_ASSIGNED,"
            f"   KEY "
            f" FROM TIMELINES "
            f" WHERE "
            f"   ASSIGNEE IN ('{developer_id}') "
            f"   AND ( "
            f"     {sql_date} BETWEEN DATEFROM AND DATETO "
            f"     OR ( "
            f"       DATEDIFF('day', {sql_date}, DATEFROM) = 0 "
            f"       AND DATEDIFF('day', {sql_date}, DATETO) > 1 "
            f"     ) "
            f"   ) "
            f" GROUP BY 1, 2"
        )
    sql += " UNION ALL ".join(days) + ") GROUP BY 1, 2 ORDER BY 1;"
    # print(sql)
    return sw.fetch_df(sql)
示例#4
0
文件: tickets.py 项目: pr3mar/thesis
def get_unresolved_ticket_counts(
    sw: SnowflakeWrapper,
    interval: Interval,
    all_unresolved_until: bool = False,  # disregards the interval.fromDate()
    breakdowns: Union[list, None] = None,
) -> DataFrame:
    if breakdowns is None:
        breakdowns = ["issueType", "issuePriority"]
    breakedown = {
        "issueType": "fields:issuetype:name::string issueType",
        "issuePriority": "fields:priority:name::string issuePriority",
        "status": "FIELDS:status:name::string status",
    }
    dimensions = [breakedown[by] for by in breakdowns]
    date_from = "" if all_unresolved_until else f"AND DateCreated >= TO_DATE({interval.fromDate()})"
    sql = (
        f" SELECT KEY, "
        f"    {','.join(dimensions)}, "
        f"        convert_timezone('UTC', to_timestamp_tz(FIELDS:created::string, "
        f'                                                \'YYYY-MM-DD"T"HH24:MI:SS.FF TZHTZM\')) DateCreated, '
        f"        DATEDIFF('day', DateCreated, {interval.toDate()})                             daysUnresolved "
        f" FROM ISSUES "
        f" WHERE "
        f"   FIELDS:resolution IS NULL "
        f"   AND FIELDS:status:name::string NOT IN ('Done', 'Cancelled', 'Live', 'Reverted') "
        f"   {date_from} "
        f"   AND DateCreated < TO_DATE({interval.toDate()})"
        f" ORDER BY {len(dimensions) + 3}; ")
    # print(sql)
    return sw.fetch_df(sql)
示例#5
0
def get_developer_ids(sw: SnowflakeWrapper) -> list:
    return sw.fetch_df(
        "SELECT USERKEY KEY "
        "FROM USERS "
        "WHERE ACTIVE = TRUE AND USERKEY NOT ILIKE '%addon%' AND USERKEY NOT ILIKE 'ID%' "
        "ORDER BY 1;"
    )['KEY'].tolist()
示例#6
0
文件: tickets.py 项目: pr3mar/thesis
def get_ticket_counts(sw: SnowflakeWrapper,
                      breakdowns: Union[list, None] = None) -> DataFrame:
    if breakdowns is None:
        breakdowns = ["issueType", "issuePriority", "resolved"]
    breakedown = {
        "issueType":
        "i.fields:issuetype:name::string issueType",
        "issuePriority":
        "i.fields:priority:name::string issuePriority",
        # "resolved": "IFF(i.fields:resolutiondate IS NULL, 'No', 'Yes') Resolved"
        "resolved":
        "IFF(i.FIELDS:resolution IS NULL AND i.FIELDS:status:name NOT IN ('Done', 'Cancelled', 'Live', 'Reverted'), 'No', 'Yes') Resolved"
    }
    dimensions = [breakedown[by] for by in breakdowns]
    group_by = ','.join([str(i + 1) for i, _ in enumerate(dimensions)])

    sql = (f"SELECT "
           f"    {','.join(dimensions)}, "
           f"    count(*) Count "
           f"FROM "
           f"    ISSUES i "
           f"GROUP BY {group_by} "
           f"ORDER BY {len(dimensions) + 1}, {group_by} DESC; ")
    # print(sql)
    return sw.fetch_df(sql)
示例#7
0
def tickets_active_on_interval(sw: SnowflakeWrapper,
                               interval: Interval,
                               cols=None) -> Union[list, pd.DataFrame]:
    get_keys_sql = (f"SELECT "
                    f"  KEY "
                    f"FROM "
                    f"  CHANGELOGS "
                    f"WHERE "
                    f"  DATECREATED >= {interval.fromDate()} AND "
                    f"  DATECREATED < {interval.toDate()} "
                    f"GROUP BY KEY")
    if cols is None:
        return sw.fetch_df(get_keys_sql)["KEY"].tolist()
    else:
        return sw.fetch_df(f"SELECT "
                           f" {utils.mask_props(cols)} "
                           f"FROM "
                           f"  ISSUES "
                           f" WHERE "
                           f"  KEY IN ({get_keys_sql});")
示例#8
0
def persist_issue_timelines(sw: SnowflakeWrapper,
                            interval: Interval,
                            keys: Union[None, list] = None) -> None:
    try:
        timelines = build_issue_timelines(sw, interval, keys)
        SnowflakeWrapper.execute_df_query(timelines,
                                          "timelines_temp",
                                          ifexists='replace')
        sw.execute(f"CREATE OR REPLACE TABLE TIMELINES ( "
                   f"    KEY	    VARCHAR(32),  "
                   f"    STATUS	VARCHAR(256),  "
                   f"    ASSIGNEE	VARCHAR(128), "
                   f"    DATEFROM	TIMESTAMP_NTZ(9),  "
                   f"    DATETO	TIMESTAMP_NTZ(9),  "
                   f"    TIMEDELTA	NUMBER(38,0) "
                   f"); ")
        sw.execute(f"INSERT ALL INTO TIMELINES "
                   f"    SELECT "
                   f"        c.KEY key, "
                   f"        t.VALUE:status::string status, "
                   f"        t.VALUE:assignee::string assignee, "
                   f"        TO_TIMESTAMP_NTZ(t.VALUE:date_from) datefrom, "
                   f"        TO_TIMESTAMP_NTZ(t.VALUE:date_to) dateto, "
                   f"        t.VALUE:tdelta::NUMBER timedelta "
                   f"    FROM "
                   f"        timelines_temp c, "
                   f"        LATERAL FLATTEN(PARSE_JSON(TIMELINES)) t; ")
    except Exception as e:
        print(f"Failed persisting timlines to Snowflake: {e}")
        raise e
示例#9
0
文件: tickets.py 项目: pr3mar/thesis
def get_tickets(sw: SnowflakeWrapper,
                interval: Interval,
                resolved: bool = True,
                max_days: int = 10) -> DataFrame:
    resolution_date = f"{convert_date('i.FIELDS:resolutiondate')}"
    where_resolution = f"{resolution_date} >= {interval.fromDate()} AND {resolution_date} < {interval.toDate()}" if resolved else ""
    max_duration = f" AVG_DAYS <= {min(30, max_days)} " if max_days > 1 else " AVG_HOUR < 10 "
    sql = (
        f'SELECT TICKET_KEY, '
        f'       STATUS, '
        f'       "IssueType", '
        f'       "IssuePriority", '
        f'       "States", '
        f'       "Transitions", '
        f'       "DegreeOfCycling", '
        f'       AVG_DAYS, '
        f'       MAX_DAYS, '
        f'       MIN_DAYS, '
        f'       AVG_HOUR, '
        f'       MAX_HOURS, '
        f'       MIN_HOURS '
        f'FROM (SELECT t.KEY                            TICKET_KEY, '
        f'             STATUS, '
        f'             i.FIELDS:issuetype: name::string "IssueType", '
        f'             i.FIELDS:priority: name::string  "IssuePriority", '
        f'             COUNT(DISTINCT t.STATUS)         "States", '
        f'             COUNT(*)                         "Transitions", '
        f'             ("Transitions" / "States") - 1   "DegreeOfCycling", '
        f'             AVG(TIMEDELTA) / (60 * 60 * 24)  AVG_DAYS, '
        f'             MAX(TIMEDELTA) / (60 * 60 * 24)  MAX_DAYS, '
        f'             MIN(TIMEDELTA) / (60 * 60 * 24)  MIN_DAYS, '
        f'             AVG(TIMEDELTA) / (60 * 60)       AVG_HOUR, '
        f'             MAX(TIMEDELTA) / (60 * 60)       MAX_HOURS, '
        f'             MIN(TIMEDELTA) / (60 * 60)       MIN_HOURS '
        f'      FROM TIMELINES t '
        f'               INNER JOIN ISSUES i ON t.KEY = i.KEY '
        f'      WHERE t.KEY IN '
        f'            (SELECT DISTINCT KEY FROM CHANGELOGS WHERE DATECREATED >= {interval.fromDate()} AND DATECREATED < {interval.toDate()}) '
        f'          AND {where_resolution}'
        f'      GROUP BY 1, 2, 3, 4) '
        f'WHERE '
        f'      AVG_HOUR > 2 '
        f'      AND {max_duration} '
        f'ORDER BY 1; ')
    # print(sql)
    df = sw.fetch_df(sql)
    df["AVG_HOUR"] = df["AVG_HOUR"].map(lambda x: np.nan
                                        if x is None else float(x))
    df["AVG_DAYS"] = df["AVG_DAYS"].map(lambda x: np.nan
                                        if x is None else float(x))
    df.insert(2, "MERGED_STATUS", map_statuses(df["STATUS"]))
    return df
示例#10
0
def transition_frequency(sw: SnowflakeWrapper,
                         interval: Interval,
                         limit=10,
                         order="DESC",
                         by_status=True,
                         by_week=False,
                         by_issue_type=False,
                         by_issue_priority=False,
                         get_sql=False) -> Union[pd.DataFrame, str]:
    limit_query = f"LIMIT {limit}" if 0 < limit < 100 else ""

    breakdown_by_issue_type = f'I.FIELDS:issuetype:name::string "IssueType",' if by_issue_type else ""
    breakdown_by_issue_priority = f'I.FIELDS:priority:name::string "IssuePriority",' if by_issue_priority else ""
    breakdown_by_week = f'YEAR "Year", IFF(MONTH = 12 AND WEEKOFYEAR = 1, WEEKOFYEAR + 52, WEEKOFYEAR) "WeekOfYear",' if by_week else ""
    breakdown_by_status = f'CHANGELOGITEM:fromString::string  || \' -> \' || CHANGELOGITEM:toString::string "Transition",' if by_status else ""

    join_issues = "INNER JOIN ISSUES I on C.KEY = I.KEY " if breakdown_by_issue_type or by_issue_priority else ""

    query_group_by_props = 0 + (2 if by_week else 0) + (
        1 if by_status else 0) + (1 if by_issue_type else
                                  0) + (1 if by_issue_priority else 0)
    group_by = f"GROUP BY {','.join([str(i + 1) for i in range(query_group_by_props)])}" if query_group_by_props > 0 else ""

    query_order = order if order.upper() in ["ASC", "DESC"] else "DESC"
    if not by_week:
        order_by = f"ORDER BY {query_group_by_props + 1} {query_order}"
    else:
        order_by = f"ORDER BY {','.join([str(i + 1) for i in range(query_group_by_props + 1)])} {query_order}"

    # TODO: add join to weeks table which will yield NULLs wherever there are missing values
    #  later on, the NULLs will be converted to zeros (0).
    sql = (
        f'SELECT '
        f'    {breakdown_by_week} '
        f'    {breakdown_by_issue_type} '
        f'    {breakdown_by_issue_priority} '
        f'    {breakdown_by_status} '
        f'    COUNT(*) "TotalTransitions" '
        f'FROM CHANGELOGS C '
        f'      {join_issues} '
        f'WHERE '
        f'    C.CHANGELOGITEM:field::string = \'status\' '
        f'    AND C.DATECREATED >= {interval.fromDate()} AND C.DATECREATED < {interval.toDate()} '
        f'{group_by} '
        f'{order_by} '
        f'{limit_query}; ')
    # print(sql)
    if get_sql:
        return sql
    return sw.fetch_df(sql)
示例#11
0
def get_avg_timeline(sw: SnowflakeWrapper, interval: Interval) -> pd.DataFrame:
    query = (f'SELECT '
             f'    STATUS                          "Status", '
             f'    COUNT(DISTINCT KEY)             "UniqueIssues", '
             f'    COUNT(*)                        "Issues", '
             f'    "Issues" - "UniqueIssues"       "Reassignments", '
             f'    AVG(TIMEDELTA) / (60 * 60 * 24) "AvgDays", '
             f'    MAX(TIMEDELTA) / (60 * 60 * 24) "MaxDays", '
             f'    MIN(TIMEDELTA) / (60 * 60 * 24) "MinDays" '
             f'FROM TIMELINES t '
             f'WHERE '
             f'    t.DATEFROM >= {interval.fromDate()} '
             f'    AND t.DATETO < {interval.toDate()} '
             f'GROUP BY 1 '
             f'ORDER BY 1, 4 DESC; ')
    print(query)
    return sw.fetch_df(query)
示例#12
0
def work_activity_on_interval(
        sw: SnowflakeWrapper,
        interval: Interval,
        ticket_keys: Union[None, list] = None) -> pd.DataFrame:
    """
    Returns the work activity (changes of assignees and statuses) on a given interval.
    To be able to infer the beginning state of the issue at intervalStartDate, the activity is provided on the interval:
    [issueCreationDate, intervalEndDate)

    :param sw: SnowflakeWrapper
    :param interval: (from:date, to:date)
    :param ticket_keys: custom list of ticket keys
    :return:
    """
    if ticket_keys is None:
        ticket_keys = tickets_active_on_interval(sw, interval)
    ids = f" c.KEY IN ({utils.mask_in(ticket_keys)}) AND "
    if interval or ids:
        print(
            "WARNING: Given interval and/or ids are being ignored at the moment."
        )
    sql = (
        f"SELECT "
        f"    c.KEY, "
        f"    i.{utils.decode_user('fields', 'reporter')} reporter, "
        f"    TO_TIMESTAMP_NTZ(convert_timezone('UTC', to_timestamp_tz(i.{utils.decode_field('fields', 'created')}::string, 'YYYY-MM-DD\"T\"HH24:MI:SS.FF TZHTZM'))) dateCreated, "
        f"    ARRAY_AGG( "
        f"        OBJECT_CONSTRUCT( "
        f"            'author', USERID, "
        f"            'dateCreated', c.DATECREATED, "
        f"            'changelogItems', ARRAY_CONSTRUCT(CHANGELOGITEM) "
        f"            ) "
        f"        ) CHANGELOGITEMS "
        f"FROM CHANGELOGS c INNER JOIN ISSUES i ON c.KEY = i.KEY "
        f"WHERE "
        f"    c.changelogItem:field IN ('status', 'assignee') AND "
        f"    c.DATECREATED < {interval.toDate()} "
        f"GROUP BY 1, 2, 3 ")
    print(f"Executing: {sql}")
    changelog = sw.fetch_df(sql)

    changelog['CHANGELOGITEMS'] = changelog['CHANGELOGITEMS'].apply(
        lambda x: sort_and_merge(
            json.loads(x, object_pairs_hook=load_with_datetime)))
    return changelog
示例#13
0
def get_aggregated_authored_activity(sw: SnowflakeWrapper, interval: Interval, user_id: Union[None, list] = None):
    ids = "" if user_id is None else f" USERID IN ({mask_in(user_id)}) AND"
    result = sw.fetch_df(
        f"SELECT "
        f"    USERID, "
        f"    ARRAY_AGG( "
        f"        OBJECT_CONSTRUCT( "
        f"            'userId', USERID, "
        f"            'field', FIELD, "
        f"            'count', CNT "
        f"            ) "
        f"        ) ACTIVITY "
        f"FROM ( "
        f"    SELECT "
        f"        activity.USERID, "
        f"        item.VALUE:field::string FIELD, "
        f"        COUNT(*) CNT "
        f"    FROM "
        f"         (SELECT "
        f"            USERID, "
        f"            ARRAY_AGG(CHANGELOGITEM) CHANGELOGITEMS "
        f"        FROM CHANGELOGS "
        f"        WHERE "
        f"            {ids} "
        f"            DATECREATED >= {interval.fromDate()} "
        f"            AND DATECREATED < {interval.toDate()} "
        f"        GROUP BY USERID) activity, "
        f"         lateral flatten(activity.CHANGELOGITEMS) item "
        f"    GROUP BY 1, 2 "
        f") "
        f"GROUP BY 1 "
        f"ORDER BY 1; "
    )
    return pd.concat(
        result['ACTIVITY'].apply(
            lambda x:
            pd.DataFrame(json.loads(x))
                .pivot(index='userId', columns='field', values='count')).tolist(),  # TODO Add breakdown by issue type
        sort=True)
示例#14
0
            f"   AND ( "
            f"     {sql_date} BETWEEN DATEFROM AND DATETO "
            f"     OR ( "
            f"       DATEDIFF('day', {sql_date}, DATEFROM) = 0 "
            f"       AND DATEDIFF('day', {sql_date}, DATETO) > 1 "
            f"     ) "
            f"   ) "
            f" GROUP BY 1, 2"
        )
    sql += " UNION ALL ".join(days) + ") GROUP BY 1, 2 ORDER BY 1;"
    # print(sql)
    return sw.fetch_df(sql)


if __name__ == '__main__':
    with SnowflakeWrapper.create_snowflake_connection() as connection:
        sw = SnowflakeWrapper(connection)
        interval = Interval(date(2019, 10, 1), date(2020, 1, 1))
        # result = get_authored_activity(sw, interval, ['marko.prelevikj'])
        # plt.figure()
        # result.hist('status', bins=40)
        # plt.show()
        # dev = get_developer(sw, Interval(date(2019, 10, 1), date(2020, 1, 1)), 'marko.prelevikj')
        # avg_dev = get_avg_developer(sw, interval, include_nans=False)
        # avg_dev_nan = get_avg_developer(sw, include_nans=True)
        # assigned_interval = Interval(date(2019, 10, 1), date(2019, 11, 6))
        assigned_interval = Interval(date(2019, 10, 1), date(2020, 1, 1))
        # assigned = tickets_assigned_in_interval(sw, 'marko.prelevikj', assigned_interval)
        assigned_per_day = tickets_assigned_per_day(sw, 'marko.prelevikj', assigned_interval)
        # data = get_developers(sw, Interval(date(2019, 10, 1), date(2020, 1, 1)), break_by=[], user_filters={})
示例#15
0
文件: tickets.py 项目: pr3mar/thesis
def get_ticket_types(sw: SnowflakeWrapper) -> list:
    return sw.fetch_df(
        'SELECT DISTINCT i.FIELDS:issuetype: name::string "IssueType" FROM ISSUES i;'
    )["IssueType"].to_list()
示例#16
0
文件: tickets.py 项目: pr3mar/thesis
def get_ticket_priorities(sw: SnowflakeWrapper) -> list:
    return sw.fetch_df(
        'SELECT DISTINCT i.FIELDS:priority: name::string "IssuePriority" FROM ISSUES i;'
    )["IssuePriority"].to_list()
示例#17
0
def get_developers(
        sw: SnowflakeWrapper,
        interval: Interval,
        user_filters: Union[dict, None] = None,
        break_by: Union[list, None] = None,
        max_duration: int = 30,
        debug: bool = False,
) -> DataFrame:
    if user_filters is None:
        user_filters = {"status": "dev"}
    if break_by is None:
        break_by = ["assignee"]
    else:
        break_by = list(set(["assignee"] + break_by))

    dimensions = {
        "type": ' i.FIELDS:issuetype:name::string "IssueType" ',
        "priority": ' i.FIELDS:priority:name::string  "IssuePriority" ',
        "status": " STATUS ",
        "assignee": " ASSIGNEE ",
    }
    statuses = ','.join([f"'{s}'" for s in expand_statuses(user_filters["status"])]) if "status" in user_filters and \
                                                                                        user_filters["status"] else None
    filters = {
        "status": f"STATUS IN ({statuses})" if statuses else " 1 = 1 ",
        "priority": f'i.FIELDS:priority:name::string IN (\'{user_filters["priority"]}\')' if "priority" in user_filters and
                                                                                             user_filters[
                                                                                                 "priority"] else " 1 = 1 ",
        "type": f'i.FIELDS:issuetype:name::string IN (\'{user_filters["type"]}\')' if "type" in user_filters and
                                                                                      user_filters["type"] else " 1 = 1"
    }
    sql_filters = ' AND '.join(filters.values())
    dims = ','.join([dimensions[dim] for dim in break_by])
    group_by = ','.join([str(i + 1) for i, _ in enumerate(break_by)])
    max_duration = f" AVG_DAYS <= {min(30, max_duration)} " if max_duration > 1 else " AVG_HOURS < 10 "

    sql = (
        f' SELECT * '
        f' FROM ( '
        f'          SELECT '
        f'              {dims}, '
        f'              COUNT(DISTINCT t.KEY)            "UniqueIssues", '
        f'              COUNT(t.KEY)                     "Issues", '
        f'              ("Issues" / "UniqueIssues") - 1  "DegreeOfCycling", '
        f'              AVG(TIMEDELTA) / (60 * 60 * 24)  AVG_DAYS, '
        f'              MAX(TIMEDELTA) / (60 * 60 * 24)  MAX_DAYS, '
        f'              MIN(TIMEDELTA) / (60 * 60 * 24)  MIN_DAYS, '
        f'              AVG(TIMEDELTA) / (60 * 60)       AVG_HOURS, '
        f'              MAX(TIMEDELTA) / (60 * 60)       MAX_HOURS, '
        f'              MIN(TIMEDELTA) / (60 * 60)       MIN_HOURS '
        f'          FROM TIMELINES t INNER JOIN ISSUES i ON t.KEY = i.KEY '
        f'          WHERE DATEFROM >= {interval.fromDate()} '
        f'            AND (DATETO < {interval.toDate()} OR DATETO IS NULL) '
        f'            AND ASSIGNEE IS NOT NULL '
        f'            AND {sql_filters}'
        f'          GROUP BY {group_by} '
        f'      ) '
        f' WHERE AVG_HOURS > 2 '
        f'   AND {max_duration} '
        f' ORDER BY 1 '
    )
    if debug:
        print(sql)
    df = sw.fetch_df(sql)
    df["AVG_DAYS"] = df["AVG_DAYS"].map(lambda x: np.nan if x is None else float(x))
    df["AVG_HOURS"] = df["AVG_HOURS"].map(lambda x: np.nan if x is None else float(x))
    if "status" in break_by:
        df.insert(2, "MERGED_STATUS", map_statuses(df["STATUS"]))
    return df
示例#18
0
def get_distinct_statuses(sw: SnowflakeWrapper) -> list:
    return sw.fetch_df("SELECT ID "
                       "FROM STATUSES "
                       "ORDER BY 1;")['ID'].tolist()