Exemplo n.º 1
0
def test_df(df, column_list):
    subtaskid = createSubTask(
        "Create dataframe of planview and temp table string", getMaintaskId())
    try:
        val = []
        val_list = []
        for i in df:
            val = []
            for v in i:
                x = list(v.values())[0]
                val.append(x)
            val_list.append(val)
        df1 = pd.DataFrame(val_list)
        df1.columns = [column_list]
        global dfObj
        dfObj = dfObj.append(df1)
        df1.columns = [column_list]
        entityname = 'temp_table'
        tablestring = " create temp table " + entityname + " ("
        for columnname in column_list:
            tablestring = tablestring + '''{0} varchar(100)'''.format(
                columnname)
            if column_list.index(columnname) < len(column_list) - 1:
                tablestring = tablestring + ","
        global temp_table
        tablestring = tablestring + ");"
        temp_table = tablestring
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 2
0
def sanity_check_counts():
    """ Execute the sanity check queries and report the failed cases
    Args:
    No Arguments
    Returns:
    No return Arguments
    """
    subtaskid = createSubTask("sanity check queries", getMaintaskId())
    try:
        conn = dbConnection()
        cur = conn.cursor()
        cur.execute("select query, name, expected_value from birepusr.etl_data_sanity_queries where project_name='planview'")
        rows = cur.fetchall()
        for row in rows:
            conn = dbConnection()
            cur = conn.cursor()
            cur.execute(row[0])
            x = cur.fetchall()
            if x == []:
                actual_value = 0
            else:
                actual_value = x[0][0]

            condition = row[1]
            expected_value = row[2]
            status = 'Success'
            if not expected_value == actual_value:
                status = 'Fail'
                message = '''Expected vs Actual values not matching for check '{0}': expected {1} rows but found {2}'''.format(condition, expected_value, actual_value)
                send_custom_mail('Service-Desk ETL : DATA SANITY TEST FAILED', message)

            executeQuery('''insert into birepusr.etl_data_sanity_test_results(condition, created_date, expected_value, actual_value, status, task_id,project_name) values ('{0}', now(), {1}, {2}, '{3}', '{4}','planview')'''.format(condition, expected_value, actual_value, status, subtaskid))
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception, psycopg2.Error) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 3
0
def getPlanviewData(mylist, column_list, ppm_id):
    subtaskid = createSubTask(
        "Create dataframe of planview and temp table string", getMaintaskId())
    try:
        entityname = 'temp_table'
        strmy = "{"
        tablestring = " create temp table " + entityname + " ("
        for columnname in column_list:
            strmy = strmy + '''"{0}": "{1}"'''.format(
                columnname,
                str(mylist[column_list.index(columnname)]
                    ['elementValue']).replace("\"", "").replace("\\", "\\\\"))
            tablestring = tablestring + '''{0} varchar(100)'''.format(
                columnname)
            if column_list.index(columnname) < len(column_list) - 1:
                strmy = strmy + ','
                tablestring = tablestring + ","
        strmy = strmy + '}'
        global temp_table
        tablestring = tablestring + ");"
        temp_table = tablestring
        global dfObj
        dfObj = dfObj.append(dict(json.loads(strmy)), ignore_index=True)
        dfObj = dfObj[column_list]
        dfObj.fillna(value='NULL', inplace=True)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 4
0
def loadEntityData():
    subtaskid = createSubTask("Load planview data into tables",
                              getMaintaskId())
    try:
        conn = dbConnection()
        cur = conn.cursor()
        cur.execute(
            "select entity_ppm_id from idw.planview_entity_dim order by load_sequence"
        )
        rows1 = cur.fetchall()
        # rows1 = [5]
        for row in rows1:
            ppm_id = row[0]
            #ppm_id=row
            cur.execute(
                "select column_select_list from idw.planview_entity_dim where entity_ppm_id={0}"
                .format(ppm_id))
            rows = cur.fetchall()
            json_dict = rows[0][0].values()
            get_list = ",".join([str(e) for e in json_dict])
            mList = [int(e) if e.isdigit() else e for e in get_list.split(",")]
            json_keys = rows[0][0].keys()
            get_Cols = ",".join([str(e) for e in json_keys])
            column_list = [
                int(e) if e.isdigit() else e for e in get_Cols.split(",")
            ]
            data = getData(ppm_id, mList)
            df = data['methodValues']
            print(df)
            test_df(df, column_list)
            ##df.apply(lambda row: getPlanviewData(row, column_list, ppm_id))
            createDf(ppm_id, dfObj, temp_table)
            print(dfObj)
            dfObj.drop(columns=column_list, inplace=True)
            df.drop(df.index, inplace=True)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)

    subtaskid = createSubTask("Load planview data into tables",
                              getMaintaskId())
    try:

        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 5
0
def initUpdateSprintMetrics():
    """
    this method initialize metrics update queries for all newly loaded sprints whose metrics is not 

    Args:
    No Arguments

    Returns:
    No Return
    """
    subtaskid = createSubTask(
        "update sprint merics in idw.jira_sprint_summary", getMaintaskId())
    try:
        updateSprintMetrics()
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception, psycopg2.Error) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 6
0
def generateTimeSpentMetrics():
    """
    this method initialize metrics update queries for all newly loaded sprints

    Args:
    No Arguments

    Returns:
    No Return
    """
    subtaskid = createSubTask(
        "update time-spent merics by project feature in idw.task_time_spent_fact",
        getMaintaskId())
    try:
        execute_proc('idw.feature_time_spent_calculation_proc')
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception, psycopg2.Error) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 7
0
def updateSprintMetrics():
    """
    This method trigger metrics update queries for a given sprint for given team
    mertics includes estimated_points,completed points, active team members working on team etc.

    Args:
    No Arguments

    Returns:
    No Return
    """
    subtaskid = createSubTask("update metrics for newly added sprints",
                              getMaintaskId())

    executeQuery(dq.update_estimate_sprint_points)
    executeQuery(dq.update_actual_completed_sprint_points)
    executeQuery(dq.update_completed_sprint_points)
    executeQuery(dq.update_team_members)
    executeQuery(dq.update_actual_sprint_date)
    executeQuery(dq.update_sprint_days)
    updateSubTask(subtaskid, "SUCCESS")
Exemplo n.º 8
0
def loadEntityData():
    """
    This function initiate data extraction from planview for all major entities like 
    Project, Tasks, Resources, Unit etc.

    Args:
    No Argument

    Returns:
    No Return
    """
    subtaskid = al.createSubTask("Initiate data extraction from planview",
                                 al.getMaintaskId())
    planview_entities = executeQueryAndReturnDF(
        "select entity_ppm_id,column_select_list,entity_name from idw.planview_entity_dim order by load_sequence"
    )
    for entity in planview_entities.iterrows():
        # get entity details required to pull data from planview
        ppm_id = int(entity[1]['entity_ppm_id'])
        column_select_list = entity[1]['column_select_list']
        entity_name = entity[1]['entity_name']

        get_list = ",".join([str(e) for e in column_select_list.values()])
        mList = [int(e) if e.isdigit() else e for e in get_list.split(",")]
        json_keys = column_select_list.keys()
        get_Cols = ",".join([str(e) for e in json_keys])
        column_list = [
            int(e) if e.isdigit() else e for e in get_Cols.split(",")
        ]
        data = planview.getData(ppm_id, mList)
        df = data['methodValues']
        dfObj = process_entity_data(df, column_list)
        temp_table = get_table_schema(column_list, 'pv_entity_temp_table')
        createDf(dfObj, temp_table, entity_name)
        dfObj.drop(columns=column_list, inplace=True)
        df.drop(df.index, inplace=True)

    al.updateSubTask(subtaskid, "SUCCESS")
Exemplo n.º 9
0
def createDf(ppm_id, df, temp_table):
    subtaskid = createSubTask(
        "Create temp table to load the planview data into db tables",
        getMaintaskId())
    try:
        conn = dbConnection()
        cur = conn.cursor()
        cur.execute('''{0}'''.format(temp_table))
        output = io.StringIO()
        df.to_csv(output, sep='|', header=True, index=False)
        output.seek(0)
        copy_query = "COPY temp_table FROM STDOUT csv DELIMITER '|' NULL ''  ESCAPE '\\' HEADER "
        cur.copy_expert(copy_query, output)
        cur.execute(
            '''select entity_name from idw.planview_entity_dim where entity_ppm_id = {0}'''
            .format(ppm_id))
        ppm = cur.fetchall()
        proc_name = ("idw.proc_" + ppm[0][0]).lower()
        cur.callproc(proc_name)
        conn.commit()
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Exemplo n.º 10
0
def process_entity_data(df, column_list):
    """
    This function process raw data received from planview. 
    Planview provide data as ordered dict, 
    this function convert ordered dict in dataframe so that data can be imported to table

    Args:
    dataframe : take dataframe with raw data as input 
    string    : list of columns for whcih data needs to be populated in dataframe

    Returns:
    dataframe : return formatted dataframe with required columns
    """
    subtaskid = al.createSubTask(
        "Create dataframe of planview and temp table string",
        al.getMaintaskId())
    dfObj = pd.DataFrame()

    try:
        val = []
        val_list = []
        for i in df:
            val = []
            for v in i:
                x = list(v.values())[0]
                val.append(x)
            val_list.append(val)
        df1 = pd.DataFrame(val_list)
        df1.columns = [column_list]
        dfObj = dfObj.append(df1)
        df1.columns = [column_list]
        al.updateSubTask(subtaskid, "SUCCESS")
        return dfObj

    except (Exception) as error:
        al.insertErrorLog(subtaskid, error)
Exemplo n.º 11
0
def timesheet_entry_user():
    """
    This function process generates data using timesheet and sprint data, 
    to insert timesheet entry into planview. 

    Args:
    No Arguments

    Returns:
    No Return
    """
    subtaskid = al.createSubTask(
        "Populate timesheet for latest sprints in planview",
        al.getMaintaskId())
    try:
        #Fetch user ids for processing
        planview_entities = executeQueryAndReturnDF(dq.get_userlist_to_populate_timesheet)
        users = planview_entities['user_list']
        ts_columns = {'companyid':'companyId','entrytypeid':'entryTypeId','timesheetid':'timesheetId','userid':'userId', 'level1id':'level1Id','level2id':'level2Id','entrydate':'entryDate','entryid':'entryId','level3id':'level3Id','entryhours':'entryHours'}

        if users[0] is not None:    
            mList = [int(e) if e.isdigit() else int(e) for e in users[0].split(',')]
        
            startDate = planview_entities['min']
            #Fetch timesheet ids for above user list
            sheet = planview.getTimesheet_users(mList,startDate[0])

            column_names = ['timesheetId','userId','startDate','endDate','entryId','entryDate','entryHours','project_ppm_id','task_ppm_id']

            user_timesheet_data = pd.DataFrame(columns=column_names)

            for tsrow in sheet.itertuples(index=True, name='Pandas'):
                if not tsrow.entriesProject:
                    user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'userId':tsrow.userId, 'startDate':tsrow.startDate, 'endDate':tsrow.endDate,'entryId': None,'entryDate': None,'entryHours': None,'project_ppm_id': None,'task_ppm_id': None}, ignore_index=True)
                else:
                    for entry in tsrow.entriesProject:
                        user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId, 'userId':tsrow.userId, 'startDate':tsrow.startDate, 'endDate':tsrow.endDate,'entryId': entry['entryId'],'entryDate': entry['entryDate'],'entryHours': entry['entryHours'],'project_ppm_id': entry['level1Id'],'task_ppm_id': entry['level2Id']}, ignore_index=True)
            
            sprint_number = executeQueryAndReturnDF(dq.get_sprints_for_timesheet)
            sprint_list = sprint_number['sprint_number'][0]
            conn = dbConnection()
            cur = conn.cursor()
            #export timesheet details into temporary table
            cur.execute('''create temp table timesheet_data (timesheetId bigint, 
            userId bigint, startDate date, endDate date, entryId bigint, 
            entryDate date,entryHours numeric, project_ppm_id bigint, task_id bigint) ''')
            output = io.StringIO()
            user_timesheet_data.to_csv(output, sep='|', header=True, index=False)
            output.seek(0)
            copy_query = "COPY timesheet_data FROM STDOUT csv DELIMITER '|' NULL ''  ESCAPE '\\' HEADER "
            cur.copy_expert(copy_query, output)
            
            #Joining timesheet data and sprint data
            query = dq.timesheet_population_query.format(sprint_list)

            timesheet_entry =  pd.read_sql_query(query, coerce_float=False, con=conn)

            conn.commit()
            timesheet_entry.rename(columns=ts_columns,inplace=True)
            init_timesheet_insert(timesheet_entry,'New')
            cur.execute('''update idw.jira_sprint_summary set is_timesheet_populated = true where sprint_number in ({0})'''.format(sprint_list))
            conn.commit()
        else:
            print("User list is Empty")

        #Processing failed timesheet entries
        failed_entries_df = executeQueryAndReturnDF("select companyId,entryTypeId,timesheetId,userId,level1Id,level2Id,to_char(entryDate,'YYYY/MM/DD') entryDate,entryId,level3Id,entryHours from idw.planview_failed_timesheetID")
        failed_entries_df.rename(columns=ts_columns,inplace=True)
        init_timesheet_insert(failed_entries_df,'Existing')
        al.updateSubTask(subtaskid, "SUCCESS")
    except Exception as error:
        al.insertErrorLog(subtaskid, error)
Exemplo n.º 12
0
def delete_entries():
    subtaskid = al.createSubTask("Delete timesheet entries for a team",
                                 al.getMaintaskId())
    try:
        timesheet_team = executeQueryAndReturnDF(dq.delete_timesheet_entries)

        for team in timesheet_team.itertuples(index=True, name='Pandas'):
            print(team.resource_list)
            users = team.resource_list
            if users is not None:
                mList = [
                    int(e) if e.isdigit() else int(e) for e in users.split(',')
                ]

            team_id = team.team_dim_id
            startDate = team.from_date
            endDate = team.to_date
            #Fetch timesheet ids for above user list
            sheet = planview.getTimesheet_users(mList, startDate)

            column_names = [
                'timesheetId', 'userId', 'startDate', 'endDate', 'entryId',
                'entryDate', 'entryHours', 'project_ppm_id', 'task_ppm_id'
            ]

            user_timesheet_data = pd.DataFrame(columns=column_names)

            for tsrow in sheet.itertuples(index=True, name='Pandas'):
                if not tsrow.entriesProject:
                    user_timesheet_data = user_timesheet_data.append(
                        {
                            'timesheetId': tsrow.timesheetId,
                            'userId': tsrow.userId,
                            'startDate': tsrow.startDate,
                            'endDate': tsrow.endDate,
                            'entryId': None,
                            'entryDate': None,
                            'entryHours': None,
                            'project_ppm_id': None,
                            'task_ppm_id': None
                        },
                        ignore_index=True)
                else:
                    for entry in tsrow.entriesProject:
                        user_timesheet_data = user_timesheet_data.append(
                            {
                                'timesheetId': tsrow.timesheetId,
                                'userId': tsrow.userId,
                                'startDate': tsrow.startDate,
                                'endDate': tsrow.endDate,
                                'entryId': entry['entryId'],
                                'entryDate': entry['entryDate'],
                                'entryHours': entry['entryHours'],
                                'project_ppm_id': entry['level1Id'],
                                'task_ppm_id': entry['level2Id']
                            },
                            ignore_index=True)

            print(user_timesheet_data.to_string())
            #export timesheet details into temporary table
            conn = dbConnection()
            cur = conn.cursor()
            cur.execute(
                '''create temp table timesheet_data_delete (timesheetId bigint, 
            userId bigint, startDate date, endDate date, entryId bigint, 
            entryDate date,entryHours numeric, project_ppm_id bigint, task_id bigint) '''
            )
            output = io.StringIO()
            user_timesheet_data.to_csv(output,
                                       sep='|',
                                       header=True,
                                       index=False)
            output.seek(0)
            copy_query = "COPY timesheet_data_delete FROM STDOUT csv DELIMITER '|' NULL ''  ESCAPE '\\' HEADER "
            cur.copy_expert(copy_query, output)
            query = '''select timesheetid,userid,string_agg(entryid::varchar(15), ', ') AS entryid from timesheet_data_delete where entrydate between '{0}' and '{1}' group by timesheetid,userid'''.format(
                startDate, endDate)
            entries = pd.read_sql_query(query, con=conn)
            print(entries)
            conn.commit()

            for i in entries.itertuples(index=True, name='Pandas'):
                print(i.timesheetid)
                print(i.entryid)
                entries = i.entryid
                if entries is not None:
                    entry_List = [
                        int(e) if e.isdigit() else int(e)
                        for e in entries.split(',')
                    ]

                print(entry_List)

                timesheet_id = i.timesheetid
                #delete timesheet entries for timesheetid
                planview.deleteTimesheet_entries(timesheet_id, entry_List)
            cur.execute(
                '''update idw.timesheet_refresh_log set is_timesheet_refreshed = true where team_dim_id = {0}'''
                .format(team_id))
            conn.commit()

    except (Exception) as error:
        al.insertErrorLog(subtaskid, error)
Exemplo n.º 13
0
def timesheet_entry_all_user():
    """
    This function pull timesheet entries from planview for users and insert into planview_timesheet_entries table 

    Args:
    No Arguments

    Returns:
    No Return
    """
    subtaskid = al.createSubTask(
        "Pull timesheet entries from planview for users and insert into planview_timesheet_entries table",
        al.getMaintaskId())
    try:
        #Fetch user ids for processing
        planview_entities = executeQueryAndReturnDF(dq.get_user_list_to_pull_timesheet_entries)
        conn = dbConnection()
        cur = conn.cursor()
        for index, row in planview_entities.iterrows():
            users = row['user_list']
            if users is not None: 
                mList = [int(e) if e.isdigit() else int(e) for e in users.split(',')]
            else:
                raise Exception("User list is Empty")
            startDate = row['min']

            #Fetch timesheet entries for user list
            sheet = planview.getTimesheet_users(mList,startDate)

            column_names = ['timesheetId', 'companyId', 'entryDate', 'entryHours', 'entryId', 'entryTypeId', 'externalId', 'internalRate', 'billableRate', 'isBillable', 'isProductive', 'level1Id', 'level2Id', 'level3Id', 'locationId', 'notes', 'state', 'userId']
            user_timesheet_data = pd.DataFrame(columns=column_names)

            for tsrow in sheet.itertuples(index=True, name='Pandas'):
            
                for entry in tsrow.entriesProject:
                    user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry['companyId'], 'entryDate': entry['entryDate'],'entryHours': entry['entryHours'],'entryId': entry['entryId'],'entryTypeId': entry['entryTypeId'] ,'externalId': entry['externalId'] ,'internalRate': entry['internalRate'] ,'billableRate': entry['billableRate'] ,'isBillable': entry['isBillable'] ,'isProductive': entry['isProductive'] ,'level1Id': entry['level1Id'],'level2Id': entry['level2Id'],'level3Id': entry['level3Id'],'locationId': entry['locationId'],'notes': entry['notes'],'state': entry['state'],'userId':tsrow.userId}, ignore_index=True)
                for entry1 in tsrow.entriesOther:
                    user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry1['companyId'], 'entryDate': entry1['entryDate'],'entryHours': entry1['entryHours'],'entryId': entry1['entryId'],'entryTypeId': entry1['entryTypeId'] ,'externalId': entry1['externalId'] ,'internalRate': entry1['internalRate'] ,'billableRate': entry1['billableRate'] ,'isBillable': entry1['isBillable'] ,'isProductive': entry1['isProductive'] ,'level1Id': entry1['level1Id'],'level2Id': entry1['level2Id'],'level3Id': entry1['level3Id'],'locationId': entry1['locationId'],'notes': entry1['notes'],'state': entry1['state'],'userId':tsrow.userId}, ignore_index=True)
                for entry2 in tsrow.entriesAccount:
                    user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry2['companyId'], 'entryDate': entry2['entryDate'],'entryHours': entry2['entryHours'],'entryId': entry2['entryId'],'entryTypeId': entry2['entryTypeId'] ,'externalId': entry2['externalId'] ,'internalRate': entry2['internalRate'] ,'billableRate': entry2['billableRate'] ,'isBillable': entry2['isBillable'] ,'isProductive': entry2['isProductive'] ,'level1Id': entry2['level1Id'],'level2Id': entry2['level2Id'],'level3Id': entry2['level3Id'],'locationId': entry2['locationId'],'notes': entry2['notes'],'state': entry2['state'],'userId':tsrow.userId}, ignore_index=True)
                for entry3 in tsrow.entriesPortfolio:
                    user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry3['companyId'], 'entryDate': entry3['entryDate'],'entryHours': entry3['entryHours'],'entryId': entry3['entryId'],'entryTypeId': entry3['entryTypeId'] ,'externalId': entry3['externalId'] ,'internalRate': entry3['internalRate'] ,'billableRate': entry3['billableRate'] ,'isBillable': entry3['isBillable'] ,'isProductive': entry3['isProductive'] ,'level1Id': entry3['level1Id'],'level2Id': entry3['level2Id'],'level3Id': entry3['level3Id'],'locationId': entry3['locationId'],'notes': entry3['notes'],'state': entry3['state'],'userId':tsrow.userId}, ignore_index=True)
            
                user_timesheet_data['isBillable'] = user_timesheet_data['isBillable'].astype('bool')
                user_timesheet_data['isProductive'] = user_timesheet_data['isProductive'].astype('bool')

            #export timesheet details into temporary table
            cur.execute('''drop table if exists timesheet_data;''')
            cur.execute('''create temp table timesheet_data (timesheetid numeric, companyid numeric, entrydate date, entryhours numeric, entryid numeric, entrytypeid numeric, externalid varchar(100), internalrate numeric, billablerate numeric, isbillable boolean, isproductive boolean, level1id numeric, level2id numeric, level3id numeric, locationid numeric, notes text, state text, userid numeric);''')
            output = io.StringIO()
            user_timesheet_data.to_csv(output, sep='~', header=True, index=False)
            output.seek(0)
            copy_query = "COPY timesheet_data FROM STDOUT csv DELIMITER '~' NULL ''  ESCAPE '\\' HEADER "
            cur.copy_expert(copy_query, output)
            conn.commit()
            cur.execute(dq.update_is_active_flag_for_ts_entries)
            cur.execute(dq.insert_timesheet_entries)
            cur.execute(dq.update_timesheet_dim_ids)
            conn.commit()
        al.updateSubTask(subtaskid, "SUCCESS")
    except Exception as error:
        print(error)
        send_custom_mail("Failed to fetch timesheet entries from planview", error)
        al.insertErrorLog(subtaskid, error)