def get_issue_events_dev(org_path, project_name, developer_login):
    path = org_path + '/' + project_name + '/Activities_Plots/' + developer_login
    os.makedirs(path, exist_ok=True)
    if ('complete_issues_events_repo.csv'
            in os.listdir(org_path + '/' + project_name +
                          '/Other_Activities')):
        ### Get Other Issues Events
        issues_events = pandas.read_csv(
            org_path + '/' + project_name +
            '/Other_Activities/complete_issues_events_repo.csv',
            sep=',')
        issues_events_data = pandas.DataFrame(
            columns=['id', 'date', 'event', 'creator_login'])

        for index, event in issues_events.iterrows():
            if (event['creator_login'] == developer_login):
                util.add(issues_events_data, event)
        if (len(issues_events_data) > 0):
            issues_events_data.to_csv(path + '/issues_events.csv',
                                      sep=',',
                                      na_rep='NA',
                                      header=True,
                                      index=False,
                                      mode='w',
                                      encoding='utf-8',
                                      quoting=None,
                                      quotechar='"',
                                      line_terminator='\n',
                                      decimal='.')
        print('{}: Issues Events Extraction Complete'.format(developer_login))
    else:
        print('{}: No Issue Events'.format(project_name))
def writeCoreDevelopers(super_path, project_name):
    with open(super_path+'/'+project_name+'/inactivity_interval_list.csv', 'r') as f:  #opens PW file
        inactivity_intervals_data = [list(map(str,rec)) for rec in csv.reader(f, delimiter=',')]

    #Read Break Dates Table
    with open(super_path+'/'+project_name+'/break_dates_list.csv', 'r') as f:
        break_dates_data = [list(map(str,rec)) for rec in csv.reader(f, delimiter=',')]
    
    breaks_df = pandas.DataFrame({'durations' : inactivity_intervals_data, 'datelimits' : break_dates_data})
    
    # FILTER DEVELOPERS
    SLIDE_WIN_SIZE = 20
    
    active_users_df = pandas.DataFrame(columns=['durations','datelimits'])
    
    path = (super_path+'/'+project_name)
    
    for index, row in breaks_df.iterrows():
        num_breaks=len(row['durations'])-3
        if (('[bot]' not in row['durations'][0]) & (num_breaks>=SLIDE_WIN_SIZE)):
            util.add(active_users_df, row)
    num_all_users = len(inactivity_intervals_data)
    num_active_users = len(active_users_df)
    
    logging.info('Project: '+project_name+'All Users: '+str(num_all_users)+' Breaks_Threshold/Sliding_Window: '+str(SLIDE_WIN_SIZE)+' Active Users: '+str(num_active_users))

    active_users=[]
    for index, row in active_users_df.iterrows():
        user_id=row['durations'][0]
        active_users.append(user_id)
        
    active_users_ids_df=pandas.DataFrame(active_users, columns=['id'])
            
    active_users_ids_df.to_csv(path+'/active_users.csv', sep=';', encoding='utf-8', na_rep='NA', header=True, index=False, mode='w', quoting=None, quotechar='"', line_terminator='\n', decimal='.')

    print('Core Developer Written for '+project_name)
    return active_users_ids_df
def writeCommitFile_Login(gith, project_url, start_date, end_date, path):
    import os, requests
    
    logger = open(path+'/commits_extraction.log','a+')
    
    exception_thrown = True
    while(exception_thrown):
        exception_thrown = False
                            
        cfg.waitRateLimit(gith)
        repo = gith.get_repo(project_url)
        
        commits = repo.get_commits(since=start_date, until=end_date) #Fake users to be filtered out (author_id NOT IN (SELECT id from users where fake=1))
        
        count_exception = True
        while(count_exception):
            count_exception = False 
            try:
                num_items = commits.totalCount
            except github.GithubException as ghe:
                if str(ghe)=='500 None':
                    print('Failed to get commits from this project (500 None: Ignoring Repo):', project_url)
                    return
                elif str(ghe).startswith('409'):
                    print('Failed to get commits from this project (409 Empty: Ignoring Repo):', project_url)
                    return
                else:
                    print('Failed to get commits from this project (GITHUB Unknown: Retrying):', project_url)
                    count_exception=True
                pass
            except requests.exceptions.Timeout:
                print('Failed to get commits from this project (TIMEOUT: Retrying):', project_url)
                count_exception=True
                pass
            except:
                print('Failed to get commits from this project (Probably Empty): ', project_url)
                return

        last_page = int(num_items/cfg.items_per_page)
        last_page_read=0
        
        if 'commits_raw_login.csv' in os.listdir(path):
            commits_data = pandas.read_csv(path+'/commits_raw_login.csv', sep=',')
            last_page_read = util.get_last_page_read_short(path+'/commits_extraction.log')
        else:
            commits_data=pandas.DataFrame(columns=['sha', 'author_id', 'date'])
        
        if 'excluded_for_NoneType.csv' in os.listdir(path):
            excluded_commits = pandas.read_csv(path+'/excluded_for_NoneType.csv', sep=',')
        else:
            excluded_commits=pandas.DataFrame(columns=['sha'])
        
        try:
            for page in range(last_page_read, last_page+1):
                commits_page = commits.get_page(page)
                for commit in commits_page:
                    cfg.waitRateLimit(gith)
                    sha=commit.sha
                    if((sha not in commits_data.sha.tolist()) & (sha not in excluded_commits.sha.tolist())):
                        if(commit.author): ### If author is NoneType, that means the author is no longer active in GitHub
                            cfg.waitRateLimit(gith)
                            author_id=commit.author.login ### HERE IS THE DIFFERENCE
                            date=commit.commit.author.date
                            util.add(commits_data,[sha, author_id, date])
            if(len(commits_data)>0):
                commits_data.to_csv(path+'/commits_raw_login.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
        except github.GithubException:
            print('Exception Occurred While Getting COMMITS: Github')
            if(len(commits_data)>0):
                commits_data.to_csv(path+'/commits_raw_login.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
            logger.write('last_page:{}\n'.format(page))
            logger.flush()
            exception_thrown = True
            pass
        except requests.exceptions.Timeout:
            print('Exception Occurred While Getting COMMITS: Timeout')
            if(len(commits_data)>0):
                commits_data.to_csv(path+'/commits_raw_login.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
            logger.write('last_page:{}\n'.format(page))
            logger.flush()
            exception_thrown = True
            pass
        except AttributeError:
            print('Exception Occurred While Getting COMMIT DATA: NoneType for Author. SHA: '+sha)
            util.add(excluded_commits, [sha])
            if(len(commits_data)>0):
                commits_data.to_csv(path+'/commits_raw_login.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
            excluded_commits.to_csv(path+'/excluded_for_NoneType.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
            logger.write('last_page:{}\n'.format(page))
            logger.flush()
            exception_thrown = True
        except:
            print('Execution Interrupted While Getting COMMITS')
            if(len(commits_data)>0):
                commits_data.to_csv(path+'/commits_raw_login.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
            logger.write('last_page:{}\n'.format(page))
            logger.flush()
            raise
def get_issue_events_repo(gith, path, repo, project_name, start_date,
                          active_users):  #Why Not get_events()?
    os.makedirs(path, exist_ok=True)

    exception_thrown = True
    while (exception_thrown):
        exception_thrown = False

        if 'issues_events_repo.csv' in os.listdir(path):
            issues_events_data = pandas.read_csv(path +
                                                 '/issues_events_repo.csv',
                                                 sep=',')
        else:
            issues_events_data = pandas.DataFrame(
                columns=['id', 'date', 'event', 'creator_login'])

        if 'issues_events_extraction.log' in os.listdir(path):
            last_issues_page, last_issue, last_events_page = get_last_page_read(
                path + '/issues_events_extraction.log')
        else:
            last_issues_page = 0
            last_issue = ''
            last_events_page = 0

        if 'events_extraction_completed_issues.csv' in os.listdir(path):
            completed_issues = pandas.read_csv(
                path + '/events_extraction_completed_issues.csv', sep=',')
        else:
            completed_issues = pandas.DataFrame(columns=['id'])

        logger = open(path + '/issues_events_extraction.log', 'a+')

        ### Get Other Issues Events
        try:
            issues_page = last_issues_page
            issue_id = ''
            page = 0

            issues = repo.get_issues(state='all',
                                     sort='created_at',
                                     since=start_date)
            num_issues = issues.totalCount
            final_issues_page = int(num_issues / cfg.items_per_page)

            for issues_page in range(last_issues_page, final_issues_page + 1):
                cfg.waitRateLimit(gith)
                current_issues_page = issues.get_page(issues_page)
                for issue in current_issues_page:
                    cfg.waitRateLimit(gith)
                    issue_id = issue.id
                    if (issue_id not in completed_issues.id.tolist()):
                        if (issue_id != last_issue):
                            last_page = 0
                        else:
                            last_page = last_events_page
                        cfg.waitRateLimit(gith)
                        issue_events = issue.get_events()
                        num_items = issue_events.totalCount
                        final_page = int(num_items / cfg.items_per_page)

                        for page in range(last_page, final_page + 1):
                            cfg.waitRateLimit(gith)
                            issues_events_page = issue_events.get_page(page)
                            for event in issues_events_page:
                                cfg.waitRateLimit(gith)
                                event_id = event.id
                                if (event_id
                                        not in issues_events_data.id.tolist()):
                                    if (event.actor):
                                        cfg.waitRateLimit(gith)
                                        actor_login = event.actor.login
                                        if (actor_login in active_users):
                                            cfg.waitRateLimit(gith)
                                            util.add(issues_events_data, [
                                                event_id, event.created_at,
                                                event.event, actor_login
                                            ])
                        util.add(completed_issues, issue_id)
            if (len(issues_events_data) > 0):
                issues_events_data.to_csv(path + '/issues_events_repo.csv',
                                          sep=',',
                                          na_rep='NA',
                                          header=True,
                                          index=False,
                                          mode='w',
                                          encoding='utf-8',
                                          quoting=None,
                                          quotechar='"',
                                          line_terminator='\n',
                                          decimal='.')
                completed_issues.to_csv(
                    path + '/events_extraction_completed_issues.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
                os.rename(path + '/issues_events_repo.csv',
                          path + '/complete_issues_events_repo.csv')
            print('{}: Issues Events Extraction Complete'.format(repo))


#        except github.UnknownObjectException:
#            print('Exception Occurred While Getting ISSUES EVENTS: UnknownObject (Skipped)')
#            logger.write('last_issues_page:{},last_issue:{},last_event_page:{}\n'.format(issues_page, issue_id, page))
#            logger.flush()
#            if(len(issues_events_data)>0):
#                issues_events_data.to_csv(path+'/issues_events_repo.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
#                completed_issues.to_csv(path+'/events_extraction_completed_issues.csv', sep=',', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.')
#            exception_thrown=True
#            pass
        except github.GithubException as ghe:
            print('Exception Occurred While Getting ISSUES EVENTS: Github')
            logger.write(
                'last_issues_page:{},last_issue:{},last_event_page:{}\n'.
                format(issues_page, issue_id, page))
            logger.flush()
            if str(ghe) == '500 None':
                print('PROBLEMS ON ISSUE: {} Excluded From Events Extraction'.
                      format(issue_id))
                util.add(completed_issues, issue_id)
            if (len(issues_events_data) > 0):
                issues_events_data.to_csv(path + '/issues_events_repo.csv',
                                          sep=',',
                                          na_rep='NA',
                                          header=True,
                                          index=False,
                                          mode='w',
                                          encoding='utf-8',
                                          quoting=None,
                                          quotechar='"',
                                          line_terminator='\n',
                                          decimal='.')
                completed_issues.to_csv(
                    path + '/events_extraction_completed_issues.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
            exception_thrown = True
            pass
        except requests.exceptions.Timeout:
            print('Exception Occurred While Getting ISSUES EVENTS: Timeout')
            logger.write(
                'last_issues_page:{},last_issue:{},last_event_page:{}\n'.
                format(issues_page, issue_id, page))
            logger.flush()
            if (len(issues_events_data) > 0):
                issues_events_data.to_csv(path + '/issues_events_repo.csv',
                                          sep=',',
                                          na_rep='NA',
                                          header=True,
                                          index=False,
                                          mode='w',
                                          encoding='utf-8',
                                          quoting=None,
                                          quotechar='"',
                                          line_terminator='\n',
                                          decimal='.')
                completed_issues.to_csv(
                    path + '/events_extraction_completed_issues.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
            exception_thrown = True
            pass
        except:
            print('Execution Interrupted While Getting ISSUES EVENTS')
            logger.write(
                'last_issues_page:{},last_issue:{},last_event_page:{}\n'.
                format(issues_page, issue_id, page))
            logger.flush()
            if (len(issues_events_data) > 0):
                issues_events_data.to_csv(path + '/issues_events_repo.csv',
                                          sep=',',
                                          na_rep='NA',
                                          header=True,
                                          index=False,
                                          mode='w',
                                          encoding='utf-8',
                                          quoting=None,
                                          quotechar='"',
                                          line_terminator='\n',
                                          decimal='.')
                completed_issues.to_csv(
                    path + '/events_extraction_completed_issues.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
            raise
def get_issues_prs(org_path, gith, repo, project_name, start_date,
                   developer_login):
    path = org_path + '/' + project_name + '/Activities_Plots/' + developer_login
    os.makedirs(path, exist_ok=True)

    exception_thrown = True
    while (exception_thrown):
        exception_thrown = False

        logger = open(path + '/issues_pr_extraction.log', 'a+')
        ### Get Issue / Pull Requests
        created_issues_prs = repo.get_issues(state='all',
                                             sort='created_at',
                                             since=start_date,
                                             creator=developer_login)

        count_exception = True
        while (count_exception):
            count_exception = False
            try:
                num_items = created_issues_prs.totalCount
            except github.GithubException:
                print(
                    'Failed to get ISSUES/PRs Number from User {} and Project {} (TIMEOUT: Retrying)'
                    .format(developer_login, project_name))
                count_exception = True
                pass
            except requests.exceptions.Timeout:
                print(
                    'Failed to get ISSUES/PRs Number from User {} and Project {} (TIMEOUT: Retrying)'
                    .format(developer_login, project_name))
                count_exception = True
                pass
            except:
                print(
                    'Failed to get ISSUES/PRs Number from User {} and Project {} (Probably Empty)'
                    .format(developer_login, project_name))
                return

        last_page = int(num_items / cfg.items_per_page)
        last_page_read = 0

        if 'issues_pr_creation.csv' in os.listdir(path):
            issues_prs_data = pandas.read_csv(path + '/issues_pr_creation.csv',
                                              sep=',')
            last_page_read = get_last_page_read_short(
                path + '/issues_pr_extraction.log')
        else:
            issues_prs_data = pandas.DataFrame(
                columns=['id', 'date', 'creator_login'])

        try:
            for page in range(last_page_read, last_page + 1):
                created_issues_prs_page = created_issues_prs.get_page(page)
                for issue in created_issues_prs_page:
                    issue_id = issue.id
                    if (issue_id not in issues_prs_data.id.tolist()):
                        if (issue.user):
                            cfg.waitRateLimit(gith)
                            util.add(
                                issues_prs_data,
                                [issue_id, issue.created_at, issue.user.login])
                logger.write('last_page_read:{}\n'.format(page))
                logger.flush()
            if (len(issues_prs_data) > 0):
                issues_prs_data.to_csv(path + '/issues_pr_creation.csv',
                                       sep=',',
                                       na_rep='NA',
                                       header=True,
                                       index=False,
                                       mode='w',
                                       encoding='utf-8',
                                       quoting=None,
                                       quotechar='"',
                                       line_terminator='\n',
                                       decimal='.')
            print(
                '{}: Issues/Pulls Extraction Complete'.format(developer_login))
        except github.GithubException:
            print('Exception Occurred While Getting ISSUES/PULLS: Github')
            logger.write('last_page_read:{}\n'.format(page))
            logger.flush()
            if (len(issues_prs_data) > 0):
                issues_prs_data.to_csv(path + '/issues_pr_creation.csv',
                                       sep=',',
                                       na_rep='NA',
                                       header=True,
                                       index=False,
                                       mode='w',
                                       encoding='utf-8',
                                       quoting=None,
                                       quotechar='"',
                                       line_terminator='\n',
                                       decimal='.')
            exception_thrown = True
            pass
        except requests.exceptions.Timeout:
            print('Exception Occurred While Getting ISSUES/PULLS: Timeout')
            logger.write('last_page_read:{}\n'.format(page))
            logger.flush()
            if (len(issues_prs_data) > 0):
                issues_prs_data.to_csv(path + '/issues_pr_creation.csv',
                                       sep=',',
                                       na_rep='NA',
                                       header=True,
                                       index=False,
                                       mode='w',
                                       encoding='utf-8',
                                       quoting=None,
                                       quotechar='"',
                                       line_terminator='\n',
                                       decimal='.')
            exception_thrown = True
            pass
        except:
            print('Execution Interrupted While Getting ISSUES/PULLS')
            logger.write('last_page_read:{}\n'.format(page))
            logger.flush()
            if (len(issues_prs_data) > 0):
                issues_prs_data.to_csv(path + '/issues_pr_creation.csv',
                                       sep=',',
                                       na_rep='NA',
                                       header=True,
                                       index=False,
                                       mode='w',
                                       encoding='utf-8',
                                       quoting=None,
                                       quotechar='"',
                                       line_terminator='\n',
                                       decimal='.')
            raise
def get_pulls_comments_repo(gith, path, repo, project_name, start_date,
                            active_users):
    os.makedirs(path, exist_ok=True)

    exception_thrown = True
    while (exception_thrown):
        exception_thrown = False

        if 'pulls_comments_repo.csv' in os.listdir(path):
            pulls_comments_data = pandas.read_csv(path +
                                                  '/pulls_comments_repo.csv',
                                                  sep=',')
        else:
            pulls_comments_data = pandas.DataFrame(
                columns=['id', 'date', 'creator_login'])

        if 'pulls_comments_extraction.log' in os.listdir(path):
            last_pulls_page, last_pull, last_comments_page = get_last_page_read(
                path + '/pulls_comments_extraction.log')
        else:
            last_pulls_page = 0
            last_pull = ''
            last_comments_page = 0

        if 'comments_extraction_completed_pulls.csv' in os.listdir(path):
            completed_pulls = pandas.read_csv(
                path + '/comments_extraction_completed_pulls.csv', sep=',')
        else:
            completed_pulls = pandas.DataFrame(columns=['id'])

        logger = open(path + '/pulls_comments_extraction.log', 'a+')

        ### Get Comments on Pull
        try:
            pulls_page = last_pulls_page
            pull_id = ''
            page = 0

            pulls = repo.get_pulls(state='all', sort='created_at')
            num_pulls = pulls.totalCount
            final_pulls_page = int(num_pulls / cfg.items_per_page)

            for pulls_page in range(last_pulls_page, final_pulls_page + 1):
                cfg.waitRateLimit(gith)
                current_pulls_page = pulls.get_page(pulls_page)
                for pull in current_pulls_page:
                    cfg.waitRateLimit(gith)
                    pull_id = pull.id
                    if (pull_id not in completed_pulls.id.tolist()):
                        if (pull_id != last_pull):
                            last_page = 0
                        else:
                            last_page = last_comments_page
                        cfg.waitRateLimit(gith)
                        pulls_comments = pull.get_comments()
                        num_items = pulls_comments.totalCount
                        final_page = int(num_items / cfg.items_per_page)

                        for page in range(last_page, final_page + 1):
                            cfg.waitRateLimit(gith)
                            pulls_comments_page = pulls_comments.get_page(page)
                            for comment in pulls_comments_page:
                                cfg.waitRateLimit(gith)
                                comment_id = comment.id
                                if (comment_id not in
                                        pulls_comments_data.id.tolist()):
                                    if (comment.user):
                                        cfg.waitRateLimit(gith)
                                        user_login = comment.user.login
                                        if (user_login in active_users):
                                            cfg.waitRateLimit(gith)
                                            util.add(pulls_comments_data, [
                                                comment_id, comment.created_at,
                                                user_login
                                            ])
                        util.add(completed_pulls, pull_id)
            if (len(pulls_comments_data) > 0):
                pulls_comments_data.to_csv(path + '/pulls_comments_repo.csv',
                                           sep=',',
                                           na_rep='NA',
                                           header=True,
                                           index=False,
                                           mode='w',
                                           encoding='utf-8',
                                           quoting=None,
                                           quotechar='"',
                                           line_terminator='\n',
                                           decimal='.')
                completed_pulls.to_csv(
                    path + '/comments_extraction_completed_pulls.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
                os.rename(path + '/pulls_comments_repo.csv',
                          path + '/complete_pulls_comments_repo.csv')
            print('{}: Pulls Comments Extraction Complete'.format(repo))
        except github.GithubException as ghe:
            print('Exception Occurred While Getting PULLS COMMENTS: Github')
            logger.write(
                'last_pulls_page:{},last_pull:{},last_comment_page:{}\n'.
                format(pulls_page, pull_id, page))
            logger.flush()
            if str(ghe) == '500 None':
                print('PROBLEMS ON PULL: {} Excluded From Comments Extraction'.
                      format(pull_id))
                util.add(completed_pulls, pull_id)
            if (len(pulls_comments_data) > 0):
                pulls_comments_data.to_csv(path + '/pulls_comments_repo.csv',
                                           sep=',',
                                           na_rep='NA',
                                           header=True,
                                           index=False,
                                           mode='w',
                                           encoding='utf-8',
                                           quoting=None,
                                           quotechar='"',
                                           line_terminator='\n',
                                           decimal='.')
                completed_pulls.to_csv(
                    path + '/comments_extraction_completed_pulls.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
            exception_thrown = True
            pass
        except requests.exceptions.Timeout:
            print('Exception Occurred While Getting PULLS COMMENTS: Timeout')
            logger.write(
                'last_pulls_page:{},last_pull:{},last_comment_page:{}\n'.
                format(pulls_page, pull_id, page))
            logger.flush()
            if (len(pulls_comments_data) > 0):
                pulls_comments_data.to_csv(path + '/pulls_comments_repo.csv',
                                           sep=',',
                                           na_rep='NA',
                                           header=True,
                                           index=False,
                                           mode='w',
                                           encoding='utf-8',
                                           quoting=None,
                                           quotechar='"',
                                           line_terminator='\n',
                                           decimal='.')
                completed_pulls.to_csv(
                    path + '/comments_extraction_completed_pulls.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
            exception_thrown = True
            pass
        except:
            print('Execution Interrupted While Getting PULLS COMMENTS')
            logger.write(
                'last_pulls_page:{},last_pull:{},last_comment_page:{}\n'.
                format(pulls_page, pull_id, page))
            logger.flush()
            if (len(pulls_comments_data) > 0):
                pulls_comments_data.to_csv(path + '/pulls_comments_repo.csv',
                                           sep=',',
                                           na_rep='NA',
                                           header=True,
                                           index=False,
                                           mode='w',
                                           encoding='utf-8',
                                           quoting=None,
                                           quotechar='"',
                                           line_terminator='\n',
                                           decimal='.')
                completed_pulls.to_csv(
                    path + '/comments_extraction_completed_pulls.csv',
                    sep=',',
                    na_rep='NA',
                    header=True,
                    index=False,
                    mode='w',
                    encoding='utf-8',
                    quoting=None,
                    quotechar='"',
                    line_terminator='\n',
                    decimal='.')
            raise
Exemplo n.º 7
0
    main_project_name = p_names[chosen_project]

    path = main_path + '/' + chosen_organization + '/' + main_project_name

    with open(path + '/inactivity_interval_list.csv',
              'r') as f:  #opens PW file
        inactivity_intervals_data = [
            list(map(str, rec)) for rec in csv.reader(f, delimiter=',')
        ]

    all_devs = len(inactivity_intervals_data)

    row = [main_project_name, all_devs]
    for threshold in range(10, 90, 10):
        num = getNumCoreDevelopers(inactivity_intervals_data, threshold)
        row.append(num)

    util.add(table, row)

table.to_csv(main_path + '/breaks_number_filter_stats.csv',
             sep=';',
             na_rep='NA',
             header=True,
             index=False,
             mode='w',
             encoding='utf-8',
             quoting=None,
             quotechar='"',
             line_terminator='\n',
             decimal='.')
#Read Break Dates Table
with open(super_path+'/break_dates_list.csv', 'r') as f:
    break_dates_data = [list(map(str,rec)) for rec in csv.reader(f, delimiter=',')]

breaks_df = pandas.DataFrame({'durations' : inactivity_intervals_data, 'datelimits' : break_dates_data})

# FILTER DEVELOPERS
active_users_breaks = pandas.DataFrame(columns=['durations','datelimits'])

path = (super_path+'/'+project_name)

for index, row in breaks_df.iterrows():
    num_breaks=len(row['durations'])-3
    if (row['durations'][0] in active_users):
        util.add(active_users_breaks, row)
num_all_users = len(inactivity_intervals_data)
num_active_users = len(active_users_breaks)

active_users_longer_intervals=[]

active_devs_sleeping_intervals_df = []
active_devs_hibernation_intervals_df = []
active_devs_dead_intervals_df = []

n=0
for index, row in active_users_breaks.iterrows():
    user_id=row['durations'][0]
    
    last_commit_day=util.getLastCommitDay(commit_table, user_id)
    last_break_length=util.days_between(last_commit_day, project_end)