示例#1
0
def main():
    client = MongoClient('mongodb://localhost:27017/')
    db = client.cvedb

    # Get the collection object
    # Here name of the database is "states"

    collection = db.cves

    # Make a query to list all the documents

    cvedicttemp = collection.find()

    cvedict = dict()
    for key in cvedicttemp:
        cvedict[key['id']] = key

    vlist = []
    carlosplt.pre_paper_plot()
    fig = plt.figure()
    tester = calc_laplace()
    ax = fig.add_subplot(2, 2, 1)
    tester.laplace_php([])
    ax = fig.add_subplot(2, 2, 2)
    tester.laplace_openjdk([])
    ax = fig.add_subplot(2, 2, 3)
    tester.laplace_wheezy([], False)
    ax = fig.add_subplot(2, 2, 4)
    tester.laplace_wheezy([], True)
    carlosplt.post_paper_plot(True, True, True)
    plt.show()
示例#2
0
    def plot_average_number(self):
        average_per_year = [0] * self.plotter.years
        for j in range(self.plotter.years):
            average_per_year[j] = self.plotter.year_sum[j] / float(
                self.plotter.year_num[j])

        x_values = list(range(1, self.plotter.years + 1))
        slope = np.polyfit(x_values, average_per_year, 1)

        print('Slope: ' + str(slope))

        n = len(self.plotter.year_sum)
        x = range(self.plotter.years)
        width = 1 / 2
        #plt.bar(x, year_sum, width)
        plt.bar(x,
                average_per_year,
                width,
                color='darkblue',
                edgecolor='black')
        plt.xticks(np.arange(0, n), self.yearsx)
        plt.ylabel('Average vulnerabilities per package')
        plt.xlabel('Year')
        carlosplt.post_paper_plot(True, True, True)
        ## Linear regression
        X = sm.add_constant(x)
        y = average_per_year
        model = sm.OLS(y, X).fit()
        predictions = model.predict(X)
        plt.plot(predictions)
        plt.show()
        print(model.summary())
        print(model.summary().as_latex())
示例#3
0
    def plot_severity_percentage(self):
        num_low = [0] * (self.years + 1)
        num_med = [0] * (self.years + 1)
        num_high = [0] * (self.years + 1)
        num_udef = [0] * (self.years + 1)
        total = [0] * (self.years + 1)
        for i in self.pkg_with_cvss:
            for j in range(len(self.src2month[i])):
                try:
                    num_low[j // 12] += self.pkg_with_cvss[i][j][0]
                    num_med[j // 12] += self.pkg_with_cvss[i][j][1]
                    num_high[j // 12] += self.pkg_with_cvss[i][j][2]
                    num_udef[j // 12] += self.pkg_with_cvss[i][j][3]
                    total[j // 12] += self.pkg_with_cvss[i][j][
                        3] + self.pkg_with_cvss[i][j][2] + self.pkg_with_cvss[
                            i][j][1] + self.pkg_with_cvss[i][j][0]
                except IndexError:
                    xx = j // 12
                    if xx == 19:
                        continue
                    else:
                        print(xx)
                        #raise IndexError('List index out of bounds')

        ## Generate percentage
        for i in range(self.years + 1):
            try:
                num_low[i] = num_low[i] / total[i]
                num_med[i] = num_med[i] / total[i]
                num_high[i] = num_high[i] / total[i]
                num_udef[i] = num_udef[i] / total[i]
            except ZeroDivisionError:
                num_low[i] = 0
                num_med[i] = 0
                num_high[i] = 0
                num_udef[i] = 0

        print(num_low)
        print(num_high)

        carlosplt.pre_paper_plot()

        pal = ['#fee8c8', '#fdbb84', '#e34a33', 'grey']
        x = range(2001, 2001 + self.years)

        labels_cvss = ['low', 'medium', 'high', 'N/A']

        h = plt.stackplot(
            x, [num_low[1:], num_med[1:], num_high[1:], num_udef[1:]],
            colors=pal,
            alpha=0.9,
            labels=labels_cvss)
        plt.xticks(x)
        plt.legend(loc='upper left', handles=h[::-1])
        carlosplt.post_paper_plot(True, True, True)
        plt.show()
示例#4
0
def test_slocs(src2month, src2sloccount):
    # Remember sloccount is of the form (total, [ansic, cpp, asm, java, python, perl, sh])
    ar1 = []
    ar2 = []
    print(sum(src2month['linux']))
    for pkg in src2month:
        try:
            total_slocs = src2sloccount[pkg][0]
            if total_slocs == 0:
                continue
            else:
                ar1.append(int(total_slocs))
        except KeyError:
            print(pkg + ": no sloccount data found!")
            continue
        total = sum(src2month[pkg])
        if total > 100:
            print(pkg + ', ' + str(total) + ', ' + str(total_slocs))
        ar2.append(total)

    vulns_sorted_slocs_total = [
        x for _, x in sorted(zip(ar1, ar2), reverse=True)
    ]
    pop_xaxis = [y for y, _ in sorted(zip(ar1, ar2), reverse=True)]

    half_more_slocs = sum(
        vulns_sorted_slocs_total[:int(len(vulns_sorted_slocs_total) / 2)])
    half_less_slocs = sum(
        vulns_sorted_slocs_total[int(len(vulns_sorted_slocs_total) / 2):])

    print(half_more_slocs)
    print(half_less_slocs)

    print(pop_xaxis[0])
    print(pop_xaxis[len(pop_xaxis) - 1])

    print(spearmanr(ar1, ar2))

    carlosplt.pre_paper_plot(True)

    plt.plot(vulns_sorted_slocs_total)
    plt.ylabel('Number of vulnerabilities')
    plt.xlabel('Number of SLOCS ranking')
    carlosplt.post_paper_plot(True, True, True)

    plt.show()
示例#5
0
 def plot_num_affected(self):
     ## Number of affected packages
     n = len(self.plotter.year_sum)
     for i in range(1, self.plotter.years + 1):
         if i % 2 == 1:
             self.yearsx.append('\'' + str(i).zfill(2))
         else:
             self.yearsx.append('')
     x = range(self.plotter.years)
     width = 1 / 2
     plt.bar(x,
             self.plotter.year_num,
             width,
             color='darkblue',
             edgecolor='black')
     plt.xticks(np.arange(0, n), self.yearsx)
     plt.ylabel('Number of affected packages')
     plt.xlabel('Year')
     carlosplt.post_paper_plot(True, True, True)
     plt.show()
示例#6
0
    def laplace_process_list(self, vlist, pkg, year):
        months = len(vlist)
        print(pkg)
        perhour = 24 * 30 * months
        instances = []
        laplace_values = []
        i = 0
        print(pkg)
        print(vlist)
        for month in vlist:
            i += 1
            temp = random.sample(range(24 * 30 * (i - 1), 24 * 30 * i), month)
            instances += temp
            laplace_values.append(lp.laplace_test(instances, 24 * 30 * i))

        final_laplace = lp.laplace_test(instances, 24 * 30 * i + 1)
        print(final_laplace)

        n = len(vlist)
        if pkg == 'wheezy':
            x = range(n + 12)
        else:
            x = range(n)
        print(n)
        yearsx = [
            '\'' + str(year + 2000 + i)[-2:]
            for i in range(len(vlist) // 12 + 1)
        ]
        carlosplt.post_paper_plot(True, True, True)

        #print(x)
        if pkg == 'wheezy':
            plt.plot(x, [None] * 4 + laplace_values + [None] * 8)  #
        else:
            plt.plot(x, laplace_values)
        plt.axhline(y=1.96, linestyle=':', color='orange')
        plt.axhline(y=2.33, linestyle=':', color='red')
        plt.axhline(y=-1.96, linestyle=':', color='orange')
        plt.axhline(y=-2.33, linestyle=':', color='red')
        plt.xticks(np.arange(0, n, step=12), yearsx)
        plt.ylabel(pkg)
示例#7
0
def test_pop(src2month, src2pop):
    ar1 = []
    ar2 = []
    for pkg in src2month:
        try:
            ar1.append(int(src2pop[pkg]))
        except KeyError:
            #print(pkg + ": no popularity data found!")
            continue
        total = sum(src2month[pkg])
        if total > 100:
            print(pkg + ', ' + str(total) + ', ' + src2pop[pkg])
        ar2.append(total)

    vulns_sorted_pop = [x for _, x in sorted(zip(ar1, ar2), reverse=True)]
    pop_xaxis = [y for y, _ in sorted(zip(ar1, ar2), reverse=True)]

    half_more_popular = sum(vulns_sorted_pop[:int(len(vulns_sorted_pop) / 2)])
    half_less_popular = sum(vulns_sorted_pop[int(len(vulns_sorted_pop) / 2):])

    print(half_more_popular)
    print(half_less_popular)

    print(pop_xaxis[0])
    print(pop_xaxis[len(pop_xaxis) - 1])

    print(src2pop)

    print(spearmanr(ar1, ar2))

    carlosplt.pre_paper_plot(True)

    plt.plot(vulns_sorted_pop)
    plt.ylabel('Number of vulnerabilities')
    plt.xlabel('Popularity ranking')
    carlosplt.post_paper_plot(True, True, True)

    plt.show()
示例#8
0
    def plot_wheezy_lts(self):
        quarter_num = self.plotter.years * 4
        # Get LTS and plot
        try:
            with open("DLA_sum.txt", "rb") as fp:
                ltslist = pickle.load(fp)
            with open("src2month_DLA.txt", "rb") as fp:
                src2monthDLAs = pickle.load(fp)
            with open("DLA_src2month.json", "r") as fp:
                src2monthDLA = json.load(fp)
            with open("DLA_withcvss.json", "r") as fp:
                self.src2monthDLA_cvss = json.load(fp)
                # Fix this so it can compute when required
                #dla.permonthDLA(src2monthDLAs)
            with open("1000.csv", "r") as csvfile:
                spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')

        except IOError:
            ltslist = dla.getDLAs()
            with open("src2month_DLA.txt", "rb") as fp:
                src2monthDLAs = pickle.load(fp)
            dla.permonthDLA(src2monthDLAs)
            return self.plot_wheezy_lts()

        ## Plot for wheezy
        quarter_sum = [0] * quarter_num

        DLA_temp = dict()

        ## Fix src2monthDLA_cvss
        for i in self.src2monthDLA_cvss:
            temp_list = []
            for j in self.src2monthDLA_cvss[i]:
                temp_list += j
            self.src2monthDLA_cvss[i] = temp_list

        ## Fix ltslist according to severity
        for i in self.src2monthDLA_cvss:
            DLA_temp[i] = []
            for j in range(len(self.src2monthDLA_cvss[i])):
                num_low = self.src2monthDLA_cvss[i][j][0]
                num_med = self.src2monthDLA_cvss[i][j][1]
                num_high = self.src2monthDLA_cvss[i][j][2]
                num_udef = self.src2monthDLA_cvss[i][j][3]
                tempp = 0
                if self.l:
                    tempp += num_low
                if self.m:
                    tempp += num_med
                if self.h:
                    tempp += num_high
                if self.udef:
                    tempp += num_udef
                DLA_temp[i].append(tempp)

        ltslist = []

        for m in range((self.plotter.years + 1) * 12):
            s = 0
            #print(m)
            for i in DLA_temp:
                s += DLA_temp[i][m]
            ltslist.append(s)

        totalLTS = ltslist
        plt.bar([i for i in range(len(ltslist))], ltslist)
        plt.show()

        quartersx = []
        for i in range(1, self.plotter.years + 1):
            for j in range(1, 5):
                if j == 1:
                    quartersx.append('Q' + str(j) + '\'' + str(i).zfill(2))
                else:
                    quartersx.append(' ')

        for pkg in self.plotter.src2month_loc:
            for j in range(quarter_num):
                temp = sum(self.plotter.src2month_loc[pkg][12 + (3 * j):12 +
                                                           3 * (j + 1)])
                quarter_sum[j] += temp

        LTS_quarter = []

        for j in range(quarter_num):
            temp = sum(totalLTS[12 + (3 * j):12 + 3 * (j + 1)])
            LTS_quarter.append(temp)

        ## Print all LTS
        cut = 12 * 4 + 1
        n = len(quarter_sum)
        x = range(quarter_num)
        width = 1 / 2

        plt.bar(x,
                LTS_quarter,
                width,
                color='brown',
                label='regular support',
                edgecolor='black')

        plt.xticks(np.arange(0, n), quartersx, rotation="vertical")
        plt.ylabel('Vulnerabilities per quarter of Debian LTS')
        plt.xlabel('Quarter')
        carlosplt.post_paper_plot(True, True, True)
        plt.show()

        ## Filter only wheezy:
        quarter_sum_regular = [0] * (12 * 4 + 1) + quarter_sum[12 * 4 +
                                                               1:12 * 4 +
                                                               9] + [0] * 12
        quarter_sum_errors = [0] * (12 * 4 + 9) + quarter_sum[12 * 4 +
                                                              9:12 * 4 + 9 +
                                                              5] + [0] * 7
        LTS_quarter = [0] * (15 * 4 + 2) + LTS_quarter[15 * 4 + 2:-3]

        whole_w = quarter_sum_regular[:-12] + quarter_sum_errors[
            12 * 4 + 9:-7] + LTS_quarter[15 * 4 + 2:]

        #print(quarter_sum_errors)
        cut = 12 * 4 + 1
        n = len(quarter_sum) - cut
        x = range(quarter_num - cut - 3)
        width = 1 / 2

        #print(len(LTS_quarter))
        print(len(x))
        print(len(quarter_sum_regular[cut:]))
        print(len(quarter_sum_errors[cut:]))
        bar1 = plt.bar(x,
                       quarter_sum_regular[cut:],
                       width,
                       color='darkblue',
                       label='regular',
                       edgecolor='black')
        bar12 = plt.bar(x,
                        quarter_sum_errors[cut:],
                        width,
                        color='darkorange',
                        label='regular*',
                        edgecolor='black')
        bar2 = plt.bar(x,
                       LTS_quarter[cut:],
                       width,
                       color='darkred',
                       label='long-term',
                       edgecolor='black')
        plt.legend(handles=[bar1, bar12, bar2])

        plt.xticks(np.arange(0, n), quartersx[cut:], rotation="vertical")
        plt.ylabel('Vulnerabilities per quarter')
        plt.xlabel('Quarter')
        carlosplt.post_paper_plot(True, True, True)
        ## Linear Regression
        print(len(x))
        print(len(whole_w[cut:]))
        X = sm.add_constant(x)
        y = whole_w[cut:]
        model = sm.OLS(y, X).fit()
        predictions = model.predict(X)
        plt.plot(predictions)
        plt.show()
        print(model.summary())
        print(model.summary().as_latex())
示例#9
0
    def plot_total(self):
        self.year_sum = [0] * self.years
        self.year_num = [0] * self.years
        for pkg in self.src2month_loc:
            for j in range(self.years):
                temp = sum(self.src2month_loc[pkg][12 * (1 + j):12 * (2 + j)])
                if (temp > 0):
                    self.year_num[j] += 1
                self.year_sum[j] += temp
            ## For last 2 years
            total = sum(self.src2month_loc[pkg][:])
            last_years = sum(self.src2month_loc[pkg][-24:])
            #print(pkg + '; ' + str(last_years))
            if (total > 1):
                self.src2sum[pkg] = total
                self.src2lastyears[pkg] = last_years

        #calc total
        sum_total = 0
        one_only = 0
        one_plus = 0
        for p in self.src2month:
            sum_part = sum(self.src2month_loc[p][:])
            sum_total += sum_part
            if (sum_part == 1):
                one_only += 1
            elif (sum_part > 1):
                one_plus += 1

        print('Total = ', sum_total)
        print('one_only = ', one_only)
        print('one_plus = ', one_plus)

        values = sorted(self.src2sum.values(), reverse=True)
        #print(values)
        keys = list(
            sorted(self.src2sum, key=self.src2sum.__getitem__, reverse=True))

        n = len(self.year_sum)
        yearsx = []
        for i in range(1, self.years + 1):
            if i % 2 == 1:
                yearsx.append('\'' + str(i).zfill(2))
            else:
                yearsx.append('')
        x = range(self.years)

        width = 1 / 2
        plt.bar(x, self.year_sum, width, color='darkblue', edgecolor='black')
        #plt.bar(x, average_per_year, width)
        plt.xticks(np.arange(0, n), yearsx)
        plt.ylabel('Total vulnerabilities')
        plt.xlabel('Year')
        carlosplt.post_paper_plot(True, True, True)

        sum_all = sum(values)
        print("Total: ", sum_all)

        ## Linear regression model
        X = sm.add_constant(x)
        y = self.year_sum
        model = sm.OLS(y, X).fit()
        predictions = model.predict(X)
        plt.plot(predictions)
        plt.show()
        print(model.summary())
        print(model.summary().as_latex())
示例#10
0
print(results.power_law.discrete)
print('lognormal mu: ', results.lognormal.mu)
print('lognormal sigma: ', results.lognormal.sigma)

#custom_model=[]
#for i in sorted(mydata,reverse=True):
#    ccdf =

#fig=results.plot_pdf(color='b', linewidth=2)
carlosplt.pre_paper_plot(True)
fig = results.plot_ccdf(color='darkblue', linestyle='-', label='data')
results.power_law.plot_ccdf(color='darkgreen', ax=fig, label='power-law fit')
#results.truncated_power_law.plot_ccdf(color = 'red', ax=fig)
#results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig)
#results.lognormal.plot_ccdf(color = 'brown', ax=fig)
#results.exponential.plot_ccdf(color = 'orange', ax=fig)
plt.ylabel('ccdf')
plt.xlabel('Vulnerabilities')
fig.legend()
carlosplt.post_paper_plot(True, True, True)
plt.show()
R, p = results.distribution_compare('power_law', 'exponential')
print('Exponential: ', R, p)
R, p = results.distribution_compare('power_law', 'stretched_exponential')
print('Stretched exponential: ', R, p)
R, p = results.distribution_compare('power_law', 'truncated_power_law')
print('Power law truncated: ', R, p)
R, p = results.distribution_compare('power_law', 'lognormal_positive')
print('Lognormal positive: ', R, p)
R, p = results.distribution_compare('power_law', 'lognormal')
print('Lognormal: ', R, p)
示例#11
0
def plot_bounties(ff):
    reports_team = dict()
    sum_team = dict()
    with open("reports_team.json", "r") as fp:
        reports_team = json.load(fp)

    with open("sum_team.json", "r") as fp:
        sum_team = json.load(fp)

    if ff < 2:
        ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
        print('list follows')
        for j in ibb_list:
            print(reports_team[j])
    else:
        ibb_list = [team for team in reports_team]
    
    most_team = dict()
    sum_bounty_team = dict()
    for team in ibb_list:
        old = 0.0
        old_sum = 0.0
        for report in reports_team[team]:
            try:
                new = float(report['total_awarded_bounty_amount'])
                old_sum += new
            except KeyError:
                print('#'*80)
                print(report)
                print('Report id ', report['id'], ' - bounty not found')
                continue
            if new > old:
                old = new
        most_team[team] = old
        sum_bounty_team[team] = old_sum

    print(most_team)
    print(sum_bounty_team)

    month2sum = []
    month2money = []
    month2bountylist = []

    #Years: 2001-2018
    for i in range(12*18):
        month2sum.append(0)
        month2money.append(0.0)
        month2bountylist.append([])

    for team in ibb_list:
        for report in reports_team[team]:
            datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
            print(str(datetime_obj))
            month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
            try:
                #if report['severity_rating'] == "high":
                if (ff==0 or ff ==2) or (report['severity_rating'] == "high") or (report['severity_rating'] == "critical"):
                    month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
                    month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
            except KeyError:
                continue

    print(month2bountylist)

    #plt.plot(month2sum[-12*5:])
    #plt.show()
    
    #plt.plot(month2money[-12*5:])
    #plt.show()

    years = 18
    quarter_num = years*4
    quarter_sum = []
    quarter_av = []
    carlosplt.pre_paper_plot()

    quarter2bountylist = []
    
    
    quartersx = []
    for i in range(1,years+1):
        for j in range(1,5):
            if j==1:
                quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
            else:
                quartersx.append(' ')
    
    for j in range(quarter_num):
        temp2 = sum(month2money[3*j:3*(j+1)])
        temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
        temp1 = len(temp3)
        print(temp3)
        quarter_sum.append(temp1)
        quarter2bountylist.append(temp3)
        try:
            quarter_av.append(temp2/temp1)
        except ZeroDivisionError:
            quarter_av.append(0)


    n = len(quarter_sum[-4*5:])
    x = range(len(quarter_sum[-4*5:]))
    width = 1/2

    #plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='brown', label='Number', edgecolor='black')
    
    #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    #plt.ylabel('Number of rewards')
    #plt.xlabel('Quarter')
    #carlosplt.post_paper_plot(True,True,True)
    #plt.show()
    #
    #plt.bar(x[-4*5:], quarter_av[-4*5:], width, color='darkblue', label='regular support', edgecolor='black')
   # 
    #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    #plt.ylabel('Average bug price of IBB projects (USD)')
    #plt.xlabel('Quarter')
    #carlosplt.post_paper_plot(True,True,True)
    #plt.show()

    #print(quarter2bountylist)
    if ff==0:
        labeltext = 'IBB-all'
    elif ff==1:
        labeltext = 'IBB-high'
    elif ff==2:
        labeltext = 'All-all'
    elif ff==3:
        labeltext = 'All-high'
    
    ## Shapiro normality test for each quarter
    ## Added powerlaw test
    reference = []
    for i in quarter2bountylist:
        reference+=i
    print(reference)

    for i in quarter2bountylist:
        print(i)
        data = i
        if len(i)>3:
            #sns.distplot(i)
            #plt.show()
            stat, p = shapiro(data)
            print('Statistics=%.3f, p=%.3f' % (stat, p))
            # interpret
            alpha = 0.01
            if p > alpha:
                print('Sample looks Gaussian (fail to reject H0)')
            else:
                print('Sample does not look Gaussian (reject H0)')

            w,p = ks_2samp(i,reference)
            if p > alpha:
                print('Samples look similar')
            else:
                print('Samples do not look similar')
            #mydata = i
            #results=powerlaw.Fit(mydata, discrete=False, xmax=5000)
            #print('alpha = ',results.power_law.alpha)
            #print(results.truncated_power_law.alpha)
            #print('xmin = ',results.power_law.xmin)
            #print('xmax = ',results.power_law.xmax)
            #print('sigma = ',results.power_law.sigma)
            #print('D = ',results.power_law.D)
            #print(results.truncated_power_law.xmin)
            #print('xmax = ', results.truncated_power_law.xmax)
            #print(results.power_law.discrete)
            #print('lognormal mu: ',results.lognormal.mu)
            #print('lognormal sigma: ',results.lognormal.sigma)

    ## Linear regression of average and median
    # Average
    xx = []
    yy = quarter_av[-4*5:]
    y = []
    counter=0
    for i in yy:
        if i!=0:
            y.append(i)
            xx.append(counter)
        counter+=1
    
    X = sm.add_constant(xx)
    model = sm.OLS(y,X).fit()
    predictions = model.predict(X)
    plt.plot(xx,predictions)
    print(model.summary())
    print(model.summary().as_latex())
    
    xx = []
    yy = quarter2bountylist[-4*5:]
    y = []
    counter=0
    for i in yy:
        if i!=[]:
            y.append(median(i))
            xx.append(counter)
        counter+=1
    
    X = sm.add_constant(xx)

    model = sm.OLS(y,X).fit()
    predictions = model.predict(X)
    plt.plot(xx,predictions, color='darkred')
    print(model.summary())
    print(model.summary().as_latex())

    ## Create box plot
    bp = plt.boxplot((quarter2bountylist[-4*5:]), whis = [5,95], patch_artist=True, positions = x)
    plt.setp(bp['boxes'], color='black')
    plt.setp(bp['whiskers'], color='darkred')
    plt.setp(bp['caps'], color='darkred')
    plt.setp(bp['fliers'], markersize = 3.0)
    plt.yscale('log')
    plt.ylim(top=50000)
    plt.ylim(bottom=1)
    plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    plt.ylabel(labeltext)
    plt.xlabel('Quarter')
    carlosplt.post_paper_plot(True,True,True)
示例#12
0
def plot_demographics(ff):
    if ff==0:
        labeltext = 'num - IBB'
    elif ff==1:
        labeltext = 'new - IBB'
    elif ff==2:
        labeltext = 'num - All'
    elif ff==3:
        labeltext = 'new - All'
    reports_team = dict()
    sum_team = dict()
    with open("reports_team.json", "r") as fp:
        reports_team = json.load(fp)

    with open("sum_team.json", "r") as fp:
        sum_team = json.load(fp)

    if ff < 2:
        ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
        print('list follows')
        for j in ibb_list:
            print(reports_team[j])
    else:
        ibb_list = [team for team in reports_team]
    
    most_team = dict()
    sum_bounty_team = dict()
    for team in ibb_list:
        old = 0.0
        old_sum = 0.0
        for report in reports_team[team]:
            try:
                new = float(report['total_awarded_bounty_amount'])
                old_sum += new
            except KeyError:
                print('#'*80)
                print(report)
                print('Report id ', report['id'], ' - bounty not found')
                continue
            if new > old:
                old = new
        most_team[team] = old
        sum_bounty_team[team] = old_sum

    print(most_team)
    print(sum_bounty_team)

    month2sum = []
    month2money = []
    month2bountylist = []
    month2newreporters = []
    repuntilnow = []

    #Years: 2001-2018
    for i in range(12*18):
        month2sum.append(0)
        month2newreporters.append(0)
        month2money.append(0.0)
        month2bountylist.append([])

    for team in ibb_list:
        for report in reports_team[team]:
            datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
            print(str(datetime_obj))
            month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
            try:
                reporter=report['reporter']['id']
                #if report['severity_rating'] == "high":
                month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
                month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
                if reporter not in repuntilnow:
                    month2newreporters[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
                    repuntilnow.append(reporter)
            except KeyError:
                print('Error with report ', report['id'])
                continue

    print(month2bountylist)

    #plt.plot(month2sum[-12*5:])
    #plt.show()
    
    #plt.plot(month2money[-12*5:])
    #plt.show()

    years = 18
    quarter_num = years*4
    quarter_sum = []
    quarter_av = []
    carlosplt.pre_paper_plot()

    quarter2bountylist = []
    
    
    quartersx = []
    for i in range(1,years+1):
        for j in range(1,5):
            if j==1:
                quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
            else:
                quartersx.append(' ')
    
    for j in range(quarter_num):
        temp2 = sum(month2money[3*j:3*(j+1)])
        temp4 = sum(month2newreporters[3*j:3*(j+1)])
        temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
        temp1 = len(temp3)
        if ff==1 or ff==3:
            quarter_sum.append(temp4)
        else:
            quarter_sum.append(temp1)

    n = len(quarter_sum[-4*5:])
    x = range(len(quarter_sum[-4*5:]))
    width = 1/2
    #print(quarter2bountylist)

    
    reference = []
    for i in quarter2bountylist:
        reference+=i
    print(reference)

    ## Create bars plot
    plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='darkblue', label='Number', edgecolor='black')
    
    plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    plt.ylabel(labeltext)
    plt.xlabel('Quarter')
    carlosplt.post_paper_plot(True,True,True)