示例#1
0
    def get_pfx2as_file(self):
        location = self.spt_dir
        cmlib.make_dir(location)

        tmp = os.listdir(self.spt_dir)
        for line in tmp:
            if 'pfx2as' in line:
                return 0 # we already have a prefix2as file

        print 'Downloading prefix to AS file ...'
        year, month = self.sdate[:4], self.sdate[4:6] # YYYY, MM
        webloc = 'http://data.caida.org/datasets/routing/routeviews-prefix2as' +\
                '/' + year + '/' + month + '/'

        webraw = cmlib.get_weblist(webloc)
        target_line = ''
        for line in webraw.split('\n'):
            if self.sdate in line:
                target_line = line
                break

        if target_line == '':
            print 'Downloading prefix to AS file fails: no such date!'
            return 0

        fname = target_line.split()[0]
        urllib.urlretrieve(webloc+fname, location+fname)
        subprocess.call('gunzip -c '+location+fname+' > '+\
                location+fname.replace('.gz', ''), shell=True)
        os.remove(location+fname)

        return 0
示例#2
0
def time_series_plot(granu, my_dict, describe):
    value = []

    dt = my_dict.keys()
    dt.sort()
    for key in dt:
        value.append(my_dict[key])
    dt = [datetime.datetime.fromtimestamp(ts)
          for ts in dt]  # int to obj. required!

    fig = plt.figure(figsize=(16, 10))
    ax = fig.add_subplot(111)
    ax.plot(dt, value, 'k-')
    ax.set_ylabel(describe)
    ax.set_xlabel('Datetime')
    myFmt = mpldates.DateFormatter('%Y-%m-%d %H%M')
    ax.xaxis.set_major_formatter(myFmt)
    plt.xticks(rotation=45)

    # make a dir according to datetime, granularity and h threshold
    sdate = describe.split('_')[0]
    cmlib.make_dir(datadir + 'output/' + sdate + '_' + str(granu) + '/')
    plt.savefig(datadir + 'output/' + sdate + '_' + str(granu) + '/' +
                describe + '.pdf')
    plt.close()

    # Record plot data in a separate file for future use
    f = open(datadir+'output/'+sdate+'_'+str(granu)+'/'+\
            describe+'.txt', 'w')
    for i in xrange(0, len(dt)):
        f.write(str(dt[i]) + ',' + str(value[i]) + '\n')
    f.close()

    return 0
示例#3
0
    def __init__(self, index):
        self.index = index
        self.sdate = daterange[index][0]
        self.edate = daterange[index][1]

        self.sdatetime_obj = datetime.datetime.strptime(self.sdate, '%Y%m%d')
        self.edatetime_obj = datetime.datetime.strptime(
            self.edate, '%Y%m%d') + datetime.timedelta(days=1)

        # location to store supporting files
        self.spt_dir = spt_dir + self.sdate + '_' + self.edate + '/'
        cmlib.make_dir(self.spt_dir)

        # Store the rib information of every collector (Note: do not change this!)
        self.rib_info_file = rib_info_dir + self.sdate + '_' + self.edate + '.txt'

        self.co_mo = dict(
        )  # collector: monitor list (does not store empty list)
        self.mo_asn = dict()
        self.mo_cc = dict()
        self.mo_tier = dict()

        self.as2nation = dict()
        self.as2name = dict()

        # Note: Occassionally run to get the latest data. (Now up to 20141225)
        #self.get_fib_size_file()
        #self.get_AS_num_file()

        self.dt_anchor1 = datetime.datetime(
            2003, 2, 3, 19, 0)  # up to now, never used data prior
        self.dt_anchor2 = datetime.datetime(2006, 2, 1, 21, 0)
    def get_update_list(self):
        tmp_dir = self.get_listfile_dir()
        cmlib.make_dir(tmp_dir)
        flist = open(self.listfile, 'w')  
    
        month_list = self.get_month_list_dot()
        for month in month_list:
            web_location = ''
            if self.co.startswith('rrc'):
                web_location = rrc_root + self.co + '/' + month + '/' 
            else:
                web_location = rv_root + self.co + '/bgpdata/' + month + '/UPDATES/'
                web_location = web_location.replace('//', '/')  # when name is ''

            webraw = cmlib.get_weblist('http://' + web_location)
            cmlib.make_dir(datadir+web_location)

            for line in webraw.split('\n'):
                if not 'updates' in line or line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                fsize = cmlib.parse_size(size)
                filename = line.split()[0]  # omit uninteresting info
                filedate = filename.split('.')[-3]

                # check whether its date in our range
                if int(filedate) < int(self.sdate) or int(filedate) > int(self.edate):
                    continue
                # note: storing the original .bz2/.gz file name makes logic clearer
                flist.write(web_location+filename+'.txt.gz|'+str(fsize)+'\n')
                logging.info('record file name: '+web_location+filename+'.txt.gz|'+str(fsize))

        return 0
示例#5
0
    def get_pfx2as_file(self):
        location = datadir + 'support/' + self.sdate + '/'
        cmlib.make_dir(location)

        tmp = os.listdir(datadir+'support/'+self.sdate+'/')
        for line in tmp:
            if 'pfx2as' in line:
                return 0 # we already have a prefix2as file

        print 'Downloading prefix to AS file ...'
        year, month = self.sdate[:4], self.sdate[4:6] # YYYY, MM
        webloc = 'http://data.caida.org/datasets/routing/routeviews-prefix2as' +\
                '/' + year + '/' + month + '/'

        webraw = cmlib.get_weblist(webloc)
        target_line = ''
        for line in webraw.split('\n'):
            if self.sdate in line:
                target_line = line
                break

        if target_line == '':
            print 'Downloading prefix to AS file fails: no such date!'
            return 0

        fname = target_line.split()[0]
        urllib.urlretrieve(webloc+fname, location+fname)
        subprocess.call('gunzip -c '+location+fname+' > '+\
                location+fname.replace('.gz', ''), shell=True)
        os.remove(location+fname)

        return 0
示例#6
0
def cdf_plot(granu, my_dict, describe):
    # my_dict DV value: exist time
    xlist = [0]
    ylist = [0]
    for key in sorted(my_dict):  # must sort by key
        xlist.append(key)
        ylist.append(my_dict[key])

    xmax = max(xlist)
    ymax = max(ylist)

    fig = plt.figure(figsize=(16, 10))
    ax = fig.add_subplot(111)
    ax.plot(xlist, ylist, 'k-')
    ax.set_ylim([-0.1 * ymax, 1.1 * ymax])
    ax.set_xlim([-0.1 * xmax, 1.1 * xmax])
    ax.set_ylabel('y')
    ax.set_xlabel('x')

    # make a dir according to datetime, granularity and h threshold
    sdate = describe.split('_')[0]
    cmlib.make_dir(datadir + 'output/' + sdate + '_' + str(granu) + '/')
    plt.savefig(datadir + 'output/' + sdate + '_' + str(granu) + '/' +
                describe + '.pdf')
    plt.close()

    # Record plot data in a separate file for future use
    f = open(datadir+'output/'+sdate+'_'+str(granu)+'/'+\
            describe+'.txt', 'w')
    for i in xrange(0, len(xlist)):
        f.write(str(xlist[i]) + ',' + str(ylist[i]) + '\n')
    f.close()

    return 0
示例#7
0
    def __init__(self, index):
        self.index = index
        self.sdate = daterange[index][0] 
        self.edate = daterange[index][1] 

        self.sdatetime_obj = datetime.datetime.strptime(self.sdate, '%Y%m%d')
        self.edatetime_obj = datetime.datetime.strptime(self.edate, '%Y%m%d') + datetime.timedelta(days=1)
        
        # location to store supporting files
        self.spt_dir = spt_dir + self.sdate + '_' + self.edate + '/'
        cmlib.make_dir(self.spt_dir)

        # Store the rib information of every collector (Note: do not change this!)
        self.rib_info_file = rib_info_dir + self.sdate + '_' + self.edate + '.txt'
    
        self.co_mo = dict() # collector: monitor list (does not store empty list)
        self.mo_asn = dict()
        self.mo_cc = dict()
        self.mo_tier = dict()

        self.as2nation = dict()
        self.as2name = dict()

        # Note: Occassionally run to get the latest data. (Now up to 20141225)
        #self.get_fib_size_file()
        #self.get_AS_num_file()

        self.dt_anchor1 = datetime.datetime(2003,2,3,19,0) # up to now, never used data prior
        self.dt_anchor2 = datetime.datetime(2006,2,1,21,0)
示例#8
0
def box_plot_grouped(granu, my_dict, describe):
    data_lists = []
    for k in my_dict.keys():  # dv ranges
        tmp_list = []
        for k2 in my_dict[k].keys():
            for i in xrange(0, len(my_dict[k][k2])):
                tmp_list.append(k2)
        data_lists.append(tmp_list)

    #plot_lists = []
    #large = 0 # the number of sub lists
    #for list in data_lists:
    #if len(list) > large:
    #large = len(list)
    #for i in xrange(0, large):
    #for j in xrange(0, len(data_lists)):
    #tmp_list = []
    #try:
    #tmp_list.append(data_lists[j][i])
    #except:
    #tmp_list.append(0)
    #plot_lists.append(tmp_list)

    #my_labels = my_dict.keys()
    #fig = plt.figure(figsize=(16, 10))
    #ax = fig.add_subplot(111)
    #ax.boxplot(data_lists)

    # make a dir according to datetime, granularity and h threshold
    sdate = describe.split('_')[0]
    cmlib.make_dir(datadir + 'output/' + sdate + '_' + str(granu) + '/')
    plt.savefig(datadir+'output/'+sdate+'_'+str(granu)+'/'+describe+'.pdf',\
            bbox_inches='tight')
    plt.close()

    # Record plot data in a separate file for future use
    f = open(datadir+'output/'+sdate+'_'+str(granu)+'_'+'/'+\
            describe+'.txt', 'w')
    for k in my_dict.keys():
        f.write(str(k) + ':')
        for k2 in my_dict[k].keys():
            f.write(str(k2) + '|')
            f.write(str(my_dict[k][k2]))
            f.write(',')
        f.write('\n')
    f.close()

    return 0
示例#9
0
    def get_update_list(self):
        tmp_dir = self.get_listfile_dir()
        cmlib.make_dir(tmp_dir)
        flist = open(self.listfile, 'w')

        month_list = self.get_month_list_dot()
        for month in month_list:
            web_location = ''
            if self.co.startswith('rrc'):
                web_location = rrc_root + self.co + '/' + month + '/'
            else:
                web_location = rv_root + self.co + '/bgpdata/' + month + '/UPDATES/'
                web_location = web_location.replace('//',
                                                    '/')  # when name is ''

            webraw = cmlib.get_weblist('http://' + web_location)
            cmlib.make_dir(datadir + web_location)

            for line in webraw.split('\n'):
                if not 'updates' in line or line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                fsize = cmlib.parse_size(size)
                filename = line.split()[0]  # omit uninteresting info
                filedate = filename.split('.')[-3]

                # check whether its date in our range
                if int(filedate) < int(self.sdate) or int(filedate) > int(
                        self.edate):
                    continue
                # note: storing the original .bz2/.gz file name makes logic clearer
                flist.write(web_location + filename + '.txt.gz|' + str(fsize) +
                            '\n')
                logging.info('record file name: ' + web_location + filename +
                             '.txt.gz|' + str(fsize))

        return 0
 def apfx_metrics_fpath(self):
     dir = metrics_output_root + str(self.granu) + '/' + self.sdate + '_' + self.edate + '/'
     cmlib.make_dir(dir)
     return dir+'active_pfx_metrics.txt'
    order = 286
    unix = cluster1_2[0]

    sdate = daterange[order][0]
    edate = daterange[order][1]

    rib_files = list()
    for co in all_collectors.keys():
        dl = Downloader(sdate, edate, co)
        rfilepath = dl.download_one_rib_before_unix(sdate, unix) # download RIB       
        if rfilepath != -1: # cannot get 
            rib_files.append(rfilepath)

    # output the rib file-list
    dir = final_output_root + 'additional_rib_list/' 
    cmlib.make_dir(dir)
    ofpath = dir + str(order) + '_' + str(unix) + '.txt'
    f = open(ofpath, 'w')
    for rpath in rib_files:
        f.write(rpath + '\n')
    f.close()

#----------------------------------------------------------------------------
# The main function
if __name__ == '__main__' and 1 == 2:
    order_list = [303]
    # we select all collectors that have appropriate start dates
    collector_list = dict()
    for i in order_list:
        collector_list[i] = list()
        for co in all_collectors.keys():
    def get_num_feature_actmon(self):

        # Get the average of each feature
        total_f2avg = dict()

        total_f2vlist = dict()
        for i in range(feature_num):
            total_f2vlist[i] = list()

        for uds in self.uds_list:
            for slot in uds.dtobj_list:
                print '*************Getting total feature values for slot ', slot
                sdt_unix = calendar.timegm(slot[0].utctimetuple())
                rpath = uds.numf_distr_output_dir() + str(sdt_unix) + '.txt'
                f = open(rpath, 'r')
                for line in f:
                    line = line.rstrip('\n')
                    name = line.split(':')[0]
                    mydict = line.replace(name+':', '')
                    mydict = ast.literal_eval(mydict)

                    if name == 'T':
                        for fea in mydict:
                            total_f2vlist[fea].append(mydict[fea])
                f.close()

        for fea in total_f2vlist: 
            total_f2avg[fea] = float(sum(total_f2vlist[fea])) / float(len(total_f2vlist[fea]))


        # Simply set the threshold for active monitors to average/N
        f2thre = dict()
        for i in range(feature_num):
            f2thre[i] = total_f2avg[i]/10.0

        print 'Get the set of active monitors for each slot and each feature'
        # To save memory, we map monitor ip to an integer
        mon2id = dict()
        count = 0
        for uds in self.uds_list:
            for mon in uds.monitors:
                try:
                    test = mon2id[mon]
                except:
                    mon2id[mon] = count
                    count += 1

        unix2fea2monset = dict()
        for uds in self.uds_list:
            for slot in uds.dtobj_list:
                print '*************Getting highly active monitors for slot ', slot
                sdt_unix = calendar.timegm(slot[0].utctimetuple())
                unix2fea2monset[sdt_unix] = dict()

                rpath = uds.numf_distr_output_dir() + str(sdt_unix) + '.txt'
                f = open(rpath, 'r')
                for line in f:
                    line = line.rstrip('\n')
                    name = line.split(':')[0]
                    mydict = line.replace(name+':', '')
                    mydict = ast.literal_eval(mydict)

                    if name != 'T':
                        id = mon2id[name]
                        for fea in mydict:
                            if mydict[fea] >= f2thre[fea]:
                                try:
                                    unix2fea2monset[sdt_unix][fea].add(id)
                                except:
                                    unix2fea2monset[sdt_unix][fea] = set([id])
                f.close()


        # store the info in a middle file. One file for one feature
        filedict = dict()
        dir = metrics_output_root + str(self.uds_list[0].granu) + '/actmon/'
        cmlib.make_dir(dir)
        for i in range(feature_num):
            filedict[i] = open(dir+str(i)+'.txt', 'w')
        for unix in unix2fea2monset:
            for fea in unix2fea2monset[unix]:
                filedict[fea].write(str(unix)+':'+str(unix2fea2monset[unix][fea])+'\n')
        for i in range(feature_num):
            filedict[i].close()

        f = open(dir+'mon2id.txt', 'w')
        f.write(str(mon2id))
        f.close()
示例#13
0
    def download_one_rib_before_unix(self, my_date,
                                     unix):  # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        ok_rib_list = list()  # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None  # the RIB closest to unix
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate + ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M')
            runix = time_lib.mktime(
                objdt.timetuple()) + 8 * 60 * 60  # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix - runix < min:
                min = unix - runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed!!!!!!!!!!!!'
            return full_loc + '.txt.gz'  # Do not download

        if os.path.exists(full_loc):
            cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
            cmlib.pack_gz(full_loc + '.txt')
            return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'
示例#14
0
 def numf_metrics_fpath(self):
     dir = metrics_output_root + str(
         self.granu) + '/' + self.sdate + '_' + self.edate + '/'
     cmlib.make_dir(dir)
     return dir + 'num_fea_metrics.txt'
 def numf_metrics_fpath(self): 
     dir = metrics_output_root + str(self.granu) + '/' + self.sdate + '_' + self.edate + '/'
     cmlib.make_dir(dir)
     return dir+'num_fea_metrics.txt'
    def rm_reset_one_list(self, rib_full_loc, tmp_full_listfile):
        ## record reset info into a temp file
        reset_info_file = datadir + 'peer_resets.txt'

        print self.co, ' obtaining BGP session reset start-end period...'
        subprocess.call('perl '+projectdir+'tool/bgpmct.pl -rf '+rib_full_loc+' -ul '+\
                tmp_full_listfile + ' > '+reset_info_file, shell=True)

        if os.path.exists(reset_info_file): 
            if os.path.getsize(reset_info_file) == 0:
                print 'no reset at all!'
                return
        else:
            print 'no reset at all!'
            return 
        
        peer_resettime = dict() # peer: list of [reset start, reset end]
        resetf = open(reset_info_file, 'r')
        for line in resetf:
            if line.startswith('run') or line.startswith('/') or ('#' in line):
                continue
            if ':' in line:
                now_peer = line.rstrip(':\n')
                continue

            stime_unix, endtime_unix= int(line.split(',')[0]), int(line.split(',')[1])
            try:
                peer_resettime[now_peer].append([stime_unix, endtime_unix])
            except:
                peer_resettime[now_peer] = [[stime_unix, endtime_unix],]
        resetf.close()

        # write the reset info into a file
        # TODO deal with gap > 32 days
        cmlib.make_dir(reset_info_dir)
        f = open(self.reset_info, 'a')
        f.write(self.co+':\n')
        for p in peer_resettime:
            f.write(p+'@\n')
            for rs in peer_resettime[p]:
                f.write(str(rs)+'\n')
        f.close()
        '''
        # XXX only for once start (continue after the program stopped because of memo issue)
        # FIXME Giant bug in these code. In future, re-download the affected collectors
        this_co_peers = []
        peer_file = cmlib.peer_path_by_rib_path(rib_full_loc)
        fff = open(peer_file, 'r')
        for line in fff:
            peer = line.split('@')[0]
            this_co_peers.append(peer)
        fff.close()
        
        peer_resettime = dict()
        record = False
        f = open(self.reset_info, 'r')
        for line in f:
            line = line.rstrip('@\n')
            if ':' in line:
                record = False
                continue
            if line[0].isdigit():
                record = True
                p = line
                peer_resettime[p] = list()
            elif record is True:
                thelist = ast.literal_eval(line)
                peer_resettime[p].append(thelist)
            else:
                assert 1 == 0
        f.close()
        # XXX only for once end
        '''

        # different collectors in the same file
        for p in peer_resettime:
            if ':' in p: # We do not really delete IPv6 updates
                continue
            #if p not in this_co_peers: # XXX used with the previous commented out code
            #    continue
            if p not in self.global_peers: # We ignore non-global peers to save time
                continue
            for l in peer_resettime[p]:
                print 'deleting reset for ', p
                self.delete_reset_updates(p, l[0], l[1], tmp_full_listfile)
                #h = hpy()
                #print h.heap()

        os.remove(reset_info_file) #XXX comment out when 'doing it once'...
示例#17
0
    def get_num_feature_actmon(self):

        # Get the average of each feature
        total_f2avg = dict()

        total_f2vlist = dict()
        for i in range(feature_num):
            total_f2vlist[i] = list()

        for uds in self.uds_list:
            for slot in uds.dtobj_list:
                print '*************Getting total feature values for slot ', slot
                sdt_unix = calendar.timegm(slot[0].utctimetuple())
                rpath = uds.numf_distr_output_dir() + str(sdt_unix) + '.txt'
                f = open(rpath, 'r')
                for line in f:
                    line = line.rstrip('\n')
                    name = line.split(':')[0]
                    mydict = line.replace(name + ':', '')
                    mydict = ast.literal_eval(mydict)

                    if name == 'T':
                        for fea in mydict:
                            total_f2vlist[fea].append(mydict[fea])
                f.close()

        for fea in total_f2vlist:
            total_f2avg[fea] = float(sum(total_f2vlist[fea])) / float(
                len(total_f2vlist[fea]))

        # Simply set the threshold for active monitors to average/N
        f2thre = dict()
        for i in range(feature_num):
            f2thre[i] = total_f2avg[i] / 10.0

        print 'Get the set of active monitors for each slot and each feature'
        # To save memory, we map monitor ip to an integer
        mon2id = dict()
        count = 0
        for uds in self.uds_list:
            for mon in uds.monitors:
                try:
                    test = mon2id[mon]
                except:
                    mon2id[mon] = count
                    count += 1

        unix2fea2monset = dict()
        for uds in self.uds_list:
            for slot in uds.dtobj_list:
                print '*************Getting highly active monitors for slot ', slot
                sdt_unix = calendar.timegm(slot[0].utctimetuple())
                unix2fea2monset[sdt_unix] = dict()

                rpath = uds.numf_distr_output_dir() + str(sdt_unix) + '.txt'
                f = open(rpath, 'r')
                for line in f:
                    line = line.rstrip('\n')
                    name = line.split(':')[0]
                    mydict = line.replace(name + ':', '')
                    mydict = ast.literal_eval(mydict)

                    if name != 'T':
                        id = mon2id[name]
                        for fea in mydict:
                            if mydict[fea] >= f2thre[fea]:
                                try:
                                    unix2fea2monset[sdt_unix][fea].add(id)
                                except:
                                    unix2fea2monset[sdt_unix][fea] = set([id])
                f.close()

        # store the info in a middle file. One file for one feature
        filedict = dict()
        dir = metrics_output_root + str(self.uds_list[0].granu) + '/actmon/'
        cmlib.make_dir(dir)
        for i in range(feature_num):
            filedict[i] = open(dir + str(i) + '.txt', 'w')
        for unix in unix2fea2monset:
            for fea in unix2fea2monset[unix]:
                filedict[fea].write(
                    str(unix) + ':' + str(unix2fea2monset[unix][fea]) + '\n')
        for i in range(feature_num):
            filedict[i].close()

        f = open(dir + 'mon2id.txt', 'w')
        f.write(str(mon2id))
        f.close()
示例#18
0
        for line in f:
            line = line.rstrip('\n')
            pfxset.add(line)
        f.close()

        my_period.pfx2as_LPM(pfxset)

        #mf = Micro_fighter(reaper)
        #mf.analyze_slot(1369102200)

    #------------------------------------------------------------
    # plot matrices of every middle file
    if action['plot_matrix']:
        mdir = my_period.get_middle_dir()
        plotdir = mdir + 'matrix/'
        cmlib.make_dir(plotdir)

        mfiles = os.listdir(mdir)
        for mf in mfiles:
            if not os.path.isfile(mdir+mf):
                mfiles.remove(mf)
            else:
                print 'Ploting matrix:', mdir+mf
                plot_matrix(mdir+mf, plotdir+mf.split('.')[0]+'.pdf') #TODO specify a range?

    reaperlist.append(reaper) 


#------------------------------------------------------------------
#combined analysis of all reapers
if action['MR']:
示例#19
0
    unix = cluster1_2[0]

    sdate = daterange[order][0]
    edate = daterange[order][1]

    rib_files = list()
    for co in all_collectors.keys():
        dl = Downloader(sdate, edate, co)
        rfilepath = dl.download_one_rib_before_unix(sdate,
                                                    unix)  # download RIB
        if rfilepath != -1:  # cannot get
            rib_files.append(rfilepath)

    # output the rib file-list
    dir = final_output_root + 'additional_rib_list/'
    cmlib.make_dir(dir)
    ofpath = dir + str(order) + '_' + str(unix) + '.txt'
    f = open(ofpath, 'w')
    for rpath in rib_files:
        f.write(rpath + '\n')
    f.close()

#----------------------------------------------------------------------------
# The main function
if __name__ == '__main__' and 1 == 2:
    order_list = [303]
    # we select all collectors that have appropriate start dates
    collector_list = dict()
    for i in order_list:
        collector_list[i] = list()
        for co in all_collectors.keys():
示例#20
0
    def pfx_metrics_CDF_met2total(self):
        uds = self.uds_list[
            0]  # XXX note: in the ISCC paper we only analyze one period!
        metrics = [
            'UQ', 'PMR', 'GC', 'CR1', 'CR4', 'CR8', 'CR0.1', 'CR0.2', 'CR0.3'
        ]
        met2list = dict()
        for met in metrics:
            met2list[met] = list()

        fname = uds.apfx_metrics_fpath()
        f = open(fname, 'r')
        for line in f:
            line = line.rstrip('\n')
            attr = line.split('|')
            met2list['UQ'].append(
                int(attr[1])
            )  # get the metrics. Hard-coding is bad. But we save time here.
            met2list['PMR'].append(float(attr[2]))
            met2list['GC'].append(float(attr[3]))
            met2list['CR1'].append(float(attr[4]))
            met2list['CR4'].append(float(attr[5]))
            met2list['CR8'].append(float(attr[6]))
            met2list['CR0.1'].append(float(attr[7]))
            met2list['CR0.2'].append(float(attr[8]))
            met2list['CR0.3'].append(float(attr[9]))
        f.close()

        fig = plt.figure(figsize=(20, 13))
        ax = fig.add_subplot(111)
        count = 0
        for mtype in met2list:
            if mtype == 'UQ':
                continue
            v2count = dict()
            for v in met2list[mtype]:
                try:
                    v2count[v] += 1
                except:
                    v2count[v] = 1
            mycdf = cmlib.value_count2cdf(v2count)
            xlist = list()
            ylist = list()

            for key in sorted(mycdf):
                xlist.append(key)
                ylist.append(mycdf[key])

            ax.plot(xlist, ylist,\
                    color=colors[count], marker=styles[count], markersize=25, markevery=(len(xlist)/2,len(xlist)), label=mtype, lw=6, alpha=0.8)
            #ax.plot(xlist, ylist, color=colors[count], label=mtype, lw=6, alpha=0.8)
            count += 1

        ax.set_ylabel('Quantity of time slot')
        ax.set_xlabel(' Metric value')
        legend = ax.legend(loc='lower right', shadow=False)
        ax.tick_params(axis='y', pad=10)
        ax.tick_params(axis='x', pad=10)
        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))

        cmlib.make_dir(env.metric_plot_dir)
        output_loc = env.metric_plot_dir + 'pfx_metrics_CDF.pdf'
        plt.savefig(output_loc, bbox_inches='tight')
        plt.clf()  # clear the figure
        plt.close()
        '''
示例#21
0
    def num_features_metrics_CDF(self):
        met2unix2fea2v = dict()  # Note: different to fea2unix2met2v

        # initialize the huge dict
        print 'Initializing ...'
        tmppath = self.uds_list[0].numf_metrics_fpath()
        f = open(tmppath, 'r')
        count = 0
        for line in f:
            mtype = line.split('|')[1]
            met2unix2fea2v[mtype] = dict()
            count += 1
            if count == 15:  # XXX Note: we assume at most 15 metrics
                break
        f.close()

        unix_list = list()
        for uds in self.uds_list:
            for dtobj in uds.dtobj_list:
                unix = calendar.timegm(dtobj[0].utctimetuple())
                unix_list.append(unix)

        for m in met2unix2fea2v.keys():
            for unix in unix_list:
                met2unix2fea2v[m][unix] = dict()

        # read output file and store information
        for uds in self.uds_list:
            mf_path = uds.numf_metrics_fpath()
            print 'Reading ', mf_path
            f = open(mf_path, 'r')
            for line in f:
                line = line.rstrip('\n')
                splitted = line.split('|')
                unix = int(splitted[0])
                mtype = splitted[1]
                thedict = ast.literal_eval(splitted[2])
                for fea in thedict:
                    value = thedict[fea]
                    met2unix2fea2v[mtype][unix][fea] = value
            f.close()

        # Plot M figures for M metrics. In each figure, N curves for N features.
        for mtype in met2unix2fea2v:
            print 'Plotting metric ', mtype

            fea2vlist = dict()
            for unix in met2unix2fea2v[mtype]:
                for fea in met2unix2fea2v[mtype][unix]:
                    value = met2unix2fea2v[mtype][unix][fea]
                    try:
                        fea2vlist[fea].append(value)
                    except:
                        fea2vlist[fea] = [value]

            fea2xlist = dict()
            fea2ylist = dict()
            for fea in fea2vlist:
                not_applicable = 0
                v2count = dict()
                for v in fea2vlist[fea]:
                    if v == -1:
                        not_applicable += 1
                        continue
                    try:
                        v2count[v] += 1
                    except:
                        v2count[v] = 1

                mycdf = cmlib.value_count2cdf(v2count)
                for key in sorted(mycdf):
                    try:
                        fea2xlist[fea].append(key)
                        fea2ylist[fea].append(mycdf[key])
                    except:
                        fea2xlist[fea] = [key]
                        fea2ylist[fea] = [mycdf[key]]
                if len(mycdf.keys()) == 2:  # highest disparity
                    fea2xlist[fea].append(mycdf.keys()[-1])
                    tmp = fea2ylist[fea][-1]
                    fea2ylist[fea][-1] = 0
                    fea2ylist[fea].append(tmp)
                if not_applicable > 0:
                    print 'fea:', fea, '. mtype:', mtype, '. not_applicable:', not_applicable

            # for showing statistics in paper
            if mtype == 'DV':
                #print 'WW:',fea2xlist[3][-1]
                #print 'AADup1',fea2xlist[4][-1]
                print 'WADup:', fea2xlist[8][1]
                print 'AADup2', fea2xlist[5][1]

            # Start plotting now!
            if mtype == 'TOTAL':
                fig = plt.figure(figsize=(20, 13))
            else:
                fig = plt.figure(figsize=(17, 13))
            ax = fig.add_subplot(111)
            count = -1
            for fea in fea2xlist:
                if fea == 7:
                    continue
                count += 1
                #ax.plot(fea2xlist[fea], fea2ylist[fea],linestyles[count%4],\
                #        color=colors[count], label=feature_num2name[fea], lw=9, alpha=0.8)
                ax.plot(fea2xlist[fea], fea2ylist[fea],\
                        color=colors[count], marker=styles[count], markersize=25, markevery=(len(fea2xlist[fea])/2,len(fea2xlist[fea])), label=feature_num2name[fea], lw=6, alpha=0.8)

            ax.set_ylabel('Quantity of time slot')
            if mtype == 'TOTAL':
                ax.set_xlabel('Quantity of feature')
                ax.set_xscale('log')
                ax.set_ylim([-500, 13500])
                ax.tick_params(axis='x', pad=10)
            else:
                ax.set_xlabel(' Metric value')
                ax.set_xlim([-0.1, 1.1])
                ax.set_ylim([-500, 13500])
            if mtype == 'GINI' or mtype == 'TOTAL':
                legend = ax.legend(loc='best', shadow=False)

            ax.tick_params(axis='y', pad=10)
            ax.tick_params(axis='x', pad=10)
            plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))

            cmlib.make_dir(env.metric_plot_dir)
            output_loc = env.metric_plot_dir + 'CDF_' + str(mtype) + '.pdf'
            plt.savefig(output_loc, bbox_inches='tight')
            plt.clf()  # clear the figure
            plt.close()
示例#22
0
    def rm_reset_one_list(self, rib_full_loc, tmp_full_listfile):
        ## record reset info into a temp file
        reset_info_file = datadir + 'peer_resets.txt'

        print self.co, ' obtaining BGP session reset start-end period...'
        subprocess.call('perl '+projectdir+'tool/bgpmct.pl -rf '+rib_full_loc+' -ul '+\
                tmp_full_listfile + ' > '+reset_info_file, shell=True)

        if os.path.exists(reset_info_file):
            if os.path.getsize(reset_info_file) == 0:
                print 'no reset at all!'
                return
        else:
            print 'no reset at all!'
            return

        peer_resettime = dict()  # peer: list of [reset start, reset end]
        resetf = open(reset_info_file, 'r')
        for line in resetf:
            if line.startswith('run') or line.startswith('/') or ('#' in line):
                continue
            if ':' in line:
                now_peer = line.rstrip(':\n')
                continue

            stime_unix, endtime_unix = int(line.split(',')[0]), int(
                line.split(',')[1])
            try:
                peer_resettime[now_peer].append([stime_unix, endtime_unix])
            except:
                peer_resettime[now_peer] = [
                    [stime_unix, endtime_unix],
                ]
        resetf.close()

        # write the reset info into a file
        # TODO deal with gap > 32 days
        cmlib.make_dir(reset_info_dir)
        f = open(self.reset_info, 'a')
        f.write(self.co + ':\n')
        for p in peer_resettime:
            f.write(p + '@\n')
            for rs in peer_resettime[p]:
                f.write(str(rs) + '\n')
        f.close()
        '''
        # XXX only for once start (continue after the program stopped because of memo issue)
        # FIXME Giant bug in these code. In future, re-download the affected collectors
        this_co_peers = []
        peer_file = cmlib.peer_path_by_rib_path(rib_full_loc)
        fff = open(peer_file, 'r')
        for line in fff:
            peer = line.split('@')[0]
            this_co_peers.append(peer)
        fff.close()
        
        peer_resettime = dict()
        record = False
        f = open(self.reset_info, 'r')
        for line in f:
            line = line.rstrip('@\n')
            if ':' in line:
                record = False
                continue
            if line[0].isdigit():
                record = True
                p = line
                peer_resettime[p] = list()
            elif record is True:
                thelist = ast.literal_eval(line)
                peer_resettime[p].append(thelist)
            else:
                assert 1 == 0
        f.close()
        # XXX only for once end
        '''

        # different collectors in the same file
        for p in peer_resettime:
            if ':' in p:  # We do not really delete IPv6 updates
                continue
            #if p not in this_co_peers: # XXX used with the previous commented out code
            #    continue
            if p not in self.global_peers:  # We ignore non-global peers to save time
                continue
            for l in peer_resettime[p]:
                print 'deleting reset for ', p
                self.delete_reset_updates(p, l[0], l[1], tmp_full_listfile)
                #h = hpy()
                #print h.heap()

        os.remove(reset_info_file)  #XXX comment out when 'doing it once'...
示例#23
0
    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        target_line = None  # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line

            diff = abs(int(fdate) - int(my_date))  # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line  # work-around for a special case

        print 'Selected RIB:', target_line
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed size & original size:', os.path.getsize(
                full_loc + '.txt.gz'), fsize
            if os.path.getsize(full_loc +
                               '.txt.gz') > 0.6 * fsize:  # 0.6 is good enough
                return full_loc + '.txt.gz'  # Do not download
            else:
                os.remove(full_loc + '.txt.gz')  # too small to be complete

        if os.path.exists(full_loc):
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else:  # Good!
                cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
                cmlib.pack_gz(full_loc + '.txt')
                return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'
    def download_one_rib_before_unix(self, my_date, unix): # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/' 
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir+web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [item for item in rib_list if 'rib' in item or 'bview' in item]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist) 

        ok_rib_list = list() # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None # the RIB closest to unix 
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate+ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M') 
            runix = time_lib.mktime(objdt.timetuple()) + 8*60*60 # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix-runix < min:
                min = unix-runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1] # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename # .bz2/.gz

        if os.path.exists(full_loc+'.txt'): # only for clearer logic
            os.remove(full_loc+'.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc+'.txt.gz'): 
            print 'existed!!!!!!!!!!!!'
            return full_loc+'.txt.gz' # Do not download

        if os.path.exists(full_loc): 
            cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
            cmlib.pack_gz(full_loc+'.txt')
            return full_loc+'.txt.gz'


        cmlib.force_download_file('http://'+web_location, datadir+web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
        cmlib.pack_gz(full_loc+'.txt')
        os.remove(full_loc) # remove the original file

        return full_loc+'.txt.gz'
    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/' 
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir+web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [item for item in rib_list if 'rib' in item or 'bview' in item]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist) 

        target_line = None # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line
            
            diff = abs(int(fdate)-int(my_date)) # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line # work-around for a special case


        print 'Selected RIB:', target_line
        size = target_line.split()[-1] # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename # .bz2/.gz

        if os.path.exists(full_loc+'.txt'): # only for clearer logic
            os.remove(full_loc+'.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc+'.txt.gz'): 
            print 'existed size & original size:',os.path.getsize(full_loc+'.txt.gz'),fsize
            if os.path.getsize(full_loc+'.txt.gz') > 0.6 * fsize: # 0.6 is good enough
                return full_loc+'.txt.gz' # Do not download
            else:
                os.remove(full_loc+'.txt.gz') # too small to be complete

        if os.path.exists(full_loc): 
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else: # Good!
                cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
                cmlib.pack_gz(full_loc+'.txt')
                return full_loc+'.txt.gz'


        cmlib.force_download_file('http://'+web_location, datadir+web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
        cmlib.pack_gz(full_loc+'.txt')
        os.remove(full_loc) # remove the original file

        return full_loc+'.txt.gz'
 def numf_distr_output_dir(self):
     dir = metrics_output_root + str(self.granu) + '/' + self.sdate + '_' + self.edate + '/'
     cmlib.make_dir(dir)
     return dir
示例#27
0
 def __init__(self, sdate):
     self.sdate = sdate
     cmlib.make_dir(datadir+'support/')
示例#28
0
    def num_features_metrics_CDF(self): 
        met2unix2fea2v = dict() # Note: different to fea2unix2met2v

        # initialize the huge dict
        print 'Initializing ...'
        tmppath = self.uds_list[0].numf_metrics_fpath()
        f = open(tmppath, 'r')
        count = 0
        for line in f:
            mtype = line.split('|')[1]
            met2unix2fea2v[mtype] = dict()
            count += 1
            if count == 15: # XXX Note: we assume at most 15 metrics
                break
        f.close()

        unix_list = list()
        for uds in self.uds_list:
            for dtobj in uds.dtobj_list:
                unix = calendar.timegm(dtobj[0].utctimetuple())
                unix_list.append(unix)

        for m in met2unix2fea2v.keys():
            for unix in unix_list:
                met2unix2fea2v[m][unix] = dict()

        # read output file and store information
        for uds in self.uds_list:
            mf_path = uds.numf_metrics_fpath()
            print 'Reading ', mf_path
            f = open(mf_path, 'r')
            for line in f:
                line = line.rstrip('\n')
                splitted = line.split('|')
                unix = int(splitted[0])
                mtype = splitted[1]
                thedict = ast.literal_eval(splitted[2])
                for fea in thedict:
                    value = thedict[fea]
                    met2unix2fea2v[mtype][unix][fea] = value
            f.close()

        # Plot M figures for M metrics. In each figure, N curves for N features.
        for mtype in met2unix2fea2v:
            print 'Plotting metric ', mtype

            fea2vlist = dict()
            for unix in met2unix2fea2v[mtype]:
                for fea in met2unix2fea2v[mtype][unix]:
                    value = met2unix2fea2v[mtype][unix][fea]
                    try:
                        fea2vlist[fea].append(value)
                    except:
                        fea2vlist[fea] = [value]

            fea2xlist = dict()
            fea2ylist = dict()
            for fea in fea2vlist:
                not_applicable = 0
                v2count = dict()
                for v in fea2vlist[fea]:
                    if v == -1:
                        not_applicable += 1
                        continue
                    try:
                        v2count[v] += 1
                    except:
                        v2count[v] = 1

                mycdf = cmlib.value_count2cdf(v2count)
                for key in sorted(mycdf):
                    try:
                        fea2xlist[fea].append(key)
                        fea2ylist[fea].append(mycdf[key])
                    except:
                        fea2xlist[fea] = [key]
                        fea2ylist[fea] = [mycdf[key]]
                if len(mycdf.keys()) == 2: # highest disparity
                    fea2xlist[fea].append(mycdf.keys()[-1])
                    tmp = fea2ylist[fea][-1]
                    fea2ylist[fea][-1] = 0
                    fea2ylist[fea].append(tmp)
                if not_applicable > 0:
                    print 'fea:',fea,'. mtype:',mtype,'. not_applicable:',not_applicable

            # for showing statistics in paper
            if mtype == 'DV':
                #print 'WW:',fea2xlist[3][-1]
                #print 'AADup1',fea2xlist[4][-1]
                print 'WADup:',fea2xlist[8][1]
                print 'AADup2',fea2xlist[5][1]

            # Start plotting now! 
            if mtype == 'TOTAL':
                fig = plt.figure(figsize=(20, 13))
            else:
                fig = plt.figure(figsize=(17, 13))
            ax = fig.add_subplot(111)
            count = -1
            for fea in fea2xlist:
                if fea == 7:
                    continue
                count += 1
                #ax.plot(fea2xlist[fea], fea2ylist[fea],linestyles[count%4],\
                #        color=colors[count], label=feature_num2name[fea], lw=9, alpha=0.8)
                ax.plot(fea2xlist[fea], fea2ylist[fea],\
                        color=colors[count], marker=styles[count], markersize=25, markevery=(len(fea2xlist[fea])/2,len(fea2xlist[fea])), label=feature_num2name[fea], lw=6, alpha=0.8)

            ax.set_ylabel('Quantity of time slot')
            if mtype == 'TOTAL':
                ax.set_xlabel('Quantity of feature')
                ax.set_xscale('log')
                ax.set_ylim([-500, 13500])
                ax.tick_params(axis='x',pad=10)
            else:
                ax.set_xlabel(' Metric value')
                ax.set_xlim([-0.1, 1.1])
                ax.set_ylim([-500, 13500])
            if mtype == 'GINI' or mtype == 'TOTAL':
                legend = ax.legend(loc='best',shadow=False)

            ax.tick_params(axis='y',pad=10)
            ax.tick_params(axis='x',pad=10)
            plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))

            cmlib.make_dir(env.metric_plot_dir)
            output_loc = env.metric_plot_dir + 'CDF_' + str(mtype) + '.pdf'
            plt.savefig(output_loc, bbox_inches='tight')
            plt.clf() # clear the figure
            plt.close()
示例#29
0
    def __init__(period, granu):
        self.filelist = period.get_filelist()

        self.sdate = period.sdate
        self.edate = period.edate
        self.granu = granu

        self.cl_list = cl_list  #XXX

        self.max_dt = -1

        #-------------------------------------------------------------
        # dir for final and middle output files
        self.output_dir = datadir + 'output/' + self.sdate + '_' + self.edate + '/'
        cmlib.make_dir(self.output_dir)

        self.monitors = []
        tmp_co_mo = period.co_mo
        for co in tmp_co_mo.keys():
            self.monitors.extend(tmp_co_mo[co])

        self.mcount = len(self.monitors)

        self.mo2index = {}  # map monitor ip to an index
        index = 0
        for mo in self.monitors:
            self.mo2index[mo] = index
            index += 1

        self.no_prefixes = period.no_prefixes  # a trie

        # FIXME not here! in period class
        '''
        self.m_as_m = dict() # AS number: monitor count
        self.m_nation_as = dict() # nation: AS (of monitors) count
        for m in self.monitors.keys():
            asn = self.monitors[m]
            try:
                self.m_as_m[asn] += 1
            except:
                self.m_as_m[asn] = 1
        for asn in self.m_as_m.keys():
            nation = self.as_to_nation(asn)
            if nation == -1:
                continue
            try:
                self.m_nation_as[nation] += 1
            except:
                self.m_nation_as[nation] = 1
        self.m_ascount = len(self.m_as_m.keys())
        print 'monitor AS count:', self.m_ascount
        self.m_nationcount = len(self.m_nation_as.keys())
        print 'monitor nation count:', self.m_nationcount
        print 'monitor nations:', self.m_nation_as.keys()
        '''

        #-----------------------------------------------------
        # For synchronization among collectors and conducting timely aggregation
        # Note: assume all collectors will exist after self.sdate + 1 hour

        self.cl_dt = {
        }  # The current datetime of every collector, for getting ceiling
        for cl in self.cl_list:
            self.cl_dt[cl] = 0

        tmp_dt = datetime.datetime(int(self.sdate[0:4]),\
                int(self.sdate[4:6]),int(self.sdate[6:8]),0,0)
        # do not fill up the hour to allow for the edge value being analyzed
        tmp_dt = tmp_dt + datetime.timedelta(minutes=58)
        tmp_dt = time_lib.mktime(tmp_dt.timetuple())

        # floor is only for ignoring anything before self.sdate + 1 hour
        self.floor = tmp_dt
        # we output everything below ceiling and above floor
        self.ceiling = self.floor

        tmp_dt = datetime.datetime(int(self.edate[0:4]),\
                int(self.edate[4:6]),int(self.edate[6:8]),23,59)
        tmp_dt = tmp_dt + datetime.timedelta(minutes=-58)
        tmp_dt = time_lib.mktime(tmp_dt.timetuple())  # Change into seconds int
        self.top_ceiling = tmp_dt  # self.ceiling cannot exceed this value

        #------------------------------------------------------
        # Basic values assignment

        self.pfx_trie = dict(
        )  # every dt has a corresponding trie, deleted periodically
        #self.dt_list = list() # the list of datetime
        #self.peerlist = dict() # dt: monitor list XXX no need any more
        #self.ucount = dict() # dt: update count
        #self.acount = dict() # dt: announcement count
        #self.wcount = dict() # dt: withdrawal count
        #self.wpctg = dict() # dt: withdrawal percentage XXX get this only when analyzing output

        # FIXME put the download of support files in period class
        # Take special care when the duration is long
        spt = Supporter(self.sdate)
        self.pfx2as = spt.get_pfx2as_trie()  # all prefixes mappingg to AS
        self.as2nation = spt.get_as2nation_dict(
        )  # all ASes mapping to nation (latest info)

        self.all_ascount = cmlib.get_all_ascount(
            self.sdate)  # Get total AS quantity
        self.all_pcount = cmlib.get_all_pcount(
            self.sdate)  # Get total prefix quantity
        self.all_pcount_lzero = 0  # quantity of prefixes having DV > 0
        self.as2cc = spt.get_as2cc_dict(
        )  # all ASes mapped to sizes of customer cones

        # XXX no need any more
        self.as2rank = dict(
        )  # All ASes mapped to rank (according to customer cone size)
        pre_value = 999999
        rank = 0  # number (of ASes whose CC is larger) + 1
        buffer = 0  # number (of ASes having the same CC size) - 1
        for item in sorted(self.as2cc.iteritems(),
                           key=operator.itemgetter(1),
                           reverse=True):
            if item[1] < pre_value:
                rank = rank + buffer + 1
                pre_value = item[1]
                self.as2rank[item[0]] = rank
                buffer = 0
            else:  # item[1] (cc size) == pre_value
                buffer += 1
                self.as2rank[item[0]] = rank

        #---------------------------------------------------------------------
        # FIXME
        # For each dt create a middle file that stores prefix|update quantity|DV value|(DV list)
        # Analyze these middle files in the end
        self.dv_level = [0, 0.05, 0.1, 0.15, 0.2]
        # Coarser DV values
        self.dvrange_dt_pfx = dict()  # DV level range: dt: pfx count
        self.dvrange_len_pfx = dict(
        )  # DV level range: prefix length: existence
        self.dv_dt_asn_pfx = dict()  # DV levels: dt: AS: prefix count
        self.pfxcount = dict()  # dv: dt: prefix (in updates) count
        self.pfxcount_range = dict()  # dv range: dt: prefix (in updates) count
        self.dv_dt_hdvp = dict()  # DV levels: dt: hdvp count
        for dl in self.dv_level:
            self.dvrange_dt_pfx[dl] = dict()
            self.dvrange_len_pfx[dl] = dict()
            self.dv_dt_asn_pfx[dl] = dict()
            self.pfxcount[dl] = dict()
            self.pfxcount_range[dl] = dict()
            self.dv_dt_hdvp[dl] = dict()

        # only record DV > 0.15 #XXX delete
        self.dup_trie = patricia.trie(None)  # TODO Enough memory for this?

        # DV distribution in every time slot
        self.dv_distribution = dict()  # dt: DV: count
        self.dv_cdf = dict()  # dt: DV: cumulative count

        #----------------------------------------------------------------------
        # CDFs for the slot before and after the cdfbound (HDVP peak)
        # FIXME do this when analyzing the middle files
        #TODO obtain cdfbound
        self.compare = False
        if cdfbound != None:
            self.compare = True
            self.cdfbfr = dict()
            self.cdfaft = dict()
            self.as_bfr = dict()
            self.as_aft = dict()
            self.cdfbound = datetime.datetime.strptime(cdfbound,
                                                       '%Y-%m-%d %H:%M:%S')
            self.bfr_start = time_lib.mktime((self.cdfbound +\
                    datetime.timedelta(minutes=-(self.granu*2))).timetuple())
            self.cdfbound = time_lib.mktime(self.cdfbound.timetuple())

            for dl in self.dv_level:
                self.as_bfr[dl] = dict()  # dv: ASN: count
                self.as_aft[dl] = dict()
示例#30
0
    def pfx_metrics_CDF_met2total(self):
        uds = self.uds_list[0] # XXX note: in the ISCC paper we only analyze one period!
        metrics = ['UQ', 'PMR', 'GC', 'CR1', 'CR4', 'CR8', 'CR0.1', 'CR0.2', 'CR0.3']
        met2list = dict()
        for met in metrics:
            met2list[met] = list()

        fname = uds.apfx_metrics_fpath()
        f = open(fname, 'r')
        for line in f:
            line = line.rstrip('\n')
            attr = line.split('|')
            met2list['UQ'].append(int(attr[1])) # get the metrics. Hard-coding is bad. But we save time here.
            met2list['PMR'].append(float(attr[2]))
            met2list['GC'].append(float(attr[3]))
            met2list['CR1'].append(float(attr[4]))
            met2list['CR4'].append(float(attr[5]))
            met2list['CR8'].append(float(attr[6]))
            met2list['CR0.1'].append(float(attr[7]))
            met2list['CR0.2'].append(float(attr[8]))
            met2list['CR0.3'].append(float(attr[9]))
        f.close()


        fig = plt.figure(figsize=(20, 13))
        ax = fig.add_subplot(111)
        count = 0
        for mtype in met2list:
            if mtype == 'UQ':
                continue
            v2count = dict()
            for v in met2list[mtype]:
                try:
                    v2count[v] += 1
                except:
                    v2count[v] = 1
            mycdf = cmlib.value_count2cdf(v2count)
            xlist = list()
            ylist = list()

            for key in sorted(mycdf):
                xlist.append(key)
                ylist.append(mycdf[key])

            ax.plot(xlist, ylist,\
                    color=colors[count], marker=styles[count], markersize=25, markevery=(len(xlist)/2,len(xlist)), label=mtype, lw=6, alpha=0.8)
            #ax.plot(xlist, ylist, color=colors[count], label=mtype, lw=6, alpha=0.8)
            count += 1

        ax.set_ylabel('Quantity of time slot')
        ax.set_xlabel(' Metric value')
        legend = ax.legend(loc='lower right',shadow=False)
        ax.tick_params(axis='y',pad=10)
        ax.tick_params(axis='x',pad=10)
        plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
        
        cmlib.make_dir(env.metric_plot_dir)
        output_loc = env.metric_plot_dir + 'pfx_metrics_CDF.pdf'
        plt.savefig(output_loc, bbox_inches='tight')
        plt.clf() # clear the figure
        plt.close()


        '''
示例#31
0
 def apfx_metrics_fpath(self):
     dir = metrics_output_root + str(
         self.granu) + '/' + self.sdate + '_' + self.edate + '/'
     cmlib.make_dir(dir)
     return dir + 'active_pfx_metrics.txt'
示例#32
0
    def __init__(self, period, granu):

        # not using dict to consider for multiple existence
        self.blank_co = list(
        )  # list of lists: [collector, start unix dt, end unix dt]
        self.ignore_co = list(
        )  # we are now ignoring some co whose are in blank period

        self.filelist = period.get_filelist()
        self.sdate = period.sdate
        self.edate = period.edate
        self.granu = granu

        self.middle_dir = period.get_middle_dir()
        cmlib.make_dir(self.middle_dir)
        self.blank_dir = period.get_blank_dir()
        cmlib.make_dir(self.blank_dir)

        self.period = period
        self.all_co_list = period.co_mo.keys()  # collector list
        self.monitors = []
        for co in period.co_mo.keys():
            self.monitors.extend(period.co_mo[co])
        self.mcount = len(self.monitors)
        # Sort the monitor list first so that this mapping consistent across multiple runs
        tmp_list = sorted(self.monitors, key=cmlib.ip_to_integer)

        self.mo2index = {}  # map monitor ip to an index
        index = 0
        for mo in tmp_list:
            self.mo2index[mo] = index
            index += 1
        # write this mapping to a file for future microscopic analysis
        self.mo2index_file = self.period.get_mon2index_file_path()
        f = open(self.mo2index_file, 'w')
        for mo in self.mo2index:
            f.write(mo + ':' + str(self.mo2index[mo]) + '\n')
        f.close()

        ###self.pfx_radix = dict() # every dt has a corresponding trie, deleted periodically
        self.pfx_tree = radix.Radix()  # XXX test
        self.dt_list = dict()  # unix dt => True # XXX test

        #-----------------------------------------------------
        # For synchronization among collectors and conducting timely aggregation
        # Note: assume all collectors will exist after self.sdate + 1 hour
        # XXX commented out when dealing with 2013 whole year data

        self.co_unix_dt = {
        }  # The current datetime of every collector, for getting ceiling
        for cl in self.all_co_list:
            self.co_unix_dt[cl] = 0

        tmp_dt = datetime.datetime(int(self.sdate[0:4]),\
                int(self.sdate[4:6]),int(self.sdate[6:8]),0,0) # is UTC
        # do not fill up the hour to allow for the edge value being analyzed
        #XXX tmp_dt = tmp_dt + datetime.timedelta(minutes=58) # is UTC
        tmp_dt = calendar.timegm(tmp_dt.timetuple())  # is UTC

        # floor is only for ignoring anything before self.sdate + 1 hour
        self.floor = tmp_dt
        # we output everything below ceiling and above floor
        #self.ceiling = self.floor
        self.ceiling = tmp_dt

        tmp_dt = datetime.datetime(int(self.edate[0:4]),\
                int(self.edate[4:6]),int(self.edate[6:8]),23,59,59)
        #XXX tmp_dt = tmp_dt + datetime.timedelta(minutes=-58)
        tmp_dt = calendar.timegm(tmp_dt.timetuple())
        self.top_ceiling = tmp_dt  # self.ceiling cannot exceed this value
示例#33
0
 def numf_distr_output_dir(self):
     dir = metrics_output_root + str(
         self.granu) + '/' + self.sdate + '_' + self.edate + '/'
     cmlib.make_dir(dir)
     return dir
示例#34
0
def get_file():
    for clctr in collectors:
        cl_name = clctr
        hdname_detail = hdname + 'archive.routeviews.org/' + cl_name +\
            '/bgpdata/'
        hdname_detail = hdname_detail.replace('//', '/') # happens when cl = ''
        # only for downloading updates, not RIBs
        for ym in yearmonth:
            sdate = ym.split('.')[0] + ym.split('.')[1] + '01'
            edate = ym.split('.')[0] + ym.split('.')[1] + '07'
            filelocation = ''
            filelocation = 'archive.routeviews.org/' + cl_name + '/bgpdata/' + ym + '/UPDATES/'
            filelocation = filelocation.replace('//', '/')  # when name is ''
            webraw = cmlib.get_weblist('http://' + filelocation)
            print filelocation
            cmlib.make_dir(hdname+'metadata/'+ym)
            flist = open(hdname+'metadata/'+ym+'/updt_filelist_'+cl_name, 'w')
            cmlib.make_dir(hdname+filelocation)
            for line in webraw.split('\n'):

                if not 'updates' in line or line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                if size.isdigit():
                    fsize = float(size)
                else:
                    fsize = float(size[:-1]) * cmlib.size_u2v(size[-1])
                filename = line.split()[0]  # omit uninteresting info
                filedate = filename.split('.')[-3]

                # check whether its datetime in our range
                if int(filedate) < int(sdate) or int(filedate) > int(edate):
                    continue

                print filename

                origin_floc = hdname + filelocation + filename # original file loc&name
                flist.write(origin_floc+'.txt.gz\n')  # .xx.txt.gz file list

                # remove existing xx.txt file to make things clearer
                try:
                    os.remove(origin_floc+'.txt')
                except:
                    pass

                if os.path.exists(origin_floc+'.txt.gz'):
                    if os.path.getsize(origin_floc+'.txt.gz') > 0.1 * fsize:
                        if os.path.exists(origin_floc):  # .bz2/.gz useless anymore
                            os.remove(origin_floc)
                        continue
                    else:
                        os.remove(origin_floc+'.txt.gz')

                if os.path.exists(origin_floc):
                    if os.path.getsize(origin_floc) > 0.9 * fsize:
                        continue
                    else:
                        os.remove(origin_floc)


                cmlib.force_download_file('http://'+filelocation, hdname+filelocation, filename) 

            # file that stores update list
            flist.close()

            filelocation = 'archive.routeviews.org/' + cl_name + '/bgpdata/' + ym + '/RIBS/'
            filelocation = filelocation.replace('//', '/')  # when name is ''
            webraw = cmlib.get_weblist('http://' + filelocation)
            print filelocation
            cmlib.make_dir(hdname+filelocation)

            # for each event, we only download one RIB (on the sdate)
            rib_fname = ''
            for line in webraw.split('\n'):
                
                if not 'rib' in line and not 'bview' in line:
                    continue
                if line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                if size.isdigit():
                    fsize = float(size)
                else:
                    fsize = float(size[:-1]) * cmlib.size_u2v(size[-1])

                filename = line.split()[0]
                print filename
                if not int(filename.split('.')[-3]) == int(sdate):
                    continue
                print filename
                origin_floc = hdname + filelocation + filename # original file loc&name

                try:
                    os.remove(origin_floc+'.txt')
                except:
                    pass

                rib_fname = filelocation + filename
                if os.path.exists(origin_floc+'.txt.gz'): 
                    if os.path.getsize(origin_floc+'.txt.gz') > 0.1 * fsize:
                        if os.path.exists(origin_floc):  # .bz2/.gz useless anymore
                            os.remove(origin_floc)
                        break
                    else:
                        os.remove(origin_floc+'.txt.gz')

                if os.path.exists(origin_floc): 
                    if os.path.getsize(origin_floc) > 0.9 * fsize:
                        break
                    else:
                        os.remove(origin_floc)

                cmlib.force_download_file('http://'+filelocation, hdname+filelocation, filename)
                break


            # download one rib to intial as_path
            sdate_datetime = datetime.datetime(int(sdate[0:4]), int(sdate[4:6]),int(sdate[6:8]))
            as_path_date = sdate_datetime - datetime.timedelta(days=1)
            as_path_date = as_path_date.strftime('%Y%m%d')
            
            as_path_ym = as_path_date[0:4] + '.' + as_path_date[4:6]
            filelocation = 'archive.routeviews.org/' + cl_name + '/bgpdata/' + as_path_ym + '/RIBS/'
            filelocation = filelocation.replace('//', '/')  # when name is ''
            webraw = cmlib.get_weblist('http://' + filelocation)
            print filelocation
            cmlib.make_dir(hdname+filelocation)

            asrib_fname = ''
            for line in reversed(webraw.split('\n')):
                print line
                if not 'rib' in line and not 'bview' in line:
                    continue
                if line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                if size.isdigit():
                    fsize = float(size)
                else:
                    fsize = float(size[:-1]) * cmlib.size_u2v(size[-1])

                filename = line.split()[0]
                print filename
                if not int(filename.split('.')[-3]) == int(as_path_date):
                    continue
                print filename
                origin_floc = hdname + filelocation + filename # original file loc&name

                try:
                    os.remove(origin_floc+'.txt')
                except:
                    pass

                asrib_fname = filelocation + filename
                if os.path.exists(origin_floc+'.txt.gz'): 
                    if os.path.getsize(origin_floc+'.txt.gz') > 0.1 * fsize:
                        if os.path.exists(origin_floc):  # .bz2/.gz useless anymore
                            os.remove(origin_floc)
                        break
                    else:
                        os.remove(origin_floc+'.txt.gz')

                if os.path.exists(origin_floc): 
                    if os.path.getsize(origin_floc) > 0.9 * fsize:
                        break
                    else:
                        os.remove(origin_floc)

                cmlib.force_download_file('http://'+filelocation, hdname+filelocation, filename)
                break
            ## now for update and RIB files, their formats are either .bz2/gz or
            ## .xx.txt.gz!!!

            print 'parsing updates...'
            parse_updates(ym, cl_name)

            print 'parsing RIB and getting peers...'
            rib_location = hdname + rib_fname  # .bz2/.gz
            #print rib_location,'dd'
            peers = get_peers(clctr,ym,rib_location)
            print 'peers: ', peers
            
            as_path_rib_location = hdname + asrib_fname  # .bz2/.gz            
            process_as_path_rib(clctr,as_path_ym,as_path_rib_location)

            print 'determining table transfers start and end time for each peer...'
            for peer in peers:  # must process each peer one by one
                peer = peer.rstrip()
                print 'processing ',peer,'...'
                subprocess.call('perl '+homedir+'tool/bgpmct.pl -rf '+rib_location+'.txt.gz'+' -ul '+\
                        hdname+'metadata/'+ym+'/updt_filelist_'+cl_name+' -p '+peer+' > '+\
                        hdname+'tmp/'+peer+'_result.txt', shell=True)
                    
            print 'delete updates caused by session reset for each peer...'
            for peer in peers:
                # No reset from this peer, so nothing in the file
                try:
                    if os.path.getsize(hdname+'tmp/'+peer+'_result.txt') == 0:
                        continue
                except: # cannot find file
                    continue
                print '\nculprit now: ', peer
                del_tabletran_updates(peer, ym, cl_name)

            # delete all rubbish in the end
            subprocess.call('rm '+hdname+'tmp/*', shell=True)
                                
    return