Пример #1
0
def get_peers(clctr,ym,rib_location): # should end with .bz2/.gz
    print rib_location
    loc = hdname + 'archive.routeviews.org/'
    peer_num_list = open(loc + 'peer_num', 'a')
    peers = []
    # get .txt
    if os.path.exists(rib_location+'.txt.gz'):  # .xx.txt.gz file exists
        subprocess.call('gunzip '+rib_location+'.txt.gz', shell=True)  # unpack                        
    elif os.path.exists(rib_location):  # .bz2/.gz file exists
        cmlib.parse_mrt(rib_location, rib_location+'.txt')
        os.remove(rib_location)  # then remove .bz2/.gz
    # read .txt
    with open(rib_location+'.txt', 'r') as f:  # get peers from RIB
        for line in f:
            try:
                addr = line.split('|')[3]
                if addr not in peers:
                    peers.append(addr)
            except:
                pass
    f.close()
    # compress RIB into .gz
    if not os.path.exists(rib_location+'.txt.gz'):
        cmlib.pack_gz(rib_location+'.txt')
    peer_num = len(peers)
    peer_num_list.write(clctr + '  ' + ym + '  ' + str(peer_num) + '\n')
    return peers
Пример #2
0
def parse_updates(sdate, cl_name):
    flist = open(hdname+'metadata/'+sdate+'/updt_filelist_'+cl_name, 'r')  # .xx.txt.gz file name
    for line in flist:
        line = line.replace('\n', '')
        if not os.path.exists(line):  # xx.txt.gz not exists, .bz2/.gz exists
            print line
            cmlib.parse_mrt(line.replace('.txt.gz', ''), line.replace('txt.gz', 'txt'))
            cmlib.pack_gz(line.replace('txt.gz', 'txt'))
            os.remove(line.replace('.txt.gz', ''))  # remove .bz2/.gz update files
        else:  # xx.txt.gz exists
            pass
    flist.close()
Пример #3
0
def parse_update_files(listfile): # all update files from one collectors/list
    flist = open(listfile, 'r')
    for line in flist:
        line = line.rstrip('\n')
        fsize = float(line.split('|')[1])
        print 'fsize=',fsize
        line = line.split('|')[0].replace('.txt.gz', '') # get the original .bz2/gz file name
        if not os.path.exists(datadir+line+'.txt.gz'):
            cmlib.parse_mrt(datadir+line, datadir+line+'.txt', fsize) # .bz2/gz => .bz2/gz.txt
            cmlib.pack_gz(datadir+line+'.txt') # .bz2/gz.txt => .bz2/gz.txt.gz
            #os.remove(datadir+line)  # remove the original .bz2/.gz file
        else:
            print 'Parsed file exists'
            print datadir+line+'.txt.gz'
            pass
    flist.close()
    return 0
Пример #4
0
def parse_update_files(listfile):  # all update files from one collectors/list
    flist = open(listfile, 'r')
    for line in flist:
        line = line.rstrip('\n')
        fsize = float(line.split('|')[1])
        print 'fsize=', fsize
        line = line.split('|')[0].replace(
            '.txt.gz', '')  # get the original .bz2/gz file name
        if not os.path.exists(datadir + line + '.txt.gz'):
            cmlib.parse_mrt(datadir + line, datadir + line + '.txt',
                            fsize)  # .bz2/gz => .bz2/gz.txt
            cmlib.pack_gz(datadir + line +
                          '.txt')  # .bz2/gz.txt => .bz2/gz.txt.gz
            #os.remove(datadir+line)  # remove the original .bz2/.gz file
        else:
            print 'Parsed file exists'
            print datadir + line + '.txt.gz'
            pass
    flist.close()
    return 0
Пример #5
0
    def get_pfx2as_trie(self):
        print 'Calculating prefix to AS number trie...'
        pfx2as = patricia.trie(None)

        if int(self.sdate) >= 20050509:
            self.get_pfx2as_file()

            pfx2as_file = ''
            tmp = os.listdir(datadir+'support/'+self.sdate+'/')
            for line in tmp:
                if 'pfx2as' in line:
                    pfx2as_file = line
                    break

            f = open(datadir+'support/'+self.sdate+'/'+pfx2as_file)
            for line in f:
                line = line.rstrip('\n')
                attr = line.split()
                if '_' in attr[2] or ',' in attr[2]:
                    continue
                pfx = cmlib.ip_to_binary(attr[0]+'/'+attr[1], '0.0.0.0')
                try:
                    pfx2as[pfx] = int(attr[2]) # pfx: origin AS
                except: # When will this happen?
                    pfx2as[pfx] = -1

            f.close()
        else:
            # Extract info from RIB of the monitor route-views2
            mydate = self.sdate[0:4] + '.' + self.sdate[4:6]
            rib_location = datadir+'routeviews.org/bgpdata/'+mydate+'/RIBS/'
            dir_list = os.listdir(datadir+'routeviews.org/bgpdata/'+mydate+'/RIBS/')


            for f in dir_list:
                if not f.startswith('.'):
                    rib_location = rib_location + f # if RIB is of the same month. That's OK.
                    break
            
            if rib_location.endswith('txt.gz'):
                subprocess.call('gunzip '+rib_location, shell=True)  # unpack                        
                rib_location = rib_location.replace('.txt.gz', '.txt')
            elif not rib_location.endswith('txt'):  # .bz2/.gz file exists
                cmlib.parse_mrt(rib_location, rib_location+'.txt')
                os.remove(rib_location)  # then remove .bz2/.gz
                rib_location = rib_location + '.txt'
            # now rib file definitely ends with .txt, let's rock and roll
            with open(rib_location, 'r') as f:
                for line in f:
                    try:
                        tmp = line.split('|')[5]
                        pfx = cmlib.ip_to_binary(tmp, '0.0.0.0')
                        ASlist = line.split('|')[6]
                        originAS = ASlist.split()[-1]
                        try:
                            pfx2as[pfx] = int(originAS)
                        except:
                            pfx2as[pfx] = -1
                    except:
                        pass

            f.close()
            # compress RIB into .gz
            if not os.path.exists(rib_location+'.gz'):
                cmlib.pack_gz(rib_location)

        return pfx2as
Пример #6
0
    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/' 
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir+web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [item for item in rib_list if 'rib' in item or 'bview' in item]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist) 

        target_line = None # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line
            
            diff = abs(int(fdate)-int(my_date)) # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line # work-around for a special case


        print 'Selected RIB:', target_line
        size = target_line.split()[-1] # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename # .bz2/.gz

        if os.path.exists(full_loc+'.txt'): # only for clearer logic
            os.remove(full_loc+'.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc+'.txt.gz'): 
            print 'existed size & original size:',os.path.getsize(full_loc+'.txt.gz'),fsize
            if os.path.getsize(full_loc+'.txt.gz') > 0.6 * fsize: # 0.6 is good enough
                return full_loc+'.txt.gz' # Do not download
            else:
                os.remove(full_loc+'.txt.gz') # too small to be complete

        if os.path.exists(full_loc): 
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else: # Good!
                cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
                cmlib.pack_gz(full_loc+'.txt')
                return full_loc+'.txt.gz'


        cmlib.force_download_file('http://'+web_location, datadir+web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
        cmlib.pack_gz(full_loc+'.txt')
        os.remove(full_loc) # remove the original file

        return full_loc+'.txt.gz'
Пример #7
0
    def download_one_rib_before_unix(self, my_date, unix): # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/' 
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir+web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [item for item in rib_list if 'rib' in item or 'bview' in item]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist) 

        ok_rib_list = list() # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None # the RIB closest to unix 
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate+ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M') 
            runix = time_lib.mktime(objdt.timetuple()) + 8*60*60 # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix-runix < min:
                min = unix-runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1] # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename # .bz2/.gz

        if os.path.exists(full_loc+'.txt'): # only for clearer logic
            os.remove(full_loc+'.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc+'.txt.gz'): 
            print 'existed!!!!!!!!!!!!'
            return full_loc+'.txt.gz' # Do not download

        if os.path.exists(full_loc): 
            cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
            cmlib.pack_gz(full_loc+'.txt')
            return full_loc+'.txt.gz'


        cmlib.force_download_file('http://'+web_location, datadir+web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
        cmlib.pack_gz(full_loc+'.txt')
        os.remove(full_loc) # remove the original file

        return full_loc+'.txt.gz'
Пример #8
0
    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        target_line = None  # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line

            diff = abs(int(fdate) - int(my_date))  # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line  # work-around for a special case

        print 'Selected RIB:', target_line
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed size & original size:', os.path.getsize(
                full_loc + '.txt.gz'), fsize
            if os.path.getsize(full_loc +
                               '.txt.gz') > 0.6 * fsize:  # 0.6 is good enough
                return full_loc + '.txt.gz'  # Do not download
            else:
                os.remove(full_loc + '.txt.gz')  # too small to be complete

        if os.path.exists(full_loc):
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else:  # Good!
                cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
                cmlib.pack_gz(full_loc + '.txt')
                return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'
Пример #9
0
    def download_one_rib_before_unix(self, my_date,
                                     unix):  # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        ok_rib_list = list()  # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None  # the RIB closest to unix
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate + ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M')
            runix = time_lib.mktime(
                objdt.timetuple()) + 8 * 60 * 60  # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix - runix < min:
                min = unix - runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed!!!!!!!!!!!!'
            return full_loc + '.txt.gz'  # Do not download

        if os.path.exists(full_loc):
            cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
            cmlib.pack_gz(full_loc + '.txt')
            return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'
Пример #10
0
    def get_pfx2as(self):
        self.get_pfx2as_file()

        print 'Calculating prefix to AS number trie...'
        pfx2as = dict()

        if int(self.sdate) >= 20050509:
            self.get_pfx2as_file()

            pfx2as_file = ''
            tmp = os.listdir(self.spt_dir)
            for line in tmp:
                if 'pfx2as' in line:
                    pfx2as_file = line
                    break

            f = open(self.spt_dir+pfx2as_file)
            for line in f:
                line = line.rstrip('\n')
                attr = line.split()
                if '_' in attr[2] or ',' in attr[2]:
                    continue
                pfx = attr[0]+'/'+attr[1]
                try:
                    pfx2as[pfx] = int(attr[2]) # pfx: origin AS
                except: # When will this happen?
                    pfx2as[pfx] = -1

            f.close()
        else:
            # Extract info from RIB of the monitor route-views2 and XXX
            mydate = self.sdate[0:4] + '.' + self.sdate[4:6]
            rib_location = datadir+'archive.routeviews.org/bgpdata/'+mydate+'/RIBS/'
            dir_list = os.listdir(datadir+'archive.routeviews.org/bgpdata/'+mydate+'/RIBS/')


            for f in dir_list:
                if not f.startswith('.'):
                    rib_location = rib_location + f # if RIB is of the same month. That's OK.
                    break
            
            if rib_location.endswith('txt.gz'):
                subprocess.call('gunzip '+rib_location, shell=True)  # unpack                        
                rib_location = rib_location.replace('.txt.gz', '.txt')
            elif not rib_location.endswith('txt'):  # .bz2/.gz file exists
                cmlib.parse_mrt(rib_location, rib_location+'.txt')
                os.remove(rib_location)  # then remove .bz2/.gz
                rib_location = rib_location + '.txt'
            # now rib file definitely ends with .txt, let's rock and roll
            with open(rib_location, 'r') as f:
                for line in f:
                    try:
                        tmp = line.split('|')[5]
                        pfx = tmp
                        ASlist = line.split('|')[6]
                        originAS = ASlist.split()[-1]
                        try:
                            pfx2as[pfx] = int(originAS)
                        except:
                            pfx2as[pfx] = -1
                    except:
                        pass

            f.close()
            # compress RIB into .gz
            if not os.path.exists(rib_location+'.gz'):
                cmlib.pack_gz(rib_location)

        return pfx2as