Exemplo n.º 1
0
def checkcache(itemname,
               filename='importvars/asn.cache',
               incachedir='importvars/mycache'):
    ''' checks cached asns; else, does a new query and stores result in cache'''
    retval = '?'
    cachedir = df.rightdir(df.getdir(__file__) + incachedir)
    if len(itemname) >= 7 and '.' in itemname:
        tmpfname = cachedir + itemname.split('.')[0] + '.cache'
        data = defaultdict(str, df.pjsonin(tmpfname))
        tmpval = data[itemname]
        if df.isint(tmpval) or tmpval == 'NA':
            retval = tmpval
        elif tmpval == str():
            asn = None
            try:
                asn = af.asn_lookup(itemname)
            except:
                pass
            if asn is None:
                data[itemname] = '?'
            else:
                data[itemname] = asn
                retval = asn
            df.pjsonout(tmpfname, dict(data), 'w')
    return retval
Exemplo n.º 2
0
 def dump(self):
     if self.changed:
         invals = self.hash
         cache = self.indirect_cache
         logger.warning("dumping, DO NOT ctrl-C!")
         if invals not in cache:
             cache[invals] = df.rightdir(statedir +
                                         "pickles") + "closeness" + str(
                                             time.time()) + ".pickle"
             df.pickleout(self.f, cache)
         df.pickleout(cache[invals], self.cache)
         logger.warning("safe to ctrl-C")
         self.changed = False
         time.sleep(2)
     if self.hits + self.misses > 0:
         logger.debug("hit rate: "+ \
                 str(float(self.hits)/float(self.hits+self.misses)) \
                 +", total hits: "+str(self.hits))
Exemplo n.º 3
0
import logging
import logging.config

logging.config.fileConfig('logging.conf', disable_existing_loggers=False)

# create logger
logger = logging.getLogger(__name__)
logger.debug(__name__ + "logger loaded")

##################################################################
#                        GLOBAL AND SETUP
##################################################################

# paths
basedir = df.getdir(__file__) + '../'
statedir = df.rightdir(basedir + 'state/')
rawdirlist = df.getlines(basedir + 'state/datapaths.list')
datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True)
plotsdir = df.rightdir(basedir + "plots/")

##################################################################
#                           CODE
##################################################################


def get_ansset_sizes(start_time, fname="", **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param **kwas: keyword arguments for vv.get_svl()
    :return: (m) matrix of client pairs vs domains,
             (fmt) list of domains
Exemplo n.º 4
0
# database setup
mclient = MongoClient()
db = mclient.veracity
pdata = db.probe_data

# fields that are specific to probes
pfields = ['country', 'asn_v4', 'asn_v6', 'geometry', 'probe_ip']

##################################################################
#                        GLOBAL AND SETUP
##################################################################


# paths
basedir = df.getdir(__file__)+'../'
statedir = df.rightdir(basedir+"state/")

# external parameters
params = df.getlines(statedir+'params')
creation_key = params[0]
destruction_key = params[1]


##################################################################
#                            CODE
##################################################################


def get_measurements(filters):
    '''
    see https://atlas.ripe.net/docs/api/v2/reference/#!/measurements/ for
Exemplo n.º 5
0
import logging
import logging.config

logging.config.fileConfig('logging.conf', disable_existing_loggers=False)

# create logger
logger = logging.getLogger(__name__)
logger.debug(__name__ + "logger loaded")

##################################################################
#                        GLOBAL AND SETUP
##################################################################

# paths
basedir = df.getdir(__file__) + '../'
statedir = df.rightdir(basedir + 'state/')
rawdirlist = df.getlines(basedir + 'state/datapaths.list')
datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True)
plotsdir = df.rightdir(basedir + "plots/")
ccachef = df.rightdir(statedir + "pickles/") + "ccache.pickle"

##################################################################
#                           CODE
##################################################################


def distance(c, d):  # c: closeness, d: physical distance (km)
    return math.sqrt(c**2 + math.log(max([d, 1.0]), 40075)**2)


def closest_diff_desc(start_time, fname="", xlim=[.6, 1.0], **kwas):
Exemplo n.º 6
0
def get_big_svl(start_time,
                duration=30000,
                mask=32,
                fmt=None,
                country_set=None,
                oddballs=True,
                maxmissing=0,
                return_ccache=True,
                ccachef=df.rightdir(statedir + "pickles/") + "ccache.pickle",
                mindomsize=1):
    '''
    see get_svl()
    '''
    dur = 60 * 60 * 24
    end_time = start_time + duration

    anssets = defaultdict(set)
    myfmt = set()

    svl, tmp_fmt, tmp_anssets = get_svl(start_time,
                                        duration=dur,
                                        mask=mask,
                                        fmt=None,
                                        country_set=country_set,
                                        oddballs=oddballs,
                                        maxmissing=1000,
                                        return_ccache=False,
                                        ccachef=ccachef,
                                        mindomsize=1)
    svld = dict()
    for sv in svl:
        svld[sv.get_id()] = sv
    for dom in tmp_anssets.keys():
        anssets[dom] |= tmp_anssets[dom]
    myfmt |= set(tmp_fmt)
    start_time += dur

    while start_time + dur <= end_time:
        tmp_svl, tmp_fmt, tmp_anssets = get_svl(start_time,
                                                duration=dur,
                                                mask=mask,
                                                fmt=None,
                                                country_set=country_set,
                                                oddballs=oddballs,
                                                maxmissing=1000,
                                                return_ccache=False,
                                                ccachef=ccachef,
                                                mindomsize=1)

        for dom in tmp_anssets.keys():
            anssets[dom] |= tmp_anssets[dom]
        myfmt |= set(tmp_fmt)

        for sv in tmp_svl:
            svid = sv.get_id()
            if svid in svld:
                svld[svid].absorb(sv)
            else:
                svld[svid] = sv

        start_time += dur

    svl = svld.values()
    if fmt is None:
        fmt = list(myfmt)
    for dom in fmt:
        if len(anssets[dom]) < mindomsize \
          or len([sv for sv in svl if dom not in sv]) > 0.5*len(svl):
            del anssets[dom]
    svl = reduce_svl(svl, fmt, maxmissing)

    if return_ccache:
        ccache = init_ccache(None, ccachef, start_time, duration, mask, fmt,
                             oddballs, maxmissing)
        return svl, fmt, dict(anssets), ccache
    else:
        return svl, fmt, dict(anssets)
Exemplo n.º 7
0
def get_svl(start_time,
            duration=30000,
            mask=32,
            fmt=None,
            country_set=None,
            oddballs=True,
            maxmissing=0,
            return_ccache=True,
            ccachef=df.rightdir(statedir + "pickles/") + "ccache.pickle",
            mindomsize=2):
    '''
    :param t: int indicating the earliest query the window should include
    :param duration: int indication the span of time covered by the window,
        in seconds
    :param mask: int, prefix mask to use over domain IPs
    :param fmt: see transform fmt
    :param country_set: the set of countries the window should include queries from.
        If None, then all countries will be inluded
    :param oddballs: if True, will include non-public IPs (10.x.x.x, etc); if
        False, will only include public IPs
    :return: list of smartvecs
    '''
    # if the duration is too long, mongo times out the cursor before we can
    # finish processing, so we should break it into smaller jobs with
    # get_big_svl()
    if duration > 24 * 60 * 60:
        logger.warning(
            "using get_big_svl(), which assumes that 'duration' is a multple of 24 hours"
        )
        return get_big_svl(start_time, duration, mask, fmt, country_set,
                           oddballs, maxmissing, return_ccache, ccachef,
                           mindomsize)
    logger.info("->window...")
    window = get_window(start_time,
                        duration,
                        country_set=country_set,
                        domain_set=fmt)

    logger.info("->svl...")
    svl, doms, anssets = dicts_to_svl(window, mask, oddballs)

    logger.debug(str(doms))

    fmt = transform_fmt(fmt, doms)
    # remove any domains that only have 1 IP (since all nodes will see the
    # same thing)
    for dom in fmt:
        if len(anssets[dom]) < mindomsize \
          or len([sv for sv in svl if dom not in sv]) > 0.5*len(svl):
            del anssets[dom]
    fmt = sorted(list(set(anssets.keys()).intersection(set(fmt))))
    svl = reduce_svl(svl, fmt, maxmissing)

    for dom in fmt:
        logger.debug("-----------" + dom + "-----------")
        tmp = sorted(anssets[dom])
        for val in tmp:
            if type(val) is int:
                logger.debug(ipp.int2ip(val))

    if return_ccache:
        ccache = init_ccache(None, ccachef, start_time, duration, mask, fmt,
                             oddballs, maxmissing)
        return svl, fmt, dict(anssets), ccache
    else:
        return svl, fmt, dict(anssets)
Exemplo n.º 8
0
import logging
import logging.config

logging.config.fileConfig('logging.conf', disable_existing_loggers=False)

# create logger
logger = logging.getLogger(__name__)
logger.debug(__name__ + "logger loaded")

##################################################################
#                        GLOBAL AND SETUP
##################################################################

# paths
basedir = df.getdir(__file__) + '../'
statedir = df.rightdir(basedir + "state/")
rawdirlist = df.getlines(basedir + 'state/datapaths.list')
datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True)
vngr.set_cache_dir(df.rightdir(statedir + "pickles"))

# database setup
mclient = MongoClient()
db = mclient.veracity
data = db.m30002_may17_full
pdata = db.probe_data

#printing
pp = pprint.PrettyPrinter(indent=4)

# vector processor
vproc = df.picklein(statedir + 'nmf_rank20_pkr01.pickle')
Exemplo n.º 9
0
def set_cache_dir(f):
    global cachedir
    cachedir = f
    global datadir
    datadir = df.rightdir(cachedir + "cache/")
Exemplo n.º 10
0
#                           LOGGING
##################################################################
import logging
import logging.config

logging.config.fileConfig('logging.conf', disable_existing_loggers=False)

# create logger
logger = logging.getLogger(__name__)
logger.debug(__name__ + "logger loaded")

##################################################################
#                       GLOBAL AND SETUP
##################################################################

basedir = df.rightdir(os.getcwd())
cachedir = basedir  # manually change this in the file that is using vinegar
datadir = df.rightdir(cachedir + "cache/")

##################################################################
#                           CODE
##################################################################


def set_cache_dir(f):
    global cachedir
    cachedir = f
    global datadir
    datadir = df.rightdir(cachedir + "cache/")

Exemplo n.º 11
0
logging.config.fileConfig('logging.conf', disable_existing_loggers=False)

# create logger
logger = logging.getLogger(__name__)
logger.debug(__name__ + "logger loaded")

##################################################################
#                        GLOBAL AND SETUP
##################################################################

# paths
basedir = df.getdir(__file__) + '../'
rawdirlist = df.getlines(basedir + 'state/datapaths.list')
datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True)
plotsdir = df.rightdir(basedir + "plots/")

##################################################################
#                           CODE
##################################################################


def add_closeness_edges(svl, ccache, G=None, minweight=0):
    '''
    :param svl: list of smartvecs
    :param ccache: closeness_cache
    :param G: preexisting graph
    :result: graph

    builds graph using pairwise closeness as edge weights
    '''