示例#1
0
def save_session(wb, queue=None):
    hostname = urlrewrite.get_hostname(wb.root.url)
    filename = urlrewrite.hostname_to_filename(hostname)
    io.write_err("Saving session to %s ..." %
         shcolor.color(shcolor.YELLOW, filename+".{web,session}"))
    io.serialize(wb, filename + ".web", dir=io.LOGDIR)
    if queue: 
        io.serialize(queue, filename + ".session", dir=io.LOGDIR)
    # only web being saved, ie. spidering complete, remove old session
    elif io.file_exists(filename + ".session", dir=io.LOGDIR):
        io.delete(filename + ".session", dir=io.LOGDIR)
    io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
示例#2
0
def save_session(wb, queue=None):
    hostname = urlrewrite.get_hostname(wb.root.url)
    filename = urlrewrite.hostname_to_filename(hostname)
    io.write_err("Saving session to %s ..." %
                 shcolor.color(shcolor.YELLOW, filename + ".{web,session}"))
    io.serialize(wb, filename + ".web", dir=io.LOGDIR)
    if queue:
        io.serialize(queue, filename + ".session", dir=io.LOGDIR)
    # only web being saved, ie. spidering complete, remove old session
    elif io.file_exists(filename + ".session", dir=io.LOGDIR):
        io.delete(filename + ".session", dir=io.LOGDIR)
    io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
示例#3
0
def restore_session(url):
    hostname = urlrewrite.get_hostname(url)
    filename = urlrewrite.hostname_to_filename(hostname)
    q, wb = None, None
    if (io.file_exists(filename + ".web", dir=io.LOGDIR)):
        io.write_err("Restoring web from %s ..." %
             shcolor.color(shcolor.YELLOW, filename+".web"))
        wb = io.deserialize(filename + ".web", dir=io.LOGDIR)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    if (io.file_exists(filename + ".session", dir=io.LOGDIR)):
        io.write_err("Restoring session from %s ..." %
             shcolor.color(shcolor.YELLOW, filename+".session"))
        q = io.deserialize(filename + ".session", dir=io.LOGDIR)
        q = recipe.overrule_records(q)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    return q, wb
示例#4
0
def restore_session(url):
    hostname = urlrewrite.get_hostname(url)
    filename = urlrewrite.hostname_to_filename(hostname)
    q, wb = None, None
    if (io.file_exists(filename + ".web", dir=io.LOGDIR)):
        io.write_err("Restoring web from %s ..." %
                     shcolor.color(shcolor.YELLOW, filename + ".web"))
        wb = io.deserialize(filename + ".web", dir=io.LOGDIR)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    if (io.file_exists(filename + ".session", dir=io.LOGDIR)):
        io.write_err("Restoring session from %s ..." %
                     shcolor.color(shcolor.YELLOW, filename + ".session"))
        q = io.deserialize(filename + ".session", dir=io.LOGDIR)
        q = recipe.overrule_records(q)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    return q, wb
示例#5
0
def rewrite_recipe(recipe, url):
    for rule in recipe:
        if not "depth" in rule:
            rule["depth"] = 1
        if os.environ.get("DEPTH"):
            rule["depth"] = int(os.environ.get("DEPTH"))

        if os.environ.get("HOST_FILTER"):
            rule["host_filter"] = urlrewrite.get_hostname(url)
        if os.environ.get("FETCH_ALL"):
            switch_key(rule, "dump", "fetch")
        elif os.environ.get("DUMP_ALL"):
            switch_key(rule, "fetch", "dump")

        # compile regexes
        for r in ("dump", "fetch", "spider"):
            if r in rule and type(rule[r]) == str:
                try:
                    rule[r] = re.compile(rule[r])
                except re.error, e:
                    raise PatternError, "Pattern error: %s: %s" % (e.args[0], rule[r])
示例#6
0
def rewrite_recipe(recipe, url):
    for rule in recipe:
        if not "depth" in rule:
            rule["depth"] = 1
        if os.environ.get("DEPTH"):
            rule["depth"] = int(os.environ.get("DEPTH"))

        if os.environ.get("HOST_FILTER"):
            rule["host_filter"] = urlrewrite.get_hostname(url)
        if os.environ.get("FETCH_ALL"):
            switch_key(rule, "dump", "fetch")
        elif os.environ.get("DUMP_ALL"):
            switch_key(rule, "fetch", "dump")

        # compile regexes
        for r in ("dump", "fetch", "spider"):
            if r in rule and type(rule[r]) == str:
                try:
                    rule[r] = re.compile(rule[r])
                except re.error, e:
                    raise PatternError, "Pattern error: %s: %s" % (e.args[0],
                                                                   rule[r])
示例#7
0
def apply_hostfilter(filter_hostname, url):
    if os.environ.get("HOST_FILTER"):
        return urlrewrite.get_hostname(url) == filter_hostname
    return True
示例#8
0
def apply_hostfilter(filter_hostname, url):
    if os.environ.get("HOST_FILTER"):
        return urlrewrite.get_hostname(url) == filter_hostname
    return True