Пример #1
0
def prepare_lrus(lru, lruLinks, crawlMetas={}):
    lrus = []
    now = int(time())

    lrus.append(LRUs.lru_to_stemnodes(lru))
    lrus[-1][-1]["crawled"] = True
    lrus[-1][-1]["crawlDepth"] = crawlMetas.get("depth", 0)
    lrus[-1][-1]["crawlTimestamp"] = crawlMetas.get("timestamp", now)
    lrus[-1][-1]["crawlHTTPCode"] = crawlMetas.get("status", 200)
    lrus[-1][-1]["crawlError"] = crawlMetas.get("error", None)
    lrus[-1][-1]["pageEncoding"] = crawlMetas.get("encoding", "utf-8")

    for link in lruLinks:
        lrus.append(LRUs.lru_to_stemnodes(link))
        lrus[-1][-1]["linked"] = True
        lrus[-1][-1]["crawlDepth"] = crawlMetas.get("depth", 0) + 1
        lrus[-1][-1]["crawlTimestamp"] = crawlMetas.get("timestamp", now)

    return lrus
Пример #2
0
def define_webentities(neo4j, lrus=TEST_DATA["manual_webentities"]):
    wes = [{
        "name": LRUs.name_lru(lru),
        "prefixes": LRUs.get_alt_prefixes(lru)
    } for lru in lrus]
    neo4j.write_query("index_lrus",
                      lrus=[
                          LRUs.lru_to_stemnodes(l) for lru in lrus
                          for l in LRUs.get_alt_prefixes(lru)
                      ])
    neo4j.write_query("create_wes", webentities=wes)
Пример #3
0
def create_webentities(neo4j, lrus):
    webentities = []
    lrusToCreate = []
    for lru in lrus:
        we = {}
        we['prefixes'] = LRUs.get_alt_prefixes(lru)
        lrusToCreate += we['prefixes']
        we['name'] = LRUs.name_lru(lru)
        webentities.append(we)

    result = neo4j.write_query(
        "index_lrus",
        lrus=[LRUs.lru_to_stemnodes(lru) for lru in lrusToCreate])
    print(result._summary.counters.__dict__)
    result = neo4j.write_query("create_wes", webentities=webentities)
    print(result._summary.counters.__dict__)
Пример #4
0
def init_WE_creation_rules(neo4j, rules=TEST_DATA["WECRs"]):
    extended_rules = [{
        "prefix": prefix,
        "pattern": r["pattern"]
    } for r in rules for prefix in LRUs.get_alt_prefixes(r["prefix"])]
    # precompile regexps for creation rules in runtime
    WECR_regexps = {
        r["prefix"] + r["pattern"]:
        re.compile(getPreset(r["pattern"], r["prefix"]))
        for r in extended_rules
    }
    neo4j.write_query("index_lrus",
                      lrus=[
                          LRUs.lru_to_stemnodes(r["prefix"])
                          for r in extended_rules if r["prefix"]
                      ])
    neo4j.write_query("create_wecreationrules", rules=extended_rules)
    return WECR_regexps
Пример #5
0
def run_WE_creation_rule(neo4j, lastcheck):
    #we_prefixes = neo4j.read_query("we_default_creation_rule", lastcheck=lastcheck)
    we_prefixes = neo4j.read_query("we_apply_creation_rule",
                                   lastcheck=lastcheck)
    #lrus = next(we_prefixes.records())["lrus"]
    lrus = [r['lru'] for r in we_prefixes.records()]

    webentities = []
    lrusToCreate = []
    for lru in lrus:
        we = {}
        we['prefixes'] = LRUs.get_alt_prefixes(lru)
        lrusToCreate += we['prefixes']
        we['name'] = LRUs.name_lru(lru)
        webentities.append(we)

    result = neo4j.write_query(
        "index_lrus",
        lrus=[LRUs.lru_to_stemnodes(lru) for lru in lrusToCreate])
    print(result._summary.counters.__dict__)
    result = neo4j.write_query("create_wes", webentities=webentities)
    print(result._summary.counters.__dict__)