Пример #1
0
	def update_url_data(self,url):
		"""
		If url is not in db then it is inserted into db,
		Esle reevaluate the weight of the url.
		"""
		# put url to datbase
		url_hash=utils.calc_hash(url)
		key_id=self.crawlerdb.check_db(url,url_hash)
		if key_id==None:
			self.crawlerdb.add_url(url,url_hash)
			# VERY LIKELY to add the next line code:to initlize rank info of url,
			# or we always get only the fitst url in db from get_url() func.
			# self.crawlerfb.add_rank_info()
			if settings.DEBUG_FLAG:
				print 'insert %s, %s in update_url_data' %(url,url_hash)	
			# #IMPROVE(ifkite): use namedtule
			# result_book=parse_page(url)#return tuple
			# ext_result_book=utils.merge_tups(result_book,new_key_id)
			# add_page(ext_result_book)
			# evalu=rank_page()
			# add_evalu(evalu)
		elif key_id>0:
			self.update_evaluate(key_id)#page has been in db
		else:
			self.handle_collision()
Пример #2
0
 def __init__(self, index, data, previous_hash):
     """Builds a block calculating its hash from he previous one."""
     self.index = index
     self.timestamp = get_block_timestamp()
     self.data = data
     self.previous_hash = previous_hash
     self.hash = calc_hash(self.index, self.timestamp, str(self.data),
                           self.previous_hash)
Пример #3
0
def crypto(file_name, direction):
    queue.push(
        {
            "file_acted_upon": file_name,
            "action": direction,
            "old_hash": old_hash,
            "new_hash": utils.calc_hash(file_name)
        }, "crypto")
    return
Пример #4
0
def get_functions():
    f = lambda data: {"data": data, "hash": utils.calc_hash(data)}
    functions = {
        "aaa":
        f(["aaa"]),
        "bbb":
        f(["bbb"]),
        "ooo":
        f(["ooo"]),
        "foo":
        f(["foo"]),
        "baz":
        f(["baz"]),
        "bar-foo":
        f(["bar", utils.calc_hash(["foo"])]),
        "bar-foo-baz":
        f([utils.calc_hash(["bar", utils.calc_hash(["foo"])]), "baz"]),
    }
    return functions
Пример #5
0
def build_file(content, tags, _from):
    h = utils.calc_hash(content)
    return (
        h,
        {
            "data": content,
            # "tags": tags,
            # "when_modified": [utils.get_time()],
            # "when_accessed": [],
            "from": _from,
        },
    )
Пример #6
0
def get_parsers():
    parsers = {
        "parser-a": {
            "data": ["parse", ["text", "Language A"]]
        },
        "parser-b": {
            "data": ["parse", ["text", "Language B"]]
        },
        "parser-spec": {
            "data": ["parse", ["text", "Language Spec"]]
        },
    }
    for k in parsers.keys():
        parsers[k]["hash"] = utils.calc_hash(parsers[k]["data"])
    return parsers
Пример #7
0
def get_binaries():
    binaries = {}
    for s in [
            "compile-default",
            "parser-a",
            "parser-b",
            "parser-spec",
            "list",
            "print",
            "apple",
            "orange",
            "banana",
    ]:
        binaries[s] = {"data": f"[BINARY CONTENT ({s})]"}
    for k in binaries.keys():
        binaries[k]["hash"] = utils.calc_hash(binaries[k]["data"])
    return binaries
Пример #8
0
def get_sources():
    sources = {
        "parser-a": {
            "data":
            "this is fake source code that is parsed via 'parser-a' into a Language A parser"
        },
        "parser-b": {
            "data":
            "this is fake source code that is parsed via 'parser-a' into a Language B parser"
        },
        "parser-spec": {
            "data":
            "this is fake source code that is parsed via 'parser-a' into a Language Spec parser"
        },
        "compile-default": {
            "data":
            "this is fake source code that is parsed via 'parser-a' into a compiler"
        },
        "foo": {
            "data": "fake source code: [foo]"
        },
        "baz": {
            "data": "fake source code: [baz]"
        },
        "bar-foo": {
            "data": "fake source code: [bar [foo]]"
        },
        "bar-foo-baz": {
            "data": "fake source code: [[bar [foo]] baz]"
        },
        "aaa": {
            "data": "fake source code: [aaa]"
        },
        "bbb": {
            "data": "fake source code: [bbb]"
        },
        "ooo": {
            "data": "fake source code: [ooo]"
        },
    }
    for k in sources.keys():
        sources[k]["hash"] = utils.calc_hash(sources[k]["data"])
    return sources
Пример #9
0
def get_programs(functions):
    programs = {
        "apple": {
            "data": [{
                "entry": functions["aaa"]["hash"]
            }]
        },
        "orange": {
            "data": [{
                "entry": functions["ooo"]["hash"]
            }]
        },
        "banana": {
            "data": [{
                "entry": functions["bbb"]["hash"]
            }]
        },
        "parser-a": {
            "data": [{
                "entry": functions["foo"]["hash"]
            }]
        },
        "parser-b": {
            "data": [{
                "entry": functions["baz"]["hash"]
            }]
        },
        "parser-spec": {
            "data": [{
                "entry": functions["bar-foo"]["hash"]
            }]
        },
        "compile-default": {
            "data": [{
                "entry": functions["bar-foo-baz"]["hash"]
            }]
        },
    }
    for k in programs.keys():
        programs[k]["hash"] = utils.calc_hash(programs[k]["data"])
    return programs
Пример #10
0
	def test_mydb_check_db_case2(self):
		url='http://book.douban.com/subject/1863930/'
		url_hash=utils.calc_hash(url)
		self.crawlerdb.add_url(url,url_hash)
		assert self.crawlerdb.check_db(url,url_hash)>0