class FileService(): def __init__(self, ip, port, node_list, directory, timeout): self.check_file_service = CheckFileService( ip=ip, port=3001, nodes=node_list, timeout=timeout, directory=directory, ) self.file_transfer_service = FileTransferService(ip=ip, port=3002, directory=directory) self.directory = directory self.threadpool = ThreadPool(2) def start_service(self): self.threadpool.add_task(self.check_file_service.start_server, ('', 0)) self.threadpool.wait_completion() def get_file(self, file_path): fastest_node = self.check_file_service.process_get_file_request( file_path=file_path) requested_file = self.file_transfer_service.get_file_from_node( node_ip=fastest_node["node"], node_port=fastest_node["node_port"], file_path=file_path) self.save_file(requested_file) def save_file(self, file_path, file_to_be_saved): with open(file_path) as f: f.write(file_to_be_saved)
def main(): node_list = ["0.0.0.0", "localhost"] main_pool = ThreadPool(3) discovery_service = DiscoveryService(ip="127.0.0.1", port=3000, initial_nodes=node_list, period=5) file_service = FileService(ip="127.0.0.1", port=3001, node_list=node_list, directory='files/', timeout=5) main_pool.add_task(discovery_service.start_service) main_pool.add_task(file_service.start_service) main_pool.wait_completion()
class FileTransferService(TCPService): def __init__(self, ip, port, directory): super(FileTransferService, self).__init__(name='FileTransfer', ip=ip, port=port) self.directory = directory self.threadpool = ThreadPool(2) def start_service(self): self.threadpool.add_task(self.start_server, ('', 0)) self.threadpool.wait_completion() def process_server_response(self, message, address): needed_file = open(self.directory + '/' + file_path) def get_server_port(self): return self.server_socket.getsockname()[1] def get_file_from_node(self, node_ip, node_port, file_path): pass
class DiscoveryService(UDPService): def __init__(self, ip, port, initial_nodes, period): super(DiscoveryService, self).__init__(name='Discovery', ip=ip, port=port) self.period = period self.nodes = initial_nodes self.threadpool = ThreadPool(2) def start_service(self): self.threadpool.add_task(self.discovery_job) self.threadpool.add_task(self.start_server) self.threadpool.wait_completion() def discovery_job(self): while True: self.send_nodes_to_others() sleep(self.period) def update_nodes(self, discovered_list): for discovered_node in discovered_list: if (discovered_node not in self.nodes and discovered_node is not self.ip): self.nodes.append(discovered_node) def process_server_response(self, message, address): self.update_nodes(ast.literal_eval(message)) print("Nodes list updated by Discovery service's server: " + str(self.nodes)) def send_nodes_to_others(self): message = str(self.nodes) for node in self.nodes: self.send_message(str_message=message, address=(node, self.port), socket=self.client_socket)
class CheckFileService(UDPService): def __init__(self, ip, port, nodes, timeout, directory, file_transfer_service): super(CheckFileService, self).__init__(name='CheckFile', ip=ip, port=port) self.nodes = nodes self.directory = directory self.client_socket.settimeout(timeout) self.threadpool = ThreadPool(2) self.file_transfer_service = file_transfer_service def start_service(self): self.threadpool.add_task(self.start_server) self.threadpool.wait_completion() def process_server_response(self, message, address): file_existance = str(self.check_file_existance(message)) tcp_server_port = str(self.file_transfer_service.get_server_port()) self.send_message(str_message=file_existance + ',' + tcp_server_port, address=address, socket=self.server_socket) def check_file_existance(self, file_path): return path.exists(self.directory + '/' + file_path) def get_nodes_response_times(self, file_request_message): nodes_responses = [] for node in self.nodes: if node is not self.ip: address = (node, self.port) start_time = time() self.send_message(str_message=file_request_message, address=address, socket=self.client_socket) try: response = self.get_message(self.client_socket) nodes_responses.append({ "node": node, "response": response, "response_time": time() - start_time }) except timeout: print(node + " node timed out") pass return nodes_responses def process_get_file_request(self, file_path): file_request_message = file_path nodes_responses = self.get_nodes_response_times(file_request_message) nodes_has_file = [] for node_response in nodes_responses: response_message = nodes_responses["response"]["message"].split( ',') node_has_file = response_message[0] == "True" if node_has_file: nodes_has_file.append({ "node": node_response["node"], "node_port": response_message[1], "response_time": node_response["response_time"] }) if nodes_has_file is not None: return min(nodes_has_file, key=lambda t: t["response_time"]) else: return None
print("check the robot,----------------" + str(resp)) soup = BeautifulSoup(content, "html.parser") headers["Content-Type"] = "application/x-www-form-urlencoded" headers["Cookie"] = resp["set-cookie"] headers["Referer"] = "http://jandan.net/block.php?from=" + path data = { "hash": soup.find("input", attrs={"name": "hash"})["value"], "from": soup.find("input", attrs={"name": "from"})["value"], } append = soup.find("form", attrs={"method": "post"})["action"] nextPath = "http://jandan.net/block.php" + append resp, content = h.request(nextPath, method="POST", headers=headers, body=urlencode(data)) if resp.status == 302: return getUrls(resp["location"]) else: return [] if __name__ == "__main__": pool = ThreadPool(5) for index in range(1665, 1680): urls = getUrls("http://www.jandan.net/ooxx/page-%d#comments" % (index)) savepath = "/home/xu/Pictures/" + str(index) if not os.path.exists(savepath): os.makedirs(savepath) for url in urls: pool.add_task(query_save, url, savepath) pool.wait_completion() print("process successed")