def test_remote_cache_check(self): arr = pickle.dumps(['http://yahoo.com','http://yahoo.com','3']) qUtils.send_message(arr, qUtils.CACHE_PORT) #self.myCache.main_cache() print "foo" response = qUtils.recv_message(qUtils.FETCH_PORT) print response time.sleep(3) qUtils.send_message('/localpath/tothe/file/yahoo', qUtils.FETCH_PORT) response2 = pickle.loads(qUtils.recv_message(qUtils.MGR_PORT)) print response2 response3 = qUtils.recv_message(qUtils.MGR_PORT) print response3
def run(self): #ask to qCache if he has the data global web_list manager = qUtils.send_message(self.request,qUtils.CACHE_PORT) if self.request != "!KILLPROXY" : while True : data = qUtils.recv_message(qUtils.MGR_PORT) if data != 'END' : data = cPickle.loads(data) web_list.append(data) #add data to web_list else : break
def run(self): print 'SECOND Thread' print 'QUERY 2 %s' %self.search_query search_results = self.execute_search(search_query) url_list_tmp = list(set(self.parse_results_for_urls(search_results))) url_list = [] for i in range (0,len(url_list_tmp)) : if i == 10 : break url_list.append(url_list_tmp[i]) print 'LIST %s'%url_list #using a lock here allows multiple requests to be serviced if url_list != [] : self.threadLock.acquire() qUtils.send_message(cPickle.dumps(url_list),qUtils.MGR_PORT) print 'SEND to MGR' data = cPickle.loads(qUtils.recv_message(qUtils.PROX_PORT)) print 'RECV %s'%data self.url_results = data self.threadLock.release()
class qManager(threading.Thread): def __init__(self,threadID, request): threading.Thread.__init__(self) self.threadID = threadID self.request = request def run(self): #ask to qCache if he has the data global web_list manager = qUtils.send_message(self.request,qUtils.CACHE_PORT) if self.request != "!KILLPROXY" : while True : data = qUtils.recv_message(qUtils.MGR_PORT) if data != 'END' : data = cPickle.loads(data) web_list.append(data) #add data to web_list else : break if __name__ == '__main__': while True: #receive proxy data url_list = qUtils.recv_message(qUtils.MGR_PORT) web_list = [] pThread = qManager(1,url_list) pThread.start() pThread.join() if url_list == "!KILLPROXY" : break print 'FIN THREAD %s'%web_list qUtils.send_message(cPickle.dumps(web_list),qUtils.PROX_PORT) print 'SEND TO PROXY'
self.imgs = list(set(self.soup.find_all('img'))) self.css = self.soup.find_all('link', rel="stylesheet") self.js = self.soup.find_all('script', src=re.compile(".*")) with open(self.saved_page_dir + '/index.html','w') as f: f.write(self.mainpage) #self.fetch_images() #self.fetch_css_and_js() #self.rewrite_links() print 'sending the cache a local file at ' + self.saved_page_dir + '/index.html' #qUtils.send_message(self.saved_page_dir+'/index.html', qUtils.CACHE_PORT) #wait for a fetch request from the cache, then dispatch a fetcher thread #and finally send the result back to the cache if __name__=='__main__': while True : #state = False #data = ' ' print 'LISTEN CACHE' data = qUtils.recv_message(qUtils.FETCH_PORT) if data == "!KILLPROXY": break pThread = qFetcherThread(5,data) pThread.start() time.sleep(1) pThread.join() qUtils.send_message(pThread.saved_page_dir + '/index.html', qUtils.CACHE_PORT)
def run(self): print 'starting server thread' self.dataRecvd = qUtils.recv_message(self.port) return
global myTable # Have to change the structure def set_cache_object(self, objectName, objectValue): myTable[objectName] = objectValue # Have to change how to stock the files def destroy_cache(self): myTable.clear() def run(self): print "not in the cache " qUtils.send_message(self.request, qUtils.FETCH_PORT) if self.request == "!KILLPROXY" or self.request == 'END' : print 'END' else: print 'WAITING ANSWER FETCHER' page = qUtils.recv_message(qUtils.CACHE_PORT) # what does it receive from the fetcher? print page self.set_cache_object(self.request, page) anfile = pickle.dumps([self.request, page]) pThread = qCacheManager(1,anfile) pThread.start() pThread.join() print "send a file " if __name__ == '__main__': myTable = dict() while True: url_list = qUtils.recv_message(qUtils.CACHE_PORT) if url_list == "!KILLPROXY" : pThread = qCacheFetcher(2,url_list) pThread.start()