def parse_packet(packet): if not packet.haslayer(http.HTTPRequest): return http_layer = packet.getlayer(http.HTTPRequest) if http_layer.fields['Method'] != b'GET': return ip_layer = packet.getlayer(IP) # path like 'http://1.1.1.1/a//b/c/d.mp4 or /a/b/c/d.mp4 or /a//b/c.mp4' url = http_layer.fields['Path'].decode('utf-8').replace('//', '/') url_low = url.lower() if any([url_low.endswith(ext) for ext in cap_ext]): # print('\n{0[src]} - {1[Method]} - http://{1[Host]}{1[Path]}'.format(ip_layer.fields, http_layer.fields)) print('{} - {} - http://{}{}'.format(ip_layer.fields['src'].decode('utf8'), http_layer.fields['Method'].decode('utf8'), http_layer.fields['Host'].decode('utf8'), http_layer.fields['Path'].decode('utf8'))) key = get_hash(url) if r.sismember(R_PROCESSING, key): print('key {}(url {}) is now processing,skip sending it from pcap to analyze'.format(key, url)) return pkt_info = get_pkt_info(packet) pkt_info.update({'key': key, 'url': url}) r.hset(R_ARGS_PASS, key, json.dumps(pkt_info)) print('send key {}(url {}) to analyze process'.format(key, url)) app.send_task('celery_app.analyze_url_task.analyze_url', (key,)) r.sadd(R_PROCESSING, key)
def process_csv(self): """ Process data from csv in self.file_path and sends it to the broker """ self.logger.info("Started processing of csv") for full_name, email in self.read_csv(): self.logger.info(f"Putting ({full_name}, {email}) into the queue") app.send_task('tasks.add_to_db', args=(full_name, email)) self.logger.info("Finished processing of csv")
def analyze_url(key): r = redis.StrictRedis(host='localhost', port=6379, db=0, decode_responses=True) _pkt_info = r.hget(R_ARGS_PASS, key) r.hdel(R_ARGS_PASS, key) pkt_info = json.loads(_pkt_info) try: print('analyze process get job,url is {}, key is {}'.format(pkt_info['url'], key)) # use the hash key of url as the primary index key in db sql = "select `url`,`hits_number`,`redirect_path` from caches where `key` = '{}';".format(key) cache = db_get_one(sql) if cache: print('cache: {}'.format(cache)) redirect = cache[2] if redirect: # the url file has aleady download print('the file in url {} is available for cache, ' 'send http 302 to src host {}'.format(pkt_info['url'], pkt_info['ip_src'])) send_http_302(redirect, pkt_info) r.srem(R_PROCESSING, key) else: hit_number = cache[1] if hit_number >= cache_threshold: print('the hit_number reach the cache_threshold,start downloading {},' 'Send the task to download process'.format(pkt_info['url'])) r.hset(R_ARGS_PASS, key, _pkt_info) app.send_task('celery_app.download_task.download', (key,)) else: r.srem(R_PROCESSING, key) if not update_lasthit_hitnumber(key): print('some error occured in add hits_number,key is {}'.format(key)) else: # no record exist, create the db record and estimate the cache_threshold sql = 'insert into `caches` (`key`,`url`,`hits_number`,`last_hit`,`redirect_path`,`add_time`) values ("{0}","{1}",1,{2},"",{2})'.format( key, pkt_info['url'], get_timestamp_utcnow()) if db_execute(sql): print('insert new url row, key {}(url {})'.format(key, pkt_info['url'])) else: print('error happened in insert new url row, key {}(url {})'.format(key, pkt_info['url'])) if cache_threshold == 0: # meet the cache_threshold although the cache does not exist. send to download process print('send key {}(url {}) to download process'.format(key, pkt_info['url'])) r.hset(R_ARGS_PASS, key, _pkt_info) app.send_task('celery_app.download_task.download', (key,)) else: r.srem(R_PROCESSING, key) except Exception as er: r.srem(R_PROCESSING, key) print('error happend in analyzing {},analyze process now end,exception is {}'.format(pkt_info['url'], er))
def entrypoint(): """ The single entrypoint for requests. """ params = json.loads(request.body) task = params.get('task') args = params.get('args') kwargs = params.get('kwargs') result = celery_app.send_task( task, run_as_test=False, *args, **kwargs ) if result: status = 200 message = 'Task found!' else: status = 301 message = 'Task not found.' return json.dumps({ 'status': status, 'message': message, 'task': task, })
import time from celery_app import task1 from celery_app import task2 from celery.result import AsyncResult from celery_app import app # t1 = task1.add.apply_async(args=[2, 8]) # 也可用 task1.add.delay(2, 8) # t2 = task2.multiply.apply_async(args=[3, 7]) # 也可用 task2.multiply.delay(3, 7) # 根据task_id获取结果 # print(AsyncResult('e15b6891-2e56-432f-9763-d0f63793f8c5').get()) # send_task精确到方法名 # app.send_task('celery_app.task2.multiply', (3, 9)) # # for i in range(20): # app.send_task('celery_app.task1.add', (3, i)) if __name__ == '__main__': meta = {} start_url = "http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E6%B7%B1%E5%9C%B3&kw=python" meta["start_url"] = start_url app.send_task("celery_app.tasks.download", args=(start_url, meta), queue="download", routing_key="for_download")
from celery_app import app promise = app.send_task("tasks.multiply", args=[2, 2]) data = promise.get() print(data)
def manage_sqrt_task(value): result = app.send_task('tasks.square_root', args=(value, )) print(result.ready()) print(result.ready()) print(result.get(timeout=2))