def populate_queue(): """ Push some messages into queues. """ client = GearmanClient(['localhost:4730']) for queue, vals in DATA.iteritems(): for msg in vals: print " >> submitting", msg, queue client.submit_job(queue, json.dumps(msg), background=True)
def spider(worker, job): "Gearman entry point" url = job.data client = GearmanClient(['localhost']) for href in url2hrefs(url): # for URL the content of that URL refers to, (blah,ext) = path.splitext(href) if ext != '.json': href = href + '.json' # submit a new spider job for that href client.submit_job('spider',href,background=True,wait_until_complete=False) return job.data
class Client(object): def __init__(self, config): self.config = config self.client = GearmanClient( self.config.get('gearman', 'hosts').split(',')) def process_transaction(self, transaction_id, background=True): job = self.client.submit_job(self.config.get('gearman', 'taskname'), str(transaction_id), background=background) if not background: return loads(job.result)
#!/usr/bin/python from gearman.client import GearmanClient client = GearmanClient(['localhost']) URL = 'http://ifcb-data.whoi.edu/feed.json' client.submit_job('spider', URL)
"nextAlbumUrl": "/detail/52085246?u=60786793" } </script> ''' picId = urlsplit(url).path.split('/')[-1] userId = parse_qs(urlsplit(url).query)['u'][0] albumId = json.loads(soup.find('script', id='pix-json-set-info').string)['albumId'] ajax_url = 'http://wantu.taobao.com/ajax/PicDetailAjax.do?picId=%s&userId=%s&albumId=%s&t=1365154666759&_method=read' ajax_url = ajax_url % (picId, userId, albumId) resp = json.loads(httplib.urlopen(ajax_url)[2].decode('gbk')) picture = resp['data']['models'][0]['picPath'] description = httplib.html_unescape(resp['data']['models'][0]['desc']) return (picture, description) #used for pipeline work gm_client = GearmanClient([config.job_server]) gm_client.data_encoder = JSONDataEncoder def submit_html_job(url): func_name = 'worker_process_html' job_req = gm_client.submit_job(func_name, url, unique=md5sum(url), background=True, wait_until_complete=False) return job_req.job.unique def submit_pic_job(url): func_name = 'worker_process_pic' job_req = gm_client.submit_job(func_name, url, unique=md5sum(url), background=True, wait_until_complete=False) return job_req.job.unique def worker_process_html(gearman_worker, gearman_job):
def __init__(self, config): self.config = config self.client = GearmanClient( self.config.get('gearman', 'hosts').split(','))
def submit(url): gm_client = GearmanClient([config.job_server]) gm_client.data_encoder = JSONDataEncoder job_req = gm_client.submit_job('worker_process_html', url) print ns.url[0], 'is submitted' return job_req
from gearman.client import GearmanClient from gearman.worker import GearmanWorker from gearman.job import GearmanJob from gearman.constants import * import crawle_pic from worker import JSONDataEncoder import redis import config def check_request_status(job_request): if job_request.complete: print "Job %s finished! Result: %s - %s" % (job_request.job.unique, job_request.state, job_request.result) elif job_request.timed_out: print "Job %s timed out!" % job_request.job.unique elif job_request.state == JOB_UNKNOWN: print "Job %s connection failed!" % job_request.job.unique if __name__ == '__main__': gm_client=GearmanClient([config.job_server]) gm_client.data_encoder=JSONDataEncoder #url='http://www.ttkzm.com/html/pic/' url='http://www.ttkzm.com/html/pic/2012/7/10211069.html' #url='http://www.ttkzm.com/html/VIP/1/' print 'clear cache before submit' r_client = redis.StrictRedis(host=config.redis_server, port=config.redis_port, db=0) r_client.delete(url) print 'submitting index',url gm_client.submit_job('worker_process_html',url)
@author: fengclient ''' import sys from gearman.client import GearmanClient from gearman.worker import GearmanWorker from gearman.job import GearmanJob from gearman.constants import * from common import JSONDataEncoder import config def check_request_status(job_request): if job_request.complete: print "Job %s finished! Result: %s - %s" % (job_request.job.unique, job_request.state, job_request.result) elif job_request.timed_out: print "Job %s timed out!" % job_request.job.unique elif job_request.state == JOB_UNKNOWN: print "Job %s connection failed!" % job_request.job.unique if __name__ == '__main__': if len(sys.argv) < 2: print 'usage: work_and_move_out.py dirpath' sys.exit(-1) else: gm_client = GearmanClient([config.job_server]) gm_client.data_encoder = JSONDataEncoder job_req = gm_client.submit_job('worker_process_html', sys.argv[1]) print sys.argv[1], 'is submitted' check_request_status(job_req)