def set_task_status_99(id_, redo, status=-99): '''Set task status to -99 to lock it''' result = session.query(Task).filter(Task.id==id_).with_lockmode('update').all() status = redo and 0 or -88 if result: status = redo and 0 or result[0].status if (redo or (result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0)): if not redo and (result[0].status > 0 and result[0].ret == 0): # NOTE: Task has been done, and the result is success, so we # just exit logger.debug('Task has been done, exit task %s' % id_) return sys.exit(0) # We locked the item try: session.query(Task).filter(Task.id==id_).update({Task.status: status,}) session.commit() except Exception as e: session.rollback() logger.error('Can not update task(%s) status msg: %s' % (id_, str(e))) return False, status else: return False, status return True, status
def set_task_status_99(id_, redo, status=-99): '''Set task status to -99 to lock it''' result = session.query(Task).filter( Task.id == id_).with_lockmode('update').all() status = redo and 0 or -88 if result: status = redo and 0 or result[0].status if (redo or (result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0)): if not redo and (result[0].status > 0 and result[0].ret == 0): # NOTE: Task has been done, and the result is success, so we # just exit logger.debug('Task has been done, exit task %s' % id_) sys.exit(0) # We locked the item try: session.query(Task).filter(Task.id == id_).update({ Task.status: status, }) session.commit() except Exception as e: session.rollback() logger.error('Can not update task(%s) status msg: %s' % (id_, str(e))) return False, status else: return False, status return True, status
def set_task_ret(id_, status, ret, retmsg): result = session.query(Task).filter(Task.id==id_).with_lockmode('update').all() if result and result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0: # We locked the item try: session.query(Task).filter(Task.id==id_).update({Task.status: status, Task.ret: ret, Task.retmsg: retmsg}) session.commit() except Exception as e: session.rollback() logger.error('Can not update task(%s) status msg: %s' % (id_, str(e))) return False else: return False return True
def set_task_ret(id_, status, ret, retmsg): result = session.query(Task).filter( Task.id == id_).with_lockmode('update').all() if result and result[0].status < WEIXIN_TRY_TIME and result[0].status >= 0: # We locked the item try: session.query(Task).filter(Task.id == id_).update({ Task.status: status, Task.ret: ret, Task.retmsg: retmsg }) session.commit() except Exception as e: session.rollback() logger.error('Can not update task(%s) status msg: %s' % (id_, str(e))) return False else: return False return True
def text(message): logger.debug(str(message)) # Find uesr information from database, if not exists, ask user to send to me result = session.query(User).filter( User.openid == message.FromUserName).scalar() #sql = u'select id, openid from wordp_user where openid = "%s"' % message.FromUserName logger.debug('get user info from database %s' % str(result)) # This text will be insert into database, so we must escape some # special character. #import MySQLdb #content = MySQLdb.escape_string(message.Content) content = message.Content if not result: if content.startswith('id:'): # insert uesr info into database, and send hello message. ret = Home.insert_user_info(message.FromUserName, content[len('id:'):].strip()) if ret is not None: return _( '''Success insert your information, and you are at \ default group to post articles to Linuxfans, and if you want to \ post articles to HackOS, please contact with administrator %s''') % ADMIN_MAIL else: return _( '''Can not insert your information, please contact \ with administrator %s''') % ADMIN_MAIL else: return ( '''We have no information about you, please SEND me your \ information at least your username with format id:<username> \ thanks.''') user_id = result.id user_groupid = result.groupid ## Now we can insert the url into task list # First we should check the url format from django.core.validators import URLValidator from django.core.exceptions import ValidationError val = URLValidator() try: val(content) except ValidationError, e: return _( '''Your URL format is malformed, please give me a correct URL.''' )
def text(message): logger.debug(str(message)) # Find uesr information from database, if not exists, ask user to send to me logger.debug(dir(message)) logger.debug(message.source) logger.debug(message.target) result = session.query(User).filter(User.openid == message.source).scalar() #sql = u'select id, openid from wordp_user where openid = "%s"' % message.source logger.debug('get user info from database %s' % str(result)) # This text will be insert into database, so we must escape some # special character. #import MySQLdb #content = MySQLdb.escape_string(message.Content) content = message.content if not result: if content.startswith('id:'): # insert uesr info into database, and send hello message. ret = Home.insert_user_info(message.source, content[len('id:'):].strip()) if ret is not None: return _('''Success insert your information, and you are at \ default group to post articles to Linuxfans, and if you want to \ post articles to HackOS, please contact with administrator %s''') % ADMIN_MAIL else: return _('''Can not insert your information, please contact \ with administrator %s''') % ADMIN_MAIL else: return ('''We have no information about you, please SEND me your \ information at least your username with format id:<username> \ thanks.''') user_id = result.id user_groupid = result.groupid ## Now we can insert the url into task list # First we should check the url format from django.core.validators import URLValidator from django.core.exceptions import ValidationError val = URLValidator() try: val(content) except ValidationError, e: return _('''Your URL format is malformed, please give me a correct URL.''')
def main(): try: url = sys.argv[1] except: sys.stderr.write("Please input crawler url\n") sys.exit(-1) if len(sys.argv) > 2 and sys.argv[2].strip() == 'redo': redo = True else: redo = False ### Get task from database, and determine the task's status is unfinished(0) # That status(1,2,3) represent try execute time, There is an option in # database to limit the retry time. global g_id_ global g_ret global g_ret_msg global g_oldstatus g_id_ = int(sys.argv[1].strip()) g_ret = 0 g_ret_msg = '' g_oldstatus = -1 author = 'testuser' class_ = CRAWLER_CLASS delivery_url = 'http://weixin.qq.com' delivery_name = author url = None ret = session.query(Task).filter(Task.id == g_id_).scalar() if ret and ret.client_name != CLIENT_NAME: logger.error('We got a error request, let me do task %s, but this is %s\'s task' % (g_id_, ret.client_name)) # NOTE: we do not record this error in database, just exit. sys.exit(0) #sql = 'UPDATE wordp_task SET status=%s where id=%s' % (-99, g_id_) # status(-99) indicate that we are processing ret, g_oldstatus = set_task_status_99(g_id_, redo) if not ret: g_ret_msg = 'Get task(%s) failed' % g_id_ logger.error(g_ret_msg) g_ret = -3 return #sql = 'select t.*, u.nickname from wordp_task t LEFT JOIN wordp_user u ON \ #(t.uid = u.id) where t.id=%s' % g_id_ result = session.query(Task, User.nickname).join(User, User.id == Task.uid).filter(Task.id==g_id_).all() if result: url = result[0].Task.param1.strip() author = result[0].nickname.strip() delivery_name = author else: g_ret_msg = 'Can not get the task %s' % g_id_ logger.error(g_ret_msg) g_ret = -3 return logger.debug('Get task(%s) information author(%s) url(%s)' % (g_id_, author, url)) try: ret, msg = get_content_from_url(url) if not ret: g_ret_msg = msg g_ret = -1 return processor_name, proc = process_content(msg, url) today_tmpdir = os.path.join(tmpdir, time.strftime("%Y-%m-%d", time.localtime())) if not os.path.exists(today_tmpdir): os.makedirs(today_tmpdir) # Notice, please input all parameter use unicode try: post_article(proc.title, str(proc.content), proc.first_img, author, url, proc.name_cn, delivery_url, delivery_name, class_, today_tmpdir) g_ret = 0 return except CanNotUploadException as e: g_ret = -2 g_ret_msg = 'CanNotUploadException %s' % str(e) return except Exception as e: import traceback msg = 'generic exception: ' + traceback.format_exc() logger.error(msg) g_ret_msg = 'Unknown error %s \nError: %s' % (url, str(e)) logger.error(g_ret_msg) g_ret = -4 return
def assign_content_to_client(user_id, user_groupid, client_name, content): # Save url to taks list in database # Frist, we should find url in database, which user insert in 7 # days, if find same url, we reject the request.(There have some # `status` and `ret` check) #sql = 'SELECT * from wordp_task where param1=\'%s\' and add_time>=%s \ #and uid=%s' \ #% (content, int(time.time())-3600*24*7, user_id) t_before_7_day = int(time.time())-3600*24*7 result = session.query(Task).filter(Task.uid==user_id).filter(Task.add_time>=t_before_7_day).filter(Task.param1==content).filter(Task.client_name==client_name).scalar() if result: def common_resend(): # NOTE: send signal to clients ret = Home.send_task_request(client_name, result.id) if ret: return _('Your request have some error(status%s ret%s), \ and we have send the process request') % (result.status, result.ret) else: return _('Can not deal with your request, (Something about client manager \ error), please inform administrator %s. Thanks.') % ADMIN_MAIL if (result.status == 0): return common_resend() elif (result.status > 0 and result.status < WEIXIN_RETRY_TIME): # Resend the request if result.ret != 0: return common_resend() else: # process successfully return _('''Do not submit request duplicated(status%s ret%s), your \ request been processed successfully.''') % (result.status, result.ret) elif (result.status >= WEIXIN_RETRY_TIME): # The task has been processed out of retry time, and we # will not process it again if result.status == -99: return _('''Your request is being processed, please wait.''') if result.ret != 0: return _('''Your request has been processed, but not \ success, is %s, please contact with administrator %s''') % (result.ret, ADMIN_MAIL) else: return _('''Do not submit request duplicated(status%s ret%s), your \ request been processed successfully.''') % (result.status, result.ret) return _('''ERROR, duplicated URL.''') # This is a new URL request, we should insert it into task list. #sql = 'INSERT INTO wordp_task (uid, status, param1, param2, param3, \ #add_time) VALUES (%s, %s, \'%s\', \'%s\', \'%s\', %s)' % (user_id, 0, #content, '', '', int(time.time())) try: task = Task(uid=user_id, status=0, param1=content, param2='', param3='', add_time=int(time.time()), ret=0, retmsg='', client_name=client_name) session.add(task) session.commit() except Exception as e: session.rollback() logger.error('Can not insert URL into database %s' % content) logger.error(str(e)) return _('Can not deal with your request, (Something about database \ error), please inform administrator %s. Thanks.') % ADMIN_MAIL else: # NOTE: send signal to clients ret = Home.send_task_request(client_name, task.id) if ret: return _('We have received your URL request, please wait to processing (taskid%s).') % task.id else: return _('Can not deal with your request, (Something about client manager \ error), please inform administrator %s. Thanks.') % ADMIN_MAIL
def assign_content_to_client(user_id, user_groupid, client_name, content): # Save url to taks list in database # Frist, we should find url in database, which user insert in 7 # days, if find same url, we reject the request.(There have some # `status` and `ret` check) #sql = 'SELECT * from wordp_task where param1=\'%s\' and add_time>=%s \ #and uid=%s' \ #% (content, int(time.time())-3600*24*7, user_id) t_before_7_day = int(time.time()) - 3600 * 24 * 7 result = session.query(Task).filter(Task.uid == user_id).filter( Task.add_time >= t_before_7_day).filter( Task.param1 == content).filter( Task.client_name == client_name).scalar() if result: def common_resend(): # NOTE: send signal to clients ret = Home.send_task_request(client_name, result.id) if ret: return _('Your request have some error(status%s ret%s), \ and we have send the process request') % (result.status, result.ret) else: return _( 'Can not deal with your request, (Something about client manager \ error), please inform administrator %s. Thanks.') % ADMIN_MAIL if (result.status == 0): return common_resend() elif (result.status > 0 and result.status < WEIXIN_RETRY_TIME): # Resend the request if result.ret != 0: return common_resend() else: # process successfully return _( '''Do not submit request duplicated(status%s ret%s), your \ request been processed successfully.''') % (result.status, result.ret) elif (result.status >= WEIXIN_RETRY_TIME): # The task has been processed out of retry time, and we # will not process it again if result.status == -99: return _( '''Your request is being processed, please wait.''') if result.ret != 0: return _('''Your request has been processed, but not \ success, is %s, please contact with administrator %s''') % (result.ret, ADMIN_MAIL) else: return _( '''Do not submit request duplicated(status%s ret%s), your \ request been processed successfully.''') % (result.status, result.ret) return _('''ERROR, duplicated URL.''') # This is a new URL request, we should insert it into task list. #sql = 'INSERT INTO wordp_task (uid, status, param1, param2, param3, \ #add_time) VALUES (%s, %s, \'%s\', \'%s\', \'%s\', %s)' % (user_id, 0, #content, '', '', int(time.time())) try: task = Task(uid=user_id, status=0, param1=content, param2='', param3='', add_time=int(time.time()), ret=0, retmsg='', client_name=client_name) session.add(task) session.commit() except Exception as e: session.rollback() logger.error('Can not insert URL into database %s' % content) logger.error(str(e)) return _( 'Can not deal with your request, (Something about database \ error), please inform administrator %s. Thanks.') % ADMIN_MAIL else: # NOTE: send signal to clients ret = Home.send_task_request(client_name, task.id) if ret: return _( 'We have received your URL request, please wait to processing (taskid%s).' ) % task.id else: return _( 'Can not deal with your request, (Something about client manager \ error), please inform administrator %s. Thanks.') % ADMIN_MAIL
def main(): try: url = sys.argv[1] except: sys.stderr.write("Please input crawler url\n") sys.exit(-1) if len(sys.argv) > 2 and sys.argv[2].strip() == 'redo': redo = True else: redo = False ### Get task from database, and determine the task's status is unfinished(0) # That status(1,2,3) represent try execute time, There is an option in # database to limit the retry time. global g_id_ global g_ret global g_ret_msg global g_oldstatus g_id_ = int(sys.argv[1].strip()) g_ret = 0 g_ret_msg = '' g_oldstatus = -1 author = 'testuser' class_ = CRAWLER_CLASS delivery_url = 'http://weixin.qq.com' delivery_name = author url = None ret = session.query(Task).filter(Task.id == g_id_).scalar() if ret and ret.client_name != CLIENT_NAME: logger.error( 'We got a error request, let me do task %s, but this is %s\'s task' % (g_id_, ret.client_name)) # NOTE: we do not record this error in database, just exit. sys.exit(0) #sql = 'UPDATE wordp_task SET status=%s where id=%s' % (-99, g_id_) # status(-99) indicate that we are processing ret, g_oldstatus = set_task_status_99(g_id_, redo) if not ret: g_ret_msg = 'Get task(%s) failed' % g_id_ logger.error(g_ret_msg) g_ret = -3 return #sql = 'select t.*, u.nickname from wordp_task t LEFT JOIN wordp_user u ON \ #(t.uid = u.id) where t.id=%s' % g_id_ result = session.query(Task, User.nickname).join( User, User.id == Task.uid).filter(Task.id == g_id_).all() if result: url = result[0].Task.param1.strip() author = result[0].nickname.strip() delivery_name = author else: g_ret_msg = 'Can not get the task %s' % g_id_ logger.error(g_ret_msg) g_ret = -3 return logger.debug('Get task(%s) information author(%s) url(%s)' % (g_id_, author, url)) try: ret, msg = get_content_from_url(url) if not ret: g_ret_msg = msg g_ret = -1 return processor_name, proc = process_content(msg, url) today_tmpdir = os.path.join( tmpdir, time.strftime("%Y-%m-%d", time.localtime())) if not os.path.exists(today_tmpdir): os.makedirs(today_tmpdir) # Notice, please input all parameter use unicode try: post_article(proc.title, str(proc.content), proc.first_img, author, url, proc.name_cn, delivery_url, delivery_name, class_, today_tmpdir) g_ret = 0 return except CanNotUploadException as e: g_ret = -2 g_ret_msg = 'CanNotUploadException %s' % str(e) return except Exception as e: import traceback msg = 'generic exception: ' + traceback.format_exc() logger.error(msg) g_ret_msg = 'Unknown error %s \nError: %s' % (url, str(e)) logger.error(g_ret_msg) g_ret = -4 return