def _send_mail(self, subject, body, spider): mail_sender = MailSender(smtphost='smtp.mxhichina.com', smtpuser='******', smtppass='******') mail_sender.send(to=spider.settings['ADMIN_USER'], subject=subject, body=body)
def spider_closed(spider): """ Sends an email and closes the selenium driver when the spider is closed """ spider.logger.info("Sending email...") mailer = MailSender( smtphost="smtp.gmail.com", mailfrom=os.environ["EMAIL_SENDER"], smtpuser=os.environ["EMAIL_SENDER"], smtppass=os.environ["SMTPPASS"], smtptls=True, smtpssl=True, smtpport=587, ) results_msg = "Web crawling job completed:\n{} articles scraped\n{} articles skipped".format( spider.articles_scraped, len(spider.articles_skipped) ) mailer.send( to=os.environ["EMAIL_RECEIVER"], subject="Web Crawling Results", body=results_msg, # cc=[""] ) # close the selenium driver spider.logger.info("Closing driver...") spider.driver.close() # log results spider.logger.info(results_msg)
def close_spider(self, spider): mailer = MailSender(smtphost=STMPHOST, mailfrom=EMAIL_FROM, smtppass=STMPPASS, smtpuser=EMAIL_FROM, smtpport=STMPPORT, smtptls=True) if spider.start_time == spider.end_time: subject = '(' + spider.end_time + ')招标文件,及时查收' else: subject = '(' + spider.start_time + '--' + spider.end_time + ')招标文件,及时查收' file = spider.zip_path if os.path.isfile(file): print(type(os.path.basename(file))) attachs = [(os.path.basename(file), EMAIL_ATTACH_MIME, open(file, "rb"))] body = '招标邮件,及时查收'.encode('utf-8') else: body = '今日无数据'.encode('utf-8') attachs = () return mailer.send(to=EMAIL_TO, subject=subject, body=body, cc=["*****@*****.**"], attachs=attachs, mimetype="text/plain", charset='utf-8')
def test_send_attach_utf8(self): subject = u'sübjèçt' body = u'bödÿ-àéïöñß' attach = BytesIO() attach.write(body.encode('utf-8')) attach.seek(0) attachs = [('attachment', 'text/plain', attach)] mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject=subject, body=body, attachs=attachs, charset='utf-8', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['subject'], subject) self.assertEqual(self.catched_msg['body'], body) msg = self.catched_msg['msg'] self.assertEqual(msg['subject'], subject) self.assertEqual(msg.get_charset(), Charset('utf-8')) self.assertEqual(msg.get('Content-Type'), 'multipart/mixed; charset="utf-8"') payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True).decode('utf-8'), body) self.assertEqual(text.get_charset(), Charset('utf-8')) self.assertEqual(attach.get_payload(decode=True).decode('utf-8'), body)
def process_spider_output(self, response, result, spider): # Called with the results returned from the Spider, after # it has processed the response. # Must return an iterable of Request, dict or Item objects. # for i in result: # yield i # 将结果邮件发送 mailer = MailSender(smtphost='smtp.163.com', mailfrom='*****@*****.**', smtpuser='******', smtppass='******', smtpport=25, smtptls=False, smtpssl=False) items = [_ for _ in result] # print('ssssssssssssssssssss', json.dumps([dict(_) for _ in items], ensure_ascii=False)) if items and '品切れ' not in items[0]['stock']: # if items: # print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^') resultBody = json.dumps([dict(_) for _ in items], ensure_ascii=False) mailer.send(to=['*****@*****.**', '*****@*****.**'], subject='Monitor Nintendo site Result', body='到货啦到货啦到货啦到货啦!!!!!!!!!!!:\n' + resultBody) return items
def test_send_single_values_to_and_cc(self): mailsender = MailSender(debug=True) mailsender.send(to='*****@*****.**', subject='subject', body='body', cc='*****@*****.**', _callback=self._catch_mail_sent)
def parse(self, response): answer_flag = response\ .css("#answers-header > div > h2::text")\ .re(r'(\d)\s[A-Z][a-z]+') if not answer_flag: self.logger.info("There are No Answers on this questions yet!") else: mailer = MailSender(smtphost="smtp.gmail.com", mailfrom="*****@*****.**", smtpuser="******", smtppass="******", smtpport=587) msg_body = "Hi there,\n\nThere are " + answer_flag[0] + \ " answers to your question on stackoverflow. " + \ "Here's the link:\n" + response.url mailer.send( to=["*****@*****.**"], subject= "Someone responded to your question on your stackoverflow", body=msg_body, cc=["*****@*****.**"])
def test_send_attach(self): attach = BytesIO() attach.write(b'content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', attachs=attachs, _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True), b'body') self.assertEqual(text.get_charset(), Charset('us-ascii')) self.assertEqual(attach.get_payload(decode=True), b'content')
def parse(self, response): answer_flag = response\ .css("#answers-header > div > h2::text")\ .re(r'(\d)\s[A-Z][a-z]+') if not answer_flag: self.logger.info("There are No Answers on this questions yet!") else: mailer = MailSender(smtphost="smtp.gmail.com", mailfrom="*****@*****.**", smtpuser="******", smtppass="******", smtpport=587) msg_body = "Hi there, \n\nThere are" + answer_flag[0] + \ " answers to your question on stackoverflow. " + \ "Here's the link:\n" + response.url mailer.send( to=["*****@*****.**"], subject= "Someone responded to your question on your stackoverflow", body=msg_body, cc=["*****@*****.**"])
def open_spider(self, spider): self.mailer = MailSender(smtphost="smtp.exmail.qq.com", mailfrom="*****@*****.**", smtpuser="******", smtppass="******") self.client = pymongo.MongoClient(self.mongo_uri) self.db = self.client[self.mongo_db]
def test_send_attach(self): attach = StringIO() attach.write('content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] mailsender = MailSender(debug=True, crawler=self.crawler) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', attachs=attachs) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True), 'body') self.assertEqual(attach.get_payload(decode=True), 'content')
def test_send_html(self): mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='<p>body</p>', mimetype='text/html', _callback=self._catch_mail_sent) msg = self.catched_msg['msg'] self.assertEqual(msg.get_payload(), '<p>body</p>') self.assertEqual(msg.get('Content-Type'), 'text/html')
class SpiderOpenCloseLogging(object): def __init__(self): dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.spider_closed, signal=signals.spider_closed) self.mailer = MailSender() self.mailer.smtphost = "smtp.sina.cn" self.mailer.smtpuser = "******" self.mailer.smtppass = "******" self.mailer.mailfrom = "*****@*****.**" def spider_opened(self, spider): log.msg("opened spider %s" % spider.name) self.mailer.send(to=["*****@*****.**"], subject="scrapy running", body="scrapy is start") def spider_closed(self, spider): if spider.domain: param =(spider.sales_num,spider.money,spider.queue_id) spider.cur.execute("update admin_queue set sales=%s , money=%s where id=%s",param) spider.conn.commit() spider.cur.close() spider.conn.close() mail_content = str(spider.shop_name)+"\n" mail_content += "30天销量:"+str(spider.sales_num)+" \n30天成交额:"+str(spider.money)+"\n店铺地址:"+str(spider.domain)+"\n" mail_content+="---------------------------------------\n" mail_content+=spider.shopinfo_str mail_title = str(spider.shop_name) +' 数据报告' self.mailer.send(to=[str(spider.mailto)], subject=mail_title, body=mail_content) log.msg("closed spider %s" % spider.name)
def stats_spider_closed(self, spider, spider_stats): mail = MailSender() body = "Global stats\n\n" body += "\n".join("%-50s : %s" % i for i in stats.get_stats().items()) body += "\n\n%s stats\n\n" % spider.name body += "\n".join("%-50s : %s" % i for i in spider_stats.items()) mail.send(self.recipients, "Scrapy stats for: %s" % spider.name, body)
def test_send_attach(self): attach = BytesIO() attach.write(b"content") attach.seek(0) attachs = [("attachment", "text/plain", attach)] mailsender = MailSender(debug=True) mailsender.send( to=["*****@*****.**"], subject="subject", body="body", attachs=attachs, _callback=self._catch_mail_sent, ) assert self.catched_msg self.assertEqual(self.catched_msg["to"], ["*****@*****.**"]) self.assertEqual(self.catched_msg["subject"], "subject") self.assertEqual(self.catched_msg["body"], "body") msg = self.catched_msg["msg"] self.assertEqual(msg["to"], "*****@*****.**") self.assertEqual(msg["subject"], "subject") payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True), b"body") self.assertEqual(text.get_charset(), Charset("us-ascii")) self.assertEqual(attach.get_payload(decode=True), b"content")
def close_spider(self, spider): settings = spider.settings gmail_user = settings['MAIL_USER'] print(gmail_user) gmail_password = settings['MAIL_PASS'] print(gmail_password) msg = MIMEMultipart() mail_subject = 'Booking.com website Scraper Report for ' + datetime.date.today( ).strftime("%m/%d/%y") msg['Subject'] = mail_subject intro = "Summary stats from Scrapy Booking.com website: \n\n" body = spider.crawler.stats.get_stats() body = pprint.pformat(body) body = intro + body msg.attach(MIMEText(body, 'plain')) mail_sender = MailSender(mailfrom=gmail_user, smtphost="smtp.gmail.com", smtpport=587, smtpuser=gmail_user, smtppass=gmail_password) mail_sender.send(to=[gmail_user], subject=mail_subject, body=msg.as_string(), cc=None)
def notify_user(title, spider_name, spider=None): if not out_to_email: if spider is not None: spider.logger.warning("out_to_email is False") return cur_file = spider_name + '.csv' last_file = 'last_' + cur_file if not is_need_notify(last_file, cur_file): if spider is not None: spider.logger.warning("No need to notify user") return mailer = MailSender( smtphost="smtp.qq.com", # 发送邮件的服务器 mailfrom="*****@*****.**", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 授权码 smtpport=25 # 端口号 25 ssl 465/587 ) subject = '[%s]%s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), title) timestamp = time.strftime("%m%d%H%M%S", time.localtime()) body = 'Scrapy auto sends' attachs = [(spider_name + '_' + timestamp + '.csv', 'text/comma-separated-values', file(cur_file, 'r'))] mailer.send(to=["*****@*****.**"], subject=subject, body=body, attachs=attachs)
class GuguPipeline(object): def __init__(self, mail_to): self.mailer = MailSender() self.mail_to = mail_to if mail_to: log.msg('Emails will be sent to %s' % mail_to, level=logging.INFO) @classmethod def from_settings(cls, settings): mail_to = settings['GUGU_PIPELINE_MAIL_TO'] return cls(mail_to) def process_item(self, item, spider): if re.search(GUGU_PATTERN, item['lyrics']): item['match'] = 'true' self.send_email(item) else: item['match'] = 'false' return item def send_email(self, item): if not self.mail_to: return subject = "Found a match: {artist} - {title}".format(**item) body = """URL: {url} {lyrics} """.format(**item) self.mailer.send(to=[self.mail_to], subject=subject, body=body)
def close_spider(self, spider): mailer = MailSender(smtphost="smtp.163.com", mailfrom="*****@*****.**", smtppass="******", smtpuser="******", smtpport=25, smtptls=True) subject = spider.output_excel_filename attach_mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if os.path.isfile(spider.output_excel_filename): attachs = [(spider.output_excel_filename, attach_mime, open(spider.output_excel_filename, "rb"))] body = '招标邮件,及时查收' else: body = (spider.zh_name + '今日无数据(' + spider.today + ')').encode('utf-8') attachs = () #, "*****@*****.**" return mailer.send(to=["*****@*****.**"], subject=subject, body=body, cc=["*****@*****.**"], attachs=attachs, mimetype="text/plain", charset='utf-8')
def parse(self, response): #方式一 # mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.exmail.qq.com", smtpport=465, # smtpuser="******", smtppass="******",smtptls=True, smtpssl=True) print("url:", response.url) # 方式一 # mailer = MailSender(mailfrom=settings['MAIL_FROM'], # smtphost=settings['MAIL_HOST'], # smtpport=settings['MAIL_PORT'], # smtpuser=settings['MAIL_USER'], # smtppass=settings['MAIL_PASS'], # smtptls=settings['MAIL_TLS'], # smtpssl=settings['MAIL_SSL']) # 方式一 mailer = MailSender(mailfrom=self.settings['MAIL_FROM'], smtphost=self.settings['MAIL_HOST'], smtpport=self.settings['MAIL_PORT'], smtpuser=self.settings['MAIL_USER'], smtppass=self.settings['MAIL_PASS'], smtptls=self.settings['MAIL_TLS'], smtpssl=self.settings['MAIL_SSL']) #方式二 # mailer = MailSender.from_settings(self.settings) return mailer.send(to=["*****@*****.**"], subject="title test", body="text test") print("end")
def close(self, reason): from scrapy.mail import MailSender # mailer = MailSender.from_settings(settings) """ mailer = MailSender( smtphost=settings.MAIL_HOST, mailfrom=settings.MAIL_FROM, smtpuser=settings.MAIL_HOST, smtppass=settings.MAIL_PASS, smtpport=settings.MAIL_PORT, smtptls=settings.MAIL_TLS, smtpssl=settings.MAIL_SSL, ) """ mailer = MailSender( smtphost='smtp.qq.com', mailfrom='*****@*****.**', smtpuser='******', smtppass='******', smtpport=465, smtptls=False, smtpssl=True, ) mailer.send(to=["*****@*****.**"], subject="subject", body="body", cc=["*****@*****.**"], charset='utf-8')
def __init__(self): self.mailer = MailSender( smtphost='smtp.163.com', smtpport=25, smtpuser='******', mailfrom='*****@*****.**', smtppass='******' )
def close_spider(self, spider): self.exporter.finish_exporting() credFile = open("cred.txt", "r") cred = [] for line in credFile: cred.append(line) u = str.split(cred[0], '|') p = str.split(cred[1], '/') un = self.unMar(u[0]) + self.unMar(u[1].rstrip()) ps = self.unMar(p[0]) + self.unMar(p[1]) credFile.close() self.cur.execute("select keyword from keyword") keyWordList = [] keywords = self.cur.fetchall() for keyword in keywords: keyWordList.append(keyword[0]) print(keyword[0]) self.cur.execute("select detail,job_id from jobs where status=0") jobs = self.cur.fetchall() jobsList = [] jobIdList = set() for job in jobs: for keyword in keyWordList: if keyword in job[0]: jobsList.append(job[0]) jobIdList.add(job[1]) if (len(jobsList) > 2): myFile = open("waks.txt", "wb") myFile.write("Following Jobs Seems Interesting\n") for x in range(0, len(jobsList)): strn = str(x + 1) + " " + jobsList[x].encode( "utf-8", "strict") + "\n" myFile.write(strn) myFile.close() myFile = open("waks.txt", "r") mailer = MailSender(smtphost="smtp.gmail.com", mailfrom=un, smtpuser=un, smtppass=ps, smtpport=587) ids = (", ".join(str(e) for e in jobIdList)) mailer.send(to=[un], subject="Scrapy mail", body=self.mesgBody, attachs=(("HomeWork Ma", "text/plain", myFile), )) sqlStr = str("update jobs set status=1 where job_id IN (%s)" % ids) logger.info(sqlStr) self.cur.execute(sqlStr) self.dbUtil.commit() self.waks.close() logger.info("Terminating sequence") try: self.dbUtil.close_db_connection() except Exception as e: print(e)
def send_email(self, subject, body): mailer = MailSender( smtphost="smtp.163.com", # 发送邮件的服务器 mailfrom="*****@*****.**", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 发送邮箱的密码不是你注册时的密码,而是授权码!!!切记! smtpport=25 # 端口号 ) mailer.send(to="*****@*****.**", subject=subject, body=body)
def send_email(self, mail_body): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******", smtppass="******") return mailer.send(to=["*****@*****.**"], subject="StockSpider: Stock Spiders Contract Error", body=mail_body)
def test_send_single_values_to_and_cc(self): mailsender = MailSender(debug=True) mailsender.send( to="*****@*****.**", subject="subject", body="body", cc="*****@*****.**", _callback=self._catch_mail_sent, )
def close(spider, reason): # send email when spider closed if spider.email_content.strip(): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******",smtppass="******") mailer.send(to=["*****@*****.**", "*****@*****.**"], cc=["*****@*****.**"], subject= "[Movies Here] " + spider.keyword + " is coming!!!", body=spider.email_content) closed = getattr(spider, 'closed', None) if callable(closed): return closed(reason)
def __init__(self): mailfrom=settings.get("MAIL_ADDRESS") smtpport=settings.get("MAIL_PORT") smtpuser=settings.get("MAIL_USER") smtppass=settings.get("MAIL_PASSWORD") smtphost=settings.get("SMTP_HOST") self.mailer = MailSender(mailfrom=mailfrom,smtphost=smtphost, smtpport=smtpport,smtpuser=smtpuser,smtppass=smtppass)
def close_spider(self, spider): info = self.info.pop(spider.name) if info is None: return spider_outdir = spider.spider_outdir # We use dict() to convert the objects stored in items to plain # dictionaries. Otherwise, they cannot be serialized by json.dump. items = [dict(i) for i in info['items']] with open(os.path.join(spider_outdir, "links.json"), 'w') as f: json.dump(items, f) errors = [ i for i in items if i['status'] != 200 or i['validation_error'] or i['header_errors'] ] if not errors: with open(os.path.join(spider_outdir, 'CLEAN'), 'w') as f: f.write("yes\n") return with open(os.path.join(spider_outdir, 'ERRORS'), 'w') as f: json.dump(errors, f) message = [] for i in errors: item_message = ["===\nURL: {0}\n\n".format(i['url'])] status = i['status'] if status != 200: item_message.append( "Failed retrieval with status: {0}\n".format(status)) if i['validation_error']: item_message.append("Failed validation.\n\n") header_errors = i['header_errors'] if header_errors: item_message.append("Failed header checks with the following " "errors:\n{0}\n".format( "\n".join(header_errors))) if len(item_message) > 1: message += item_message message.append("\nSee %s for details of validation errors." % spider_outdir) email_body = "".join(message) with open(os.path.join(spider_outdir, 'REPORT'), 'w') as f: f.write(email_body) send_to = spider.send_to if send_to is not None: sender = MailSender(mailfrom="*****@*****.**") sender.send([send_to], "Smoketest failure", email_body)
def get_email(source_name): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******", smtppass="******") mailer.send(to=["*****@*****.**"], subject="Test mail : Report", body="Run completed for %s " % source_name, cc=["*****@*****.**", "*****@*****.**"])
def spider_error(failure): """Send errors email.""" from_email = RYANAIR_SETTINGS['FROM_EMAIL'] to_email = RYANAIR_SETTINGS['FAILURE_EMAIL'] mailer = MailSender(mailfrom=from_email) mailer.send( to=[to_email], subject="Ryanair flights error", body=failure.getErrorMessage(), )
def send_mail(subject, body): mailer = MailSender(smtphost=settings.MAIL_HOST, mailfrom=settings.MAIL_FROM, smtpuser=settings.MAIL_USER, smtppass=settings.MAIL_PASS, smtpport=25) to = settings.MAIL_TO mailer.send(to=to, subject=subject.encode('utf-8'), body=body.encode('utf-8'))
def spider_closed(self, spider): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******", smtppass="******") mailer.send(to=["*****@*****.**"], subject="Test mail : Report", body="Run completed for Makemytrip Crawler ", cc=["*****@*****.**", "*****@*****.**"])
def close_spider(self, spider): info = self.info.pop(spider.name) if info is not None: outdir = spider.outdir outpath = os.path.join(outdir, "links.json") items = info['items'] with open(outpath, 'w') as f: f.write(json.dumps([dict(i) for i in items])) errors = [ i for i in items if i['status'] != 200 or i['validation_error'] or i['header_errors']] if errors: with open(os.path.join(outdir, 'ERRORS'), 'w') as f: f.write(json.dumps([dict(i) for i in errors])) message = [] for i in errors: item_message = ["===\nURL: {0}\n\n".format(i['url'])] status = i['status'] if status != 200: item_message.append( "Failed retrieval with status: {0}\n".format( status)) if i['validation_error']: item_message.append("Failed validation.\n\n") header_errors = i['header_errors'] if header_errors: item_message.append( ("Failed header checks with the following " "errors:\n{0}\n").format( "\n".join(header_errors))) if len(item_message) > 1: message += item_message message.append("\nSee %s for details of validation errors." % outdir) email_body = "".join(message) with open(os.path.join(outdir, 'REPORT'), 'w') as f: f.write(email_body) send_to = spider.send_to if send_to is not None: sender = MailSender(mailfrom="*****@*****.**") sender.send([send_to], "Smoketest failure", email_body) else: with open(os.path.join(outdir, 'CLEAN'), 'w') as f: f.write("yes\n")
def close(self, reason): self.logger.info(reason) mailfrom = '*****@*****.**' smtphost = 'smtp.163.com' smtpport = 25, smtpuser = '******' smtppass = '******' smtpssl = True mailer = MailSender(mailfrom=mailfrom, smtphost=smtphost, smtpuser=smtpuser, smtppass=smtppass) # mailer = MailSender.from_settings(settings.MAIL) mailer.send(to=['*****@*****.**'], subject='Send Email Test by Scrapy MailSender!', body='Holle world!') print settings.MAIL['MAIL_USER']
def wrapper(*args, **kwargs): max_attempts = settings.getint("MAX_MONGO_RECONNECT_ATTEMPTS", MAX_AUTO_RECONNECT_ATTEMPTS) mail = MailSender() for attempt in xrange(max_attempts): try: return mongo_op_func(*args, **kwargs) except AutoReconnect as e: wait_t = 1 + attempt # exponential back off log.msg("PyMongo auto-reconnecting... %s. Waiting %.1f seconds."%(str(e), wait_t), log.INFO) mail.send(to=[settings.get('MAIL_TO')], subject='PyMongo auto-reconnecting....', \ body="%s\n%s"%(e, traceback.format_exc())) time.sleep(wait_t)
def send_email(self, to=[], cc=[], subject="爬虫运行异常", body="", attachs=[]): # 如果收件人邮箱为空, 则发送到root账户的邮箱 if len(to) == 0: root_user = User.objects.filter(is_superuser=1) if len(root_user) == 0: raise Exception("root账户不存在, 请添加root账户和root账户的邮箱") root_user_email = root_user[0].email if root_user_email == None or root_user_email == "": raise Exception("root账户没有配置邮箱, 请添加root账户的邮箱") self.email_receiver.append(root_user_email) mailer = MailSender() mailer.send(to=to, cc=cc, subject=subject.encode("utf-8"), body=body.encode("utf-8"), attachs=attachs)
class MemoryDebugger(object): def __init__(self): try: import libxml2 self.libxml2 = libxml2 except ImportError: self.libxml2 = None if not settings.getbool('MEMDEBUG_ENABLED'): raise NotConfigured self.mail = MailSender() self.rcpts = settings.getlist('MEMDEBUG_NOTIFY') dispatcher.connect(self.engine_started, signals.engine_started) dispatcher.connect(self.engine_stopped, signals.engine_stopped) def engine_started(self): if self.libxml2: self.libxml2.debugMemory(1) def engine_stopped(self): figures = self.collect_figures() report = self.create_report(figures) self.log_or_send_report(report) def collect_figures(self): gc.collect() figures = [] figures.append(("Objects in gc.garbage", len(gc.garbage), "")) if self.libxml2: self.libxml2.cleanupParser() figures.append(("libxml2 memory leak", self.libxml2.debugMemory(1), "bytes")) return figures def create_report(self, figures): s = "" s += "SCRAPY MEMORY DEBUGGER RESULTS\n\n" for f in figures: s += "%-30s : %d %s\n" % f if settings.getbool('TRACK_REFS'): s += os.linesep s += format_live_refs() return s def log_or_send_report(self, report): if self.rcpts: self.mail.send(self.rcpts, "Scrapy Memory Debugger results at %s" % \ socket.gethostname(), report) log.msg(report)
def engine_closed(self): dirtree = os.path.join(settings.get('DATA_PATH'), 'Top') #get the total size of the corpus (could take a while...) mailer = MailSender() success = os.path.isdir(dirtree) if success: content = "Crawling ended succesfully at %s." % time.asctime() else: content = "Crawling ended abnormally at %s" % time.asctime() mailer.send(to = ['*****@*****.**', '*****@*****.**', '*****@*****.**'], subject = "The training corpus has been downloaded" if success else "Error crawling", body =content , )
def test_send(self): mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') self.assertEqual(msg.get_payload(), 'body')
def spider_closed(self, spider): mailer = MailSender(mailfrom="*****@*****.**",smtphost="smtp.gmail.com",smtpport=587,smtpuser="******",smtppass="******") # get statistics self.cur.execute("SELECT COUNT(*) FROM Results") crawled = self.cur.fetchone() self.cur.execute("SELECT COUNT(*) FROM RequestUrls") totalUrl = self.cur.fetchone() toBeCrawled = totalUrl[0] - crawled[0] emailBody = "Crawled: " + str(crawled[0]) + "\nTo be crawled: " + \ str(toBeCrawled) + "\nProgress: " + str(float(crawled[0])/totalUrl[0]) return mailer.send(to=["*****@*****.**"],subject="Test",body=emailBody)
def closed(self,reason): str = '' #conn = MySQLdb.connect(host='127.0.0.1',user='******',passwd='spider_user!@#',port=3306,db='db_spider',charset='utf8') #cur = conn.cursor() #mydict = {"name":"Lucy", "sex":"female","job":"nurse"} for index,item in enumerate(self.web_data_list): tmp = 'index:%d, userid:%s, author:%s,head_img:%s \n,age:%s,sex:%s, vote:%s,contentid:%s\n[%s]\n\n' % (index,item['userid'],item['author'],item['head_img'],item['age'],item['sex'],item['stats_vote'],item['contentid'],item['content']) str = str + tmp author=item['author'] content=item['content'] stats_vote = item['stats_vote'] contentid=item['contentid'] #sql="insert ignore into t_qiushi(author,content,vote,content_id) values('%s','%s','%s','%s')" % (author,content,stats_vote,contentid) #cur.execute(sql) #print str #conn.commit() #cur.close() #conn.close() #将爬取的数据发送邮件 settings = get_project_settings() mailer = MailSender.from_settings(settings)
def test_send_utf8(self): subject = u'sübjèçt' body = u'bödÿ-àéïöñß' mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject=subject, body=body, charset='utf-8', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['subject'], subject) self.assertEqual(self.catched_msg['body'], body) msg = self.catched_msg['msg'] self.assertEqual(msg['subject'], subject) self.assertEqual(msg.get_payload(), body) self.assertEqual(msg.get_charset(), Charset('utf-8')) self.assertEqual(msg.get('Content-Type'), 'text/plain; charset="utf-8"')
def from_crawler(cls, crawler): recipients = crawler.settings.getlist("STATSMAILER_RCPTS") mail = MailSender.from_settings(crawler.settings) o = cls(crawler.stats, recipients, mail) crawler.signals.connect(o.engine_stopped, signal=signals.engine_stopped) crawler.signals.connect(o.engine_started, signal=signals.engine_started) return o
def send_mail(self, month_year): subject = 'Bonn: Neuer Termin frei im ' + month_year body = self.start_urls[0] # you have to set up the mail settings in your own settings.py # http://doc.scrapy.org/en/latest/topics/email.html#topics-email-settings mailer = MailSender.from_settings(self.settings) mailer.send(to=[self.notification_email], subject=subject, body=body)
def from_crawler(cls, crawler): recipients = crawler.settings.getlist("STATSMAILER_RCPTS") if not recipients: raise NotConfigured mail = MailSender.from_settings(crawler.settings) o = cls(crawler.stats, recipients, mail) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o
def __init__(self): dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.spider_closed, signal=signals.spider_closed) self.mailer = MailSender() self.mailer.smtphost = "smtp.sina.cn" self.mailer.smtpuser = "******" self.mailer.smtppass = "******" self.mailer.mailfrom = "*****@*****.**"
def closed(self,reason): self.logger.info("Spider closed: %s"%str(reason)) mailer = MailSender.from_settings(self.settings) mailer.send( to=["*****@*****.**"], subject="Spider closed", body=str(self.crawler.stats.get_stats()), cc=["*****@*****.**"] )
def __init__(self): if not settings.getbool('MEMDEBUG_ENABLED'): raise NotConfigured self.mail = MailSender() self.rcpts = settings.getlist('MEMDEBUG_NOTIFY') dispatcher.connect(self.engine_started, signals.engine_started) dispatcher.connect(self.engine_stopped, signals.engine_stopped)
def from_crawler(cls, crawler): mail_list = crawler.settings.getlist("ERRMAIL_LIST") if not mail_list: raise NotConfigured mail = MailSender.from_settings(crawler.settings) o = cls(crawler.stats, mail_list, mail) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o
def parse(self, response): mailer = MailSender.from_settings(settings) try: mailer.send(to=["*****@*****.**"],subject="scrapy spider",body="test message",cc=['*****@*****.**'],charset="utf-8") except Exception as e : msg = "Error occurred...{0}".format(str(e)) print(msg) print('mail sending')
def closed(self,reason): import pdb;pdb.set_trace() self.logger.info("Spider closed: %s"%str(reason)) mailer = MailSender.from_settings(self.settings) mailer.send( to=["******@qq.com"], subject="Spider closed", body=str(self.crawler.stats.get_stats()), cc=["**********@xxxxxxxx.com"] )
def from_crawler(cls, crawler): mail = MailSender.from_settings(crawler.settings) instance = cls(mail) crawler.signals.connect(instance.spider_opened, signal=signals.spider_opened) crawler.signals.connect(instance.spider_closed, signal=signals.spider_closed) crawler.signals.connect(instance.item_scraped, signal=signals.item_scraped) return instance