def fetch_url(url, opener=None, timeout=60.0, chunk_size=16384): if opener is None: opener = urllib2.build_opener() try: output = StringIO() fileobj = yield idiokit.thread(opener.open, url, timeout=timeout) try: while True: data = yield idiokit.thread(fileobj.read, chunk_size) if not data: break output.write(data) finally: fileobj.close() info = fileobj.info() info = email.parser.Parser().parsestr(str(info), headersonly=True) output.seek(0) idiokit.stop(info, output) except urllib2.HTTPError as he: raise HTTPError(he.code, he.msg, he.hdrs, he.fp) except urllib2.URLError as error: if _is_timeout(error.reason): raise FetchUrlTimeout("fetching URL timed out") raise FetchUrlFailed(str(error)) except socket.error as error: if _is_timeout(error): raise FetchUrlTimeout("fetching URL timed out") raise FetchUrlFailed(str(error)) except httplib.HTTPException as error: raise FetchUrlFailed(str(error))
def _login(self, server, user, password): yield idiokit.thread(server.ehlo) if server.has_extn("starttls"): yield idiokit.thread(server.starttls) yield idiokit.thread(server.ehlo) if user is not None and password is not None and server.has_extn("auth"): yield idiokit.thread(server.login, user, password)
def feed(self, query, feed_all): collab = wiki.GraphingWiki( self.collab_url, ssl_verify_cert=not self.collab_ignore_cert, ssl_ca_certs=self.collab_extra_ca_certs) yield idiokit.thread(collab.authenticate, self.collab_user, self.collab_password) yield idiokit.sleep(5) token = None current = dict() while True: try: result = yield idiokit.thread(collab.request, "IncGetMeta", query, token) except wiki.WikiFailure as fail: self.log.error("IncGetMeta failed: {0!r}".format(fail)) else: incremental, token, (removed, updates) = result removed = set(removed) if not incremental: removed.update(current) current.clear() for page, keys in updates.iteritems(): event = current.setdefault(page, events.Event()) event.add("id:open", self.page_id(page)) event.add("gwikipagename", page) event.add("collab url", self.collab_url + urllib.quote(page.encode("utf8"))) removed.discard(page) for key, (discarded, added) in keys.iteritems(): for value in map(normalize, discarded): event.discard(key, value) for value in map(normalize, added): event.add(key, value) if not feed_all: yield idiokit.send(event) for page in removed: current.pop(page, None) event = events.Event() event.add("id:close", self.page_id(page)) event.add("gwikipagename", page) event.add("collab url", self.collab_url + page) yield idiokit.send(event) if feed_all: for page in current: yield idiokit.send(current[page]) yield idiokit.sleep(self.poll_interval)
def run_mailbox(self, min_delay=5.0, max_delay=60.0): mailbox = None try: while True: item = yield idiokit.next() while True: delay = min(min_delay, max_delay) while mailbox is None: try: mailbox = yield idiokit.thread(self.connect) except (imaplib.IMAP4.abort, socket.error) as error: self.log.error( "Failed IMAP connection ({0})".format( utils.format_exception(error))) else: break self.log.info( "Retrying connection in {0:.2f} seconds".format( delay)) yield idiokit.sleep(delay) delay = min(2 * delay, max_delay) event, name, args, keys = item if event.result().unsafe_is_set(): break try: method = getattr(mailbox, name) result = yield idiokit.thread(method, *args, **keys) except (imaplib.IMAP4.abort, socket.error) as error: yield idiokit.thread(self.disconnect, mailbox) self.log.error("Lost IMAP connection ({0})".format( utils.format_exception(error))) mailbox = None event.fail(LostConnection, "", None) break except imaplib.IMAP4.error as error: event.fail(type(error), error, None) break else: event.succeed(result) break finally: if mailbox is not None: yield idiokit.thread(self.disconnect, mailbox)
def build_mail(self, events, to=[], cc=[], bcc=[], template="", template_values={}, **keys): """ Return a mail object produced based on collected events and session parameters. The "events" parameter is None when we just want to test building a mail. """ if events is None: events = [] csv = templates.CSVFormatter() template_keys = { "csv": csv, "attach_csv": templates.AttachUnicode(csv), "attach_and_embed_csv": templates.AttachAndEmbedUnicode(csv), "attach_zip": templates.AttachZip(csv), "to": templates.Const(format_addresses(to)), "cc": templates.Const(format_addresses(cc)), "bcc": templates.Const(format_addresses(bcc)) } for key, value in dict(template_values).iteritems(): template_keys[key] = templates.Event(value) mail_template = MailTemplate(template, **template_keys) msg = yield idiokit.thread(mail_template.format, events) idiokit.stop(msg)
def _connect(self, host, port, retry_interval=60.0): server = None while server is None: self.log.info(u"Connecting to SMTP server {0!r} port {1}".format( host, port)) try: server = yield idiokit.thread( smtplib.SMTP, host, port, timeout=self.smtp_connection_timeout) except (socket.error, smtplib.SMTPException) as exc: self.log.error(u"Failed connecting to SMTP server: {0}".format( utils.format_exception(exc))) else: self.log.info(u"Connected to the SMTP server") break self.log.info( u"Retrying SMTP connection in {0:.2f} seconds".format( retry_interval)) yield idiokit.sleep(retry_interval) idiokit.stop(server)
def _compress(self, queue): while True: compress_path = yield queue.wait() try: path = yield idiokit.thread(_compress, compress_path) self.log.info("Compressed archive {0!r}".format(path)) except ValueError: self.log.error("Invalid path {0!r}".format(compress_path))
def run_mailbox(self, min_delay=5.0, max_delay=60.0): mailbox = None try: while True: item = yield idiokit.next() while True: delay = min(min_delay, max_delay) while mailbox is None: try: mailbox = yield idiokit.thread(self.connect) except (imaplib.IMAP4.abort, socket.error) as error: self.log.error("Failed IMAP connection ({0})".format(utils.format_exception(error))) else: break self.log.info("Retrying connection in {0:.2f} seconds".format(delay)) yield idiokit.sleep(delay) delay = min(2 * delay, max_delay) event, name, args, keys = item if event.result().unsafe_is_set(): break try: method = getattr(mailbox, name) result = yield idiokit.thread(method, *args, **keys) except (imaplib.IMAP4.abort, socket.error) as error: yield idiokit.thread(self.disconnect, mailbox) self.log.error("Lost IMAP connection ({0})".format(utils.format_exception(error))) mailbox = None except imaplib.IMAP4.error as error: event.fail(type(error), error, None) break else: event.succeed(result) break finally: if mailbox is not None: yield idiokit.thread(self.disconnect, mailbox)
def _connect(self, host, port, retry_interval=60.0): server = None while server is None: self.log.info(u"Connecting to SMTP server {0!r} port {1}".format(host, port)) try: server = yield idiokit.thread(smtplib.SMTP, host, port, timeout=self.smtp_connection_timeout) except (socket.error, smtplib.SMTPException) as exc: self.log.error(u"Failed connecting to SMTP server: {0}".format(utils.format_exception(exc))) else: self.log.info(u"Connected to the SMTP server") break self.log.info(u"Retrying SMTP connection in {0:.2f} seconds".format(retry_interval)) yield idiokit.sleep(retry_interval) idiokit.stop(server)
def _manage_cache(self, query): token = None wikikeys = set() for valueset in self.keys.values(): wikikeys.update(valueset) while True: try: result = yield idiokit.thread(self.collab.request, "IncGetMeta", query, token) except (socket.error, wiki.WikiFailure) as exc: self.log.error("IncGetMeta failed: {0}".format(exc)) else: incremental, token, (removed, updates) = result removed = set(removed) if not incremental: removed.update(self.cache.keys()) self.cache.clear() for page, keys in updates.iteritems(): event = self.cache.setdefault(page, events.Event()) event.add("gwikipagename", page) removed.discard(page) for key, (discarded, added) in keys.iteritems(): for value in discarded: event.discard(key, value) for value in added: if key in wikikeys: event.add(key, value) for page in removed: self.cache.pop(page, None) if removed or updates: self.log.info( "Updated {0} pages and removed {1} pages ({2} pages in cache)" .format(len(updates), len(removed), len(self.cache.keys()))) yield idiokit.sleep(self.poll_interval)
def _manage_cache(self, query): token = None wikikeys = set() for valueset in self.keys.values(): wikikeys.update(valueset) while True: try: result = yield idiokit.thread(self.collab.request, "IncGetMeta", query, token) except (socket.error, wiki.WikiFailure) as exc: self.log.error("IncGetMeta failed: {0}".format(exc)) else: incremental, token, (removed, updates) = result removed = set(removed) if not incremental: removed.update(self.cache.keys()) self.cache.clear() for page, keys in updates.iteritems(): event = self.cache.setdefault(page, events.Event()) event.add("gwikipagename", page) removed.discard(page) for key, (discarded, added) in keys.iteritems(): for value in discarded: event.discard(key, value) for value in added: if key in wikikeys: event.add(key, value) for page in removed: self.cache.pop(page, None) if removed or updates: self.log.info("Updated {0} pages and removed {1} pages ({2} pages in cache)".format( len(updates), len(removed), len(self.cache.keys()))) yield idiokit.sleep(self.poll_interval)
def build_mail(self, events, to=[], cc=[], bcc=[], template="", template_values={}, **keys): """ Return a mail object produced based on collected events and session parameters. The "events" parameter is None when we just want to test building a mail. """ if events is None: events = [] csv = templates.CSVFormatter() template_keys = { "csv": csv, "attach_csv": templates.AttachUnicode(csv), "attach_and_embed_csv": templates.AttachAndEmbedUnicode(csv), "attach_zip": templates.AttachZip(csv), "to": templates.Const(format_addresses(to)), "cc": templates.Const(format_addresses(cc)), "bcc": templates.Const(format_addresses(bcc)), } for key, value in dict(template_values).iteritems(): template_keys[key] = templates.Event(value) mail_template = MailTemplate(template, **template_keys) msg = yield idiokit.thread(mail_template.format, events) idiokit.stop(msg)
def report(self, eventlist, retries=None, to=[], cc=[], bcc=[], **keys): if retries is None: retries = self.max_retries msg = yield self.build_mail(eventlist, to=to, cc=cc, bcc=bcc, **keys) prep_recipient_header(msg, "to", to) prep_recipient_header(msg, "cc", cc) prep_recipient_header(msg, "bcc", bcc) # FIXME: Use encoding after getaddresses from_addr = getaddresses([self.mail_sender])[0] if "from" not in msg: msg["from"] = formataddr(from_addr) subject = decode_subject(msg.get("subject", "")) header_recipients = clean_recipients(msg.get_all("to", []) + msg.get_all("cc", []) + msg.get_all("bcc", [])) if self.mail_receiver_override is not None: actual_recipients = clean_recipients(self.mail_receiver_override) recipient_string = u"{actual_recipients} (overridden from {header_recipients})".format( actual_recipients=format_recipients(actual_recipients), header_recipients=format_recipients(header_recipients), ) else: actual_recipients = header_recipients recipient_string = unicode(format_recipients(actual_recipients)) # No need to keep both the mail object and mail data in memory. msg_data = msg.as_string() del msg event = events.Event( { "type": "mail", "subject": subject, "to": to, "cc": cc, "bcc": bcc, "sender": from_addr[1], "recipients": actual_recipients, "event count": unicode(len(eventlist)), } ) sent = False if not actual_recipients: self.log.info( u'Skipped message "{subject}": {recipients}'.format(subject=subject, recipients=recipient_string), event=event.union(status="skipped (no recipients)"), ) elif not eventlist: self.log.info( u'Skipped message "{subject}" to {recipients}: no events'.format( subject=subject, recipients=recipient_string ), event=event.union(status="skipped (no events)"), ) else: server = yield self._connect(self.smtp_host, self.smtp_port) try: yield self._login(server, self.smtp_auth_user, self.smtp_auth_password) self.log.info( u'Sending message "{subject}" to {recipients}'.format(subject=subject, recipients=recipient_string) ) try: yield idiokit.thread(server.sendmail, from_addr[1], actual_recipients, msg_data) except smtplib.SMTPDataError as data_error: self.log.error( u"Could not send the message to {recipients}: {error}. Dropping message from queue".format( recipients=recipient_string, error=utils.format_exception(data_error) ) ) except smtplib.SMTPRecipientsRefused as refused: for recipient, reason in refused.recipients.iteritems(): self.log.error( u"Could not the send message to {recipients}: {error}. Dropping message from queue".format( recipients=recipient_string, error=utils.format_exception(reason) ) ) except (socket.error, smtplib.SMTPException) as exc: self.log.error( u"Could not send the message to {recipients}: {error}".format( recipients=recipient_string, error=utils.format_exception(exc) ) ) if retries >= 1: self.log.info(u"Retrying sending in 60 seconds") self.requeue(60.0, retries=retries - 1) else: self.log.error(u"Failed all retries, dropping the mail from the queue") else: sent = True self.log.info( u'Sent message "{subject}" to {recipients}'.format( subject=subject, recipients=recipient_string ), event=event.union(status="sent"), ) finally: yield idiokit.thread(server.quit) idiokit.stop(sent)
def fetch_url(url, opener=None, timeout=60.0, chunk_size=16384, cookies=None, auth=None, cert=None, verify=True, proxies=None): if opener is not None: raise TypeError("'opener' argument is no longer supported") handlers = [ _CustomHTTPSHandler(cert=cert, verify=verify), urllib2.ProxyHandler(proxies) ] if cookies is not None: handlers.append(urllib2.HTTPCookieProcessor(cookies)) if auth is not None: username, password = auth passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() if isinstance(url, urllib2.Request): uri = url.get_full_url() passmgr.add_password(None, uri, username, password) else: passmgr.add_password(None, url, username, password) handlers.append(urllib2.HTTPBasicAuthHandler(passmgr)) opener = urllib2.build_opener(*handlers) try: output = StringIO() fileobj = yield idiokit.thread(opener.open, url, timeout=timeout) try: while True: data = yield idiokit.thread(fileobj.read, chunk_size) if not data: break output.write(data) finally: fileobj.close() info = fileobj.info() info = email.parser.Parser().parsestr(str(info), headersonly=True) output.seek(0) idiokit.stop(info, output) except urllib2.HTTPError as he: raise HTTPError(he.code, he.msg, he.hdrs, he.fp) except urllib2.URLError as error: if _is_timeout(error.reason): raise FetchUrlTimeout("fetching URL timed out") raise FetchUrlFailed(str(error)) except socket.error as error: if _is_timeout(error): raise FetchUrlTimeout("fetching URL timed out") raise FetchUrlFailed(str(error)) except CertificateError as error: raise SSLCertificateError(str(error)) except httplib.HTTPException as error: raise FetchUrlFailed(str(error))
def report(self, eventlist, retries=None, to=[], cc=[], bcc=[], **keys): if retries is None: retries = self.max_retries msg = yield self.build_mail(eventlist, to=to, cc=cc, bcc=bcc, **keys) prep_recipient_header(msg, "to", to) prep_recipient_header(msg, "cc", cc) prep_recipient_header(msg, "bcc", bcc) # FIXME: Use encoding after getaddresses from_addr = getaddresses([self.mail_sender])[0] if "from" not in msg: msg["from"] = formataddr(from_addr) subject = decode_subject(msg.get("subject", "")) header_recipients = clean_recipients(msg.get_all("to", []) + msg.get_all("cc", []) + msg.get_all("bcc", [])) if self.mail_receiver_override is not None: actual_recipients = clean_recipients(self.mail_receiver_override) recipient_string = u"{actual_recipients} (overridden from {header_recipients})".format( actual_recipients=format_recipients(actual_recipients), header_recipients=format_recipients(header_recipients) ) else: actual_recipients = header_recipients recipient_string = unicode(format_recipients(actual_recipients)) # No need to keep both the mail object and mail data in memory. msg_data = msg.as_string() del msg event = events.Event({ "type": "mail", "subject": subject, "to": to, "cc": cc, "bcc": bcc, "sender": from_addr[1], "recipients": actual_recipients, "event count": unicode(len(eventlist)) }) sent = False if not actual_recipients: self.log.info( u"Skipped message \"{subject}\": {recipients}".format( subject=subject, recipients=recipient_string ), event=event.union(status="skipped (no recipients)") ) elif not eventlist: self.log.info( u"Skipped message \"{subject}\" to {recipients}: no events".format( subject=subject, recipients=recipient_string ), event=event.union(status="skipped (no events)") ) else: server = yield self._connect(self.smtp_host, self.smtp_port) try: yield self._login(server, self.smtp_auth_user, self.smtp_auth_password) self.log.info(u"Sending message \"{subject}\" to {recipients}".format( subject=subject, recipients=recipient_string )) try: yield idiokit.thread(server.sendmail, from_addr[1], actual_recipients, msg_data) except smtplib.SMTPDataError as data_error: self.log.error(u"Could not send the message to {recipients}: {error}. Dropping message from queue".format( recipients=recipient_string, error=utils.format_exception(data_error) )) except smtplib.SMTPRecipientsRefused as refused: for recipient, reason in refused.recipients.iteritems(): self.log.error(u"Could not the send message to {recipients}: {error}. Dropping message from queue".format( recipients=recipient_string, error=utils.format_exception(reason) )) except (socket.error, smtplib.SMTPException) as exc: self.log.error(u"Could not send the message to {recipients}: {error}".format( recipients=recipient_string, error=utils.format_exception(exc) )) if retries >= 1: self.log.info(u"Retrying sending in 60 seconds") self.requeue(60.0, retries=retries - 1) else: self.log.error(u"Failed all retries, dropping the mail from the queue") else: sent = True self.log.info( u"Sent message \"{subject}\" to {recipients}".format( subject=subject, recipients=recipient_string ), event=event.union(status="sent") ) finally: yield idiokit.thread(server.quit) idiokit.stop(sent)
def fetch_url( url, opener=None, timeout=60.0, chunk_size=16384, cookies=None, auth=None, cert=None, verify=True, proxies=None ): if opener is not None: raise TypeError("'opener' argument is no longer supported") handlers = [ _CustomHTTPSHandler(cert=cert, verify=verify), urllib2.ProxyHandler(proxies) ] if cookies is not None: handlers.append(urllib2.HTTPCookieProcessor(cookies)) if auth is not None: username, password = auth passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() if isinstance(url, urllib2.Request): uri = url.get_full_url() passmgr.add_password(None, uri, username, password) else: passmgr.add_password(None, url, username, password) handlers.append(urllib2.HTTPBasicAuthHandler(passmgr)) opener = urllib2.build_opener(*handlers) try: output = StringIO() fileobj = yield idiokit.thread(opener.open, url, timeout=timeout) try: while True: data = yield idiokit.thread(fileobj.read, chunk_size) if not data: break output.write(data) finally: fileobj.close() info = fileobj.info() info = email.parser.Parser().parsestr(str(info), headersonly=True) output.seek(0) idiokit.stop(info, output) except urllib2.HTTPError as he: raise HTTPError(he.code, he.msg, he.hdrs, he.fp) except urllib2.URLError as error: if _is_timeout(error.reason): raise FetchUrlTimeout("fetching URL timed out") raise FetchUrlFailed(str(error)) except socket.error as error: if _is_timeout(error): raise FetchUrlTimeout("fetching URL timed out") raise FetchUrlFailed(str(error)) except httplib.HTTPException as error: raise FetchUrlFailed(str(error))