def run(): # connection = setup_connection() # msg_ids = get_new_msg_ids(connection) # msgs_data = get_data_for_msgs(connection, msg_ids) # pickle.dump(msgs_data, open('message-data.pickle', 'wb')) msgs_data = mail.get_raw_mail() for msg_id in msgs_data.keys(): logger.info("parsing message") ticket, body, sender, reply_all = parse_msg_data(msgs_data[msg_id]) if ticket is None: logger.info("sending rejection message") send_rejection(sender[1]) continue logger.info("updating ticket") try: update_ticket(ticket, body, sender, reply_all) except TicketUpdateException as e: logger.error(e) continue logger.info( "ticket updated – ID: {}, Body: {}, Sender: {}, Public: {}".format( ticket, body.partition('\n')[0][:100], sender, reply_all))
def get_support_emails(): logger.info('getting archive support emails') imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT) imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER) status, response = imap4.uid('search', None, 'ALL') if status != 'OK': logger.error('unable to search email server') exit(1) # response of the form: [b'1 2 3 4'] if response[0] == b'': logger.info('no support emails to match against') return [] msg_ids = response[0].decode().split(' ') logger.info('found {} support emails'.format(len(msg_ids))) responses = [] for msg_ids_chunk in chunked(msg_ids, 1000): logger.debug('getting a message chunk') status, response = imap4.uid('fetch', ','.join(msg_ids_chunk), '(BODY[])') if status != 'OK': logger.error('unable to fetch from email server') exit(1) responses.extend(response) imap4.close() imap4.logout() return responses
def hotswap_zd_msgs(zendesk_msgs): started_at = time.monotonic() pattern = re.compile(b'.*ZD(\d+):.*') msg_ids = list(zendesk_msgs.keys()) ticket_ids = [] for msg_id in msg_ids: try: bin_subject = zendesk_msgs[msg_id][b'ENVELOPE'].subject subject_match = pattern.match(bin_subject) if subject_match is None: raise Exception('invalid subject line "{}"'.format( bin_subject.decode())) ticket_ids.append(int(subject_match.group(1).decode())) except Exception as e: ticket_ids.append(None) logger.error('bad subject line {}'.format(e)) first_audit_ids = apiservice.concurrent_get_first_comments(ticket_ids) raw_emails = apiservice.concurrent_get_raw_emails(ticket_ids, first_audit_ids) for msg_id, raw in zip(msg_ids, raw_emails): zendesk_msgs[msg_id][b'BODY[TEXT]'] = raw if raw is not None else b'' logger.debug('completed hotswap in {} seconds'.format( round(time.monotonic() - started_at, 2)))
def get_old_ids(msgs, cutoff): old = [] for msg_id, msg in msgs.items(): date = msg[b'ENVELOPE'].date if date is None: date = msg[b'INTERNALDATE'] if date is None: logger.error('message found with not internal date') if date < cutoff: old.append(msg_id) return old
def consult(): """ payload structure: { "ticket_id": Int, "consultant": "<consultant>@archive.org, ...", "subject": String, "body": String, "html_body": String, } :return: """ logger.info("{} request from {}".format(request.method, request.origin)) auth = request.authorization if auth is None: message = "Provide basic auth to use this service." logger.error(message) return jsonify({"Error": message}), 401 if (auth['username'] != env['ZENDESK_TRIGGER_USERNAME'] or auth['password'] != env['ZENDESK_TRIGGER_PASSWORD']): message = "Invalid Username/Password" logger.error(message) return jsonify({"Error": message}), 401 try: json = request.get_json() except BadRequest as e: message = "Bad Request: Could not parse json object" logger.error(message) return jsonify({"Error": message}), 400 # verify correct keys required_keys = ['consultant', 'subject', 'body', 'html_body', 'ticket_id'] if any(map(lambda k: k not in json, required_keys)): logger.error("Invalid data keys") return jsonify({ "Error": "Json object must contain the following non-optional keys", "keys": ["consultant", "subject", "body", "html_body", "ticket_id"] }), 400 # send mail body = INTERNAL_MESSAGE_PLAIN + json['body'] html_body = INTERNAL_MESSAGE_HTML + json['html_body'] body = body.replace('\\n', '\n') html_body = html_body.replace('\\n', '\n') for consultant in json['consultant'].replace(' ', '').split(','): mail.send_mail( sender='{} <{}>'.format(MAILBOT_NAME, env['MAILBOT_ADDRESS']), receiver=consultant, subject=SUBJECT_PATTERN.format(json['ticket_id'], json['subject']), body=body, html_body=html_body, cc=['{} <{}>'.format(MAILBOT_CC_NAME, env['MAILBOT_CC_ADDRESS'])]) return jsonify({"Success": "Consultant has been emailed"}), 200
def text_match(zd_msg, archive_msg, threshold=0.90): zd_subject = zd_msg[b'ENVELOPE'].subject if zd_subject is None: logger.warning('found message from zendesk with no subject') zd_subject = '' if zd_subject is None else zd_subject.decode() archive_subject = archive_msg[b'ENVELOPE'].subject if archive_subject is None: logger.warning('found message from archive with no subject') archive_subject = '' if archive_subject is None else archive_subject.decode( ) try: zd_text = zd_msg[b'BODY[TEXT]'].decode() archive_text = archive_msg[b'BODY[TEXT]'].decode() except: logger.error('found msg with no body. subject: "{}" or "{}"'.format( zd_subject, archive_subject)) return False # Cut the shit if zd_text == archive_text: logger.info('COMPLETE MATCH') return True # Preliminary check matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t') matcher.set_seqs(zd_text, archive_text) qr = matcher.quick_ratio() if qr < threshold: logger.debug('quick ratio: {} - "{}" and "{}" don\'t match'.format( qr, zd_subject, archive_subject)) return False # Full check dmp = dmp_module.diff_match_patch() dmp.Diff_Timeout = 0.2 diff = dmp.diff_main(zd_text, archive_text) d = dmp.diff_levenshtein(diff) ratio = 1 - d / max(len(zd_text), len(archive_text)) verdict = ratio > threshold if verdict: logger.info('full ratio: {} - "{}" and "{}" FULL MATCH'.format( round(ratio, 4), zd_subject, archive_subject)) else: logger.debug('full ratio: {} - "{}" and "{}" no match'.format( round(ratio, 4), zd_subject, archive_subject)) return verdict
def process_events(ticket_events): zendesk_comments = [] # [(timestamp, comment, ticket_id)...] for event in ticket_events: contents_found = 0 # TODO ditch this variable for child in event['child_events']: if child['event_type'].lower() == 'comment': contents_found += 1 if contents_found > 1: logger.error( 'found {} comment children in single event'.format( contents_found)) zendesk_comments.append( (event['timestamp'], child['body'], event['ticket_id'])) logger.info('found {} zendesk comments'.format(len(zendesk_comments))) return zendesk_comments
def parse_msg_data(msg_data): # get ticket id subject_pattern = re.compile( SUBJECT_PATTERN.format( '(?P<id>\d+)', # ticket number '.*' # original subject line )) subject = msg_data[b'ENVELOPE'].subject.decode() match = subject_pattern.search(subject) if match is None: logger.error( 'Received mail with invalid subject line: "{}"'.format(subject)) ticket = None else: ticket = match.group('id') # get body raw_body = msg_data[b'BODY[]'] response_body = get_plain_response_body(raw_body) # get sender envelope = msg_data[b'ENVELOPE'] if len(envelope.from_) == 1: address = envelope.from_[0] elif len(envelope.reply_to) == 1: address = envelope.reply_to[0] else: address = envelope.sender[0] # ("John Smith", "*****@*****.**") sender = (address.name.decode(), "{}@{}".format(address.mailbox.decode(), address.host.decode())) # get public flag reply_all = False if envelope.cc is not None: if env['MAILBOT_CC_ADDRESS'] in [ "{}@{}".format(cc.mailbox.decode(), cc.host.decode()) for cc in envelope.cc ]: reply_all = True return ticket, response_body, sender, reply_all
def concurrent_get_raw_emails(ticket_ids, first_audit_ids): assert (len(ticket_ids) == len(first_audit_ids) ), 'unmatched ticket and first audit ids' url_template = 'https://archivesupport.zendesk.com/audits/{}/email.eml?ticket_id={}' session = get_logged_in_future_sesh() raw_email_futures = [] for i, t_id, fa_id in zip(range(len(ticket_ids)), ticket_ids, first_audit_ids): logger.debug('getting raw email future for ticket #{} {}/{}'.format( ticket_ids[i], i, len(ticket_ids))) if t_id is None or fa_id is None: raw_email_futures.append(None) continue raw_email_futures.append(session.get(url_template.format(fa_id, t_id))) time.sleep(60 / ZENDESK_API_RATE_LIMIT) raw_emails = [] for i, raw_email_future in enumerate(raw_email_futures): if raw_email_future is None: raw_emails.append(None) continue result = raw_email_future.result() if result.status_code != 200: logger.error('bad status code {}: {}'.format( result.status_code, result.content)) raw_emails.append(None) continue try: zd_body_buf = io.StringIO(result.content.decode()) while zd_body_buf.readline().strip() != '': pass raw_emails.append(''.join(zd_body_buf.readlines()).encode()) except Exception as e: logger.error('{}#{} problem while stripping headers: {}'.format( first_audit_ids[i], ticket_ids[i], e)) raw_emails.append(None) return raw_emails
def get_plain_response_body(msg_bytes): msg = BytesParser(policy=policy.default).parsebytes(msg_bytes) raw = msg.get_body(preferencelist=('plain', )) if raw is not None: plain = raw.get_content().replace('\r\n', '\n') else: raw = msg.get_body(preferencelist=('html', )) if raw is None: logger.error('Found message with no plain or html body') try: html_content = raw.get_content() except LookupError as e: logger.error(e) h = html2text.HTML2Text() h.body_width = 0 h.ignore_links = True plain = h.handle(html_content) # removing quoted content lines = plain.split('\n') for i, line in enumerate(lines): j = line.find(DELIMITER) if j != -1: # found DELIMITER line quoted_prefix = line[:j].strip() break content_lines = list( filter(lambda line: not line.startswith(quoted_prefix), lines)) # removing "On <date> Support Team wrote: while content_lines[-1].strip() == '': content_lines.pop() if content_lines[-1].find(MAILBOT_NAME) != -1: content_lines.pop() if content_lines[-1].find(env['MAILBOT_ADDRESS']) != -1: content_lines.pop() return '\n'.join(content_lines).strip()
def concurrent_get_first_comments(ticket_ids): session = FuturesSession() url_template = 'https://archivesupport.zendesk.com/api/v2/tickets/{}/audits.json' audit_ids = [] for ticket_ids_chunk in chunked(ticket_ids, 1000): audit_futures = [] for i, ticket_id in enumerate(ticket_ids_chunk): if ticket_id is None: audit_futures.append(None) continue logger.debug('getting audit future for ticket {}/{}'.format( i, len(ticket_ids_chunk))) audit_futures.append( session.get(url_template.format(ticket_id), auth=HTTPBasicAuth( env['ZENDESK_AGENT_ACCOUNT'] + "/token", env['ZENDESK_API_KEY']))) time.sleep(60 / ZENDESK_API_RATE_LIMIT) for i, af in enumerate(audit_futures): if af is None: audit_ids.append(None) continue result = af.result() if result.status_code != 200: logger.error('ticket #{} bad status code {}: {}'.format( ticket_ids_chunk[i], result.status_code, result.content)) audit_ids.append(None) continue try: audit_ids.append(result.json()['audits'][0]['id']) except Exception as e: logger.error('while parsing result for #{} {}'.format( ticket_ids_chunk[i], e)) audit_ids.append(None) return audit_ids
def parse_emails_response(support_emails_response): # these configurations match what we get from zendesk html2text.config.IGNORE_TABLES = True html2text.config.IGNORE_IMAGES = False h = html2text.HTML2Text() h.body_width = 0 h.ignore_links = True # patterns and formats id_pattern = re.compile(b".*UID (\d+) .*") time_str_format = '%a, %d %b %Y %H:%M:%S %z (%Z)' # collect decorated messages [(timestamp, comment, id)...] support_decorated_comments = [] logger.info('parsing archive support email data') for li in support_emails_response: # weird case – something isn't implemented properly in the libraries if li == b')': continue id_bytes, msg_bytes = li msg = BytesParser(policy=policy.default).parsebytes(msg_bytes) # get id msg_id = int(re.match(id_pattern, id_bytes.strip()).group(1)) # get time stamp time_str = msg['Received'].split(';')[-1].strip() time_stamp = parser.parse(time_str).timestamp() # get message body raw = msg.get_body(preferencelist=('plain', )) if raw is not None: try: body = raw.get_content() except LookupError as e: logger.error(e) continue else: raw = msg.get_body(preferencelist=('html', )) if raw is None: logger.error('Found message with no plain or html body') continue try: html_content = raw.get_content() except LookupError as e: logger.error(e) continue body = h.handle(html_content) support_decorated_comments.append((time_stamp, body, msg_id)) return support_decorated_comments
def cleanup(zd_matched, zd_unmatched, archive_matched, archive_unmatched, zd_still_fresh_filename, start_time): imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT) imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER) # move matched emails archive_matched_ids = [str(msg_id) for _, _, msg_id in archive_matched] logger.info('moving {} matched emails'.format(len(archive_matched_ids))) for msg_ids_chunk in chunked(archive_matched_ids, 1000): uids = ','.join(msg_ids_chunk) result, err = imap4.uid('COPY', uids, MATCHED_ARCHIVE_FOLDER) if result != 'OK': logger.error('unable to copy items') result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)') if result != 'OK': logger.error('unable to delete original versions') # deal with unmatched updates from Zendesk cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60 still_fresh = [] old = [] for triple in zd_unmatched: if triple[0] > cutoff: still_fresh.append(triple) else: old.append(triple) # save still fresh for later logger.info('saving {} zd ticket messages for the next round'.format( len(still_fresh))) pickle.dump((start_time, still_fresh), open(zd_still_fresh_filename, 'wb')) # log old unmatched ticket comments logger.info('logging {} old zd ticket messages that went unmatched'.format( len(old))) with open('zd_unmatched.log', 'a') as f: for t, c, t_id in old: f.write(""" Ticket #{} Time: {} Comment: {} """.format(t_id, str(datetime.datetime.fromtimestamp(t)), c)) # move old emails to unmatched cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60 old_archive_unmatched_ids = [ str(msg_id) for t, _, msg_id in archive_unmatched if t < cutoff ] logger.info('moving {} old unmatched archive emails'.format( len(old_archive_unmatched_ids))) for msg_ids_chunk in chunked(old_archive_unmatched_ids, 1000): uids = ','.join(msg_ids_chunk) result, err = imap4.uid('COPY', uids, UNMATCHED_ARCHIVE_FOLDER) if result != 'OK': logger.error('unable to copy items') result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)') if result != 'OK': logger.error('unable to delete original versions') # we're done here imap4.expunge() imap4.close() imap4.logout()