示例#1
0
def run():
    # connection = setup_connection()
    # msg_ids = get_new_msg_ids(connection)
    # msgs_data = get_data_for_msgs(connection, msg_ids)
    # pickle.dump(msgs_data, open('message-data.pickle', 'wb'))

    msgs_data = mail.get_raw_mail()

    for msg_id in msgs_data.keys():
        logger.info("parsing message")
        ticket, body, sender, reply_all = parse_msg_data(msgs_data[msg_id])
        if ticket is None:
            logger.info("sending rejection message")
            send_rejection(sender[1])
            continue
        logger.info("updating ticket")
        try:
            update_ticket(ticket, body, sender, reply_all)
        except TicketUpdateException as e:
            logger.error(e)
            continue
        logger.info(
            "ticket updated – ID: {}, Body: {}, Sender: {}, Public: {}".format(
                ticket,
                body.partition('\n')[0][:100], sender, reply_all))
示例#2
0
def get_support_emails():
    logger.info('getting archive support emails')
    imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT)
    imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])
    imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER)

    status, response = imap4.uid('search', None, 'ALL')
    if status != 'OK':
        logger.error('unable to search email server')
        exit(1)
    # response of the form: [b'1 2 3 4']
    if response[0] == b'':
        logger.info('no support emails to match against')
        return []

    msg_ids = response[0].decode().split(' ')
    logger.info('found {} support emails'.format(len(msg_ids)))
    responses = []
    for msg_ids_chunk in chunked(msg_ids, 1000):
        logger.debug('getting a message chunk')
        status, response = imap4.uid('fetch', ','.join(msg_ids_chunk),
                                     '(BODY[])')
        if status != 'OK':
            logger.error('unable to fetch from email server')
            exit(1)
        responses.extend(response)
    imap4.close()
    imap4.logout()

    return responses
示例#3
0
def hotswap_zd_msgs(zendesk_msgs):
    started_at = time.monotonic()

    pattern = re.compile(b'.*ZD(\d+):.*')
    msg_ids = list(zendesk_msgs.keys())
    ticket_ids = []
    for msg_id in msg_ids:
        try:
            bin_subject = zendesk_msgs[msg_id][b'ENVELOPE'].subject
            subject_match = pattern.match(bin_subject)
            if subject_match is None:
                raise Exception('invalid subject line "{}"'.format(
                    bin_subject.decode()))
            ticket_ids.append(int(subject_match.group(1).decode()))
        except Exception as e:
            ticket_ids.append(None)
            logger.error('bad subject line {}'.format(e))

    first_audit_ids = apiservice.concurrent_get_first_comments(ticket_ids)
    raw_emails = apiservice.concurrent_get_raw_emails(ticket_ids,
                                                      first_audit_ids)

    for msg_id, raw in zip(msg_ids, raw_emails):
        zendesk_msgs[msg_id][b'BODY[TEXT]'] = raw if raw is not None else b''

    logger.debug('completed hotswap in {} seconds'.format(
        round(time.monotonic() - started_at, 2)))
示例#4
0
def get_old_ids(msgs, cutoff):
    old = []
    for msg_id, msg in msgs.items():
        date = msg[b'ENVELOPE'].date
        if date is None:
            date = msg[b'INTERNALDATE']
        if date is None:
            logger.error('message found with not internal date')
        if date < cutoff:
            old.append(msg_id)
    return old
示例#5
0
def consult():
    """
    payload structure:
        {
            "ticket_id": Int,
            "consultant": "<consultant>@archive.org, ...",
            "subject": String,
            "body": String,
            "html_body": String,
        }
    :return:
    """
    logger.info("{} request from {}".format(request.method, request.origin))
    auth = request.authorization
    if auth is None:
        message = "Provide basic auth to use this service."
        logger.error(message)
        return jsonify({"Error": message}), 401
    if (auth['username'] != env['ZENDESK_TRIGGER_USERNAME']
            or auth['password'] != env['ZENDESK_TRIGGER_PASSWORD']):
        message = "Invalid Username/Password"
        logger.error(message)
        return jsonify({"Error": message}), 401

    try:
        json = request.get_json()
    except BadRequest as e:
        message = "Bad Request: Could not parse json object"
        logger.error(message)
        return jsonify({"Error": message}), 400

    # verify correct keys
    required_keys = ['consultant', 'subject', 'body', 'html_body', 'ticket_id']
    if any(map(lambda k: k not in json, required_keys)):
        logger.error("Invalid data keys")
        return jsonify({
            "Error":
            "Json object must contain the following non-optional keys",
            "keys":
            ["consultant", "subject", "body", "html_body", "ticket_id"]
        }), 400

    # send mail
    body = INTERNAL_MESSAGE_PLAIN + json['body']
    html_body = INTERNAL_MESSAGE_HTML + json['html_body']
    body = body.replace('\\n', '\n')
    html_body = html_body.replace('\\n', '\n')
    for consultant in json['consultant'].replace(' ', '').split(','):
        mail.send_mail(
            sender='{} <{}>'.format(MAILBOT_NAME, env['MAILBOT_ADDRESS']),
            receiver=consultant,
            subject=SUBJECT_PATTERN.format(json['ticket_id'], json['subject']),
            body=body,
            html_body=html_body,
            cc=['{} <{}>'.format(MAILBOT_CC_NAME, env['MAILBOT_CC_ADDRESS'])])
    return jsonify({"Success": "Consultant has been emailed"}), 200
示例#6
0
def text_match(zd_msg, archive_msg, threshold=0.90):
    zd_subject = zd_msg[b'ENVELOPE'].subject
    if zd_subject is None:
        logger.warning('found message from zendesk with no subject')
    zd_subject = '' if zd_subject is None else zd_subject.decode()
    archive_subject = archive_msg[b'ENVELOPE'].subject
    if archive_subject is None:
        logger.warning('found message from archive with no subject')
    archive_subject = '' if archive_subject is None else archive_subject.decode(
    )
    try:
        zd_text = zd_msg[b'BODY[TEXT]'].decode()
        archive_text = archive_msg[b'BODY[TEXT]'].decode()
    except:
        logger.error('found msg with no body. subject: "{}" or "{}"'.format(
            zd_subject, archive_subject))
        return False

    # Cut the shit
    if zd_text == archive_text:
        logger.info('COMPLETE MATCH')
        return True

    # Preliminary check
    matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t')
    matcher.set_seqs(zd_text, archive_text)
    qr = matcher.quick_ratio()
    if qr < threshold:
        logger.debug('quick ratio: {} - "{}" and "{}" don\'t match'.format(
            qr, zd_subject, archive_subject))
        return False

    # Full check
    dmp = dmp_module.diff_match_patch()
    dmp.Diff_Timeout = 0.2
    diff = dmp.diff_main(zd_text, archive_text)
    d = dmp.diff_levenshtein(diff)
    ratio = 1 - d / max(len(zd_text), len(archive_text))
    verdict = ratio > threshold
    if verdict:
        logger.info('full ratio: {} - "{}" and "{}" FULL MATCH'.format(
            round(ratio, 4), zd_subject, archive_subject))
    else:
        logger.debug('full ratio: {} - "{}" and "{}" no match'.format(
            round(ratio, 4), zd_subject, archive_subject))
    return verdict
示例#7
0
def process_events(ticket_events):
    zendesk_comments = []  # [(timestamp, comment, ticket_id)...]

    for event in ticket_events:
        contents_found = 0  # TODO ditch this variable

        for child in event['child_events']:
            if child['event_type'].lower() == 'comment':
                contents_found += 1
                if contents_found > 1:
                    logger.error(
                        'found {} comment children in single event'.format(
                            contents_found))
                zendesk_comments.append(
                    (event['timestamp'], child['body'], event['ticket_id']))

    logger.info('found {} zendesk comments'.format(len(zendesk_comments)))

    return zendesk_comments
示例#8
0
def parse_msg_data(msg_data):

    # get ticket id
    subject_pattern = re.compile(
        SUBJECT_PATTERN.format(
            '(?P<id>\d+)',  # ticket number
            '.*'  # original subject line
        ))
    subject = msg_data[b'ENVELOPE'].subject.decode()
    match = subject_pattern.search(subject)
    if match is None:
        logger.error(
            'Received mail with invalid subject line: "{}"'.format(subject))
        ticket = None
    else:
        ticket = match.group('id')

    # get body
    raw_body = msg_data[b'BODY[]']
    response_body = get_plain_response_body(raw_body)

    # get sender
    envelope = msg_data[b'ENVELOPE']
    if len(envelope.from_) == 1:
        address = envelope.from_[0]
    elif len(envelope.reply_to) == 1:
        address = envelope.reply_to[0]
    else:
        address = envelope.sender[0]
    # ("John Smith", "*****@*****.**")
    sender = (address.name.decode(), "{}@{}".format(address.mailbox.decode(),
                                                    address.host.decode()))

    # get public flag
    reply_all = False
    if envelope.cc is not None:
        if env['MAILBOT_CC_ADDRESS'] in [
                "{}@{}".format(cc.mailbox.decode(), cc.host.decode())
                for cc in envelope.cc
        ]:
            reply_all = True

    return ticket, response_body, sender, reply_all
示例#9
0
def concurrent_get_raw_emails(ticket_ids, first_audit_ids):
    assert (len(ticket_ids) == len(first_audit_ids)
            ), 'unmatched ticket and first audit ids'
    url_template = 'https://archivesupport.zendesk.com/audits/{}/email.eml?ticket_id={}'
    session = get_logged_in_future_sesh()

    raw_email_futures = []
    for i, t_id, fa_id in zip(range(len(ticket_ids)), ticket_ids,
                              first_audit_ids):
        logger.debug('getting raw email future for ticket #{} {}/{}'.format(
            ticket_ids[i], i, len(ticket_ids)))
        if t_id is None or fa_id is None:
            raw_email_futures.append(None)
            continue
        raw_email_futures.append(session.get(url_template.format(fa_id, t_id)))
        time.sleep(60 / ZENDESK_API_RATE_LIMIT)

    raw_emails = []
    for i, raw_email_future in enumerate(raw_email_futures):
        if raw_email_future is None:
            raw_emails.append(None)
            continue
        result = raw_email_future.result()
        if result.status_code != 200:
            logger.error('bad status code {}: {}'.format(
                result.status_code, result.content))
            raw_emails.append(None)
            continue

        try:
            zd_body_buf = io.StringIO(result.content.decode())
            while zd_body_buf.readline().strip() != '':
                pass
            raw_emails.append(''.join(zd_body_buf.readlines()).encode())
        except Exception as e:
            logger.error('{}#{} problem while stripping headers: {}'.format(
                first_audit_ids[i], ticket_ids[i], e))
            raw_emails.append(None)

    return raw_emails
示例#10
0
def get_plain_response_body(msg_bytes):

    msg = BytesParser(policy=policy.default).parsebytes(msg_bytes)

    raw = msg.get_body(preferencelist=('plain', ))
    if raw is not None:
        plain = raw.get_content().replace('\r\n', '\n')
    else:
        raw = msg.get_body(preferencelist=('html', ))
        if raw is None:
            logger.error('Found message with no plain or html body')
        try:
            html_content = raw.get_content()
        except LookupError as e:
            logger.error(e)
        h = html2text.HTML2Text()
        h.body_width = 0
        h.ignore_links = True
        plain = h.handle(html_content)

    # removing quoted content
    lines = plain.split('\n')
    for i, line in enumerate(lines):
        j = line.find(DELIMITER)
        if j != -1:
            # found DELIMITER line
            quoted_prefix = line[:j].strip()
            break
    content_lines = list(
        filter(lambda line: not line.startswith(quoted_prefix), lines))

    # removing "On <date> Support Team wrote:
    while content_lines[-1].strip() == '':
        content_lines.pop()
    if content_lines[-1].find(MAILBOT_NAME) != -1:
        content_lines.pop()
    if content_lines[-1].find(env['MAILBOT_ADDRESS']) != -1:
        content_lines.pop()

    return '\n'.join(content_lines).strip()
示例#11
0
def concurrent_get_first_comments(ticket_ids):
    session = FuturesSession()

    url_template = 'https://archivesupport.zendesk.com/api/v2/tickets/{}/audits.json'
    audit_ids = []

    for ticket_ids_chunk in chunked(ticket_ids, 1000):
        audit_futures = []
        for i, ticket_id in enumerate(ticket_ids_chunk):
            if ticket_id is None:
                audit_futures.append(None)
                continue
            logger.debug('getting audit future for ticket {}/{}'.format(
                i, len(ticket_ids_chunk)))
            audit_futures.append(
                session.get(url_template.format(ticket_id),
                            auth=HTTPBasicAuth(
                                env['ZENDESK_AGENT_ACCOUNT'] + "/token",
                                env['ZENDESK_API_KEY'])))
            time.sleep(60 / ZENDESK_API_RATE_LIMIT)

        for i, af in enumerate(audit_futures):
            if af is None:
                audit_ids.append(None)
                continue
            result = af.result()
            if result.status_code != 200:
                logger.error('ticket #{} bad status code {}: {}'.format(
                    ticket_ids_chunk[i], result.status_code, result.content))
                audit_ids.append(None)
                continue
            try:
                audit_ids.append(result.json()['audits'][0]['id'])
            except Exception as e:
                logger.error('while parsing result for #{} {}'.format(
                    ticket_ids_chunk[i], e))
                audit_ids.append(None)

    return audit_ids
示例#12
0
def parse_emails_response(support_emails_response):
    # these configurations match what we get from zendesk
    html2text.config.IGNORE_TABLES = True
    html2text.config.IGNORE_IMAGES = False
    h = html2text.HTML2Text()
    h.body_width = 0
    h.ignore_links = True

    # patterns and formats
    id_pattern = re.compile(b".*UID (\d+) .*")
    time_str_format = '%a,  %d %b %Y %H:%M:%S %z (%Z)'

    # collect decorated messages [(timestamp, comment, id)...]
    support_decorated_comments = []
    logger.info('parsing archive support email data')
    for li in support_emails_response:

        # weird case – something isn't implemented properly in the libraries
        if li == b')':
            continue

        id_bytes, msg_bytes = li
        msg = BytesParser(policy=policy.default).parsebytes(msg_bytes)

        # get id
        msg_id = int(re.match(id_pattern, id_bytes.strip()).group(1))

        # get time stamp
        time_str = msg['Received'].split(';')[-1].strip()
        time_stamp = parser.parse(time_str).timestamp()

        # get message body
        raw = msg.get_body(preferencelist=('plain', ))
        if raw is not None:
            try:
                body = raw.get_content()
            except LookupError as e:
                logger.error(e)
                continue
        else:
            raw = msg.get_body(preferencelist=('html', ))
            if raw is None:
                logger.error('Found message with no plain or html body')
                continue
            try:
                html_content = raw.get_content()
            except LookupError as e:
                logger.error(e)
                continue
            body = h.handle(html_content)

        support_decorated_comments.append((time_stamp, body, msg_id))

    return support_decorated_comments
示例#13
0
def cleanup(zd_matched, zd_unmatched, archive_matched, archive_unmatched,
            zd_still_fresh_filename, start_time):
    imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT)
    imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])
    imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER)

    # move matched emails
    archive_matched_ids = [str(msg_id) for _, _, msg_id in archive_matched]
    logger.info('moving {} matched emails'.format(len(archive_matched_ids)))
    for msg_ids_chunk in chunked(archive_matched_ids, 1000):
        uids = ','.join(msg_ids_chunk)
        result, err = imap4.uid('COPY', uids, MATCHED_ARCHIVE_FOLDER)
        if result != 'OK':
            logger.error('unable to copy items')
        result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)')
        if result != 'OK':
            logger.error('unable to delete original versions')

    # deal with unmatched updates from Zendesk
    cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60
    still_fresh = []
    old = []
    for triple in zd_unmatched:
        if triple[0] > cutoff:
            still_fresh.append(triple)
        else:
            old.append(triple)
    # save still fresh for later
    logger.info('saving {} zd ticket messages for the next round'.format(
        len(still_fresh)))
    pickle.dump((start_time, still_fresh), open(zd_still_fresh_filename, 'wb'))
    # log old unmatched ticket comments
    logger.info('logging {} old zd ticket messages that went unmatched'.format(
        len(old)))
    with open('zd_unmatched.log', 'a') as f:
        for t, c, t_id in old:
            f.write("""
Ticket #{}
Time: {}
Comment:
{}
""".format(t_id, str(datetime.datetime.fromtimestamp(t)), c))

    # move old emails to unmatched
    cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60
    old_archive_unmatched_ids = [
        str(msg_id) for t, _, msg_id in archive_unmatched if t < cutoff
    ]
    logger.info('moving {} old unmatched archive emails'.format(
        len(old_archive_unmatched_ids)))
    for msg_ids_chunk in chunked(old_archive_unmatched_ids, 1000):
        uids = ','.join(msg_ids_chunk)
        result, err = imap4.uid('COPY', uids, UNMATCHED_ARCHIVE_FOLDER)
        if result != 'OK':
            logger.error('unable to copy items')
        result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)')
        if result != 'OK':
            logger.error('unable to delete original versions')

    # we're done here
    imap4.expunge()
    imap4.close()
    imap4.logout()