示例#1
0
def update_list(list, start_msg, limit = 10):
  import email_loader
  import urllib
  required_fields = ['date', 'message_id', 'subject', 'body', 'sender']
  thread_id_cache = {}
  thread_pool = {}
  message_pool = []
  new = 0
  err = None
  for i in range(start_msg, start_msg + limit):
    url = "%s/msg%05i.html" % (list.list_url, i)
    logging.info("loading an email from url %s" % url)

    # Try three times to fetch the URL
    result = None
    for attempt in range(3):
      try:
        result = urlfetch.fetch(url=url)
        err = None
        break
      except urlfetch.DownloadError, msg:
        err = msg
        continue
    if result is None: break

    if result.status_code == 404:
      # This message does not exist, so return messages collected so far.
      break
    elif result.status_code != 200:
      err = 'Got status code %i while trying to fetch %s' % (result.status_code,
                                                             url)
      break

    logging.info("got content: %s" % result.content)
    result = email_loader.parser(StringIO(result.content))

    logging.info("got result: %s" % result)
    result['source_url'] = url
    result['list_msg_id'] = i
    if not all((field in result) for field in required_fields):
      logging.error(
        "failed to update list %s msg %i with url %s; got bad parse result %s"
        % (list, i, url, result))
      break
    result['date'] = datetime.strptime(' '.join(result['date'].split()[:-1]),
                                       '%a, %d %b %Y %H:%M:%S')
    # Determine thread_id
    if 'references' in result:
      for reference in result['references']:
        if 'thread_id' in result:
          break
        if reference in thread_id_cache:
          result['thread_id'] = thread_id_cache[reference]

      for reference in result['references']:
        if 'thread_id' in result:
          break
        ref = gql_limit1(Message, message_id = reference)
        if ref is not None:
          result['thread_id'] = ref.thread_id
    # If no thread_id was found, start a new thread with this message
    if 'thread_id' not in result:
      result['thread_id'] = result['message_id']
      thread = Thread(thread_id = result['message_id'],
                      list_url = list.list_url,
                      last_message_time = result['date'],
                      subject = result['subject'],
                      last_message_body = result['body'],
                      participants = [result['sender']])
      thread_pool[result['thread_id']] = thread

    # Build the message object
    message = Message(**result)

    # Update the thread
    if result['thread_id'] not in thread_pool:
      thread = gql_limit1(Thread, thread_id = result['thread_id'])
      thread_pool[result['thread_id']] = thread
    thread = thread_pool[result['thread_id']]
    thread.last_message_time = message.date
    thread.last_message_body = message.body
    if message.sender not in thread.participants:
      thread.participants.append(message.sender)
    thread_id_cache[result['message_id']] = result['thread_id']

    # Update the message_pool
    message_pool.append(message)
    new += 1