def getmsgparts(self, m, L, level): ctype = m.gettype() if level or ctype != "text/plain": print ". " * level + str(ctype) if ctype == "text/plain": L.append(m.getbodytext()) elif ctype in ("multipart/alternative", "multipart/mixed"): for part in m.getbodyparts(): self.getmsgparts(part, L, level + 1) elif ctype == "message/rfc822": f = StringIO(m.getbodytext()) m = mhlib.Message("<folder>", 0, f) self.getheaders(m, L) self.getmsgparts(m, L, level + 1)
def formatresults(self, text, results, maxlines=MAXLINES, lo=0, hi=sys.maxint): stop = self.stopdict.has_key words = [w for w in re.findall(r"\w+\*?", text.lower()) if not stop(w)] pattern = r"\b(" + "|".join(words) + r")\b" pattern = pattern.replace("*", ".*") # glob -> re syntax prog = re.compile(pattern, re.IGNORECASE) print '=' * 70 rank = lo qw = self.index.query_weight(text) for docid, score in results[lo:hi]: rank += 1 path = self.docpaths[docid] score = 100.0 * score / qw print "Rank: %d Score: %d%% File: %s" % (rank, score, path) path = os.path.join(self.mh.getpath(), path) try: fp = open(path) except (IOError, OSError), msg: print "Can't open:", msg continue msg = mhlib.Message("<folder>", 0, fp) for header in "From", "To", "Cc", "Bcc", "Subject", "Date": h = msg.getheader(header) if h: print "%-8s %s" % (header + ":", h) text = self.getmessagetext(msg) if text: print nleft = maxlines for part in text: for line in part.splitlines(): if prog.search(line): print line nleft -= 1 if nleft <= 0: break if nleft <= 0: break print '-' * 70
def writeparts(part, oname): global partnum content = part.getbody() # decoded content or list if type(content) == ListType: # multiparts: recur for each for subpart in content: writeparts(subpart, oname) else: # else single decoded part assert type(content) == StringType # use filename if in headers print print part.getparamnames() # else make one with counter fmode = 'wb' fname = part.getparam('name') if not fname: fmode = 'w' fname = oname + str(partnum) if part.gettype() == 'text/plain': fname = fname + '.txt' elif part.gettype() == 'text/html': fname = fname + '.html' output = open(fname, fmode) # mode must be 'wb' on windows print 'writing:', output.name # for word doc files, not 'w' output.write(content) partnum = partnum + 1 partnum = 0 input = open(iname, 'r') # open mail file message = mhlib.Message('.', 0, input) # folder, number args ignored writeparts(message, oname) print 'done: wrote %s parts' % partnum