示例#1
0
 def index(self, msgs: notmuch.Messages) -> None:
     """Index messages in the address book"""
     tot_msg = 0
     tot_addr = 0
     try:
         parser = email.parser.Parser()
         for msg in msgs:
             msg_fn = msg.get_filename()
             try:
                 with open(msg_fn, "r") as data:
                     mail = parser.parse(data, True)
             except UnicodeDecodeError:
                 with open(msg_fn, "r", encoding="latin9") as data:
                     mail = parser.parse(data, True)
             addrs = []
             for hdr in ("from", "to", "cc", "bcc"):
                 addrs += mail.get_all(hdr, [])
             addrs = email.utils.getaddresses(addrs)
             tot_addr += self._add(addrs)
             tot_msg += 1
             if (tot_msg % 20) == 0:
                 logging.debug("Messages: %d; addresses: %d",
                               tot_msg, tot_addr)
     finally:
         # At the end, save the DB
         self._merge_db()
         with open(os.path.expanduser(_DBPATH), "wb") as fout:
             pickle.dump(self._db, fout, pickle.HIGHEST_PROTOCOL)
         logging.info(
             "Total: indexed %d messages and %d addresses. "
             "%d unique addresses in the address book.",
             tot_msg,
             tot_addr,
             len(self._db),
         )
示例#2
0
 def index(self, msgs: notmuch.Messages) -> None:
     """Index messages in the address book"""
     tot_msg = 0
     tot_addr = 0
     try:
         parser = email.parser.Parser()
         for msg in msgs:
             msg_fn = msg.get_filename()
             try:
                 with open(msg_fn, "r") as data:
                     mail = parser.parse(data, True)
             except UnicodeDecodeError:
                 with open(msg_fn, "r", encoding="latin9") as data:
                     mail = parser.parse(data, True)
             addrs = []
             for hdr in ("from", "to", "cc", "bcc"):
                 addrs += mail.get_all(hdr, [])
             addrs = email.utils.getaddresses(addrs)
             tot_addr += self._add(addrs)
             tot_msg += 1
             if (tot_msg % 20) == 0:
                 logging.debug("Messages: %d; addresses: %d", tot_msg,
                               tot_addr)
     finally:
         # At the end, save the DB
         self._merge_db()
         with open(os.path.expanduser(_DBPATH), "wb") as fout:
             pickle.dump(self._db, fout, pickle.HIGHEST_PROTOCOL)
         logging.info(
             "Total: indexed %d messages and %d addresses. "
             "%d unique addresses in the address book.",
             tot_msg,
             tot_addr,
             len(self._db),
         )
def import_mail(conn, fp):
    msgtxt = StringIO.StringIO()
    shutil.copyfileobj(fp, msgtxt)
    msgtxt.seek(0)
    parser = email.parser.Parser()
    msg = parser.parse(msgtxt)

    to = msg['To']
    name, addr = email.utils.parseaddr(to)
    print name, addr

    mailname = addr.split('@')[0].lower()
    
    print mailname

    c = conn.cursor()
    c.execute("select id from isp_reports where mailname = %s", [mailname])
    row = c.fetchone()
    if row is None:
        return
    report_id = row[0]

    c.execute("insert into isp_report_emails(report_id, message, created) values (%s,%s,now())",
              [report_id, msgtxt.getvalue()])
    conn.commit()
示例#4
0
    def add_email_from(self, lines):
        """Add an address from From: field of a mail.
        This assumes a single mail file is supplied through.

        Args:
          lines: A generator of lines, usually a open file.

        """

        parser = email.parser.HeaderParser()
        headers = parser.parse(lines)
        if 'From' not in headers:
            print "Not a valid mail file!"
            sys.exit(2)

        (name, mailaddr) = email.utils.parseaddr(headers['From'])
        if not name:
            name = mailaddr
        else:
            # This decodes headers like "=?iso-8859-1?q?p=F6stal?="
            values = email.header.decode_header(name)
            if len(values) == 0:
                # Can't this be possible?
                name = mailaddr
            else:
                # There should be only one element anyway
                (name, encoding) = values[0]

                if encoding is not None:
                    name = name.decode(encoding)

        self.add_mail_contact(name, mailaddr)
示例#5
0
def parser_worker(fileQ, loaderQ, debug):
    try:
        while True:
            fileName = fileQ.get()
            if debug and fileName is not None:
                print "PARSING: " + fileName
            if fileName is None:
                fileQ.task_done()
                break
            
            rootLen = len(sys.argv[1])
            if fileName[rootLen] == '/' or fileName[rootLen] == '\\':
                rootLen += 1
            
            parser = email.parser.Parser()
            
            with open(fileName) as f:
                eFile = fileName[rootLen:]
                email_msg = parser.parse(f, headersonly=True)
                if len(email_msg._headers) > 0:
                    
                    t = email_msg._headers[0]
                    if t[1] == 'VCARD':
                        "vcard - skip"
                    elif t[1] == 'VCALENDAR':
                        "vcalendar - skip"
                    else:
                        loaderQ.put( (email_msg, eFile) )
            fileQ.task_done()
            
    except Exception, e:
        print e
        pass
示例#6
0
def parseElement(filename,element,type):
	parser = email.parser.Parser()
	email_val = parser.parse(open(filename,"r"))
	element_val=None

	if element.lower()=="message":

		if email_val.is_multipart():
			for part in email_val.walk():
				ctype = part.get_content_type()
				cdispo = str(part.get('Content-Disposition'))
				
				#skip any text/plain (txt) attachments
				if ctype == 'text/html' and 'attachment' not in cdispo:
					element_val = part.get_payload(decode=True) #decode
					break
		#not multipart -i.e. plain text, no attachments, keeping fingers crossed
		else:
			element_val = email_val.get_payload(decode = True)
	else:
		element_val=email_val.get_all(element)[0]
		
		
	if element_val!=None:
		print(element_val.decode())
	else:
		print ("".decode())
示例#7
0
 def metadata(self):
     """
     Return the contents of the :file:`METADATA` file inside the wheel.
     """
     if self._metadata is None:
         with zipfile.ZipFile(self.open()) as wheel:
             filenames = {
                 '{self.package_tag}-'
                 '{self.package_version_tag}.dist-info/'
                 'METADATA'.format(self=self),
                 '{self.package_canon}-'
                 '{self.package_version_tag}.dist-info/'
                 'METADATA'.format(self=self),
             }
             for filename in filenames:
                 try:
                     with wheel.open(filename) as metadata:
                         parser = email.parser.BytesParser()
                         self._metadata = parser.parse(metadata)
                 except KeyError:
                     pass
                 else:
                     break
             if self._metadata is None:
                 raise RuntimeError(
                     'Unable to locate METADATA in %s; attempted: %r; '
                     'possible files: %r' % (self.wheel_file, filenames, {
                         info.filename
                         for info in wheel.infolist()
                         if info.filename.endswith('METADATA')
                     }))
     return self._metadata
示例#8
0
def extract_xml(source: Union[str, TextIO]) -> io.StringIO:
    """Takes an SMTP message with a single attachment,
    extracts it, and returnsit as a file-like object. Handles
    multipart mime messages (yahoo, others...) as well as
    Google's minimalist application/zip messages"""

    if not hasattr(source, 'read'):
        source = open(source, 'r')

    parser = email.parser.Parser(policy=email.policy.default)
    email_msg = parser.parse(source)
    source.close()

    if email_msg.get_content_type() == 'application/zip':  # google
        zip_data = email_msg.get_content()
        zf = zipfile.ZipFile(io.BytesIO(zip_data))
        filenames = zf.namelist()
        if len(filenames) != 1:
            cnt = len(filenames)
            raise RuntimeError(
                f'Not exactly one file in attached zip file ({cnt} found)')
        xml = zf.read(filenames[0])
        xml_fd = io.StringIO(xml.decode())
    else:
        attachments = list(email_msg.iter_attachments())
        if not len(attachments) == 1:
            cnt = len(attachments)
            raise RuntimeError(
                f'Not exactly one attachment in mail ({cnt} found)')
        compressed_data = attachments[0].get_content()
        xml_fd = io.StringIO(gzip.decompress(compressed_data).decode())

    return xml_fd
示例#9
0
def process(git_patch_file):
  parser = email.parser.Parser()
  msg = parser.parse(git_patch_file)
  from_hdr = clean_header(msg['From'])
  commit_title = clean_header(msg['subject'])
  if not len(commit_title) or not len(from_hdr):
    sys.stderr.write("%s does not look like a valid git patch file, skipping\n"
                     % git_patch_file.name)
    return

  parsed_from = email.utils.parseaddr(from_hdr)
  nuke_prefix = r"\[PATCH( \d+/\d+)?\] "
  match = re.match(nuke_prefix, commit_title)
  if match:
    commit_title = commit_title[match.end():]

  patch_body = msg.get_payload()

  # git format-patch wraps the diff (including trailing whitespace):
  #   ---
  #   <diff>
  #   -- 
  #   2.0.3
  # This doesn't hurt parsing the diff at all, but the version number is
  # nonsense once the git specific items have been stripped
  patch_body = re.sub(r'--\s?\n[0-9\.]+\n$', '', patch_body)

  return '\n'.join(['# HG changeset patch',
                    '# User %s <%s>' % parsed_from,
                    '',
                    commit_title,
                    '',
                    patch_body])
def findUrl(path):
    import traceback
    parser = email.parser.Parser()

    try:
        file_pointer = open(path, "r", encoding="utf-8", errors="ignore")
    except FileNotFoundError:
        print("No such file")
    EmailMessage = parser.parse(file_pointer, headersonly=False)
    content = ""

    try:
        content = recursivePayloadSearch(EmailMessage)
    #                 content = removehtmltag(removecsstag(content))
    except Exception:
        print("Get Content Error: %s" % (path))

    try:
        #                 blockPrint()
        trypayload = EmailMessage._payload
        pure_b64 = pure_b64decode(trypayload)
        if len(pure_b64) > 0.6 * len(EmailMessage._payload):
            # print(path)
            content = pure_b64
    except Exception:
        pass
    return content
示例#11
0
def process_new_email(path, threads_index):
    with open(path, "r") as fd:
        parser = email.parser.HeaderParser()
        email_headers = parser.parse(fd)
        
        subject = email_headers["subject"]
        from_field = {}
        from_field["name"], from_field["address"] = email.utils.parseaddr(email_headers["From"])
        to_field = {}
        to_field["addresses"] = email.utils.getaddresses(email_headers["to"])

        if subject != None:
            subject = headers.cleanup_subject(subject)
            thread = None
            for index, thr in enumerate(threads_index):
                if thr["subject"] == subject:
                    thread = threads_index.pop(index)
                    break

            if not thread:
                # create a new thread
                thread = threads.create_thread_structure()
                thread["subject"] = subject
                thread["creator"] = from_field

            msg_id = os.path.basename(path)
            thread["messages"].append(msg_id)
            thread["date"] = datetime.datetime.utcnow()
            thread["unread"] = True

            if from_field["address"] != thread["creator"]["address"]:
                thread["lastreplyfrom"] = from_field

            threads_index.insert(0, thread)
示例#12
0
def egg_info(files: Dict[str, str]) -> Tuple[Message, Distribution]:
    # TODO consider
    # https://docs.python.org/3/distutils/apiref.html#distutils.core.run_setup
    # and whether that gives a Distribution that knows setuptools-only options
    with tempfile.TemporaryDirectory() as d:
        for relname, contents in files.items():
            Path(d, relname).parent.mkdir(exist_ok=True, parents=True)
            Path(d, relname).write_text(contents)

        try:
            cwd = os.getcwd()
            stdout = sys.stdout

            os.chdir(d)
            sys.stdout = io.StringIO()
            dist = run_setup(f"setup.py", ["egg_info"])
        finally:
            os.chdir(cwd)
            sys.stdout = stdout

        sources = list(Path(d).rglob("PKG-INFO"))
        assert len(sources) == 1

        with open(sources[0]) as f:
            parser = email.parser.Parser()
            info = parser.parse(f)
        reader = SetuptoolsReader(Path(d))
        dist = reader.get_metadata()
        return info, dist
示例#13
0
def entry_list(request, root_dir):
    # Get list of entries
    # TODO. Split in to separate function.
    # TODO. Cache
    entries = []
    for dir_name, subdirs, files in os.walk(root_dir):
        for file_name in files:
            m = entry_file_name_re.match(file_name)
            if m:
                entries.append(m.groups() + (dir_name, file_name))
    entries.sort() # Most recent last.
    
    entry = entries[-1]
    y, m, d, slug, suffix, dir_name, file_name = entry
    with open(os.path.join(dir_name, file_name), 'rb') as input:
        msg = parser.parse(input)        
    template_args = {
        'title': msg['title'],
        'body': msg.get_payload(),
        'published': datetime.datetime(int(y), int(m, 10), int(d, 10), 12, 0, 0),
    }
    
    # Let’s add some links from a resource library.
    lib = get_library_or_404(settings.SPREADLINKS_DIR, 'spreadsite')
    links = lib.all_links
    template_args['links'] = links
    
    return render_to_response('downblog/entry_list.html', template_args, RequestContext(request))
示例#14
0
 def index(self, msgs):
     tot_msg = 0
     tot_addr = 0
     try:
         parser = email.parser.Parser()
         for msg in msgs:
             fn = msg.get_filename()
             with open(fn, "r") as f:
                 mail = parser.parse(f, True)
             addrs = []
             for hdr in ("from", "to", "cc", "bcc"):
                 addrs += mail.get_all(hdr, [])
             addrs = email.utils.getaddresses(addrs)
             tot_addr += self._add(addrs)
             tot_msg += 1
             if (tot_msg % 20) == 0:
                 logging.debug("Messages: %d; addresses: %d" %
                               (tot_msg, tot_addr))
     finally:
         # At the end, save the DB
         self._merge_db()
         with open(os.path.expanduser(_DBPATH), "wb") as f:
             pickle.dump(self._db, f, pickle.HIGHEST_PROTOCOL)
         logging.info(
             "Total: indexed %d messages and %d addresses. %d unique addresses in the address book."
             % (tot_msg, tot_addr, len(self._db)))
示例#15
0
def process_new_email(path, threads_index):
    with open(path, "r") as fd:
        parser = email.parser.HeaderParser()
        email_headers = parser.parse(fd)

        subject = email_headers["subject"]
        from_field = {}
        from_field["name"], from_field["address"] = email.utils.parseaddr(
            email_headers["From"])
        to_field = {}
        to_field["addresses"] = email.utils.getaddresses(email_headers["to"])

        if subject != None:
            subject = headers.cleanup_subject(subject)
            thread = None
            for index, thr in enumerate(threads_index):
                if thr["subject"] == subject:
                    thread = threads_index.pop(index)
                    break

            if not thread:
                # create a new thread
                thread = threads.create_thread_structure()
                thread["subject"] = subject
                thread["creator"] = from_field

            msg_id = os.path.basename(path)
            thread["messages"].append(msg_id)
            thread["date"] = datetime.datetime.utcnow()
            thread["unread"] = True

            if from_field["address"] != thread["creator"]["address"]:
                thread["lastreplyfrom"] = from_field

            threads_index.insert(0, thread)
示例#16
0
 def convertDate(date):
     from time import struct_time
     import datetime
     from dateutil import parser
     date = parser.parse(date)
     date = date.timetuple()
     return date
示例#17
0
    def handle_check(self):
        parser = email.parser.BytesParser(policy=email.policy.SMTP)
        msg = parser.parse(self.rfile)

        digest = pyzor.digest.DataDigester(msg).value
        check = pyzor.client.Client().check(digest)

        self.write_json({k: v for k, v in check.items()})
示例#18
0
 def iter_package_entries(self, name: str) -> Iterator[PackageEntry]:
     for version, url in self.collect_best_dist_urls(name).items():
         http_file = cast(IO[bytes], HttpFile(url, self.session))
         with zipfile.ZipFile(http_file) as zf:
             with _open_metadata(zf, name) as f:
                 parser = email.parser.BytesParser()
                 data = parser.parse(cast(BinaryIO, f), headersonly=True)
         dependencies: List[str] = data.get_all("Requires-Dist", [])
         yield PackageEntry(version, dependencies)
示例#19
0
 def __init__(self, file):
   if sys.version_info[0] < 3:
     parser = email.parser.Parser()
   else:
     parser = email.parser.BytesParser()
   message = parser.parse(file, headersonly=True)
   if sys.version_info[0] < 3:
     return mailbox.MaildirMessage.__init__(self, message)
   else:
     return super().__init__(message)
示例#20
0
    def handle_check(self):
        parser = email.parser.BytesParser(policy=email.policy.SMTP)
        msg = parser.parse(self.rfile)

        digest = pyzor.digest.DataDigester(msg).value
        # whitelist 'default' digest (all messages with empty/short bodies)
        if digest != 'da39a3ee5e6b4b0d3255bfef95601890afd80709':
            check = pyzor.client.Client().check(digest)

        self.write_json({k: v for k, v in check.items()})
示例#21
0
    def test_customize_message_encoding(self):
        mailing = factories.MailingFactory(
            header="""Content-Transfer-Encoding: 7bit
Content-Type: multipart/alternative; boundary="===============2840728917476054151=="
Subject: Great news!
From: Mailing Sender <*****@*****.**>
To: <*****@*****.**>
Date: Wed, 05 Jun 2013 06:05:56 -0000
""",
            body="""
This is a multi-part message in MIME format.
--===============2840728917476054151==
Content-Type: text/plain; charset="iso-8859-1"
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable

This is a very simple mailing. I'm happy.
--===============2840728917476054151==
Content-Type: text/html; charset="iso-8859-1"
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head>
<META http-equiv=3DContent-Type content=3D"text/html; charset=3Diso-8859-1">
</head>
<body>
This is <strong> a very simple</strong> <u>mailing</u>. =
I'm happy! Nothing else to say...
</body></html>

--===============2840728917476054151==--
"""
        )
        recipient = factories.RecipientFactory(mailing=mailing)

        customizer = MailCustomizer(recipient)
        fullpath = os.path.join(customizer.temp_path, MailCustomizer.make_file_name(recipient.mailing.id, recipient.id))
        if os.path.exists(fullpath):
            os.remove(fullpath)

        self.assertFalse(os.path.exists(fullpath))

        customizer._run_customizer()

        self.assertTrue(os.path.exists(fullpath))
        parser = email.parser.Parser()
        message = parser.parse(file(fullpath, 'rt'), headersonly = False)
        assert(isinstance(message, email.message.Message))
        self.assertTrue(message.is_multipart())
        self.assertEquals("multipart/alternative", message.get_content_type())
        self.assertEquals("text/plain", message.get_payload(i=0).get_content_type())
        self.assertEquals("text/html", message.get_payload(i=1).get_content_type())
        self.assertEquals(message.get_payload(i=0).get_payload(decode=True), "This is a very simple mailing. I'm happy.")
        self.assertIn("This is <strong> a very simple</strong> <u>mailing</u>. I'm happy! ", message.get_payload(i=1).get_payload(decode=True))
def move_files(from_where):
	valid_names=[]
	COUNTY_NAMES = ["wilkes","polk","vance","transylvania","person","nash","chatham","mcdowell","lincoln","lenoir","dare","camden","caldwell","chowan","jackson","alexander","poke","montogmery","hoke","duplin","columbus","randoplh"]
	print COUNTY_NAMES
	COUNTY_NAMES = COUNTY_NAMES+ valid_names
	print COUNTY_NAMES
	parser = email.parser.Parser()
	global COUNTY, OUTPUT_PATH,COUNTY_EXTENSION
	output_directory = OUTPUT_PATH
	if from_where == "spam":
		input_directory = output_directory + "/spam"
	else:
		input_directory = output_directory + "/unsure"
	# You know the input directory. from the input specified to this function.
	# Loop through all the files of the input directory..
	for path,subdirs,files in os.walk(input_directory):
		for filename in files:
	# First initialize valid to zero.
			f_name = os.path.join(path,filename)
			print f_name
			replaced_fname = f_name.replace(" ","\ ")
			valid =0
			emaildata= parser.parse(open(f_name,"r"))
			subject = obtainSubjectEmail(emaildata)
			sender = obtainFromEmail(emaildata)
			if subject == None or sender == None:
				continue
			subject = subject[0].lower()
			sender = sender[0]
			print subject,sender,COUNTY_EXTENSION
			if  (COUNTY_EXTENSION in sender) or "re:" in subject or "MAILER-DAEMON" in sender or "fw" in subject or "county" in sender or "@" not in sender:
				print subject,sender,COUNTY_EXTENSION,valid
				valid =1
	# If sender is from nash county . Make the valid 1
	# If the mail was a reply or forward make the valid 1
			for name in COUNTY_NAMES:
				if name in sender:
					valid=1
					break
			print valid
			if valid:
				print "Found"
				if from_where == "spam":
					command = "bogofilter -Sn -v < " + replaced_fname
					#os.system(command)
					print command
				elif from_where == "unsure":
					command = "bogofilter -n -v < " + replaced_fname 
					#os.system(command)
					print command
				command = "mv " + replaced_fname +  " " + output_directory + "/ham/"
				print command
				os.system(command)
示例#23
0
文件: taps.py 项目: kuking/mailshaker
 def all_messages(self):
     parser = email.parser.Parser()
     for path in self._all_files:
         try:
             f = codecs.open(path, 'r')
             msg = parser.parse(f)
             f.close()
             yield (path, msg)
         except IsADirectoryError:
             pass
         except UnicodeDecodeError as e:
             if not self._try_latin1:
                 logging.error("Couldn't decode %s - error: %s"%(path, e))
             else:
                 logging.info("HACK! failed with UTF-8, trying with latin-1...")
                 try:
                     f = codecs.open(path, 'r', encoding='latin-1')
                     msg = parser.parse(f)
                     f.close()
                     yield(path, msg)
                 except UnicodeDecodeError as ee:
                     logging.error("Couldn't decode %s, even after trying with latin-1 Hack, Unicode: %s"%(path, ee))
示例#24
0
    def handle_check(self):
        parser = email.parser.BytesParser(policy=email.policy.SMTP)
        msg = parser.parse(self.rfile)

        servers = pyzor.config.load_servers("/root/.pyzor/servers")
        # log = "/tmp/pyzor.log"
        # logging.basicConfig(filename=log,level=logging.DEBUG,format='%(asctime)s %(message)s', datefmt='%d/%m/%Y %H:%M:%S')
        # logging.info(servers)

        digest = pyzor.digest.DataDigester(msg).value
        check = pyzor.client.Client().check(digest, address=servers[0])

        self.write_json({k: v for k, v in check.items()})
示例#25
0
def process_mails(rootdirs, fields = ['Date','From'], ratio=1.0):
    parser = email.parser.Parser()
    for rootdir in rootdirs:
        for path, dirs, files in os.walk(rootdir):
            print path
            for fname in files:
                if not random.random() < ratio: continue
                with open(os.path.join(path, fname)) as f:
                    hdrs = parser.parse(f, True)
                body = hdrs.get_payload()
                if '---' in body:
                    body = body[:body.index('---')]
                yield [hdrs[field] for field in fields] + [body]
示例#26
0
    def do(self):

        try:
            fp = open(self.eml_file, 'r')

            parser = email.parser.HeaderParser()

            self.eml_msg = parser.parse(fp)

        except Exception as e:
            pass

        return Job.do(self)
示例#27
0
 def metadata(self):
     """
     Return the contents of the :file:`METADATA` file inside the wheel.
     """
     if self._metadata is None:
         with zipfile.ZipFile(self.wheel_file.open('rb')) as wheel:
             filename = ('{self.package_tag}-'
                         '{self.package_version_tag}.dist-info/'
                         'METADATA'.format(self=self))
             with wheel.open(filename) as metadata:
                 parser = email.parser.BytesParser()
                 self._metadata = parser.parse(metadata)
     return self._metadata
示例#28
0
    def do(self):

        try:
            fp = open(self.eml_file, 'r')

            parser = email.parser.HeaderParser()

            self.eml_msg = parser.parse(fp)

        except Exception as e:
            pass

        return Job.do(self)
示例#29
0
 def generator():
     time_thresh = (datetime.now() - timedelta(days=1)).timestamp()
     parser = email.parser.BytesHeaderParser()
     for match in glob.iglob(spam_glob):
         timestamp = os.stat(match)[stat.ST_CTIME]
         if timestamp > time_thresh:
             with gzip.open(match) as fh:
                 res = parser.parse(fh)
                 yield ns_dict({
                     'to': res['To'],
                     'frm': res['From'],
                     'subj': res['Subject'],
                     'id': res['X-Quarantine-ID'],
                     'score': res['X-Spam-Score'],
                     'time': timestamp
                 })
示例#30
0
def process_mails(outfile, rootdir):

    count = 0 # for progress reporting...
    skipped_no_to = 0
    skipped_x_to = 0

    parser = email.parser.Parser()

    with open(outfile, "wb") as outf:
        csvwriter = csv.writer(outf)
        for path, dirs, files in os.walk(rootdir):
            for fname in files:
                with open(os.path.join(path, fname)) as f:

                    hdrs = parser.parse(f, True)

                mailpath = os.path.join(path[len(rootdir):], fname)

                outelems = []
                outelems.append(mailpath)
                outelems.append(hdrs['Date'])
                outelems.append(hdrs['Message-ID'])
                outelems.append(hdrs['From'])
                # for each of the "to" fields, add a separate row for each
                # recipient, and the "to-type"
                written = False
                for to in ['To', 'Cc', 'Bcc']:
                    if hdrs[to]:
                        for rcpt in hdrs[to].split(','):
                            csvwriter.writerow(outelems + [rcpt.strip(), to])
                            written = True    
                if not written:
                    # is this a company-wide email?
                    if hdrs['X-to'] and "All Enron Worldwide" in hdrs['X-To']:
                        csvwriter.writerow(outelems + ['All Enron Worldwide', 'X-To'])
                    else:
                        if hdrs['X-To'] or hdrs['X-cc'] or hdrs['X-bcc']:
                            xtos = hdrs['X-To'] + hdrs['X-cc'] + hdrs['X-bcc']
                            print "Skipping %s, has X-to fields: %s" % (mailpath, xtos)
                            skipped_x_to += 1
                        else:
                            skipped_no_to += 1

                # progress reporting
                count += 1
                if count % 100 == 0:
                    print count, "(skipped: %d,%d)" % (skipped_x_to, skipped_no_to)
示例#31
0
def main():
    username = None
    try:
        # EmailMessage doesn't exist before 3.4
        # parser = email.parser.Parser(_class=email.message.EmailMessage)
        parser = email.parser.Parser()
        msg = parser.parse(sys.stdin)
        username = get_username(msg)
        if not username:
            raise MailprintError(
                'could not identify sender: {} | {} | {}'.format(
                    msg.get_unixfrom(), msg.get('Sender'), msg.get('From')))
        print('[{}] incoming message from {}'.format(datetime.datetime.now(),
                                                     username),
              file=sys.stderr)
        subject = msg.get('Subject')
        if not subject:
            subject = ''
        spooled_file = False
        for part in msg.walk():
            name = part.get_filename()
            if not name:
                continue
            mimetype = part.get_content_type()
            if (mimetype not in TYPE_WHITELIST
                    and part.get_content_maintype() != 'text'):
                send_zephyr(
                    [username], 'error',
                    'file ' + part.get_filename() + ' has illegal type ' +
                    mimetype + '\nplease send a text file, ' +
                    'or a file with type:\n' + '\n'.join(TYPE_WHITELIST))
                continue
            pdf = mimetype == 'application/pdf'
            spool_file(name, part.get_payload(decode=True), username, pdf,
                       'color' in subject)
            send_zephyr([username], 'info', 'Spooled file: ' + name)
            spooled_file = True
        if not spooled_file:
            send_zephyr([username], 'error',
                        'Your print request with subject:\n' + subject +
                        '\nwas received, but had no printable attachments.')
    except MailprintError as e:
        e.send_zephyr()
        raise
    except Exception:
        zephyr_error()
        raise
def parse_email(filepath):
  fileobj = open(filepath, "r")
  parser = email.parser.Parser()
  emailobj = parser.parse(fileobj)
  fileobj.close()

  revised_parsed_email = dict()

  revised_parsed_email['message_id'] = emailobj['Message-ID'].decode('cp1252').encode('utf-8')
  revised_parsed_email['subject'] = emailobj['Subject'].decode('cp1252').encode('utf-8')
  revised_parsed_email['date'] = emailobj['Date'].decode('cp1252').encode('utf-8')
  revised_parsed_email['from'] = emailobj['From'].decode('cp1252').encode('utf-8')
  revised_parsed_email['to'] = parse_email_addresses(emailobj['To'])
  revised_parsed_email['cc'] = parse_email_addresses(emailobj['Cc'])
  revised_parsed_email['bcc'] = parse_email_addresses(emailobj['Bcc'])
  revised_parsed_email['body'] = emailobj.get_payload().decode('cp1252').encode('utf-8')
  return revised_parsed_email
def parseElement(filename,element):
	parser = email.parser.Parser()
	email_val = parser.parse(open(filename,"r"))
	element_val=None
	if element.lower()=="message":
		while True:
			try:
				email_val = email_val.get_payload(0)
			except:
				break
		element_val = email_val.get_payload()
	else:
		element_val=email_val.get_all(element)[0]
	if element_val!=None:
		print element_val
	else:
		print ""
示例#34
0
文件: io.py 项目: jwilges/drover
def get_digest(source_file_names: Sequence[Path],
               block_size: int = 8192) -> Optional[str]:
    """Return a SHA256 hash composed from the content of all source files.

    Args:
        source_file_names: A sequence of source file paths

    Returns: A SHA256 hash composed from the content of all source files."""
    # See the PEP-376 RECORD file specification: <https://www.python.org/dev/peps/pep-0376/#record>
    package_record_pattern = re.compile(r'\.dist-info/RECORD$')
    egg_information_pattern = re.compile(r'\.egg-info/PKG-INFO$')
    digest = hashlib.sha256()
    full = set(source_file_names)
    done = set()
    if not full:
        return None
    for source_file_name in sorted(full):
        if package_record_pattern.search(str(source_file_name)):
            package_parent_path = source_file_name.parent.parent
            with open(source_file_name, 'r', buffering=block_size) as record:
                reader = csv.reader(record,
                                    delimiter=',',
                                    quotechar='"',
                                    lineterminator=os.linesep)
                for item in reader:
                    item_name, item_hash, _other = item[:3]
                    source_file_name = package_parent_path / item_name
                    if item_hash and source_file_name in full:
                        digest.update((str(item_name) + item_hash).encode())
                        done.add(source_file_name)
    remaining = full - done
    for source_file_name in sorted(remaining):
        with open(source_file_name, 'rb', buffering=block_size) as source_file:
            if egg_information_pattern.search(str(source_file_name)):
                # Ensure deterministic field order from PKG-INFO files
                # See: https://www.python.org/dev/peps/pep-0314/#including-metadata-in-packages
                parser = email.parser.BytesHeaderParser(
                    policy=email.policy.default)
                source_headers = sorted(parser.parse(source_file).items())
                for header, value in source_headers:
                    digest.update(header.encode())
                    digest.update(value.encode())
            else:
                digest.update(source_file.read())

    return digest.hexdigest()
示例#35
0
    def __init__(self, config, sendmail, *a, **kw):
        """
        Args:
            config: ConfigParser object holding configuration for the Mailing
                Set SMTP server.
            sendmail: A function with the same signature as smtp.sendmail which
                will be called to send outgoing messages. Test code uses this to
                check assertions on the outgoing messages. Production code
                passes in smtp.sendmail itself.
        """
        smtp.SMTPFactory.__init__(self, *a, **kw)

        self.config = config
        self.sendmail = sendmail

        # Cache list definitions and use them to parse destination addresses
        resolver = MailingSetState(self.config)
        self.parse = lambda address: parser.parse(resolver, address)
示例#36
0
def git_am_patch_split(f):
    """Parse a git-am-style patch and split it up into bits.

    :param f: File-like object to parse
    :return: Tuple with commit object, diff contents and git version
    """
    parser = email.parser.Parser()
    msg = parser.parse(f)
    c = Commit()
    c.author = msg["from"]
    c.committer = msg["from"]
    try:
        patch_tag_start = msg["subject"].index("[PATCH")
    except ValueError:
        subject = msg["subject"]
    else:
        close = msg["subject"].index("] ", patch_tag_start)
        subject = msg["subject"][close+2:]
    c.message = subject.replace("\n", "") + "\n"
    first = True

    body = BytesIO(msg.get_payload())

    for l in body:
        if l == "---\n":
            break
        if first:
            if l.startswith("From: "):
                c.author = l[len("From: "):].rstrip()
            else:
                c.message += "\n" + l
            first = False
        else:
            c.message += l
    diff = ""
    for l in body:
        if l == "-- \n":
            break
        diff += l
    try:
        version = next(body).rstrip("\n")
    except StopIteration:
        version = None
    return c, diff, version
示例#37
0
def git_am_patch_split(f):
    """Parse a git-am-style patch and split it up into bits.

    :param f: File-like object to parse
    :return: Tuple with commit object, diff contents and git version
    """
    parser = email.parser.Parser()
    msg = parser.parse(f)
    c = Commit()
    c.author = msg["from"]
    c.committer = msg["from"]
    try:
        patch_tag_start = msg["subject"].index("[PATCH")
    except ValueError:
        subject = msg["subject"]
    else:
        close = msg["subject"].index("] ", patch_tag_start)
        subject = msg["subject"][close+2:]
    c.message = subject.replace("\n", "") + "\n"
    first = True

    body = BytesIO(msg.get_payload())

    for l in body:
        if l == "---\n":
            break
        if first:
            if l.startswith("From: "):
                c.author = l[len("From: "):].rstrip()
            else:
                c.message += "\n" + l
            first = False
        else:
            c.message += l
    diff = ""
    for l in body:
        if l == "-- \n":
            break
        diff += l
    try:
        version = next(body).rstrip("\n")
    except StopIteration:
        version = None
    return c, diff, version
示例#38
0
def generate_single_mbox(conn, listid, year, month, destination):
    curs = conn.cursor()
    curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
        'listid': listid,
        'startdate': date(year, month, 1),
        'enddate': date(year, month, calendar.monthrange(year, month)[1]),
    })
    with open(destination, 'w', encoding='utf8') as f:
        for id, raw, in curs:
            s = BytesIO(raw)
            parser = email.parser.BytesParser(policy=email.policy.compat32)
            msg = parser.parse(s)
            try:
                x = msg.as_string(unixfrom=True)
                f.write(x)
            except UnicodeEncodeError as e:
                print("Not including {0}, unicode error".format(msg['message-id']))
            except Exception as e:
                print("Not including {0}, exception {1}".format(msg['message-id'], e))
示例#39
0
def eatfiles(dirname, outfile):
    for dirpath, dirs, files in os.walk(dirname):
        for filename in files: #this is bs needs to be any file except a directory, I think??
            with open(os.path.join(dirpath, filename)) as infile:
                try:
                    parser = email.parser.Parser()
                    msg = parser.parse(infile, True)

                    # skip things that were prior to 2006
                    year = email.utils.parsedate(msg.get('date'))[0]
                    if year < 2006:
                        continue

                    # include all address fields...maybe too many? Depends
                    # on how there are used in real life
                    addrFields = []
                    tos = msg.get_all('to',[])
                    fos = msg.get_all('from',[])
                    ccs = msg.get_all('cc',[])
                    bccs = msg.get_all('bcc',[])
                    resenttos = msg.get_all('resent-to',[])
                    resentfos = msg.get_all('resent-from',[])
                    resentccs = msg.get_all('resent-cc',[])
                    resentbccs = msg.get_all('resent-bcc',[])

                    # generate tuples from header fields
                    newaddrs = email.utils.getaddresses(tos+fos+ccs+bccs+resenttos+resentfos+resentccs+resentbccs)

                    #switch tuple order
                    newaddrs = [(t[1], t[0]) for t in newaddrs]

                    # filter out addresses that are lists, doemail, or already know longer names
                    newaddrs = [addr for addr in newaddrs if not infolosing(addr)]

                    # merge
                    emailNameMap.update(newaddrs)
                except:
                    # if there was a non-email file sitting there, try to ekip it and continue
                    print 'Problem parsing', dirpath + filename + '; Skipping file...'
                    continue
    of = open(outfile, 'w')
    for key, value in emailNameMap.iteritems():
        of.write(value + ' , ' + key + '\n')
def parse_path(path, stop_words, filter_addr_fn = None):
	emails = []
	if os.path.isdir(path):
		for filename in os.listdir(path):
			emails += parse_path(os.path.join(path, filename), stop_words, filter_addr_fn = filter_addr_fn)
	else:
		filename = os.path.basename(path)
		if not file_re.match(filename):
			if __DEBUG__:
				print >> sys.stderr, "File " + filename + " is not in the expected \d+. filename format. Skipping..."
		else:
			e = parser.parse(open(path))
			e = process_email(e, stop_words, filter_addr_fn = filter_addr_fn)

			if e != None:
				e['file'] = path
				emails = [e]

	return emails
示例#41
0
def main():
    global opts
    opts = parse_args()
    flags = set()

    if opts.only_html:
        flags.add('only-html')

    parser = email.parser.Parser()
    msg = parser.parse(sys.stdin)
    
    try:
        msg = process_message(msg, flags=flags)
    except InvalidInputMessage:
        pass
    except Exception, detail:
        if not opts.force:
            raise
        else:
            print >>sys.stderr, 'ERROR: %s' % detail
示例#42
0
    def test_customize_message(self):
        mailing = factories.MailingFactory()
        recipient = factories.RecipientFactory(mailing=mailing)

        customizer = MailCustomizer(recipient)
        fullpath = os.path.join(customizer.temp_path, MailCustomizer.make_file_name(recipient.mailing.id, recipient.id))
        if os.path.exists(fullpath):
            os.remove(fullpath)

        self.assertFalse(os.path.exists(fullpath))

        customizer._run_customizer()

        self.assertTrue(os.path.exists(fullpath))
        # print file(fullpath, 'rt').read()
        parser = email.parser.Parser()
        message = parser.parse(file(fullpath, 'rt'), headersonly = False)
        assert(isinstance(message, email.message.Message))
        self.assertFalse(message.is_multipart())
        self.assertTrue('Date' in message)
        self.assertEquals('This is a very simple mailing.', message.get_payload())
def parse_email(filepath):
    fileobj = open(filepath, "r")
    parser = email.parser.Parser()
    emailobj = parser.parse(fileobj)
    fileobj.close()

    revised_parsed_email = dict()

    revised_parsed_email['message_id'] = emailobj['Message-ID'].decode(
        'cp1252').encode('utf-8')
    revised_parsed_email['subject'] = emailobj['Subject'].decode(
        'cp1252').encode('utf-8')
    revised_parsed_email['date'] = emailobj['Date'].decode('cp1252').encode(
        'utf-8')
    revised_parsed_email['from'] = emailobj['From'].decode('cp1252').encode(
        'utf-8')
    revised_parsed_email['to'] = parse_email_addresses(emailobj['To'])
    revised_parsed_email['cc'] = parse_email_addresses(emailobj['Cc'])
    revised_parsed_email['bcc'] = parse_email_addresses(emailobj['Bcc'])
    revised_parsed_email['body'] = emailobj.get_payload().decode(
        'cp1252').encode('utf-8')
    return revised_parsed_email
示例#44
0
def fuzz(parser):
    with open(sys.argv[1], "rb") as fp:
        message = parser.parse(fp)
    message.as_bytes(policy=email.policy.default)
    message.is_multipart()
    message.get_unixfrom()
    keys = message.keys()
    for key in keys:
        message.get(key)
        message.get_all(key)
    message.values()
    message.get_content_type()
    message.get_content_maintype()
    message.get_content_subtype()
    message.get_default_type()
    message.get_filename()
    message.get_boundary()
    message.get_content_charset()
    message.is_attachment()
    message.get_content_disposition()
    for part in message.walk():
        pass
示例#45
0
    def __init__(self, fromlines=None, fromstring=None, fromfile=None):
        #self.log = Logger()
        self.recipient = None
        self.received_by = None
        self.received_from = None
        self.received_with = None
        self.__raw = None
        parser = email.parser.Parser()

        # Message is instantiated with fromlines for POP3, fromstring for
        # IMAP (both of which can be badly-corrupted or invalid, i.e. spam,
        # MS worms, etc).  It's instantiated with fromfile for the output
        # of filters, etc, which should be saner.
        if fromlines:
            try:
                self.__msg = parser.parsestr(os.linesep.join(fromlines))
            except email.errors.MessageError as o:
                self.__msg = corrupt_message(o, fromlines=fromlines)
            self.__raw = os.linesep.join(fromlines)
        elif fromstring:
            try:
                self.__msg = parser.parsestr(fromstring)
            except email.errors.MessageError as o:
                self.__msg = corrupt_message(o, fromstring=fromstring)
            self.__raw = fromstring
        elif fromfile:
            try:
                self.__msg = parser.parse(fromfile)
            except email.errors.MessageError as o:
                # Shouldn't happen
                self.__msg = corrupt_message(o, fromstring=fromfile.read())
            # fromfile is only used by getmail_maildir, getmail_mbox, and
            # from reading the output of a filter.  Ignore __raw here.
        else:
            # Can't happen?
            raise SystemExit('Message() called with wrong arguments')

        self.sender = address_no_brackets(self.__msg['return-path']
                                          or 'unknown')
示例#46
0
 def __init__(self, path, dependencies=None):
     self.wheel_file = path
     self._filesize = path.stat().st_size
     self._filehash = None
     if dependencies is None:
         dependencies = {}
     self._dependencies = dependencies
     self._parts = list(path.stem.split('-'))
     # Fix up retired tags (noabi->none)
     if self._parts[-2] == 'noabi':
         self._parts[-2] = 'none'
     # We read metadata now rather than lazily evaluating it to ensure that
     # we can report corrupt (or invalid) wheels upon construction rather
     # than waiting to find out later when metadata is queried
     with zipfile.ZipFile(self.open()) as wheel:
         filenames = (
             '{self.package_tag}-{self.package_version_tag}.dist-info/'
             'METADATA'.format(self=self),
             '{self.package_canon}-{self.package_version_tag}.dist-info/'
             'METADATA'.format(self=self),
         )
         for filename in filenames:
             try:
                 with wheel.open(filename) as metadata:
                     parser = email.parser.BytesParser()
                     self._metadata = parser.parse(metadata)
             except KeyError:
                 pass
             else:
                 break
         else:
             raise BadWheel(
                 'Unable to locate METADATA in %s; attempted: %r; '
                 'possible files: %r' % (self.wheel_file, filenames, {
                     info.filename
                     for info in wheel.infolist()
                     if info.filename.endswith('METADATA')
                 }))
示例#47
0
文件: addrbook.py 项目: SjB/dotfiles
 def index(self, msgs):
     tot_msg = 0
     tot_addr = 0
     try:
         parser = email.parser.Parser()
         for msg in msgs:
             fn = msg.get_filename()
             with open(fn, "r") as f:
                 mail = parser.parse(f, True)
             addrs = []
             for hdr in ("from", "to", "cc", "bcc"):
                 addrs += mail.get_all(hdr, [])
             addrs = email.utils.getaddresses(addrs)
             tot_addr += self._add(addrs)
             tot_msg += 1
             if (tot_msg % 20) == 0:
                 logging.debug("Messages: %d; addresses: %d" % (tot_msg, tot_addr))
     finally:
         # At the end, save the DB
         self._merge_db()
         with open(os.path.expanduser(_DBPATH), "wb") as f:
             pickle.dump(self._db, f, pickle.HIGHEST_PROTOCOL)
         logging.info("Total: indexed %d messages and %d addresses. %d unique addresses in the address book." % (tot_msg, tot_addr, len(self._db)))
 def generator():
     time_thresh = (datetime.now() - timedelta(days=1)).timestamp()
     emlparser = email.parser.BytesHeaderParser()
     # loop through all objects in the quarantine
     for match in glob.iglob(spam_glob):
         timestamp = os.stat(match)[stat.ST_CTIME]
         # compare the timestamp against time treshold
         if timestamp > time_thresh:
             # check if file is gzipped
             if '.gz' in pathlib.Path(match).suffixes:
                 # open gzip file handle
                 with gzip.open(match, 'rb') as gh:
                     res = emlparser.parse(gh)
                     yield ns_dict({
                         'date': parser.parse(res['Date']),
                         'to': res['To'],
                         'frm': res['From'],
                         'subj': res['Subject'],
                         'id': match.split("virusmails/")[1],
                         'score': res['X-Spam-Score'],
                         'xto': res['X-Envelope-To'],
                         'time': timestamp
                     })
示例#49
0
文件: message.py 项目: simpkins/amt
    def from_maildir(cls, path):
        # Parse the message itself
        parser = cls.msg_parser()
        with open(path, 'rb') as f:
            s = os.fstat(f.fileno())
            timestamp = s.st_mtime
            msg = parser.parse(f)

        # Load the metadata from the file name
        parent, basename = os.path.split(path)
        parts = basename.split(':', 1)
        if len(parts) > 1:
            info = parts[1]
        else:
            info = ''

        flags = set()
        if info.startswith('2,'):
            if 'P' in info:
                flags.add(cls.FLAG_FORWARDED)
            if 'R' in info:
                flags.add(cls.FLAG_REPLIED_TO)
            if 'S' in info:
                flags.add(cls.FLAG_SEEN)
            if 'T' in info:
                flags.add(cls.FLAG_DELETED)
            if 'D' in info:
                flags.add(cls.FLAG_DRAFT)
            if 'F' in info:
                flags.add(cls.FLAG_FLAGGED)

        subdir = os.path.basename(parent)
        if subdir == 'new':
            flags.add(cls.FLAG_NEW)

        custom_flags = set()
        return cls(msg, timestamp, flags, custom_flags)
示例#50
0
    def test_customize_simple_message_with_recipient_attachment(self):
        recipient = factories.RecipientFactory(
            contact_data={
                'email': '*****@*****.**',
                'custom': 'very simple',
                'attachments': [
                    {
                        'filename': "export.csv",
                        'data': base64.b64encode("col1;col2;col3\nval1;val2;val3\n"),
                        'content-type': 'text/plain',
                        'charset': 'us-ascii',
                    },
                ]
            }
        )
        #factories.MailingContentFactory(mailing=recipient.mailing)
        #print recipient.mailing.content

        customizer = MailCustomizer(recipient)
        fullpath = os.path.join(customizer.temp_path, MailCustomizer.make_file_name(recipient.mailing.id, recipient.id))
        if os.path.exists(fullpath):
            os.remove(fullpath)

        self.assertFalse(os.path.exists(fullpath))

        customizer._run_customizer()

        self.assertTrue(os.path.exists(fullpath))
        parser = email.parser.Parser()
        message = parser.parse(file(fullpath, 'rt'), headersonly = False)
        assert(isinstance(message, email.message.Message))
        self.assertTrue(message.is_multipart())
        # print
        # print message.as_string()
        self.assertEquals(message.get_payload(i=0).get_payload(), 'This is a very simple mailing.')
        self.assertEquals(message.get_payload(i=1).get_payload(), 'col1;col2;col3\nval1;val2;val3\n')
示例#51
0
def generate_single_mbox(conn, listid, year, month, destination):
    curs = conn.cursor()
    curs.execute(
        "SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date",
        {
            'listid': listid,
            'startdate': date(year, month, 1),
            'enddate': date(year, month,
                            calendar.monthrange(year, month)[1]),
        })
    with open(destination, 'w', encoding='utf8') as f:
        for id, raw, in curs:
            s = BytesIO(raw)
            parser = email.parser.BytesParser(policy=email.policy.compat32)
            msg = parser.parse(s)
            try:
                x = msg.as_string(unixfrom=True)
                f.write(x)
            except UnicodeEncodeError:
                print("Not including {0}, unicode error".format(
                    msg['message-id']))
            except Exception as e:
                print("Not including {0}, exception {1}".format(
                    msg['message-id'], e))
示例#52
0
文件: filterdaemon.py 项目: pwys/kite
    def run(self):
        while True:
            while len(events_queue) != 0:
                event = events_queue.pop(0)
                if event["type"] == "create":
                    try:
                        print event["path"]
                        with open(event["path"], "r") as fd:
                            parser = email.parser.HeaderParser()
                            email_headers = parser.parse(fd)
                            subject = email_headers.get("Subject")
                            print "Subject: %s" % subject

                            if subject != None:
                                subject = headers.cleanup_subject(subject)
                                if subject in threads_index.data:
                                    threads_index.data[subject].append(event["path"])
                                else:
                                    threads_index.data[subject] = [event["path"]]
                    except IOError:
                        # Postfix/Dovecot creates temporary files. Ignore them
                        pass
                    
            time.sleep(EVENTS_QUEUE_PROCESSING_DELAY)
示例#53
0
 def msg(self):
     """Return an RFC 2822 parsed message instance"""
     with open(os.path.join(self.dir_name, self.file_name), 'rb') as strm:
         return parser.parse(strm)
示例#54
0
# Parses each file from the Enron email dataset and produces a tab separated
# From and To email address tuples. Multiple recipients in the To: header are
# written out as multiple lines of output.
import email.parser
import os
import re
import sys

def remove_special_chars(s):
  return re.sub(r"[<>\"' ]", "", s)

fname = sys.argv[1]
if os.path.isfile(fname) and fname.endswith("."):
  fin = open(sys.argv[1], 'rb')
  parser = email.parser.HeaderParser()
  msg = parser.parse(fin, headersonly=True)
  fin.close()
  try:
    from_value = msg["From"]
    to_values = msg["To"].replace("\r\n", "").replace("\t", "").split(", ")
    if from_value != None and to_values != None:
      from_value = remove_special_chars(from_value)
      for to_value in to_values:
        to_value = remove_special_chars(to_value)
        print("%s\t%s" % (from_value, to_value))
  except AttributeError:
    pass
示例#55
0
logging.basicConfig(filename='/var/log/twittermailgate.log',level=logging.DEBUG)


api = twitter.Api()


api = twitter.Api(consumer_key='',
                      consumer_secret='',
                      access_token_key='',
                      access_token_secret='')



parser = email.parser.Parser()
e = parser.parse(sys.stdin)

sender = e['From']
myaddy = '*****@*****.**'
message = e.get_payload()

if isinstance(message, str):
    logging.debug("Looks like a normal tweet")
else:
    logging.debug("I think this is a media tweet")
    try:
        smtpObj = smtplib.SMTP('localhost')
        smtpObj.sendmail(myaddy, '@twitpic.com', e.as_string())
        logging.info("Sent email to TwitPic")
    except Exception, err:
        logging.error("Error: unable to send email :" + str(err))
示例#56
0
文件: pop3_mail.py 项目: amitu/dutils
def sanitize_html(html):
    parser = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
    return parser.parse(html).toxml()
示例#57
0
# -*- coding: utf-8 -*-

from pprint import pprint

# 電子メールのデータを処理する(email)
# 邦訳ドキュメントあり

# メールを解析する(email.parser)

import email
import email.parser
parser = email.parser.Parser()
with open('email.txt') as f:
    m = parser.parse(f)
    pprint(type(m))
    pprint(m.items())

with open('email.txt') as f:
    s = f.read()
    m = email.message_from_string(s)
    pprint(m.items())

f = open('email.txt')
msg = email.message_from_file(f)
pprint(type(msg))
pprint(msg.is_multipart())
pprint(msg.get_payload())
pprint(msg.keys())
pprint(msg.get('From'))
pprint(msg.as_string())
def filter(Action, filelist, outputfile):
    """Score files in one of several ways depending on which Action is passed to it"""
    parser = email.parser.Parser()
    SF = SpamFilter("spam.brain","ham.brain")
    index = 0
    if Action in (MARK_SPAM, MARK_NOT_SPAM):
        if Action == MARK_SPAM:
            brain = SF.spambrain
            SF.spambrain_modified = True
        else:
            brain = SF.hambrain
            SF.hambrain_modified = True
        for i in OPENIter(filelist):
            messages = [start_message(parser.parse(i))]
            try:
                brain.add_sample(wordtokenizer(messages))
            except IOError: print 'x',
            index = index + 1
            if (index % 100) == 0:
                print '.',
                sys.stdout.flush()
            #print i.name
        SF.save()
    elif Action == FILTER_SPAM_LIST:
        msgs, spams, hams, unknowns = 0,0,0,0
        for i in OPENIter(filelist):
            l = list(wordtokenizer([start_message(parser.parse(i))]))
            spamscore = SF.spambrain.get_filescore(iter(l))
            hamscore = SF.hambrain.get_filescore(iter(l))
            msgs += 1
            if spamscore == hamscore:
                type = ' unknown'
                unknowns += 1
            elif spamscore > hamscore:
                type = '    spam'
                spams += 1
            else:
                type = 'not spam'
                hams += 1
            print i.name, type, hamscore,spamscore
        if msgs:
            print "Spam: ", spams*100.0/msgs, "    Not spam: ",hams*100.0/msgs, "    Unknown: ",unknowns*100.0/msgs
    elif Action == FILTER_SPAM_SAVE:
         for i in OPENIter(filelist):
              msg = parser.parse(i)
              l = list(wordtokenizer([start_message(msg)]))
              spamscore = SF.spambrain.get_filescore(iter(l))
              hamscore = SF.hambrain.get_filescore(iter(l))
              if spamscore == hamscore:
                   msg['MarkovBrainSpamStatus'] = "Unknown"
              elif spamscore > hamscore:
                   msg['MarkovBrainSpamStatus'] = "Spam"
              else:
                   msg['MarkovBrainSpamStatus'] = "Not Spam"
              #in normal use cases, this should get redirected to a file from stdout
              if outputfile:
                   saveout = sys.stdout
                   outfile = open(outputfile,'a')
                   sys.stdout = outfile
                   print str(msg)
                   sys.stdout = saveout
              else: print str(msg)
def parseMail(filename):
    return_type={}
    # Open the file for parsing.
    email = parser.parse(open(filename,"r"))
    # Get string containing the sender section of email.
    from_email = obtainFromEmail(email)
    # Parse the string sender section to get an email_address. The is in the form of a list.
    return_type["from"] = text_parsing.parse_addresses(from_email)
    # Similary obtain the string which is in the "To" section of email.
    to_email = obtainToEmail(email)
    # Get a list of parsed email ids from this parsed section.
    return_type["to"] = text_parsing.parse_addresses(to_email)
    # Get the string representation of the cc section.
    cc_email = obtainCcEmail(email)
    # Convert it to list of parsed email ids.
    return_type["cc"] = text_parsing.parse_addresses(cc_email)
    # Obtain the date the email was sent.
    date = obtainDateEmail(email)
    if date!=None:
    	comma = date[0].split(",")
    	if len(comma) ==2:
    		comma = comma[1].strip()
    	else:
    		comma = comma[0].strip()
    	# If there is a valid date then note it down
    	provided_date = comma.split(" ")
    	if len(provided_date)>1 and len(provided_date[1]) == 1:
    		provided_date[1] = "0" +provided_date[1]
    	add_subtract = 1
    	if len(provided_date)>4 and len(provided_date[4])>4 and provided_date[4][0] == "-":
    		add_subtract = -1
    	try:
	    	timezone_hours = int(provided_date[4][1:3])
	    	timezone_minutes = int(provided_date[4][3:5])
	except:
		timezone_hours = 0
		timezone_minutes = 0
    	provided_date = " ".join(provided_date[0:4])
    	try:
	    	datetime_object= datetime.datetime.strptime(provided_date,"%d %b %Y %H:%M:%S")
	    	new_date_utc = datetime_object + add_subtract* datetime.timedelta(hours = timezone_hours, minutes = timezone_minutes)
		print_date = new_date_utc.strftime("%d %b %Y %H:%M:%S")
		return_type["date"] = print_date
        except:
		logger.debug("Could not find date in the correct format. Skipping it " + date[0])
		return_type["date"] = ""
    else:
    	# Else no valid date
        return_type["date"] = ""
    # Now obtain the subject section of the email.
    subject = obtainSubjectEmail(email)
    if subject!=None:
    	#If there is a valid subject
    	# save the original subject for printing it as raw text
        return_type["original_subject"] = subject[0]
        # This parses the subject and removes all the stopwords and everything. to give cleaner tokenized subject
        subject,extra_info = text_parsing.parse_text(subject[0])
        return_type["subject"] = subject.replace("\"","")
    # Finally obtain the text section of the email.
    message = obtainMessageEmail(email)
    if message  == None:
        return return_type
    # Parse this email text and remove the headers and footers, stopwords, and cleaned and tokenized.
    message,original_text = text_parsing.parse_message(message)
    return_type["message"] = message
    return_type["original_text"] = original_text
    # Return the email object.
    return return_type