Exemplo n.º 1
0
def extract(email_id, buff_mail, out_dir, categories, target_email):
    _dir = "{}/emails/{}".format(out_dir, email_id)
    mkdirp(_dir)
    #write raw email to new dir
    spit("{}/{}.eml".format(_dir, email_id), buff_mail)
    mail = email.message_from_string(buff_mail)
    attach=[]
    msg = ""
    attach_count = counter()

    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            msg = msg + "\n" + part.get_payload() 
        if part.get_content_type() == 'message/delivery-status':
            continue
        if part.get_content_maintype() == 'multipart':
            continue
        if part.get('Content-Disposition') is None:
            continue

        

        fileName = part.get_filename()
        fileName = fileName if fileName else "Attach_{}".format(attach_count.next())
        
        if fileName == 'rtf-body.rtf':
            continue

        fileName = clean_string(fileName, [
            EXPR_OPTS['fix_utf8'], 
            EXPR_OPTS['fix_forwardslash'], 
            (r' ', '_'),
            (r'&', '_')])

        attach.append(fileName)
        filePath = "{}/{}".format(_dir, fileName)        
        #save attachment
        fp = open(filePath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()
        
    msg = clean_string(msg, [EXPR_OPTS['fix_utf8']])
    spit("{}/{}.txt".format(_dir, email_id), msg)
    row= createRow(email_id, "emails/{}".format(email_id), target_email, mail, categories, attach, msg)

    return row
Exemplo n.º 2
0
def extract(email_id, buff_mail, out_dir, categories, target_email):
    _dir = "{}/emails/{}".format(out_dir, email_id)
    mkdirp(_dir)
    #write raw email to new dir
    spit("{}/{}.eml".format(_dir, email_id), buff_mail)
    mail = email.message_from_string(buff_mail)
    attach = []
    msg = ""
    attach_count = counter()

    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            msg = msg + "\n" + part.get_payload()
        if part.get_content_type() == 'message/delivery-status':
            continue
        if part.get_content_maintype() == 'multipart':
            continue
        if part.get('Content-Disposition') is None:
            continue

        fileName = part.get_filename()
        fileName = fileName if fileName else "Attach_{}".format(
            attach_count.next())

        if fileName == 'rtf-body.rtf':
            continue

        fileName = clean_string(fileName, [
            EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_forwardslash'], (r' ', '_'),
            (r'&', '_')
        ])

        attach.append(fileName)
        filePath = "{}/{}".format(_dir, fileName)
        #save attachment
        fp = open(filePath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()

    msg = clean_string(msg, [EXPR_OPTS['fix_utf8']])
    spit("{}/{}.txt".format(_dir, email_id), msg)
    row = createRow(email_id, "emails/{}".format(email_id), target_email, mail,
                    categories, attach, msg)

    return row
Exemplo n.º 3
0
    cat 2006.txt | ./pst/normalize.py  [email protected] demail/emails/[email protected] -a --start 0 --limit 1000
    '''

    parser = argparse.ArgumentParser(
        description=" ... ", 
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=desc)
    parser.add_argument("-a","--header", action='store_true', help="add header to output")
    parser.add_argument("-s","--start", type=int, default=0, help="start at line #")
    parser.add_argument("-l", "--limit", type=int, default=0, help="end at line #")
    parser.add_argument("target_email", help="Target Email")
    parser.add_argument("out_dir", help="Output Directory")
    parser.add_argument("infile", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Input File")
    args = parser.parse_args()
    outfile = "{}/output.csv".format(args.out_dir)
    mkdirp("{}/emails".format(args.out_dir))
    if args.header:
        spit(outfile, email_extract.headerrow() + "\n")

    for i, line in enumerate(skip(args.infile, at_start=args.start)):
        if ((not args.limit == 0) and (i >= args.limit)):
            break;
        try:
            fp = line.strip()
            guid = email_extract.md5(fp)
            category = email_extract.categoryList(fp)
            buff = slurp(fp)

            row = email_extract.extract(guid, buff, args.out_dir, category, args.target_email)
            spit(outfile, row + "\n")
        except Exception as e:
Exemplo n.º 4
0
                        help="start at line #")
    parser.add_argument("-l",
                        "--limit",
                        type=int,
                        default=0,
                        help="end at line #")
    parser.add_argument("target_email", help="Target Email")
    parser.add_argument("out_dir", help="Output Directory")
    parser.add_argument("infile",
                        nargs='?',
                        type=argparse.FileType('r'),
                        default=sys.stdin,
                        help="Input File")
    args = parser.parse_args()
    outfile = "{}/output.csv".format(args.out_dir)
    mkdirp("{}/emails".format(args.out_dir))
    if args.header:
        spit(outfile, email_extract.headerrow() + "\n")

    for i, line in enumerate(skip(args.infile, at_start=args.start)):
        if ((not args.limit == 0) and (i >= args.limit)):
            break
        try:
            fp = line.strip()
            guid = email_extract.md5(fp)
            category = email_extract.categoryList(fp)
            buff = slurp(fp)

            row = email_extract.extract(guid, buff, args.out_dir, category,
                                        args.target_email)
            spit(outfile, row + "\n")