示例#1
0
def do_eml(eml,out_txt):
#    print("Parsing email (%d bytes)"%(len(eml)))
    vtokens=[]
    tokens=[]
    for text in eml2str(eml):
#      print(text.encode("utf-8"))
      if text.find('pam detection software, running on the system')>=0:
        continue
      t=" ".join(text.replace('"',' ').split())
#      print(t.encode("utf-8"))
      if(len(t)>10):
#        print(str(label)+" "+t.encode("utf-8"))
#        print(t.encode("utf-8"))
        try:
          vtok,tok=tokenize(t,wordmap)
          if len(vtok)>len(vtokens):
            vtokens=vtok
            tokens=tok
        except:
          eprint(traceback.format_exc())
#    print("NUM of tokens: %d / %d"%(len(vtokens),len(tokens)))
#    print("%d / %d"%(bestnn,len(tokens)))
    if len(vtokens)<10:
        return 0

#    ok=dedup(vtokens,5,(len(vtokens)-10)/3)
    ok=dedup(vtokens,7,(len(vtokens)-10)*4/5)
#    ok=dedup1(vtokens)
#    print(ok)
    if ok:
#        print(" ".join(tokens)+"\n")
        out_txt.write(" ".join(tokens)+"\n")

    return ok
示例#2
0
def do_eml(msg):
    #  # jobb ha bytes-ban kapja meg a raw levelet, mert az utf8 karakterek kulonben elcseszodhetnek! pl. sql_0000022480.eml ahol keverve van htmlentity es utf8 text/plain-ben!
    #  if type(eml)==bytes:
    #    msg = email.message_from_bytes(eml)
    #  else:
    #    msg = email.message_from_string(eml)
    #  try:
    #    print len(eml)
    vtokens = []
    tokens = []
    for text in eml2str(msg):
        if text.find('pam detection software, running on the system') >= 0:
            continue
        t = " ".join(text.replace('"', ' ').split())
        #      print(t.encode("utf-8"))
        if (len(t) > 10):
            #        print(str(label)+" "+t.encode("utf-8"))
            try:
                vtok, tok = tokenize(t, wordmap)
                if len(vtok) > len(vtokens):
                    vtokens = vtok
                    tokens = tok
            except:
                eprint(traceback.format_exc())
    print("NUM of tokens: %d / %d" % (len(vtokens), len(tokens)))
    if len(vtokens) < 10:
        return "toosmall"

    print(" ".join(vtokens))
    res = deepspam_test(vtokens)
    res += 0.1
    print(res)
    #    print("%d%%"%(res))
    try:
        f = open("deepspam.res", "at")
        f.write("%3d%%:" % (res) + " ".join(tokens) + "\n")
        f.close()
    except:
        pass
    if res < 2:
        return "ham %d%%" % (res)
    if res < 10:
        return "maybeham %d%%" % (res)
    if res < 20:
        return "20ham %d%%" % (res)
    if res > 98:
        return "spam %d%%" % (res)
    if res > 90:
        return "maybespam %d%%" % (res)
    if res > 80:
        return "80spam %d%%" % (res)
    return "dunno %d%%" % (res)
示例#3
0
 def header(self, name, val):
     if name == "X-Grey-ng" and val[0:6] == "REJECT":
         self.reject = 1
     if self.fp:
         try:
             self.fp.write(
                 ("%s: %s\n" % (name, val)).encode()
             )  # python2, sima utf8. py2 alatt elvileg tamogatott a surrogate is, de azt ugyis csak az unreleased pymilter tudja
         except:
             try:
                 self.fp.write(
                     ("%s: %s\n" % (name, val)).encode(
                         encoding='ascii', errors='surrogateescape')
                 )  # python3,  speci (surrogate escaped) utf8 ami 8 bites asciit tarol
             except:
                 eprint(
                     "DEEPSPAM: Exception at header(%s) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
                     % (name))
                 eprint(traceback.format_exc())
     return Milter.CONTINUE
示例#4
0
    def eom(self):
        if not self.fp: return Milter.ACCEPT
        try:
            self.fp.seek(0)
            print("PARSING %d body chars" % self.bodysize)
            try:
                msg = email.message_from_binary_file(self.fp)  # python 3.2+
            except:
                msg = email.message_from_file(self.fp)  # python2
            res = do_eml(msg)
            print("X-deepspam: " + res)
            self.addheader("X-deepspam", res)
        except:
            eprint(
                "DEEPSPAM: Exception at eom() !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
            eprint(traceback.format_exc())
#    if self.reject:
#      self.setreply('541','5.7.1','Sorry, your address is already blocked for sending spam/virus, contact postmaster')
#      return Milter.REJECT
        return Milter.ACCEPT  # ACCEPT modified message
示例#5
0
def do_eml(eml):
    vtokens=[]
    tokens=[]
    for text in eml2str(eml):
      if text.find('pam detection software, running on the system')>=0:
        continue
      t=" ".join(text.replace('"',' ').split())
      if(len(t)>10):
#        print(str(label)+" "+t.encode("utf-8"))
#        print(t.encode("utf-8"))
        try:
          vtok,tok=tokenize(t,wordmap)
          if len(vtok)>len(vtokens):
            vtokens=vtok
            tokens=tok
        except:
          eprint(traceback.format_exc())
    print("NUM of tokens: %d / %d"%(len(vtokens),len(tokens)))
    if len(vtokens)<10:
        return "toosmall"

    print(" ".join(vtokens))
    res=deepspam_test(vtokens)
    res+=0.1
    print(res)
    try:
        f=open("test_res","at")
        f.write("%3d%%:"%(res)+" ".join(tokens)+"\n")
        f.close()
    except:
        pass
    if res<=10:
        return "ham-%d"%(res)
    if res<20:
        return "ham-maybe"
    if res>=90:
        return "spam-%d"%(res)
    if res>80:
        return "spam-maybe"
    return "dunno"