示例#1
0
 def test_loop_read_from_dir(self):
     ''' This is not a unit test as it reads a full dir and then launch the lexer on each file '''
     
     import email
     import os
     
     dir = '/tmp/req_messages'
     for f in os.listdir(dir):
         
         print("********************* Try to Parse %s/%s **************************"%(dir,f))
         
         fd = open('%s/%s'%(dir,f))
         
         msg = email.message_from_file(fd)
 
         print("it is not multipart")
         
         if not msg.is_multipart():
             to_parse = msg.get_payload()
             #print("to_parse %s\n"%(to_parse))
             
             index = to_parse.lower().find('begin')
             
             if index >= 0:
             
                lexer  = IMSLexer()
                lexer.input(to_parse[index:])
                cpt = 0
                for token in lexer:
                   #print("\nToken = %s"%(token))
                   cpt +=1
             else:
                print("Cannot find begin")
             
         else:
             print("it is multipart")
     
             for part in msg.walk():
                 #print(part.get_content_type())
                 # if we have a text/plain this a IMS2.0 message so we try to parse it
                 if part.get_content_type() == 'text/plain':
                     to_parse = part.get_payload()
                     index = to_parse.lower().find('begin')
             
                     if index >= 0:
                        lexer  = IMSLexer()
                        lexer.input(to_parse)
                        cpt = 0
                        for token in lexer:
                            #print("\nToken = %s"%(token))
                            cpt +=1  
                     else:
                         print("Cannot find begin")     
示例#2
0
    def test_read_from_email(self):
        ''' test read from an email message and lex '''
        
        import email
        dir = '/home/aubert/req_messages'
    
        #fd = open('/tmp/req_messages/34366629.msg')

        fd  = open('%s/%s'%(dir,'34368614.msg'))
        msg = email.message_from_file(fd)
    
        #print("msg = %s\n"%(msg))
    
        if not msg.is_multipart():
            to_parse = msg.get_payload()
            print("to_parse %s\n"%(to_parse))
                
            index = to_parse.lower().find('begin')
                
            if index >= 0:
                
                lexer  = IMSLexer()
                lexer.input(to_parse[index:])
                cpt = 0
                for token in lexer:
                    print("\nToken = %s"%(token))
                    cpt +=1
            else:
                print("Cannot find begin")
        else:
            print("multipart")
        
            for part in msg.walk():
                #print(part.get_content_type())
                # if we have a text/plain this a IMS2.0 message so we try to parse it
                if part.get_content_type() == 'text/plain':
                    to_parse = part.get_payload()
                    index = to_parse.lower().find('begin')
                
                    if index >= 0:
                        lexer  = IMSLexer()
                        lexer.input(to_parse)
                        cpt = 0
                        for token in lexer:
                            print("\nToken = %s"%(token))
                            cpt +=1  
                    else:
                        print("Cannot find begin")
示例#3
0
 def test_star_expansion(self):
     ''' test with expansion star'''
     lexer  = IMSLexer()
     lexer.input("     begin IMS2.0     \nmsg_type data \nmsg_id 54695 ctbto_idc\ne-mail [email protected]     \ntime 2000/11/22 to 2000/11/23\nsta_list *BC,A*T,AD*  \nalert_temp\n    stop")
                                                                                                                               
     cpt = 0
     for token in lexer:
         #print("\nToken = %s"%(token))
         
         if cpt == 0:
             # retrieve token
             self.assertEqual(token.type,'BEGIN')
             self.assertEqual(token.value,'begin')
         elif cpt == 1:
             self.assertEqual(token.type,'MSGFORMAT')
             self.assertEqual(token.value,'IMS2.0')
         elif cpt == 2:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 3:
             self.assertEqual(token.type,'MSG_TYPE')
             self.assertEqual(token.value,'msg_type')
         elif cpt == 4:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'data')
         elif cpt == 5:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 6:
             self.assertEqual(token.type,'MSG_ID')
             self.assertEqual(token.value,'msg_id')
         elif cpt == 7:
             self.assertEqual(token.type,'NUMBER')
             self.assertEqual(token.value,(54695, '54695'))
         elif cpt == 8:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'ctbto_idc')
         elif cpt == 9:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 10:
             self.assertEqual(token.type,'EMAIL')
             self.assertEqual(token.value,'e-mail')
         elif cpt == 11:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'*****@*****.**')
         elif cpt == 12:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 13:
             self.assertEqual(token.type,'TIME')
             self.assertEqual(token.value,'time')
         elif cpt == 14:
             self.assertEqual(token.type,'DATETIME')
             self.assertEqual(token.value,'2000/11/22')
         elif cpt == 15:
             self.assertEqual(token.type,'TO')
             self.assertEqual(token.value,'to')
         elif cpt == 16:
             self.assertEqual(token.type,'DATETIME')
             self.assertEqual(token.value,'2000/11/23')
         elif cpt == 17:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 18:
             self.assertEqual(token.type,'STA_LIST')
             self.assertEqual(token.value,'sta_list')
         elif cpt == 19:
             self.assertEqual(token.type,'WCID')
             self.assertEqual(token.value,'*BC')
         elif cpt == 20:
             self.assertEqual(token.type,'COMMA')
             self.assertEqual(token.value,',')
         elif cpt == 21:
             self.assertEqual(token.type,'WCID')
             self.assertEqual(token.value,'A*T')
         elif cpt == 22:
             self.assertEqual(token.type,'COMMA')
             self.assertEqual(token.value,',')
         elif cpt == 23:
             self.assertEqual(token.type,'WCID')
             self.assertEqual(token.value,'AD*')
         elif cpt == 24:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 25:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'alert_temp')
         elif cpt == 26:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 27:
             self.assertEqual(token.type,'STOP')
             self.assertEqual(token.value,'stop')
         elif cpt == 28:
             self.assertEqual(token.type,'ENDMARKER')
             self.assertEqual(token.value,None)
 
         cpt+=1
示例#4
0
    def test_lat_lon(self):
        ''' lat-lon test '''
        
        lexer  = IMSLexer()
        lexer.input("     begin IMS2.0     \nmsg_type data \nmsg_id 54695 ctbto_idc\ne-mail [email protected]     \ntime 2000/11/22 to 2001/01/01\nlat -12 to 17\nlon 44 to 66\nalert_temp\n    stop")
    
        cpt = 0
        for token in lexer:
            #print("\nToken = %s"%(token))
            
            if cpt == 0:
                # retrieve token
                self.assertEqual(token.type,'BEGIN')
                self.assertEqual(token.value,'begin')
            elif cpt == 1:
                self.assertEqual(token.type,'MSGFORMAT')
                self.assertEqual(token.value,'IMS2.0')
            elif cpt == 2:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 3:
                self.assertEqual(token.type,'MSG_TYPE')
                self.assertEqual(token.value,'msg_type')
            elif cpt == 4:
                self.assertEqual(token.type,'ID')
                self.assertEqual(token.value,'data')
            elif cpt == 5:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 6:
                self.assertEqual(token.type,'MSG_ID')
                self.assertEqual(token.value,'msg_id')
            elif cpt == 7:
                self.assertEqual(token.type,'NUMBER')
                self.assertEqual(token.value,(54695, '54695'))
            elif cpt == 8:
                self.assertEqual(token.type,'ID')
                self.assertEqual(token.value,'ctbto_idc')
            elif cpt == 9:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 10:
                self.assertEqual(token.type,'EMAIL')
                self.assertEqual(token.value,'e-mail')
            elif cpt == 11:
                self.assertEqual(token.type,'ID')
                self.assertEqual(token.value,'*****@*****.**')
            elif cpt == 12:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 13:
                self.assertEqual(token.type,'TIME')
                self.assertEqual(token.value,'time')
            elif cpt == 14:
                self.assertEqual(token.type,'DATETIME')
                self.assertEqual(token.value,'2000/11/22')
            elif cpt == 15:
                self.assertEqual(token.type,'TO')
                self.assertEqual(token.value,'to')
            elif cpt == 16:
                self.assertEqual(token.type,'DATETIME')
                self.assertEqual(token.value,'2001/01/01')
            elif cpt == 17:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 18:
                self.assertEqual(token.type,'LAT')
                self.assertEqual(token.value,'lat')
            elif cpt == 19:
                self.assertEqual(token.type,'MINUS')
                self.assertEqual(token.value,'-')
            elif cpt == 20:
                self.assertEqual(token.type,'NUMBER')
                self.assertEqual(token.value,(12,'12'))
            elif cpt == 21:
                self.assertEqual(token.type,'TO')
                self.assertEqual(token.value,'to')
            elif cpt == 22:
                self.assertEqual(token.type,'NUMBER')
                self.assertEqual(token.value,(17,'17'))
            elif cpt == 23:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 24:
                self.assertEqual(token.type,'LON')
                self.assertEqual(token.value,'lon')
            elif cpt == 25:
                self.assertEqual(token.type,'NUMBER')
                self.assertEqual(token.value,(44,'44'))
            elif cpt == 26:
                self.assertEqual(token.type,'TO')
                self.assertEqual(token.value,'to')
            elif cpt == 27:
                self.assertEqual(token.type,'NUMBER')
                self.assertEqual(token.value,(66,'66'))
            elif cpt == 28:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 29:
                self.assertEqual(token.type,'ID')
                self.assertEqual(token.value,'alert_temp')
            elif cpt == 30:
                self.assertEqual(token.type,'NEWLINE')
                self.assertEqual(token.value,'\n')
            elif cpt == 31:
                self.assertEqual(token.type,'STOP')
                self.assertEqual(token.value,'stop')

    
            cpt+=1
示例#5
0
 def test_time_2(self):
     ''' test time extreme cases '''
     lexer  = IMSLexer()
     lexer.input("     begin IMS2.0     \nmsg_type data \nmsg_id 54695 ctbto_idc\ne-mail [email protected]     \ntime 1999/02/01 23:1:1.7 to 1999/2/1 2:29:19.76\nsta_list ABC,DEF, FGH  \nalert_temp\n    stop")
 
     cpt = 0
     for token in lexer:
         #print("\nToken = %s"%(token))
         
         if cpt == 0:
             # retrieve token
             self.assertEqual(token.type,'BEGIN')
             self.assertEqual(token.value,'begin')
         elif cpt == 1:
             self.assertEqual(token.type,'MSGFORMAT')
             self.assertEqual(token.value,'IMS2.0')
         elif cpt == 2:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 3:
             self.assertEqual(token.type,'MSG_TYPE')
             self.assertEqual(token.value,'msg_type')
         elif cpt == 4:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'data')
         elif cpt == 5:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 6:
             self.assertEqual(token.type,'MSG_ID')
             self.assertEqual(token.value,'msg_id')
         elif cpt == 7:
             self.assertEqual(token.type,'NUMBER')
             self.assertEqual(token.value,(54695, '54695'))
         elif cpt == 8:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'ctbto_idc')
         elif cpt == 9:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 10:
             self.assertEqual(token.type,'EMAIL')
             self.assertEqual(token.value,'e-mail')
         elif cpt == 11:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'*****@*****.**')
         elif cpt == 12:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 13:
             self.assertEqual(token.type,'TIME')
             self.assertEqual(token.value,'time')
         elif cpt == 14:
             self.assertEqual(token.type,'DATETIME')
             self.assertEqual(token.value,'1999/02/01 23:1:1.7')
         elif cpt == 15:
             self.assertEqual(token.type,'TO')
             self.assertEqual(token.value,'to')
         elif cpt == 16:
             self.assertEqual(token.type,'DATETIME')
             self.assertEqual(token.value,'1999/2/1 2:29:19.76')
         elif cpt == 17:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 18:
             self.assertEqual(token.type,'STA_LIST')
             self.assertEqual(token.value,'sta_list')
         elif cpt == 19:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'ABC')
         elif cpt == 20:
             self.assertEqual(token.type,'COMMA')
             self.assertEqual(token.value,',')
         elif cpt == 21:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'DEF')
         elif cpt == 22:
             self.assertEqual(token.type,'COMMA')
             self.assertEqual(token.value,',')
         elif cpt == 23:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'FGH')
         elif cpt == 24:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 25:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'alert_temp')
         elif cpt == 26:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 27:
             self.assertEqual(token.type,'STOP')
             self.assertEqual(token.value,'stop')
         elif cpt == 28:
             self.assertEqual(token.type,'ENDMARKER')
             self.assertEqual(token.value,None)
         
         cpt += 1
示例#6
0
 def test_help_message(self):
     
     lexer  = IMSLexer()
     lexer.input("begin IMS2.0     \nmsg_type request \nmsg_id 54695 ctbto_idc\ne-mail [email protected]     \nhelp\nstop")
 
     cpt = 0
     for token in lexer:
         #print("\nToken = %s"%(token))
         
         if cpt == 0:
             # retrieve token
             self.assertEqual(token.type,'BEGIN')
             self.assertEqual(token.value,'begin')
         elif cpt == 1:
             self.assertEqual(token.type,'MSGFORMAT')
             self.assertEqual(token.value,'IMS2.0')
         elif cpt == 2:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 3:
             self.assertEqual(token.type,'MSG_TYPE')
             self.assertEqual(token.value,'msg_type')
         elif cpt == 4:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'request')
         elif cpt == 5:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 6:
             self.assertEqual(token.type,'MSG_ID')
             self.assertEqual(token.value,'msg_id')
         elif cpt == 7:
             self.assertEqual(token.type,'NUMBER')
             self.assertEqual(token.value,(54695, '54695'))
         elif cpt == 8:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'ctbto_idc')
         elif cpt == 9:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 10:
             self.assertEqual(token.type,'EMAIL')
             self.assertEqual(token.value,'e-mail')
         elif cpt == 11:
             self.assertEqual(token.type,'ID')
             self.assertEqual(token.value,'*****@*****.**')
         elif cpt == 12:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 13:
             self.assertEqual(token.type,'HELP')
             self.assertEqual(token.value,'help')
         elif cpt == 14:
             self.assertEqual(token.type,'NEWLINE')
             self.assertEqual(token.value,'\n')
         elif cpt == 15:
             self.assertEqual(token.type,'STOP')
             self.assertEqual(token.value,'stop')
         elif cpt == 16:
             self.assertEqual(token.type,'ENDMARKER')
             self.assertEqual(token.value,None)
         
         cpt+=1