Python flush示例，webscavator.utils.utils.session.flush Python示例

示例#1

0

显示文件

文件： caseController.py 项目： Webscavator/Webscavator

 def jsonEditEntries(self):
     """
         Same as `self.jsonAddEntries` but allows the editing of old entries and the addition
         of new entries.
     """
     groups = []
     
     if self.validate_form(edit2_form()):
         # form is validated, so edit group details
         for i, v in enumerate(self.form_result.itervalues()):   
             for entry in v:            
                 if entry[u'group'] is None: # new, so add it
                     group, error = self.addData(entry)
                     
                     if group is None:
                         form_error = {}
                         form_error['csv_entry-' + str(i) + '.data'] = \
                         'The data could not be added to the database due to an error: {0}'.format(error)
                         return form_error
             
                     groups.append(group)
                 else:                       # already there, edit it
                     group = entry[u'group']
                     groups.append(group)
                     group.name = entry[u'name']
                     group.description = entry[u'desc']
                     group.program = get_program(entry[u'program'])
                     
                     if entry[u'keepcsv'] == False: # add new csv data
                         group.csv_name = entry[u'data'].filename
                         
                         self.addEntry(group.program, entry['data'].stream, group)
                   
                 session.flush()
         
         # some data might be deleted, loop through all groups, if not in 'groups' then can delete it
         for g in self.case.groups:
             if g not in groups:
                 session.delete(g)
         
         return True
     else:
         return self.form_error

示例#2

0

显示文件

文件： caseController.py 项目： Webscavator/Webscavator

 def addEntry(self, program, file, group):
     """
         Calls the generator `convert_file()` found in :doc:`converters` on each row of the file, 
         and adds the result to the database. If an exception happens during the converting and
         adding of data, then the session is rolled back and `None` is returned. Otherwise
         `True` is returned. 
         
         .. note::
             This had been optimised to make the adding of data as fast as possible, but
             has been slowed down again by adding search terms. 
             
             **ToDo**: Optimise the adding of search terms. 
     """
     session.flush()
     browser_ids = {}        
     
     try:
         entry_ins = Entry.__table__.insert()
         url_ins = URL.__table__.insert()
         count_num = 0
         for d in convert_file(program, file):
             
             browser_name = d.pop('browser_name')
             browser_version = d.pop('browser_version')
             source = d.pop('source_file')
                             
             key = (browser_name, browser_version, source)                
             browser_id = browser_ids.get(key)
             if browser_id is None:
                 browser = Browser.getFilterBy(name=browser_name, version=browser_version,
                                               source=source).first()
                 if browser is None:
                     browser = Browser(*key)
                     session.add(browser)
                     session.flush()
                 browser_id = browser_ids[key] = browser.id
             
             # optimised to make adding data as fast as possible - ignores the ORM
             v = d.pop('access_time')
             if v is not None:
                 d['access_date'] = datetime(v.year, v.month, v.day, 0, 0, 0, 0)
                 d['access_time'] = time(v.hour, v.minute, v.second, v.microsecond)
             else:
                 continue # don't add data without an access time
             v = d.pop('modified_time')
             if v is not None:
                 d['modified_date'] = datetime(v.year, v.month, v.day, 0, 0, 0, 0)
                 d['modified_time'] = time(v.hour, v.minute, v.second, v.microsecond)
             else:
                 d['modified_date'] = None
                 d['modified_time'] = None
             
             result = session.execute(entry_ins.values(browser_id=browser_id, 
                                                       group_id=group.id,
                                                        **d))                
             entry_id = result.last_inserted_ids()[0]
             
             # add URLS
             url = URL(d['url'])
             session.execute(url_ins.values(entry_id=entry_id, **url.asDict()))  
             url_id = result.last_inserted_ids()[0]
             
             # add search terms
             # TODO: make this optimised like above!
             entry = Entry.get(entry_id)
             url = URL.get(url_id)
             
             opts = config.options('search_engines')
             if url.query != None and 'search' in url.path:
                 for opt in opts:
                     if opt in url.netloc:
                         query = url.query.split(config.get('search_engines', opt)+'=')\
                                 [-1].split('&')[0]
                         q_string, terms = SearchTerms.getTerms(urllib.unquote(query))
                         url.search = q_string 
                         for term in terms:
                             t = SearchTerms.getFilterBy(term=term, engine=opt).first()
                             if t is None:
                                 t = SearchTerms(term, opt, config.get('search', opt))
                                 session.add(t)
                             else:
                                 t.occurrence = t.occurrence + 1
                             entry.search_terms.append(t)
                             session.flush()   
             count_num = count_num + 1
         if count_num == 0:
         # we have not added anything, but no exceptions where raised.
             return None, "No entries found in the uploaded file"             
     except Exception, e:
         session.rollback()            
         return None, e

示例#3

0

显示文件

文件： baseController.py 项目： lowmanio/Webscavator

    def addDefaultFilters(self):
        """
            Adds the default filters for the timegraph such as filtering by browser type,
            group, work hours, Google searches and local files. Gets called when a new case
            is being set up in `finish_wizard()` in :doc:`caseController`. 
        """
        
        # Add filters for the browsers available, unless only one browser, then a filter on 
        # everything is pointless
        browsers = Browser.getAll().group_by(Browser.name).all()
        if len(browsers) > 1:
            for browser in browsers:
                f = Filter(u''.join(browser.name.lower().split(' ')), browser.name)
                fq = FilterQuery()
                cls = u'Browser'
                attr = u'name'
                func = u'Is'
                val = browser.name
                fq.add_element(cls, attr, func, val, None)
                    
                f.query = fq
                session.add(f)
                session.flush()
        
        # filters for Google searches
        f = Filter(u'googlesearch', u'Google searches')
        fq = FilterQuery()
        
        params = [(u'URL Parts', u'query', u'Is not', None, None),
                  (u'URL Parts', u'netloc', u'Is not', None, None),
                  (u'URL Parts', u'path', u'Is not', None, None),
                  (u'URL Parts', u'netloc', u'Contains', u'google', None),
                  (u'URL Parts', u'path', u'Contains', u'search', None),
                  ]
        for entry in params:
            fq.add_element(*entry)
            
        f.query = fq
        session.add(f)
        session.flush()

        # filters for local files accessed
        files = URL.getFilterBy(scheme="file").all()
        if files is not None:
            f = Filter(u'files', u'Local Files')
            fq = FilterQuery()
            cls = u'URL Parts'
            attr = u'scheme'
            func = u'Is'
            val = u'file'
            fq.add_element(cls, attr, func, val, None)  
            f.query = fq
            session.add(f)
            session.flush() 
        
        # filters for different groups      
        groups = Group.getAll().all()
        if len(groups) > 1:
            for group in groups:
                f = Filter(u''.join(group.name.lower().split(' ')), group.name)
                fq = FilterQuery()
                cls = u'Group'
                attr = u'name'
                func = u'Is'
                val = group.name
                fq.add_element(cls, attr, func, val, None)  
                f.query = fq
                session.add(f)
                session.flush()
                
        # filters for work hours 
        f = Filter(u'workhours', u'Work hours')
        fq = FilterQuery()
        
        five = time(17, 00, 01)
        nine = time(8, 59, 59)
        params = [(u'Entry', u'access_time', u'Less than', five, None),
                  (u'Entry', u'access_time', u'Greater than', nine, None),
                  ]
        for entry in params:
            fq.add_element(*entry)
              
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for adverts 
        f = Filter(u'adverts', u'Advert URLs')
        fq = FilterQuery()
        fq.add_element(u'URL Parts',u'domain',u'Is not', None, None) 
        fq.add_element(u'URL Parts',u'domain',u'Is in list', None, 'advert_domainnames.txt') 
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for Facebook, MySpace, Bebo, twitter, hi5
        f = Filter(u'social', u'Social Networking URLs')
        fq = FilterQuery()
        fq.add_element(u'URL Parts',u'domain',u'Is not', None, None) 
        fq.add_element(u'URL Parts',u'domain',u'Is in list', None, 'socialmedia.txt') 
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for email
        f = Filter(u'email', u'Web Email')
        fq = FilterQuery()
        five = time(17, 00, 01)
        nine = time(8, 59, 59)
        params = [(u'Entry', u'url', u'Contains', 'mail', None),
                  (u'URL Parts', u'scheme', u'Is Not', 'file', None),
                  ]
        for entry in params:
            fq.add_element(*entry)
              
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for news
        f = Filter(u'news', u'News URLs')
        fq = FilterQuery()
        fq.add_element(u'URL Parts',u'hostname',u'Is not', None, None) 
        fq.add_element(u'URL Parts',u'hostname',u'Is in list', None, 'news.txt') 
        f.query = fq
        session.add(f)
        session.flush()