Пример #1
0
 def get_all_normalize_dom(self, configuration):
     if not self._dom_list:
         dom_list = self.get_dom_list(configuration)
         dom = [ DomAnalyzer.normalize( stateDom['dom'] ) for stateDom in dom_list ]
         dom = "\n".join(dom)
         return dom
     else:
         dom = [ DomAnalyzer.normalize( stateDom['dom'] ) for stateDom in self._dom_list ]
         dom = "\n".join(dom)
         return dom
Пример #2
0
 def get_all_normalize_dom(self, configuration):
     if not self._dom_list:
         dom_list = self.get_dom_list(configuration)
         dom = [
             DomAnalyzer.normalize(stateDom['dom']) for stateDom in dom_list
         ]
         dom = "\n".join(dom)
         return dom
     else:
         dom = [
             DomAnalyzer.normalize(stateDom['dom'])
             for stateDom in self._dom_list
         ]
         dom = "\n".join(dom)
         return dom
Пример #3
0
    def save_dom(self, state):
        try:
            #make dir for each state
            state_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id() )
            if not os.path.isdir(state_dir):
                os.makedirs(state_dir)

            iframe_key_dict = { 'num': 0 }
            for stateDom in state.get_dom_list(self.configuration):
                iframe_key = ';'.join(stateDom['iframe_path']) if stateDom['iframe_path'] else None
                #make new dir for iframe
                if stateDom['iframe_path']:
                    iframe_key_dict['num'] += 1
                    iframe_key_dict[ str(iframe_key_dict['num']) ] = { 'path' : stateDom['iframe_path'], 'url': stateDom['url'] }
                    dom_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id(), str(iframe_key_dict['num']) )
                    if not os.path.isdir(dom_dir):
                        os.makedirs(dom_dir)
                else:
                    iframe_key_dict['basic'] = { 'url' : stateDom['url'] }
                    dom_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id() )

                with codecs.open( os.path.join( dom_dir, state.get_id()+'.txt'),            'w', encoding='utf-8' ) as f:
                    f.write( stateDom['dom'] )
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_nor.txt'),        'w', encoding='utf-8' ) as f:
                    f.write( DomAnalyzer.normalize( stateDom['dom'] ) )
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_inputs.txt'),     'w', encoding='utf-8' ) as f:
                    json.dump(state.get_inputs_json( iframe_key ),               f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_selects.txt'),    'w', encoding='utf-8' ) as f:
                    json.dump(state.get_selects_json(iframe_key ),               f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_radios.txt'),     'w', encoding='utf-8' ) as f:
                    json.dump(state.get_radios_json(iframe_key ),                f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_checkboxes.txt'), 'w', encoding='utf-8' ) as f:
                    json.dump(state.get_checkboxes_json(iframe_key ),            f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_clicks.txt'),     'w', encoding='utf-8' ) as f:
                    json.dump(state.get_candidate_clickables_json( iframe_key ), f, indent=2, sort_keys=True, ensure_ascii=False)
                
            with codecs.open( os.path.join( state_dir, 'iframe_list.json'),  'w', encoding='utf-8' ) as f:
                json.dump( iframe_key_dict, f, indent=2, sort_keys=True, ensure_ascii=False)

            """
            TODO: turn TempFile stateDom into FilePath stateDom
            """
            state.clear_dom()

        except Exception as e:  
            logging.error(' save dom : %s \t\t__from automata.py save_dom()', str(e))
Пример #4
0
    def save_dom(self, state):
        try:
            #make dir for each state
            state_dir = os.path.join(self.configuration.get_abs_path('dom'),
                                     state.get_id())
            if not os.path.isdir(state_dir):
                os.makedirs(state_dir)

            iframe_key_dict = {'num': 0}
            for stateDom in state.get_dom_list(self.configuration):
                iframe_key = ';'.join(stateDom['iframe_path']
                                      ) if stateDom['iframe_path'] else None
                #make new dir for iframe
                if stateDom['iframe_path']:
                    iframe_key_dict['num'] += 1
                    iframe_key_dict[str(iframe_key_dict['num'])] = {
                        'path': stateDom['iframe_path'],
                        'url': stateDom['url']
                    }
                    dom_dir = os.path.join(
                        self.configuration.get_abs_path('dom'), state.get_id(),
                        str(iframe_key_dict['num']))
                    if not os.path.isdir(dom_dir):
                        os.makedirs(dom_dir)
                else:
                    iframe_key_dict['basic'] = {'url': stateDom['url']}
                    dom_dir = os.path.join(
                        self.configuration.get_abs_path('dom'), state.get_id())

                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    f.write(stateDom['dom'])
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_nor.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    f.write(DomAnalyzer.normalize(stateDom['dom']))
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_inputs.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_inputs_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_selects.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_selects_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_radios.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_radios_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(
                        dom_dir,
                        state.get_id() + '_checkboxes.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_checkboxes_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_clicks.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_candidate_clickables_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)

            with codecs.open(os.path.join(state_dir, 'iframe_list.json'),
                             'w',
                             encoding='utf-8') as f:
                json.dump(iframe_key_dict,
                          f,
                          indent=2,
                          sort_keys=True,
                          ensure_ascii=False)
            """
            TODO: turn TempFile stateDom into FilePath stateDom
            """
            state.clear_dom()

        except Exception as e:
            logging.error(' save dom : %s \t\t__from automata.py save_dom()',
                          str(e))