Exemplo n.º 1
0
    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces)

        # Create the list of files to be used as input
        self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search)
        log.info("file_list=%s" % str(self.file_list))
        if not len(self.file_list):
            raise Exception('File list is empty!!')

        self.file_list_done = []
Exemplo n.º 2
0
    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces)

        # Create the list of files to be used as input
        self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search)
        log.info("file_list=%s" % str(self.file_list))
        if not len(self.file_list):
            raise Exception('File list is empty!!')

        self.cur_file_path = None
        self.file_list_done = []
Exemplo n.º 3
0
    def __init__(self, configdict, section):
        Input.__init__(self, configdict, section, produces=FORMAT.etree_doc)
        self.cur_feature_blob = None
        self.rowcount = 0

        # http://www.mkyong.com/regular-expressions/how-to-extract-html-links-with-regular-expression/
        self.regex_xlink_href = re.compile("\\s*(?i)xlink:href\\s*=\\s*(\"#([^\"]*\")|'#[^']*'|(#[^'\">\\s]+))")

        self.db = None
        self.xlink_db = None
        self.buffer = None
        self.feature_count = 0
        # Reusable XML parser
        self.xml_parser = etree.XMLParser(remove_blank_text=True)
Exemplo n.º 4
0
    def __init__(self, configdict, section):
        Input.__init__(self, configdict, section, produces=FORMAT.etree_doc)
        self.cur_feature_blob = None
        self.rowcount = 0

        # http://www.mkyong.com/regular-expressions/how-to-extract-html-links-with-regular-expression/
        self.regex_xlink_href = re.compile("\\s*(?i)xlink:href\\s*=\\s*(\"#([^\"]*\")|'#[^']*'|(#[^'\">\\s]+))")

        self.db = None
        self.xlink_db = None
        self.buffer = None
        self.feature_count = 0
        # Reusable XML parser
        self.xml_parser = etree.XMLParser(remove_blank_text=True)
Exemplo n.º 5
0
    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces)

        # path to file or files: can be a dir or files or even multiple, comma separated
        self.file_path = self.cfg.get('file_path')

        # The filename pattern according to Python glob.glob
        self.filename_pattern = self.cfg.get('filename_pattern', '*.[gxGX][mM][lL]')

        # Recurse into directories ?
        self.depth_search = self.cfg.get_bool('depth_search', False)

        # Create the list of files to be used as input
        self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search)
        log.info("file_list=%s" % str(self.file_list))
Exemplo n.º 6
0
    def __init__(self, configdict, section, produces=FORMAT.any):
        Input.__init__(self, configdict, section, produces)

        # url and optional parameters
        self.url = self.cfg.get('url')
        self.parameters = self.cfg.get('parameters')

        # http://docs.python.org/2/howto/urllib2.html
        self.query_string = None
        if self.parameters:
            # http://stackoverflow.com/questions/988228/converting-a-string-to-dictionary
            import ast
            self.parameters = ast.literal_eval(self.parameters)
            self.query_string = urllib.urlencode(self.parameters)

        log.info("url=%s" % self.url)
Exemplo n.º 7
0
    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces)

        # path to file or files: can be a dir or files or even multiple, comma separated
        self.file_path = self.cfg.get('file_path')

        # The filename pattern according to Python glob.glob
        self.filename_pattern = self.cfg.get('filename_pattern',
                                             '*.[gxGX][mM][lL]')

        # Recurse into directories ?
        self.depth_search = self.cfg.get_bool('depth_search', False)

        # Create the list of files to be used as input
        self.file_list = Util.make_file_list(self.file_path, None,
                                             self.filename_pattern,
                                             self.depth_search)
        log.info("file_list=%s" % str(self.file_list))
Exemplo n.º 8
0
 def __init__(self, configdict, section):
     Input.__init__(self, configdict, section, produces=[FORMAT.ogr_feature, FORMAT.ogr_feature_array])
Exemplo n.º 9
0
 def __init__(self, configdict, section):
     Input.__init__(self, configdict, section, produces=FORMAT.xml_line_stream)
Exemplo n.º 10
0
 def __init__(self, configdict, section, produces):
     Input.__init__(self, configdict, section, produces=produces)
Exemplo n.º 11
0
 def __init__(self, configdict, section, produces):
     Input.__init__(self, configdict, section, produces=produces)
Exemplo n.º 12
0
    def __init__(self, configdict, section, produces=FORMAT.any):
        Input.__init__(self, configdict, section, produces)

        log.info("url=%s parameters=%s" % (self.url, self.parameters))
Exemplo n.º 13
0
 def __init__(self, configdict, section):
     Input.__init__(self,
                    configdict,
                    section,
                    produces=FORMAT.xml_line_stream)
Exemplo n.º 14
0
 def __init__(self, configdict, section):
     Input.__init__(self, configdict, section, produces=[FORMAT.ogr_feature, FORMAT.ogr_feature_array])
Exemplo n.º 15
0
    def __init__(self, configdict, section, produces=FORMAT.any):
        Input.__init__(self, configdict, section, produces)

        log.info("url=%s parameters=%s" % (self.url, self.parameters))