Пример #1
0
    def replace_tag(self, xpath, new_tag, new_tag_attributes=None):
        """Replace element tag from *xpath* expression search to
        *new_tag*.

        **Args:**
            *xpath*: standard XPath expression used to query against *html*

            *new_tag*: new element tag name to replace

            *new_tag_attributes*: list of tuples representing attributes
            name|value pairs to add to the new tag

        """
        log.info('Replace element tag XPath: "%s"' % xpath)

        for tag in self.root.xpath(xpath):
            log.debug('Replacing element tag "%s" with "%s"' %
                      (tag.tag, new_tag))
            new_element = lxml.etree.Element(new_tag)
            new_element.text = tag.text_content()

            if new_tag_attributes is not None and len(new_tag_attributes):
                for new_tag_attribute in new_tag_attributes:
                    name, value = new_tag_attribute
                    if value is None:
                        value = str()
                    new_element.attrib[name] = value
            else:
                log.debug('Copying over existing attributes: "%s"' %
                          tag.attrib)
                for key, value in tag.attrib.iteritems():
                    new_element.attrib[key] = value

            tag.getparent().replace(tag, new_element)
Пример #2
0
def delete_file(filename):
    """Delete a Munger staged or ready file from the server.

    Uses the request referrer value to determine which page initiates
    the request.  The deletion will occur within context of the referring
    page.  For example, delete request on the Uploads page will remove
    a file from the staging directory.

    **Args:**
        *filename*: name of the file to delete

    """
    referrer = flask.request.referrer
    log.debug('File deletion referrer: "%s"' % referrer)

    parsed_referrer_url = urlparse.urlparse(referrer)
    log.debug('File deletion referrer path: "%s"' % parsed_referrer_url.path)

    delete_path = baip_munger_ui.app.config['STAGING_DIR']
    route = 'upload'
    if parsed_referrer_url.path == '/munger/download':
        delete_path = baip_munger_ui.app.config['READY_DIR']
        route = 'download'

    delete_path = os.path.join(delete_path, filename)
    log.info('Attempting file delete: "%s"' % delete_path)
    remove_files(delete_path)

    return flask.redirect(flask.url_for(route))
Пример #3
0
def delete_file(filename):
    """Delete a Parser staged or ready file from the server.

    Uses the request referrer value to determine which page initiates
    the request.  The deletion will occur within context of the referring
    page.  For example, delete request on the Uploads page will remove
    a file from the staging directory.

    **Args:**
        *filename*: name of the file to delete

    """
    referrer = flask.request.referrer
    log.debug('File deletion referrer: "%s"' % referrer)

    parsed_referrer_url = urlparse.urlparse(referrer)
    log.debug('File deletion referrer path: "%s"' %
              parsed_referrer_url.path)

    delete_path = baip_parser_ui.app.config['STAGING_DIR']
    route = 'upload'
    if parsed_referrer_url.path == '/parser/download':
        delete_path = baip_parser_ui.app.config['READY_DIR']
        route = 'download'

    delete_path = os.path.join(delete_path, filename)
    log.info('Attempting file delete: "%s"' % delete_path)
    remove_files(delete_path)

    return flask.redirect(flask.url_for(route))
Пример #4
0
def allowed_file(filename, extensions):
    """Check if *filename* has an extension that is acceptable for upload.

    **Args:**
        *filename*: the textual representation of the file to upload

        *extensions*: list of accepted filename extension.  For example::

            ['html', 'htm', ...]

    **Returns:**
        Boolean ``True`` if extension is allowed.  ``False`` otherwise.

    """
    is_file_allowed = False

    extension = os.path.splitext(filename)[1].strip('.')

    log.debug('File name|valid extensions: "%s"|%s' %
              (filename, extensions))
    if extension in extensions:
        is_file_allowed = True

    log.info('File "%s" has accepted extension?: %s' %
             (filename, is_file_allowed))

    return is_file_allowed
Пример #5
0
def allowed_file(filename, extensions):
    """Check if *filename* has an extension that is acceptable for upload.

    **Args:**
        *filename*: the textual representation of the file to upload

        *extensions*: list of accepted filename extension.  For example::

            ['html', 'htm', ...]

    **Returns:**
        Boolean ``True`` if extension is allowed.  ``False`` otherwise.

    """
    is_file_allowed = False

    extension = os.path.splitext(filename)[1].strip('.')

    log.debug('File name|valid extensions: "%s"|%s' % (filename, extensions))
    if extension in extensions:
        is_file_allowed = True

    log.info('File "%s" has accepted extension?: %s' %
             (filename, is_file_allowed))

    return is_file_allowed
Пример #6
0
def download_file(filename):
    download_path = os.path.join(baip_munger_ui.app.config['READY_DIR'],
                                 filename)
    log.info('Attempting file download: "%s"' % download_path)

    return flask.send_from_directory(baip_munger_ui.app.config['READY_DIR'],
                                     filename,
                                     as_attachment=True)
Пример #7
0
def download_file(filename):
    download_path = os.path.join(baip_parser_ui.app.config['READY_DIR'],
                                 filename)
    log.info('Attempting file download: "%s"' % download_path)

    return flask.send_from_directory(baip_parser_ui.app.config['READY_DIR'],
                                     filename,
                                     as_attachment=True)
Пример #8
0
def parse(path='.'):
    """Parser parse.

    """
    status = False

    if flask.request.method == 'POST':
        config_file = baip_parser_ui.app.config['PARSER_CONF_FILE']
        conf = baip_parser.ParserConfig(config_file)
        conf.parse_config()

        in_dir = baip_parser_ui.app.config['STAGING_DIR']
        files_to_process = get_directory_files_list(in_dir)

        results = []
        for file_to_process in files_to_process:
            log.info('Processing file: %s' % file_to_process)
            parser = baip_parser.Parser()
            parser.open(file_to_process)
            log.debug('xxx: %s' % conf.cells_to_extract)
            parser.cells_to_extract = conf.cells_to_extract
            parser.skip_sheets = conf.skip_sheets

            results.append(parser.parse_sheets())

        if len(results):
            parserd = baip_parser.ParserDaemon(pidfile=None, conf=conf)
            tmp_file = parserd.dump(results)

            ready_dir = baip_parser_ui.app.config['READY_DIR']
            target_file = os.path.join(ready_dir,
                                       os.path.basename(tmp_file))
            log.info('Moving "%s" to target "%s"' % (tmp_file, target_file))
            shutil.move(tmp_file, target_file)

            status = True
            remove_files(files_to_process)

    enabled = False
    if get_directory_files_list(baip_parser_ui.app.config['STAGING_DIR']):
        enabled = True
    kwargs = {
        'path': path,
        'template': 'dashboard/parse.html',
        'template_context': {
            'enabled': enabled,
            'status': status,
        },
        'endpoint': '.parse',
    }

    return baip_parser_ui.staging_index.render_autoindex(**kwargs)
Пример #9
0
def upload_file():
    if flask.request.method == 'POST':
        file_storage = flask.request.files['file']
        source_file = None

        if file_storage:
            source_file = file_storage.filename
            log_msg = 'File "%s" ' % source_file
            log.info('%s has been selected for upload' % log_msg)

            extensions = baip_munger_ui.app.config['ALLOWED_EXTENSIONS']
            if allowed_file(source_file, extensions):
                filename = werkzeug.secure_filename(source_file)
                target = os.path.join(baip_munger_ui.app.config['STAGING_DIR'],
                                      filename)
                file_storage.save(target)
                log.info('%s uploaded to "%s"' % (log_msg, target))

    return flask.redirect(flask.url_for('upload'))
Пример #10
0
def upload_file():
    if flask.request.method == 'POST':
        file_storage = flask.request.files['file']
        source_file = None

        if file_storage:
            source_file = file_storage.filename
            log_msg = 'File "%s" ' % source_file
            log.info('%s has been selected for upload' % log_msg)

            extensions = baip_parser_ui.app.config['ALLOWED_EXTENSIONS']
            if allowed_file(source_file, extensions):
                filename = werkzeug.secure_filename(source_file)
                target = os.path.join(baip_parser_ui.app.config['STAGING_DIR'],
                                      filename)
                file_storage.save(target)
                log.info('%s uploaded to "%s"' % (log_msg, target))

    return flask.redirect(flask.url_for('upload'))
Пример #11
0
    def munge(self, actions, staged_file, munged_file):
        """Munge *staged_file* and deposit to *munged_file*

        **Args:**
            *actions*:
                the processing actions as generated by the
                :method:`baip_munger.XpathGen.parse_configuration` method

            *staged_file*:
                absolute path to the HTML file to process

            *munged_file*:
                absolute path to the HTML file to process

        **Returns:**
            Booelan ``True`` on success.  ``False`` otherwise

        """
        log.info('Munging source file: "%s" ...' % staged_file)

        munge_status = False

        try:
            with open(staged_file, 'r') as html_fh:
                self.root = html_fh.read()
        except IOError as e:
            log.error(str(e))

        if self.root is not None:
            replace_tags_actions = actions.get('replace_tags')
            if replace_tags_actions is not None:
                for rule in replace_tags_actions:
                    self.replace_tag(**rule)

            insert_tags_actions = actions.get('insert_tags')
            if insert_tags_actions is not None:
                for rule in insert_tags_actions:
                    self.insert_tag(**rule)

            attribute_actions = actions.get('attributes')
            if attribute_actions is not None:
                for rule in attribute_actions:
                    self.update_element_attribute(**rule)

            strip_chars_actions = actions.get('strip_chars')
            if strip_chars_actions is not None:
                for rule in strip_chars_actions:
                    self.strip_char(**rule)

            log.info('Writing out munged content to "%s"' % munged_file)
            with open(munged_file, 'w') as out_fh:
                out_fh.write(self.dump_root())

            munge_status = True

        log.info('Munge status: %s' % munge_status)

        return munge_status
Пример #12
0
    def strip_char(self, xpath, chars):
        """Strip *chars* from *xpath* expression search.

        **Args:**
            *xpath*: standard XPath expression used to query against *html*

            *chars*: characters to strip from the element tag text

        """
        log.info('Strip chars XPath expression: "%s"' % xpath)

        for tag in self.root.xpath(xpath):
            for child_tag in tag.iter():
                if child_tag.text is not None:
                    log.debug('Stipping "%s" from tag "%s" text: "%s"' %
                              (chars, child_tag.tag, child_tag.text))
                    child_tag.text = child_tag.text.strip(chars)
                    log.debug('Resultant text: "%s"' % child_tag.text)
                    if child_tag.tail is not None:
                        log.debug('Stipping tail text: "%s" from "%s"' %
                                  (chars, child_tag.tail))
                        child_tag.tail = child_tag.tail.strip(chars)
                        log.debug('Resultant tail text: "%s"' % child_tag.tail)
Пример #13
0
    def restart(self):
        """Restart the daemon

        No real magic here -- simply calls the :meth:`stop` and
        :meth:`start` method sequence (in that order)

        .. note::

            TODO - Need better tests around this process.

        """
        log_msg = '%s daemon --' % type(self).__name__
        log.info('%s attempting restart ...' % log_msg)
        log.info('%s stopping ...' % log_msg)
        self.stop()

        # Allow some time between restarts.
        time.sleep(2)

        log.info('%s attempting restart ...' % log_msg)
        self.start()
Пример #14
0
import flask

import trols_stats
from logga.log import log


app = flask.Flask(__name__)
if os.environ.get('TROLSUI_CONF'):
    app.config.from_envvar('TROLSUI_CONF')
else:
    app.config.from_object('trols_munger_ui.config')
import trols_munger_ui.views

db = None
if app.config.get('SHELVE') is not None:
    log.info('SHELVE: %s', app.config.get('SHELVE'))
    session = trols_stats.DBSession(shelve=app.config.get('SHELVE'))
    session.connect()
    log.info('Reading TROLS stats in memory ...')
    db = session.connection['trols']
    log.info('TROLS stats read OK.')
    session.close()


def get_db():
    """Opens a new database connection if there is none yet for the
    current application context.
    """
    top = flask._app_ctx_stack.top
    if not hasattr(top, 'shelve'):
        top.shelve = db
Пример #15
0
    def insert_tag(self, xpath, new_tag):
        """Insert *new_tag* element tag from *xpath* expression search.

        Workflow is:

            * identify elements from XPath expression
            * group same parent/sequential elements
            * construct new HTML element based on *new_tag*
            * insert into :att:`root` :mod:`lxml.html` tree

        **Args:**
            *xpath*: standard XPath expression used to query against
            *html*

            *new_tag*: new element tag name to replace as a string.
            Method will convert to a :mod:`lxml.etree.Element`

        """
        def build_xml(new_tag, tags_to_extend):
            new_element = lxml.etree.Element(new_tag)
            new_element.extend(tags_to_extend)
            xml = lxml.etree.XML(lxml.etree.tostring(new_element))

            return xml

        def child_xml_insert(start_index, node_count, parent_element, xml):
            insert_index = start_index - node_count + 1
            log.info('Child element insert "%s ..." at index: %d' %
                     (lxml.html.tostring(xml), insert_index))
            parent_element.insert(insert_index, xml)

        log.info('Insert element tag XPath: "%s"' % xpath)

        tags = self.root.xpath(xpath)
        current_parent = None
        prev_index = None
        tags_to_extend = []

        for tag in tags:
            parent = tag.getparent()
            index = parent.index(tag)
            log.debug('Index (current): %d' % index)

            if current_parent is None:
                current_parent = parent
                log.debug('Set current parent (initial) %s:"%s"' %
                          (current_parent, current_parent.tag))
                log.debug('Extending tag (parent): %s' %
                          (lxml.html.tostring(tag)))
                tags_to_extend.append(tag)
                prev_index = index
                log.debug('Previous index (parent): %d' % prev_index)
                continue

            if parent == current_parent:
                if index == (prev_index + 1):
                    log.debug('Extending tag: %s' % (lxml.html.tostring(tag)))
                    tags_to_extend.append(tag)
                    prev_index = index
                    log.debug('Previous index (parent match): %d' % prev_index)
                    continue
                else:
                    log.debug('Sequential index interrupted: inserting')
            else:
                log.debug('Parent change: inserting')

            xml = build_xml(new_tag, tags_to_extend)
            if parent != current_parent:
                current_parent.insert(prev_index - 1, xml)
                current_parent = parent
                log.debug('Set current parent %s:"%s"' %
                          (current_parent, current_parent.tag))
            else:
                child_xml_insert(prev_index, len(tags_to_extend),
                                 current_parent, xml)

            # Reset our control variables.
            prev_index = parent.index(tag)
            log.debug('New index after insert: %d' % prev_index)
            del tags_to_extend[:]
            log.debug('Extending tag (pass through): %s' %
                      (lxml.html.tostring(tag)))
            tags_to_extend.append(tag)

        # Insert the laggards (if any).
        if len(tags_to_extend):
            xml = build_xml(new_tag, tags_to_extend)
            child_xml_insert(prev_index, len(tags_to_extend), current_parent,
                             xml)
Пример #16
0
 def child_xml_insert(start_index, node_count, parent_element, xml):
     insert_index = start_index - node_count + 1
     log.info('Child element insert "%s ..." at index: %d' %
              (lxml.html.tostring(xml), insert_index))
     parent_element.insert(insert_index, xml)
Пример #17
0
 def _exit_handler(self, signal, frame):
     log_msg = '%s --' % type(self).__name__
     log.info('%s SIGTERM intercepted' % log_msg)
     self.set_exit_event()