def replace_tag(self, xpath, new_tag, new_tag_attributes=None): """Replace element tag from *xpath* expression search to *new_tag*. **Args:** *xpath*: standard XPath expression used to query against *html* *new_tag*: new element tag name to replace *new_tag_attributes*: list of tuples representing attributes name|value pairs to add to the new tag """ log.info('Replace element tag XPath: "%s"' % xpath) for tag in self.root.xpath(xpath): log.debug('Replacing element tag "%s" with "%s"' % (tag.tag, new_tag)) new_element = lxml.etree.Element(new_tag) new_element.text = tag.text_content() if new_tag_attributes is not None and len(new_tag_attributes): for new_tag_attribute in new_tag_attributes: name, value = new_tag_attribute if value is None: value = str() new_element.attrib[name] = value else: log.debug('Copying over existing attributes: "%s"' % tag.attrib) for key, value in tag.attrib.iteritems(): new_element.attrib[key] = value tag.getparent().replace(tag, new_element)
def delete_file(filename): """Delete a Munger staged or ready file from the server. Uses the request referrer value to determine which page initiates the request. The deletion will occur within context of the referring page. For example, delete request on the Uploads page will remove a file from the staging directory. **Args:** *filename*: name of the file to delete """ referrer = flask.request.referrer log.debug('File deletion referrer: "%s"' % referrer) parsed_referrer_url = urlparse.urlparse(referrer) log.debug('File deletion referrer path: "%s"' % parsed_referrer_url.path) delete_path = baip_munger_ui.app.config['STAGING_DIR'] route = 'upload' if parsed_referrer_url.path == '/munger/download': delete_path = baip_munger_ui.app.config['READY_DIR'] route = 'download' delete_path = os.path.join(delete_path, filename) log.info('Attempting file delete: "%s"' % delete_path) remove_files(delete_path) return flask.redirect(flask.url_for(route))
def delete_file(filename): """Delete a Parser staged or ready file from the server. Uses the request referrer value to determine which page initiates the request. The deletion will occur within context of the referring page. For example, delete request on the Uploads page will remove a file from the staging directory. **Args:** *filename*: name of the file to delete """ referrer = flask.request.referrer log.debug('File deletion referrer: "%s"' % referrer) parsed_referrer_url = urlparse.urlparse(referrer) log.debug('File deletion referrer path: "%s"' % parsed_referrer_url.path) delete_path = baip_parser_ui.app.config['STAGING_DIR'] route = 'upload' if parsed_referrer_url.path == '/parser/download': delete_path = baip_parser_ui.app.config['READY_DIR'] route = 'download' delete_path = os.path.join(delete_path, filename) log.info('Attempting file delete: "%s"' % delete_path) remove_files(delete_path) return flask.redirect(flask.url_for(route))
def allowed_file(filename, extensions): """Check if *filename* has an extension that is acceptable for upload. **Args:** *filename*: the textual representation of the file to upload *extensions*: list of accepted filename extension. For example:: ['html', 'htm', ...] **Returns:** Boolean ``True`` if extension is allowed. ``False`` otherwise. """ is_file_allowed = False extension = os.path.splitext(filename)[1].strip('.') log.debug('File name|valid extensions: "%s"|%s' % (filename, extensions)) if extension in extensions: is_file_allowed = True log.info('File "%s" has accepted extension?: %s' % (filename, is_file_allowed)) return is_file_allowed
def download_file(filename): download_path = os.path.join(baip_munger_ui.app.config['READY_DIR'], filename) log.info('Attempting file download: "%s"' % download_path) return flask.send_from_directory(baip_munger_ui.app.config['READY_DIR'], filename, as_attachment=True)
def download_file(filename): download_path = os.path.join(baip_parser_ui.app.config['READY_DIR'], filename) log.info('Attempting file download: "%s"' % download_path) return flask.send_from_directory(baip_parser_ui.app.config['READY_DIR'], filename, as_attachment=True)
def parse(path='.'): """Parser parse. """ status = False if flask.request.method == 'POST': config_file = baip_parser_ui.app.config['PARSER_CONF_FILE'] conf = baip_parser.ParserConfig(config_file) conf.parse_config() in_dir = baip_parser_ui.app.config['STAGING_DIR'] files_to_process = get_directory_files_list(in_dir) results = [] for file_to_process in files_to_process: log.info('Processing file: %s' % file_to_process) parser = baip_parser.Parser() parser.open(file_to_process) log.debug('xxx: %s' % conf.cells_to_extract) parser.cells_to_extract = conf.cells_to_extract parser.skip_sheets = conf.skip_sheets results.append(parser.parse_sheets()) if len(results): parserd = baip_parser.ParserDaemon(pidfile=None, conf=conf) tmp_file = parserd.dump(results) ready_dir = baip_parser_ui.app.config['READY_DIR'] target_file = os.path.join(ready_dir, os.path.basename(tmp_file)) log.info('Moving "%s" to target "%s"' % (tmp_file, target_file)) shutil.move(tmp_file, target_file) status = True remove_files(files_to_process) enabled = False if get_directory_files_list(baip_parser_ui.app.config['STAGING_DIR']): enabled = True kwargs = { 'path': path, 'template': 'dashboard/parse.html', 'template_context': { 'enabled': enabled, 'status': status, }, 'endpoint': '.parse', } return baip_parser_ui.staging_index.render_autoindex(**kwargs)
def upload_file(): if flask.request.method == 'POST': file_storage = flask.request.files['file'] source_file = None if file_storage: source_file = file_storage.filename log_msg = 'File "%s" ' % source_file log.info('%s has been selected for upload' % log_msg) extensions = baip_munger_ui.app.config['ALLOWED_EXTENSIONS'] if allowed_file(source_file, extensions): filename = werkzeug.secure_filename(source_file) target = os.path.join(baip_munger_ui.app.config['STAGING_DIR'], filename) file_storage.save(target) log.info('%s uploaded to "%s"' % (log_msg, target)) return flask.redirect(flask.url_for('upload'))
def upload_file(): if flask.request.method == 'POST': file_storage = flask.request.files['file'] source_file = None if file_storage: source_file = file_storage.filename log_msg = 'File "%s" ' % source_file log.info('%s has been selected for upload' % log_msg) extensions = baip_parser_ui.app.config['ALLOWED_EXTENSIONS'] if allowed_file(source_file, extensions): filename = werkzeug.secure_filename(source_file) target = os.path.join(baip_parser_ui.app.config['STAGING_DIR'], filename) file_storage.save(target) log.info('%s uploaded to "%s"' % (log_msg, target)) return flask.redirect(flask.url_for('upload'))
def munge(self, actions, staged_file, munged_file): """Munge *staged_file* and deposit to *munged_file* **Args:** *actions*: the processing actions as generated by the :method:`baip_munger.XpathGen.parse_configuration` method *staged_file*: absolute path to the HTML file to process *munged_file*: absolute path to the HTML file to process **Returns:** Booelan ``True`` on success. ``False`` otherwise """ log.info('Munging source file: "%s" ...' % staged_file) munge_status = False try: with open(staged_file, 'r') as html_fh: self.root = html_fh.read() except IOError as e: log.error(str(e)) if self.root is not None: replace_tags_actions = actions.get('replace_tags') if replace_tags_actions is not None: for rule in replace_tags_actions: self.replace_tag(**rule) insert_tags_actions = actions.get('insert_tags') if insert_tags_actions is not None: for rule in insert_tags_actions: self.insert_tag(**rule) attribute_actions = actions.get('attributes') if attribute_actions is not None: for rule in attribute_actions: self.update_element_attribute(**rule) strip_chars_actions = actions.get('strip_chars') if strip_chars_actions is not None: for rule in strip_chars_actions: self.strip_char(**rule) log.info('Writing out munged content to "%s"' % munged_file) with open(munged_file, 'w') as out_fh: out_fh.write(self.dump_root()) munge_status = True log.info('Munge status: %s' % munge_status) return munge_status
def strip_char(self, xpath, chars): """Strip *chars* from *xpath* expression search. **Args:** *xpath*: standard XPath expression used to query against *html* *chars*: characters to strip from the element tag text """ log.info('Strip chars XPath expression: "%s"' % xpath) for tag in self.root.xpath(xpath): for child_tag in tag.iter(): if child_tag.text is not None: log.debug('Stipping "%s" from tag "%s" text: "%s"' % (chars, child_tag.tag, child_tag.text)) child_tag.text = child_tag.text.strip(chars) log.debug('Resultant text: "%s"' % child_tag.text) if child_tag.tail is not None: log.debug('Stipping tail text: "%s" from "%s"' % (chars, child_tag.tail)) child_tag.tail = child_tag.tail.strip(chars) log.debug('Resultant tail text: "%s"' % child_tag.tail)
def restart(self): """Restart the daemon No real magic here -- simply calls the :meth:`stop` and :meth:`start` method sequence (in that order) .. note:: TODO - Need better tests around this process. """ log_msg = '%s daemon --' % type(self).__name__ log.info('%s attempting restart ...' % log_msg) log.info('%s stopping ...' % log_msg) self.stop() # Allow some time between restarts. time.sleep(2) log.info('%s attempting restart ...' % log_msg) self.start()
import flask import trols_stats from logga.log import log app = flask.Flask(__name__) if os.environ.get('TROLSUI_CONF'): app.config.from_envvar('TROLSUI_CONF') else: app.config.from_object('trols_munger_ui.config') import trols_munger_ui.views db = None if app.config.get('SHELVE') is not None: log.info('SHELVE: %s', app.config.get('SHELVE')) session = trols_stats.DBSession(shelve=app.config.get('SHELVE')) session.connect() log.info('Reading TROLS stats in memory ...') db = session.connection['trols'] log.info('TROLS stats read OK.') session.close() def get_db(): """Opens a new database connection if there is none yet for the current application context. """ top = flask._app_ctx_stack.top if not hasattr(top, 'shelve'): top.shelve = db
def insert_tag(self, xpath, new_tag): """Insert *new_tag* element tag from *xpath* expression search. Workflow is: * identify elements from XPath expression * group same parent/sequential elements * construct new HTML element based on *new_tag* * insert into :att:`root` :mod:`lxml.html` tree **Args:** *xpath*: standard XPath expression used to query against *html* *new_tag*: new element tag name to replace as a string. Method will convert to a :mod:`lxml.etree.Element` """ def build_xml(new_tag, tags_to_extend): new_element = lxml.etree.Element(new_tag) new_element.extend(tags_to_extend) xml = lxml.etree.XML(lxml.etree.tostring(new_element)) return xml def child_xml_insert(start_index, node_count, parent_element, xml): insert_index = start_index - node_count + 1 log.info('Child element insert "%s ..." at index: %d' % (lxml.html.tostring(xml), insert_index)) parent_element.insert(insert_index, xml) log.info('Insert element tag XPath: "%s"' % xpath) tags = self.root.xpath(xpath) current_parent = None prev_index = None tags_to_extend = [] for tag in tags: parent = tag.getparent() index = parent.index(tag) log.debug('Index (current): %d' % index) if current_parent is None: current_parent = parent log.debug('Set current parent (initial) %s:"%s"' % (current_parent, current_parent.tag)) log.debug('Extending tag (parent): %s' % (lxml.html.tostring(tag))) tags_to_extend.append(tag) prev_index = index log.debug('Previous index (parent): %d' % prev_index) continue if parent == current_parent: if index == (prev_index + 1): log.debug('Extending tag: %s' % (lxml.html.tostring(tag))) tags_to_extend.append(tag) prev_index = index log.debug('Previous index (parent match): %d' % prev_index) continue else: log.debug('Sequential index interrupted: inserting') else: log.debug('Parent change: inserting') xml = build_xml(new_tag, tags_to_extend) if parent != current_parent: current_parent.insert(prev_index - 1, xml) current_parent = parent log.debug('Set current parent %s:"%s"' % (current_parent, current_parent.tag)) else: child_xml_insert(prev_index, len(tags_to_extend), current_parent, xml) # Reset our control variables. prev_index = parent.index(tag) log.debug('New index after insert: %d' % prev_index) del tags_to_extend[:] log.debug('Extending tag (pass through): %s' % (lxml.html.tostring(tag))) tags_to_extend.append(tag) # Insert the laggards (if any). if len(tags_to_extend): xml = build_xml(new_tag, tags_to_extend) child_xml_insert(prev_index, len(tags_to_extend), current_parent, xml)
def child_xml_insert(start_index, node_count, parent_element, xml): insert_index = start_index - node_count + 1 log.info('Child element insert "%s ..." at index: %d' % (lxml.html.tostring(xml), insert_index)) parent_element.insert(insert_index, xml)
def _exit_handler(self, signal, frame): log_msg = '%s --' % type(self).__name__ log.info('%s SIGTERM intercepted' % log_msg) self.set_exit_event()