def test_get_in_path():
    cfg = ns.Namespace()
    assert ns.get_in_path(cfg, 'Section.value') is None
    assert ns.get_in_path(cfg, 'Section.value', '') == ''
    cfg.Section = ns.Namespace()
    assert ns.get_in_path(cfg, 'Section.value') is None
    assert ns.get_in_path(cfg, 'Section.value', '') == ''
    cfg.Section.value = '42'
    assert ns.get_in_path(cfg, 'Section.value') == '42'
    cfg.Section = ''
    assert ns.get_in_path(cfg, 'Section.value') is None
Пример #2
0
    def __init__(self):
        if not data.DAO().exists():
            self.namespace = []
            self.namespace.append(ns.Namespace())
            data.DAO(self).save()

        else:
            data.DAO().load(self)
Пример #3
0
    def createSubNamespaces(self, path, parent):
        namespace = parent
        createdNamespaces = []
        for component in path:
            parentNamespace = namespace
            namespace = None
            if parentNamespace == None:
                namespace = namespace.Namespace(component, None)
            else:
                namespace = parentNamespace.addSubNamespace(component)

            createdNamespaces.append(namespace)
            self.namespaces[namespace.fullName] = namespace

        return createdNamespaces
Пример #4
0
    def test_multi_write_item_selection(self):
        """
        test that namespaces are writable with unnamed multi attributes using
        item selection. Note that for this to work, multi attributes have to be
        first declared. Data is verified using both attribute naming and item
        selection
        """

        # create a namespace
        inamespace = namespace.Namespace()

        # populate the namespace randomly
        for attribute in self._testcase.keys():

            # to enable writes using item selection the multi attribute has to
            # be first declared
            exec("inamespace.%s" % attribute)

            for (ikey, ivalue) in self._testcase[attribute].items():

                if (isinstance(ivalue, str)):
                    exec("inamespace ['%s'] [%s] = '%s'" %
                         (attribute, ikey, ivalue))

                else:
                    exec("inamespace ['%s'] [%s] = %s" %
                         (attribute, ikey, ivalue))

        # finally, verify that all data was correctly saved using both item
        # selection and attribute names
        for iattr in self._testcase.keys():

            for ikey, ival in self._testcase[iattr].items():

                for imethod in [
                        "inamespace ['%s'] [%s]" %
                    (iattr, ikey),  # item selection
                        "inamespace.%s [%s]" % (iattr, ikey)
                ]:  # attribute naming

                    (expected, returned) = (ival, eval(imethod))

                    # check for equality
                    if isinstance(ival, float):
                        self.assertAlmostEqual(expected, returned)
                    else:
                        self.assertEqual(expected, returned)
Пример #5
0
 def delta(self, other):
     '''return the difference between the data'''
     if hasattr(other, 'data'):
         data = other.data
     else:
         data = other
     diffs = [
         namespace.locate_ns_item(self.data, key, building=True) for key in \
         filter( \
             lambda key: namespace.locate_ns_item(data, key) != namespace.locate_ns_item(self.data, key), \
                  [key for key in self.relevant_data_keys if namespace.in_ns(self.data, key)]
         ) \
     ]
     delta = namespace.Namespace()
     for diff in diffs:
         delta.update(diff)
     return delta
Пример #6
0
    def get_namespaced_data(self, v, l):
        # fetching namespaced data from namespace.py
        import namespace as namespace
        ns = namespace.Namespace(self.logger)
        data = ns.get_ns_data(False, '', l)

        # variables to store data from get_ns_data function from namespace.py
        cluster_pods_list, cluster_svc_list = data[1], data[2]

        # analysing security context from security_context function in modules/process.py
        data_security_context = k8s.Check.security_context('pods', cluster_pods_list, \
        ['NAMESPACE', 'POD', 'CONTAINER_NAME', 'PRIVILEGED_ESC', \
        'PRIVILEGED', 'READ_ONLY_FS', 'RUN_AS_NON_ROOT', 'RUNA_AS_USER'], \
        v, 'all', l, self.logger)
        if l: self.logger.info(data_security_context)

        # analysing health checks from health_probes function in modules/process.py
        data_health_probes = k8s.Check.health_probes('pods', cluster_pods_list, \
        ['NAMESPACE', 'POD', 'CONTAINER_NAME', 'READINESS_PROPBE', 'LIVENESS_PROBE'], \
        v, 'all', l, self.logger)
        if l: self.logger.info(data_health_probes)

        # analysing limit/requests from resources function in modules/process.py
        data_resources = k8s.Check.resources('pods',cluster_pods_list, \
        ['NAMESPACE', 'POD', 'CONTAINER_NAME', 'LIMITS', 'REQUESTS'], v, 'all', l, self.logger)
        if l: self.logger.info(data_resources, self.logger)

        # analysing qos context from qos function in modules/process.py
        data_qos = k8s.Check.qos('pods', cluster_pods_list, ['NAMESPACE', 'POD', 'QoS'], \
        v, 'all', l, self.logger)
        if l: self.logger.info(data_qos)

        # analysing image_pull_policy from image_pull_policy function in modules/process.py
        data_image_pull_policy = k8s.Check.image_pull_policy('pods', cluster_pods_list, \
        ['DEPLOYMENT', 'CONTAINER_NAME', 'IMAGE', 'IMAGE_PULL_POLICY'], \
        v, 'all', l, self.logger)
        if l: self.logger.info(data_image_pull_policy)

        # analysing services from get_service function in modules/process.py
        data_get_service = k8s.Service.get_service('services', cluster_svc_list, \
        ['NAMESPACE', 'SERVICE', 'SERVICE_TYPE', 'IP', 'SELECTOR'], \
        v, 'all', l, self.logger)
        if l: self.logger.info(data_get_service[0])
Пример #7
0
    def setUp(self):
        """
        define a particular test case
        """

        # set up a particular test with single key entries. These are
        # represented as random dictionaries which hold the attribute in the key
        # and its value in the right term of the dictionary
        self._testcase = mkedict(SingleTestCase._mean,
                                 SingleTestCase._variance)

        # create a namespace
        self._namespace = namespace.Namespace()

        # populate the namespace randomly
        for (attribute, value) in self._testcase.items():

            if (isinstance(value, str)):
                exec("self._namespace.%s = '%s'" % (attribute, value))

            else:
                exec("self._namespace.%s = %s" % (attribute, value))
Пример #8
0
    def test_single_write_setattr(self):
        """
        test that namespaces are writable with single key values using
        setattr. Data is verified reading the namespace using item selection,
        attribute naming and getattr
        """

        # create a namespace
        inamespace = namespace.Namespace()

        # populate the namespace with the info in this testcase
        for ikey, ival in self._testcase.items():

            if (isinstance(ival, str)):
                exec("inamespace.setattr ('%s', '%s')" % (ikey, ival))

            else:
                exec("inamespace.setattr ('%s', %s)" % (ikey, ival))

        # now, verify that the values stored in the namespace are correct
        for ikey, ival in self._testcase.items():

            # access data using item selection, attribute naming and getattr
            for imethod in [
                    "inamespace ['%s']" % ikey,  # item selection
                    "inamespace.%s" % ikey,  # attribute naming
                    "inamespace.getattr ('%s')" % ikey
            ]:  # getattr

                # compute the expected and returned values
                (expected, returned) = (ival, eval(imethod))

                # and check for equality
                if isinstance(ival, float):
                    self.assertAlmostEqual(expected, returned)
                else:
                    self.assertEqual(expected, returned)
def test_set_in_path():
    cfg = ns.Namespace()
    ns.set_in_path(cfg, 'Testing.One.Two.Three.name', '42')
    assert cfg.Testing.One.Two.Three.name == '42'
def test_del():
    cfg = ns.Namespace()
    cfg.attr = ''
    assert 'attr' in cfg
    del cfg.attr
    assert 'attr' not in cfg
def test_set_in_path_section_exists():
    cfg = ns.Namespace()
    cfg.Section = ns.Namespace()
    ns.set_in_path(cfg, "Section.name", '42')
    assert cfg.Section.name == '42'
def test_set_in_path_section_is_value():
    cfg = ns.Namespace()
    cfg.Section = ''
    with pytest.raises(ns.ConfigurationError):
        ns.set_in_path(cfg, "Section.name", '42')
Пример #13
0
    def setUp(self):
        """
        define a particular test case
        """

        # set up a particular test with multiple key entries. The data of the
        # whole test case is stored in two nested dictionaries. The first key is
        # the name of an unnamed multi key atttribute. It contains another
        # dictionary which is indexed by a tuple of strings and/or ints which
        # stores values of different types ---but all being single valued

        # first, _testcase contains the attributes, its values (randomly
        # generated) will be next overriden
        self._testcase = mkedict(MultipleTestCase._mean,
                                 MultipleTestCase._variance)

        for attribute in self._testcase:
            self._testcase[attribute] = dict()

            # compute randomly the number of keys used to index this entry
            # ---note that the following statement forces the key to contain at
            # least one element
            keylength = max(
                1,
                int(
                    random.gauss(MultipleTestCase._mean,
                                 MultipleTestCase._variance)))

            # now, compute a random number of entries for this attribute ---note
            # that the following statement forces at least one entry
            for ientry in range(
                    max(
                        1,
                        int(
                            random.gauss(MultipleTestCase._mean,
                                         MultipleTestCase._variance)))):

                # compute randomly the key of this entry as a tuple
                key = mketuple(keylength, 0, 'int', 'str')

                # checking the length of the keys is pretty important since
                # namespaces do not verify it
                if keylength != len(key):
                    raise KeyError(
                        'Mismatch in the length of the key: %i != %i' %
                        (keylength, len(key)))

                # and write it into the specification of this testcase
                self._testcase[attribute][key] = randval()

        # create a namespace
        self._namespace = namespace.Namespace()

        # populate the namespace randomly
        for attribute in self._testcase.keys():

            for (ikey, ivalue) in self._testcase[attribute].items():

                if (isinstance(ivalue, str)):
                    exec("self._namespace.%s [%s] = '%s'" %
                         (attribute, ikey, ivalue))

                else:
                    exec("self._namespace.%s [%s] = %s" %
                         (attribute, ikey, ivalue))
Пример #14
0
def check(config: ns.Namespace):
    """Check validity of values in the parsed configuration."""

    # We need at least the Source and Config sections
    for name in ('Source', 'Local'):
        ns.check_section(config, name)

    # Check Source
    src = config.Source
    ns.check_oneof(src, 'type', ('github', 'directory'))
    ns.check_oneof(src, 'srctype', ('xml', 'udl'), 'udl')
    ns.check_encoding(src, 'encoding', 'UTF-8')

    # Set some defaults if needed
    ns.check_default(src, 'srcdir', '')
    ns.check_default(src, 'datadir', '')
    ns.check_default(src, 'cspdir', '')
    ns.check_default(src, 'skip', [])

    # Strip leading slash if present, we don't need it
    if src.srcdir == '/': src.srcdir = src.srcdir[1:]
    if src.datadir == '/': src.datadir = src.datadir[1:]
    if src.cspdir == '/': src.cspdir = src.cspdir[1:]

    # Check Local section
    local = config.Local
    ns.check_notempty(local, 'outfile')
    ns.check_default(local, 'deployment', False)
    ns.check_default(local, 'logdir', '')
    ns.check_default(local, 'loglevel', '')

    # Check CSP configuration
    if src.cspdir:
        csp = ns.check_section(config, 'CSP')
        ns.check_oneof(csp, 'export', ('embed', 'separate', 'none'), 'embed')
        if not csp.export == 'none':
            # Only check these if we are to export CSP files
            if ns.check_default(csp, 'parsers', []):
                raise ConfigurationError(
                    "At least one [[CSP.parsers]] section for CSP items should be present."
                )
            for i, parser in enumerate(csp.parsers):
                if not isinstance(parser, ns.Namespace):
                    raise ConfigurationError(
                        f'Parser {i+1} must be a section.')
                ns.check_notempty(parser, 'regex')
                ns.check_notempty(parser, 'app')
                ns.check_notempty(parser, 'item')
                ns.check_oneof(parser, 'nomatch', ('skip', 'error'), 'error')

        # CSP items appear unsupported in deployments, so must be exported separately
        if local.deployment and csp.export == 'embed':
            raise ConfigurationError(
                "When requesting a deployment, CSP export must be 'separate'.")

    # Check optional sections
    if src.type == 'directory':
        ns.check_section(config, 'Directory')
        ns.check_notempty(config.Directory, 'path')
        if 'structure' in config.Directory:
            logging.warning(
                "Warning: setting 'structure' in section Directory no longer used."
            )
            del config.Directory.structure
        if not isabs(config.Directory.path):
            config.Directory.path = join(abspath(config.cfgdir),
                                         config.Directory.path)
    else:
        ns.check_section(config, 'GitHub')
        gh = config.GitHub
        ns.check_notempty(gh, 'user')
        ns.check_notempty(gh, 'repo')
        ns.check_notempty(gh, 'tag')
        ns.check_default(config.GitHub, 'token', '')

    if src.srctype == 'udl':
        # Server needed for conversion to XML
        svr = ns.get_section(config, 'Server')
        if svr is None:
            svr = config.Server = ns.Namespace()
        ns.check_default(svr, 'host', 'localhost')
        ns.check_default(svr, 'port', '52773')
        ns.check_default(svr, 'user', 'SuperUser')
        ns.check_default(svr, 'password', 'SYS')
        ns.check_default(svr, 'namespace', 'USER')
        ns.check_default(svr, 'https', False)
Пример #15
0
# Copyright (C) 2013, Thomas Leonard
# See the README file for details, or visit http://0install.net.

from xml.dom import minidom, XMLNS_NAMESPACE, Node

from zeroinstall import SafeException
from zeroinstall.injector.namespaces import XMLNS_IFACE
from zeroinstall.injector import model

import namespace, formatting

ns = namespace.Namespace()

class DuplicateIDException(SafeException):
	pass

def childNodes(parent, namespaceURI = None, localName = None):
	for x in parent.childNodes:
		if x.nodeType != Node.ELEMENT_NODE: continue
		if namespaceURI is not None and x.namespaceURI != namespaceURI: continue

		if localName is None or x.localName == localName:
			yield x

requires_names = frozenset(['requires', 'restricts'] + list(model.binding_names))

class Context:
	def __init__(self, impl):
		self.attribs = {}		# (ns, localName) -> value
		self.requires = []		# Actually, requires, restricts and bindings
		self.commands = {}		# (name, version-expr) -> <command>
Пример #16
0
defaults = namespace.Namespace(
    # resolution and shape constants
    _DPI=500,
    _size=(8, 6),
    _shape=(6, 7),
    _obs_height=4,
    _obs_width=4,
    _cbar_height=5,
    _cmap="Spectral_r",

    # constants for specifying gridspec
    _obs_grid={
        'left': 0.15,
        'right': 0.8,
        'wspace': 0.05,
        'top': 0.9,
        'bottom': 0.2
    },
    _mod_grid={
        'left': 0.1,
        'wspace': 0.05,
        'hspace': 0.05,
        'top': 0.9
    },
    _cbar_grid={
        'left': 0.15,
        'right': 0.8 - 0.05,
        'bottom': 0.2,
        'top': 0.4
    },

    # final adjustment to leave room for the title
    _subplot_adjustment={'top': 0.9},

    # specifying font sizes
    _labelfont=12,
    _textfont=10.5,
    _titlefont=14,
    _panel_labelfont=12,
    _mod_labelpad=0.05,
    _ticklabelfont=12,
    font=Formats.globalfont,

    # positions for panel labels
    _panel_labelx=-25,
    _panel_labelx_obs=-17.5,
    _panel_labely=0,
    _panel_labely_obs=0,

    # resolution constants for the plots
    _xres=1001,
    _yres=1001,
    _dx=1.e2,
    _dy=1.e2,
    _h_res=10,  # factor higher res than other arrays
    _decay_threshold=0.005,
    _ctick_fmt='{}',
    _txt_dec_fmt='{:.4f}',
    _paneltext_x=0.02,
    _paneltext_y=0.02,
    _paneltext_xspace=0.15,

    # title: formatted by overpass.info and windspeed
    _title="%s, wind %s m/s, %s$^{\circ}$",
    _cbarlabel="Xco$_2$ enhancement relative to background",
    _obslabel="Observed Xco$_2$ enhancement",
    _modlabel="Model Xco$_2$ enhancement",
    _xlabel='Distance along wind (km)',
    _ylabel='Distance perpendicular to wind (km)',
    clim=(0.99, 1.01),
    xlim=(-20, 80),
    ylim=(-50, 50),
    x_step=None,
    y_step=20,
    f_plume=0.10,
    f_background=0.01,
    offset=3.e3,
    y_max_positive=50.e3,
    y_min_positive=0.,
    y_max_negative=50.e3,
    y_min_negative=0.,
    direction='y',
    wind_adjustment=0.,
    x_max=75.e3,
    wind_source='Average',
    snr_strong_co2_min=None,
    chi_squared_max=None,
    albedo_min=None,
    albedo_max=None,
    surface_pressure_min=None,
    surface_pressure_max=None,
    smooth=False,
    outcome_flags={1, 2},
    force_winds=None,
    sza_adjustments=True,
    temporal_factors=False,
    uncertainty=False,
    plot_offset=False,
    bias_correction='corrected',
    scatter_plot=False,
    background_thresholds=[0.01],
    plume_thresholds=[0.10, 0.25],
    force_wind=None,
    weighted=False,
    units=Units.output_units,
    stability=None,
    surface_stability=True,
    xco2_min=395,
    xco2_max=405,
    opacity=0.0,
    secondary_sources=[],
    fixed_secondary_sources=[],
    background_average=None,
)
Пример #17
0
class BotParser(object):
    """
    Base class of all parsebots

    It automates the parsing of any text file and the automatic extraction of
    information according to the specification of a database specification file
    """

    # regular epression for recognizing pairs (var, val)
    # -----------------------------------------------------------------------------
    # the following regexp is used by default: first, the user can provide its
    # own regexps (see below) or it can overwrite the current regexp which is
    # distinguished with the special name 'data'
    #
    # the following regexp correctly matches strings with two groups 'varname'
    # and 'value' such as:
    #
    # > Cost     : 359
    # > CPU time : 16.89311
    #
    # since these fields are written into the data namespace (see below) they
    # can be accessed by the user in the database specification file with the
    # format data.Cost and data.'CPU time'
    statregexp = r" >[\t ]*(?P<varname>[a-zA-Z ]+):[ ]+(?P<value>([0-9]+\.[0-9]+|[0-9]+))"

    # logging services
    # -----------------------------------------------------------------------------
    _loglevel = logging.INFO  # default logging level

    # namespaces - a common place to exchange data in the form of single and
    # multi key attributes. The following namespaces are mapped (in the
    # comments) with the type of variables recognized by the dbparser (see
    # dbparser.py)
    #
    # the purpose of every namespace is described below:
    #
    # * namespace: denoted also as the main or sys namespace. It contains sys
    #              information and main variables
    # * data: It contains datavar and filevar
    # * user: this namespace is never used by autobot and it is created only for
    #         user specifics
    # * param: it stores param and dirvar. It is not used by botparser but by
    #          bottester
    # * regexp : it stores the results of processing the contents of a file with
    #            the regexps found in the database specification
    # * snippet: saves the values of variables computed with external Python
    #            code that can be initialized with variables in autobot
    #
    # These namespaces automatically use the different variables (most of them
    # defined in the dbparser) whose purpose is defined below:
    #
    # * sysvar: these are variables computed by autobot with additional info
    #           such as the index of the current file, current date and time and
    #           the name of the file been currently processed
    #
    # * mainvar: these are the flags given to the main script using autobot
    #            (ie., parsebot) These variables can be used to create a template
    #            for the 'output' file
    #
    # * datavar: data processed from the stdout of the executable. These data is
    #            retrieved using the default regular expression
    #
    # * filevar: these variables are given as filenames whose value are the
    #            contents of the file
    #
    # * regexp: regexps are defined separately in the database specification
    #           file and can be used in the specification of database tables to
    #           refer to the various groups that result every time a match is
    #           found
    # * snippet: snippets are also defined separately in the database
    #            specification file and can be used in the specification of
    #            database tables to refer to the different output variables that
    #            are computed by the snippet
    #
    # Importantly, all these variables can be qualified with contexts which have
    # to be regexps. When contexts are used, the final value results of applying
    # the next regexp to the previous value until all contexts have been
    # processed.
    #
    # to make these relationships more apparent, the variables given in the
    # database specification file can be preceded by a prefix that provides
    # information about the namespace they are written to:
    #
    # type of variable   prefix
    # -----------------+-----------
    # sysvar           | 'sys.'
    # datavar          | 'data.'
    # filevar          | 'file.'
    # mainvar          | "main.'
    # -----------------+-----------
    #
    # the case of regexp variables is a bit particular. They have their own
    # statements of the form:
    #
    # regexp <name> <specification-string>
    #
    # where <specification-string> should contain at least one <group> defined
    # with the directive (?P<group>...). This way, any column in the
    # specification of a database can use the format <name>.<group> to refer to
    # the value parsed in group <group> with regexp <name>
    #
    # Likewise, snippets are defined with the syntax:
    #
    # snippet <name>
    #    input-var1 = <autobot-variable>
    #          ...  = ...
    #    input-varn = <autobot-variable>
    #    return output-var1
    #      ...      ...
    #    return output-varn
    #    code <python-file>
    #
    # This way, any column in the specification of a database can use the
    # variables computed by the execution of the <python-file> with the syntax
    # <name>.<output-var>
    #
    # Namespaces are populated with information with the following variable
    # types:
    #
    # namespace   variable type
    # ----------+-----------------
    # namespace | sysvar mainvar
    # data      | datavar filevar
    # user      | --
    # regexp    | regexp
    # snippet   | snippet
    # ----------+-----------------
    #
    # These associations are implemented in the evaluation of dbexpressions
    # -----------------------------------------------------------------------------
    _namespace = namespace.Namespace()  # sysvar, mainvar
    _data = namespace.Namespace()  # datavar, filevar
    _param = namespace.Namespace()  # param, dirvar (to be used in BotTester)
    _user = namespace.Namespace()  # user space
    _regexp = namespace.Namespace()  # regexp
    _snippet = namespace.Namespace()  # snippets of python code

    # -----------------------------------------------------------------------------
    # _sub
    #
    # substitute in string the ocurrence of every keyword in the namespace used
    # in this instance of BotParser (BotParser._namespace) with its value if it
    # appears preceded by '$' in string and it is a str. Similar to
    # Template.substitute but it also allows the substitution of strings which
    # do not follow the convention of python variable names
    #
    # Of course, other namespaces can be used but _sub is used only to compute
    # the name of the output file so that only static information is used
    # -----------------------------------------------------------------------------
    def _sub(self, string):
        """
        substitute in string the ocurrence of every keyword in the namespace
        used in this instance of BotParser (BotParser._namespace) with its value
        if it appears preceded by '$' in string and it is a str. Similar to
        Template.substitute but it also allows the substitution of strings which
        do not follow the convention of python variable names

        Of course, other namespaces can be used but _sub is used only to compute
        the name of the output file so that only static information is used
        """

        result = string  # initialization

        # now, substitute every ocurrence of every single attribute in
        # namespace with its value only in case the value is a string
        for ikey in [
                jkey for jkey in BotParser._namespace
                if not isinstance(BotParser._namespace[jkey], dict)
        ]:

            # perform the substitution enforcing the type of value to be str
            result = re.sub('\$' + ikey, str(BotParser._namespace[ikey]),
                            result)

        # and return the result now return result
        return result

    # -----------------------------------------------------------------------------
    # check_flags
    #
    # check the parameters given
    # -----------------------------------------------------------------------------
    def check_flags(self, txtfile, dbfile, directory):
        """
        check the parameters given
        """

        # verify that all text files are accessible
        for itxtfile in txtfile:

            if not os.access(itxtfile, os.F_OK):
                self._logger.critical("""
 The text file '%s' is not accessible
 Use '--help' for more information
""" % itxtfile)
                raise ValueError(" The text file is not accessible")

        # verify also that the db file is accessible
        if not os.access(dbfile, os.F_OK):
            self._logger.critical("""
 The database specification file does not exist or it resides in an unreachable location
 Use '--help' for more information
""")
            raise ValueError(
                " The database specification file is not accessible")

    # -----------------------------------------------------------------------------
    # show_switches
    #
    # show a somehow beautified view of the current params
    # -----------------------------------------------------------------------------
    def show_switches(self, txtfile, dbfile, directory):
        """
        show a somehow beautified view of the current params
        """

        self._logger.info("""
  %s %s %s
 -----------------------------------------------------------------------------
  * Files                : %s
  * Database             : %s

  * Directory            : %s
 -----------------------------------------------------------------------------"""
                          % (__revision__[1:-1], __date__[1:-1], __version__,
                             txtfile, dbfile, directory))

    # -----------------------------------------------------------------------------
    # setup
    #
    # sets up all the necessary environment. It returns: the directory where the
    # parsed files should be copied and the config dir where additional
    # information (such as the db specification) should be written
    # -----------------------------------------------------------------------------
    def setup(self, directory):
        """
        sets up all the necessary environment. It returns: the directory where
        the parsed files should be copied and the config dir where additional
        information (such as the db specification) should be written
        """
        def _mksubdir(parent, subdir):
            """
            create the given subdirectory from the parent and returns it. Note that
            the absolute path is computed. Passing the absolute path prevents a
            number of errors
            """
            newdir = os.path.abspath(os.path.join(parent, subdir))
            os.mkdir(newdir)

            return newdir

        # the given directory should exist at this time, but not its
        # subdirectories. A couple of sanity checks follow:
        if (not os.access(directory, os.F_OK)):
            os.makedirs(directory)
            self._logger.debug(" The directory '%s' has been created!" %
                               directory)

        # create another subdir to store the results. Note that the absolute path is
        # computed. Passing the absolute path to the results dir prevents a number
        # of errors
        resultsdir = _mksubdir(directory, "results")

        # create also an additional directory to store additional information
        # such as the database specification
        configdir = _mksubdir(directory, "config")

        # return the directories to be used in the experimentation
        return (resultsdir, configdir)

    # -----------------------------------------------------------------------------
    # bz2
    #
    # compress the contents of the given filename and writes the results to a
    # file with the same name + '.bz2'. If remove is enabled, the original
    # filename is removed
    # -----------------------------------------------------------------------------
    def _bz2(self, filename, remove=False):
        """
        compress the contents of the given filename and writes the results to a
        file with the same name + '.bz2'. If remove is enabled, the
        original filename is removed
        """

        # open the original file in read mode
        with open(filename, 'r') as input:

            # create a bz2file to write compressed data
            with bz2.BZ2File(filename + '.bz2', 'w',
                             compresslevel=9) as output:

                # and just transfer data from one file to the other
                shutil.copyfileobj(input, output)

        # if remove is enabled, remove the original filename
        if (remove):
            os.remove(filename)

    # -----------------------------------------------------------------------------
    # copy_file
    #
    # take the contents of src and put them into the directory given in target
    # with the name specified in dst. If move is given, the file is moved,
    # otherwise, it is copied
    #
    # If compression was requested it compresses the file
    # -----------------------------------------------------------------------------
    def copy_file(self, src, target, dst, move=False):
        """
        take the contents of src and put them into the directory given in target
        with the name specified in dst. If move is given, the file is moved,
        otherwise, it is copied
        
        If compression was requested it compresses the file
        """

        # take care of the compress flag. If bzip2 compression was requested
        # apply it
        if (self._compress):
            self._logger.debug(" Compressing file '%s'" % src)

            # note that we remove the original files in case move is
            # requested. If compression is requested, the files to move are the
            # compressed ones so that the original ones can be deleted (unless
            # move is False, of course!)
            self._bz2(src, remove=move)

            # and rename the src and dst files to take the suffix into account
            src += '.bz2'
            dst += '.bz2'

        # now, either move or copy the files according to the value passed to
        # move. If move was requested, then just move this file
        if move:
            shutil.move(src, os.path.join(target, dst))

        # otherwise, copy it
        else:
            shutil.copy(src, os.path.join(target, dst))

    # -----------------------------------------------------------------------------
    # parse_single_file
    #
    # looks for all matches of all regular expressions defined in the database
    # specification in the given text file. The results of all matches are
    # written to the regexp namespace. Also, the data namespace is populated
    # with the results of the matches of the default regexp
    #
    # Also, the textfile is backed up to the resultsdir
    # -----------------------------------------------------------------------------
    def parse_single_file(self, txtfile):
        """looks for all matches of all regular expressions defined in the database
        specification in the given text file. The results of all matches are
        written to the regexp namespace. Also, the data namespace is populated
        with the results of the matches of the default regexp

        Also, the textfile is backed up to the resultsdir
        """
        def _eval_snippet(variable):
            """creates a dbexpression that consists of a snippet and requests its
            evaluation, thus updating the snippet namespace
            """

            # botparser is responsible only for making sure that data necessary
            # to evaluate expressions is available in the corresponding
            # namespaces. Thus, if this snippet is volatile (ie, if any of its
            # output variables has been declared as volatile) then it is not
            # evaluated here. Instead, it should be evaluated when the system
            # is ready for downloading data to the database
            snippetname = string.split(variable, '.')[0]
            snippet = self._dbspec.get_snippet(snippetname)
            if snippet.get_keyword() == 'volatile':
                return

            # now, in case it is static, it is evaluated never more than once
            if snippetname in BotParser._snippet:
                return

            # otherwise, to evaluate this snippet create an expression with
            # this snippet
            expression = dbexpression.DBExpression(dbparser.SNIPPETNST,
                                                   variable, self._logger,
                                                   self._logfilter)

            # and request its evaluation
            expression.eval_snippet(dbspec=self._dbspec,
                                    sys=BotParser._namespace,
                                    data=BotParser._data,
                                    param=None,
                                    regexp=BotParser._regexp,
                                    snippet=BotParser._snippet,
                                    user=BotParser._user)

        def _eval_filevar(variable):
            """creates a dbexpression that consists of a filevar and requests its
            evaluation, thus updating the data namespace
            """

            # create a dbexpression and require its evaluation updating
            # the data namespace
            dbexpression.DBExpression(
                dbparser.FILENST, variable, self._logger,
                self._logfilter).eval_filevar(data=BotParser._data)

        # default regexp
        # ---------------------------------------------------------------------
        # read all contents of the input file - yep, this might take a lot
        # of memory but the alternative, to process each line separately
        # would not allow to match various lines simultaneously

        # open the file in read mode
        with open(txtfile, "r") as stream:

            # for all matches of the default regexp in the current text file
            for imatch in re.finditer(BotParser.statregexp, stream.read()):

                # and store every match in the data namespace
                BotParser._data[imatch.group('varname').rstrip(' ')] = \
                    imatch.group('value')

        # for all database tables (ie, implicitly ignoring snippets) within the
        # current database specification
        for itable in [
                itable for itable in self._dbspec
                if isinstance(itable, dbparser.DBTable)
        ]:

            # snippets
            # ---------------------------------------------------------------------
            # now, for all snippets mentioned in any column of this table
            for icolumn in [
                    icolumn for icolumn in itable
                    if icolumn.get_vartype() == dbparser.SNIPPETNST
            ]:

                _eval_snippet(icolumn.get_variable())

            # filevars
            # -------------------------------------------------------------------------
            # populate the data namespace with the contents of files (filevars)
            # as specified in the database specification file
            for icolumn in [
                    icolumn for icolumn in itable
                    if icolumn.get_vartype() == dbparser.FILENST
            ]:

                _eval_filevar(icolumn.get_variable())

            # regexps
            # ---------------------------------------------------------------------
            # also, for all regular expressions that start with either a
            # snippet or a file
            for icolumn in [
                    icolumn for icolumn in itable
                    if icolumn.get_vartype() == dbparser.REGEXPNST
            ]:

                # create an expression with this regular expression
                expression = dbexpression.DBExpression(icolumn.get_vartype(),
                                                       icolumn.get_variable(),
                                                       self._logger,
                                                       self._logfilter)

                # retrieve the first context
                head = expression.get_context()[0]

                # and retrieve its prefix and variable name
                (prefix, variable) = string.split(head, '.')

                # verify whether this is a snippet ...
                if self._dbspec.get_snippet(prefix):

                    _eval_snippet(head)

                # ... or a file variable
                elif string.upper(prefix) == dbparser.FILENST:

                    _eval_filevar(variable)

    # -----------------------------------------------------------------------------
    # parse_all_files
    #
    # starts the automated parsing of all text files given in txtfiles. All
    # these files are copied to the results directory given in resultsdir.
    #
    # if prologue/epilogue actions are specified then its __call__ method is
    # invoked before/after parsing every text file.
    # -----------------------------------------------------------------------------
    def parse_all_files(self, txtfiles, resultsdir):
        """
        starts the automated parsing of all text files given in txtfiles. All
        these files are copied to the results directory given in resultsdir.

        if prologue/epilogue actions are specified then its __call__ method is
        invoked before/after parsing every text file.
        """

        # before parsing all the text files, initialize the current file to the
        # empty string. This will enforce the creation of a first database that
        # will contain the results of the parsing
        currdbname = str()

        # processing files
        # -------------------------------------------------------------------------
        # keep track of the file id as an integer
        idx = 0

        # now, process every text file
        for itxtfile in txtfiles:

            # namespaces
            # -------------------------------------------------------------------------
            # initialize the contents of the namespaces that hold variables
            # whose value is dependent upon the contents of the current file
            BotParser._namespace.clear()
            BotParser._data.clear()
            BotParser._regexp.clear()
            BotParser._snippet.clear()

            # - main (sys) namespace
            # -------------------------------------------------------------------------
            # initialize the main namespace with the parameters passed to the
            # main script (ie., the parsebot), mainvars. These are given in
            # self._argnamespace. Since the argparser automatically casts type
            # according to their type field, they are all converted into
            # strings here to allow a uniform treatment
            if self._argnamespace:
                for index, value in self._argnamespace.__dict__.items():
                    BotParser._namespace[index] = str(value)

            # also, with the contents of this file
            with open(itxtfile, "r") as stream:
                BotParser._namespace.stdout = stream.read()
            stream.close()

            # and also with the following sys variables
            #
            #   index         - index of this file in the range [0, ...)
            #   filename      - name of this text file
            #   date          - current date
            #   time          - current time
            #   startfullparsedatetime - when the whole parsing started in
            #                            date/time format
            #   startfullparsetime - when the whole parsing started in secs
            #                        from Epoch
            #
            # Note that other fields are added below to register the right
            # timings when every parsing started/ended
            BotParser._namespace.index = idx
            BotParser._namespace.name = os.path.basename(itxtfile)
            BotParser._namespace.date = datetime.datetime.now().strftime(
                "%Y-%m-%d")
            BotParser._namespace.time = datetime.datetime.now().strftime(
                "%H:%M:%S")
            BotParser._namespace.startfullparsedatetime = datetime.datetime.now(
            )
            BotParser._namespace.startfullparsetime = time.time()

            self._logger.info(" Starting the automated parsing of file '%s'" %
                              itxtfile)

            # parsing
            # -------------------------------------------------------------------------
            # execute the prologue in case any was given (note that the run
            # time is computed right now) and register also the exact time when
            # the processing of this file started (including the prologue)
            if self._prologue:
                action = self._prologue(
                    textfile=itxtfile,
                    dbfile=self._dbfile,
                    directory=self._directory,
                    startfullparsetime=BotParser._namespace.startfullparsetime,
                    namespace=BotParser._namespace,
                    data=BotParser._data,
                    user=BotParser._user)
                action(self._logger)

            # now, invoke the automated parsing of this particular text file
            # after recording the exact timings before and after (ie, this do
            # not take the time of the prologue/epilogue into account)
            BotParser._namespace.startparsedatetime = datetime.datetime.now()
            BotParser._namespace.startparsetime = time.time()

            self.parse_single_file(itxtfile)

            BotParser._namespace.endparsedatetime = datetime.datetime.now()
            BotParser._namespace.endparsetime = time.time()

            # now, before processing the next text file, invoke the epilogue in
            # case any was given
            if self._epilogue:
                action = self._epilogue(
                    textfile=itxtfile,
                    dbfile=self._dbfile,
                    directory=self._directory,
                    startparsetime=BotParser._namespace.startparsetime,
                    endparsetime=BotParser._namespace.endparsetime,
                    namespace=BotParser._namespace,
                    data=BotParser._data,
                    user=BotParser._user)
                action(self._logger)

            # and register the exact time when the whole parsing of this file
            # ended including processing the epilogue both in seconds from Epoc
            # (endruntime) and in date/time format (enddatetime)
            BotParser._namespace.endfullparsetime = time.time()
            BotParser._namespace.endfullparsedatetime = datetime.datetime.now()

            # results/
            # -------------------------------------------------------------------------
            # once this file has been processed, copy it (as opposed to move
            # it) to the results directory after applying the substitution
            # specified in the output directive.
            self.copy_file(itxtfile,
                           resultsdir,
                           self._sub(self._output),
                           move=False)

            # in case compression was requested, make sure to remove the
            # compressed file which has been copied with copy_file
            if self._compress:
                os.remove(itxtfile + '.bz2')

            # database
            # -------------------------------------------------------------------------
            # now, write data to the database. Note that we do this after
            # invoking the epilogue so that the user gets a finer control on
            # the data that is about to be inserted into the database

            # First, compute the name of the database
            dbname = self._sub(self._dbname)

            # create a new SQLITE3 database connection
            dbhandler = sqltools.dbaccess(dbname)

            # in case we get a different database
            if dbname != currdbname:

                # create the tables
                for itable in self._dbspec.get_db():
                    dbhandler.create_table(itable)

                # and remember the name of the current database
                currdbname = dbname

            # now, populate the datatase
            self._logger.debug(" Inserting data into '%s'" % currdbname)
            for itable in self._dbspec.get_db():
                self._logger.debug(" Populating '%s'" % itable.get_name())
                dbhandler.insert_data(
                    itable,
                    itable.poll(dbspec=self._dbspec,
                                namespace=BotParser._namespace,
                                data=BotParser._data,
                                param=None,
                                regexp=BotParser._regexp,
                                snippet=BotParser._snippet,
                                user=BotParser._user,
                                logger=self._logger,
                                logfilter=self._logfilter))

            # and close the database
            dbhandler.close()

            # update the index
            idx += 1

    # -----------------------------------------------------------------------------
    # wrapup
    #
    # wrapup performing the last operations
    # -----------------------------------------------------------------------------
    def wrapup(self, dbspec, configdir):
        """wrapup performing the last operations
        """

        # copy the database specification to the config dir
        if isinstance(dbspec, dbtools.DBVerbatim):
            with open(os.path.join(configdir, 'database.db'), 'w') as database:
                database.write(dbspec.data)

        elif isinstance(dbspec, dbtools.DBFile):
            shutil.copy(
                dbspec.filename,
                os.path.join(configdir, os.path.basename(dbspec.filename)))
        else:
            raise ValueError(" Incorrect dbspec in wrapup")

    # -----------------------------------------------------------------------------
    # go
    #
    # main service provided by this class. It automates the whole parsing
    # process. It parses the contents of all files specified in txtfile (which
    # is a list of strings) according to the specification given in dbfile. It
    # writes down the results in the database whose name is given in dbname
    #
    # The argnamespace is the Namespace of the parser used (which should be an
    # instance of argparse or None). Other (optional) parameters are:
    #
    # directory - target directory where all output is recorded
    # output - filenames given to the backup copies of the parsed files
    # logger - if a logger is given, autobot uses a child of it. Otherwise, it
    #          creates its own logger
    # logfilter - if the client code uses a logger that requires additional
    #             information, a logging.Filter should be given here
    # prologue - if a class is provided here then __call__ () is automatically
    #            invoked before parsing every text file. This class should
    #            be a subclass of BotAction so that it automatically inherits
    #            all the attributes
    # epilogue - if a class is provided here then __call__ () is automatically
    #            invoked after parsing every text file. This class should be a
    #            subclass of BotAction so that it automatically inherits all
    #            the attributes
    # enter - much like prologue but __call__ is automatically invoked before
    #         parsing the first text file
    # windUp - much like epilogue but __call__ is automatically invoked after
    #          parsing the last text file
    # quiet - if given, some additional information is skipped
    # -----------------------------------------------------------------------------
    def go(self,
           txtfile,
           dbfile,
           dbname="$name.db",
           directory=os.getcwd(),
           compress=False,
           argnamespace=None,
           output="$name",
           logger=None,
           logfilter=None,
           prologue=None,
           epilogue=None,
           enter=None,
           windUp=None,
           quiet=False):
        """
        main service provided by this class. It automates the whole parsing
        process. It parses the contents of all files specified in txtfile (which
        is a list of strings) according to the specification given in dbfile. It
        writes down the results in the database whose name is given in dbname

        The argnamespace is the Namespace of the parser used (which should be an
        instance of argparse or None). Other (optional) parameters are:

        directory - target directory where all output is recorded
        output - filenames given to the backup copies of the parsed files
        logger - if a logger is given, autobot uses a child of it. Otherwise, it
                 creates its own logger
        logfilter - if the client code uses a logger that requires additional
                    information, a logging.Filter should be given here
        prologue - if a class is provided here then __call__ () is automatically
                   invoked before parsing every text file. This class should
                   be a subclass of BotAction so that it automatically inherits
                   all the attributes
        epilogue - if a class is provided here then __call__ () is automatically
                   invoked after parsing every text file. This class should be a
                   subclass of BotAction so that it automatically inherits all
                   the attributes
        enter - much like prologue but __call__ is automatically invoked before
                parsing the first text file
        windUp - much like epilogue but __call__ is automatically invoked after
                 parsing the last text file
        quiet - if given, some additional information is skipped
        """

        # copy the attributes
        (self._txtfile, self._dbfile, self._dbname, self._directory,
         self._compress, self._argnamespace, self._output,
         self._prologue, self._epilogue, self._quiet) = \
         (txtfile, dbfile, dbname, directory,
          compress, argnamespace, output,
          prologue, epilogue, quiet)

        # logger settings - if a logger has been passed, just create a child of
        # it and save the log filter since it might be given to other methods
        # invoked from this class
        self._logfilter = logfilter
        if logger:
            self._logger = logger.getChild('bots.BotParser')

            # in case a filter has been given add it and finally set the log level
            if logfilter:
                self._logger.addFilter(logfilter)

        # otherwise, create a simple logger based on a stream handler
        else:
            self._logger = logging.getLogger(self.__class__.__module__ + '.' +
                                             self.__class__.__name__)
            handler = logging.StreamHandler()
            handler.setLevel(BotParser._loglevel)
            handler.setFormatter(
                logging.Formatter(" %(levelname)-10s:   %(message)s"))
            self._logger.addHandler(handler)

            # not passing a logger does not mean that other loggers do not exist
            # so that make sure that the log messages generated here are not
            # propagated upwards in the logging hierarchy
            self._logger.propagate = False

        self._logger.debug(" Starting automated parsing ...")

        # check that all parameters are valid
        self.check_flags(self._txtfile, self._dbfile, self._directory)

        # and now, create the database specification

        # process the database either as a string with a path to the file to
        # parse or just simply copy the specification in case it was given as a
        # verbatim string or as a file already processed
        # proceed similarly in case of the database specification file
        if type(self._dbfile) is str:
            self._logger.debug(" Parsing the database specification file ...")
            self._dbspec = dbtools.DBFile(self._dbfile)
        elif isinstance(self._dbfile, dbtools.DBVerbatim):
            self._logger.debug(
                " The database was given as a verbatim specification")
            self._dbspec = self._dbfile
            self._dbfile = BotParser.defaultname
        elif isinstance(self._dbfile, dbtools.DBFile):
            self._logger.debug(
                " The database was given as a file already parsed")
            self._dbspec = self._dbfile
            self._dbfile = self._dbfile.filename
        else:
            raise ValueError(" Incorrect specification of the database")

        # and now, unless quiet is enabled, show the flags
        if (not self._quiet):

            self.show_switches(self._txtfile, self._dbfile, self._directory)

        # setup the necessary environment and retrieve the directores to be
        # used
        (resultsdir, configdir) = self.setup(self._directory)

        # is the user overriding the definition of the default data regexp?
        for iregexp in self._dbspec.get_regexp():

            # if so, override the current definition and show an info message
            if iregexp.get_name() == 'default':
                self.statregexp = iregexp.get_specification()
                self._logger.warning(
                    " The data regexp has been overridden to '%s'" %
                    iregexp.get_specification())

        # in case it is requested to execute an *enter* action do it now
        if enter:
            action = enter(dbfile=self._dbfile,
                           directory=self._directory,
                           namespace=BotParser._namespace,
                           user=BotParser._user)
            action(self._logger)

        # record the start time
        self._starttime = datetime.datetime.now()

        # now, invoke the automated parsing of this particular text file
        self.parse_all_files(self._txtfile, resultsdir)

        # record the end time
        self._endtime = datetime.datetime.now()

        # and wrapup
        self.wrapup(self._dbspec, configdir)

        # before leaving, execute a windup action in case it was requested
        if windUp:
            action = windUp(dbfile=self._dbfile,
                            directory=self._directory,
                            namespace=BotParser._namespace,
                            data=BotParser._data,
                            user=BotParser._user)
            action(self._logger)
Пример #18
0
 def __init__(self):
     self.namespaces = {'': namespace.Namespace('', None) }