Пример #1
0
 def __init__(self, engine=DEFAULT_ENGINE, **engine_params):
     log.logger(__name__).info(
         "initializing Catalogue Database (engine=%s, params %s)",
         engine, engine_params)
     self._engine_class = self.__class__.get_engine(engine)
     self._engine = self._engine_class(**engine_params)
     if 'drop' in engine_params or 'memory' in engine_params:
         log.logger(__name__).info("reset catalogue data")
     self._cache = collections.defaultdict(dict)
Пример #2
0
 def __init__(self, drop=False, engine=DEFAULT_ENGINE, **engine_params):
     log.logger(__name__).info(
         "initializing Catalogue Database (engine=%s, params %s)",
                  engine, engine_params)
     self._engine_class = self.__class__.get_engine(engine)
     self._engine = self._engine_class(**engine_params)
     if drop or 'memory' in engine_params:
         log.logger(__name__).info("reset catalogue data")
         self.recreate()
Пример #3
0
    def select(self, grouped_measures, native_scale, target_scale, mus):
        selected = self.__class__.do_select(grouped_measures, native_scale,
                                            target_scale, mus)

        log.logger(__name__).debug(
            "selected %d measures over %d groups on scales %s,%s (%s)",
            len(selected[0]), len(grouped_measures),
            native_scale, target_scale, mus)

        return selected
Пример #4
0
    def group_measures_by_time(self, measures):
        """
        Group `measures` in time by using the time_window and the
        time_distance_fn
        """
        groups = self.group_measures_by_var(
            measures, self.time_distance_fn, self.time_window)

        log.logger(__name__).debug(
            "grouping by time returned %d groups", len(groups))
        return groups
    def set_criteria(self, criteria=None):
        """
        Set the criteria used to filters measures

        E.g.
        h**o = Homogeniser()
        h**o.set_criteria(C(agency__in=a_list_agency) or C(magnitude__gt=4))
        """
        self._criteria = criteria or Criteria()

        log.logger(__name__).debug("Changed criteria to %s", self._criteria)

        return self.selected_native_measures(), self.selected_target_measures()
Пример #6
0
 def group(cls, measures):
     """
     Groups the measures using the event source key as aggregator
     """
     groups = {}
     for m in measures:
         key = m.event_key
         if not key in groups:
             groups[key] = []
         groups[key].append(m)
     log.logger(__name__).info(
         "Measure grouper by source key returned %d groups" % len(groups))
     return groups
Пример #7
0
 def load_file(self, filename, importer_module_name, **kwargs):
     """
     Load filename by using an Importer defined in
     `importer_module_name`. Other kwargs are passed to the store
     method of the importer
     """
     if not '.' in importer_module_name:
         importer_module_name = (
             'eqcatalogue.importers.' + importer_module_name)
     module = __import__(importer_module_name, fromlist=['Importer'])
     importer = module.Importer(file(filename), self)
     summary = importer.store(**kwargs)
     log.logger(__name__).info(summary)
    def harmonise(self, measures, path_finder_cls=FormulaPathFinder,
                    measure_uncertainty=0.0, allow_trivial_conversion=True):
        """
        Harmonise an iterator of measures.

        :param measures:
          the measures to be converted
        :param path_finder_cls:
          the class used to find the sequence of formulas
          to apply to get a conversion.
        :param measure_uncertainty:
          default grade of uncertainty, error related to the measures
          if no standard error for measures is no defined.
        :returns: the converted and the unconverted measures
        :rtype: a 2-tuple. The former is dictionary where the keys are
        the converted measures and the value is a dictionary storing
        the converted value and the formula used for the conversion.
        The latter is a list of the unconverted measures
        """
        result = HarmoniserResult()
        path_finder = path_finder_cls(self._formulas)
        identity = ConversionFormula.make_identity(self.target_scale)

        log.logger(__name__).debug("Start harmonization of %d measures",
                      len(measures))

        for m in measures:
            formulas = path_finder.find_formulas_for(m, self.target_scale)
            if formulas:
                value = formulas[0].apply(m, measure_uncertainty)

                for formula in formulas[1:]:
                    value = formula.apply(value, measure_uncertainty)
                result.append(m, value)
                log.logger(__name__).debug(
                    "Measure %s harmonised with formulas %s",
                              m, formulas)

            elif m.scale == self.target_scale and allow_trivial_conversion:
                result.append(
                    m, m.convert(m.value, identity, m.standard_error))
                log.logger(__name__).debug(
                    "Measure %s harmonised with trivial conversion", m)
            else:
                result.append(m)
                log.logger(__name__).debug("Could not convert measure %s", m)

        log.logger(__name__).info(
            "Harmonized %s measures. %s measures not converted",
                     len(result.converted), len(result.unconverted))
        return result
Пример #9
0
    def group_measures_by_magnitude_value(self, measures):
        """
        Group `measures` in magnitude by using the magnitude_window
        and the magnitude_distance_fn. If no magnitude window is
        given, an error is raised
        """
        if not self.magnitude_window:
            raise RuntimeError("Please provide a magnitude window")
        groups = self.group_measures_by_var(
            measures, self.magnitude_distance_fn, self.magnitude_window)

        log.logger(__name__).debug(
            "grouping by magnitude value returned %d groups", len(groups))

        return groups
Пример #10
0
    def select(self, grouped_measures,
               native_scale, target_scale,
               mus):
        """
        Build two lists for native_measure and target_measure. Each
        list is built by selecting a measure from a
        grouped_measures item. The selection is driven by the agency
        ranking.

        :py:param:: grouped_measures
         A dictionary where the keys identifies the events and
        the value are the list of measures associated with it
        :py:param:: native_scale, target_scale
        The native and target scale used
        :py:param:: mus
        A missing uncertainty strategy object used to handle the case
        when no standard error of a measure is provided
        """
        native_measures = []
        target_measures = []

        for measures in grouped_measures.values():
            sorted_native_measures = []
            sorted_target_measures = []
            for measure in measures:
                if mus.should_be_discarded(measure):
                    continue
                if measure.scale == native_scale:
                    sorted_native_measures.append(
                        (self.calculate_rank(measure), measure))
                elif measure.scale == target_scale:
                    sorted_target_measures.append(
                        (self.calculate_rank(measure), measure))
                if not measure.standard_error:
                    measure.standard_error = mus.get_default(measure)
            sorted_native_measures.sort(reverse=True)
            sorted_target_measures.sort(reverse=True)

            if sorted_native_measures and sorted_target_measures:
                native_measures.append(sorted_native_measures[0][1])
                target_measures.append(sorted_target_measures[0][1])

        log.logger(__name__).debug(
            "selected %d measures over %d groups on scales %s,%s (%s)",
            len(native_measures), len(grouped_measures),
            native_scale, target_scale, mus)

        return native_measures, target_measures
Пример #11
0
def export_measures(measures, filename, header=True, mode="w", **kwargs):
    """
    Export `measures` to `filename` by using the csv module from the
    standard python library. If `header` is true the first row of the
    csv will be an header. The remaining arguments of the function are
    passed as they are to the csv writer constructor.
    """
    with open(filename, mode) as csvfile:
        measure_writer = csv.writer(csvfile, **kwargs)

        if header:
            measure_writer.writerow(measures[0].keys())

        for measure in measures:
            measure_writer.writerow(measure.values())

    log.logger(__name__).info(
        "Exported %d measures to %s" % (len(measures), filename))
    def set_grouper(self, grouper_class, **grouper_args):
        """
        Set the algorithm used to group measures by event.

        :param grouper_class:
          A class that implements the MeasureGrouper protocol.
          See :py:class:`eqcatalogue.grouping` for the current
          admitted values

        Any other parameter is given as input to the constructor of
        `grouper_class`. E.g.::

         from eqcatalogue.grouping import GroupMeasuresByHierarchicalClustering
         an_homogeniser.set_grouper(GroupMeasuresByHierarchicalClustering)
        """
        self._grouper = grouper_class(**grouper_args)

        log.logger(__name__).debug("Changed grouper to %s" % self._grouper)
        return self.grouped_measures()
    def set_selector(self, selector_class, **selector_args):
        """
        Set the algorithm used to select a measure among grouped
        measures.

        :param selector_class:
          A class that implements the MeasureSelection protocol.
          See :py:class:`eqcatalogue.selection` for the current
          admitted values

        Any other parameter is given as input to the constructor of
        `selector_class`. E.g.::

          from eqcatalogue.selection import Precise
          an_homogeniser.set_selector(Precise)
        """
        self._selector = selector_class(**selector_args)

        log.logger(__name__).debug("Changed selector to %s" % self._selector)
        return self.selected_native_measures(), self.selected_target_measures()
Пример #14
0
    def group_measures(self, measures):
        """
        Groups the measures by clustering on time
        """

        data = np.array([self._key_fn(m) for m in measures])
        npdata = np.reshape(np.array(data), [len(data), 1])

        clusters = hierarchy.fclusterdata(npdata, **self._clustering_args)

        grouped = {}
        for i, cluster in enumerate(clusters):
            current = grouped.get(cluster, [])
            current.append(measures[i])
            grouped[cluster] = current

        log.logger(__name__).info(
            "Measure grouper by clustering on time returned %d groups" % len(
                grouped))
        return grouped
Пример #15
0
    def group_measures(self, measures):
        """
        Group `measures` by sequentially applying a grouping on
        time, space and magnitude (optional) variables.
        """

        groups = sum([
            self.group_measures_by_space(time_group)
            for time_group in self.group_measures_by_time(measures)],
            [])

        if self.magnitude_window:
            groups = sum([
                self.group_measures_by_magnitude_value(group)
                for group in groups], [])

        log.logger(__name__).info(
            "Measure grouper (sequential clustering) returned %d groups",
            len(groups))
        return dict([(i, group) for i, group in enumerate(groups)])
    def set_missing_uncertainty_strategy(self, mu_strategy_class,
                                         **mu_strategy_args):
        """
        Set the algorithm used to handle situations where uncertainty
        data are missing in a measure.

        :param mu_strategy_class:
          A class that implements the MissingUncertaintyStrategy protocol.
          See :py:class:`eqcatalogue.selection` for the current
          admitted values

        Any other parameter is given as input to the constructor of
        `mu_strategy_class`. E.g.::

          from eqcatalogue.selection import MUSSetDefault
          an_homogeniser.set_missing_uncertainty_strategy(
            MUSSetDefault, default=3)
        """
        self._mu_strategy = mu_strategy_class(**mu_strategy_args)

        log.logger(__name__).debug(
            "Changed missing uncertainty strategy to %s", self._selector)
        return self.selected_native_measures(), self.selected_target_measures()
Пример #17
0
events from catalogue saved in the ISF Format
http://www.isc.ac.uk/standards/isf/
"""

import re
import datetime
from sqlalchemy.exc import IntegrityError

from eqcatalogue import models as catalogue

from eqcatalogue.log import logger
from eqcatalogue.importers import BaseImporter
from eqcatalogue.exceptions import ParsingFailure


LOG = logger(__name__)
CATALOG_URL = 'http://www.isc.ac.uk/cgi-bin/web-db-v4'

ANALYSIS_TYPES = {'a': 'automatic',
                  'm': 'manual',
                  'g': 'guess'}

LOCATION_METHODS = {
    'i': 'inversion',
    'p': 'pattern recognition',
    'g': 'ground truth',
    'o': 'other'
}

EVENT_TYPES = {
    'uk': 'unknown',