Пример #1
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(
            rup_array, self.samples_by_grp, self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz in parallel
        smap = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                ((rgetter,) for rgetter in rgetters),
                                self.monitor('get_eid_rlz'),
                                progress=logging.debug)
        i = 0
        for eid_rlz in smap:  # 30 million of events associated in 1 minute!
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='id')  # fast too
        n_unique_events = len(numpy.unique(events['id']))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        self.datastore['events'] = events
Пример #2
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(
            rup_array, self.samples_by_grp, self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz in parallel
        smap = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                ((rgetter,) for rgetter in rgetters),
                                self.monitor('get_eid_rlz'),
                                progress=logging.debug)
        i = 0
        for eid_rlz in smap:  # 30 million of events associated in 1 minute!
            for er in eid_rlz:
                events[i] = er
                i += 1
        events.sort(order=['rlz', 'eid'])  # fast too
        n_unique_events = len(numpy.unique(events['eid']))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        self.datastore['events'] = events
        indices = numpy.zeros((self.R, 2), U32)
        for r, [startstop] in get_indices(events['rlz']).items():
            indices[r] = startstop
        self.datastore.set_attrs('events', indices=indices)
Пример #3
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)
        self.E = len(eids)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        events['eid'] = eids
        self.eid2idx = eid2idx = dict(zip(events['eid'], range(self.E)))
        rgetters = self.get_rupture_getters()

        # build the associations eid -> rlz in parallel
        smap = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                ((rgetter, ) for rgetter in rgetters),
                                self.monitor('get_eid_rlz'),
                                progress=logging.debug)
        for eid_rlz in smap:
            # fast: 30 million of events associated in 1 minute
            for eid, rlz in eid_rlz:
                events[eid2idx[eid]]['rlz'] = rlz
        self.datastore['events'] = events  # fast too
        return rgetters
Пример #4
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures

        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)

        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = gen_rgetters(self.datastore)
        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!

        logging.info('Building assocs event_id -> rlz_id for {:_d} events'
                     ' and {:_d} ruptures'.format(len(events), len(rup_array)))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:

            # parallel composite array with the associations eid->rlz
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  h5=self.datastore.hdf5)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='rup_id')  # fast too
        # sanity check
        n_unique_events = len(numpy.unique(events[['id', 'rup_id']]))

        assert n_unique_events == len(events), (n_unique_events, len(events))

        events['id'] = numpy.arange(len(events))
        # set event year and event ses starting from 1
        itime = int(self.oqparam.investigation_time)
        nses = self.oqparam.ses_per_logic_tree_path
        extra = numpy.zeros(len(events), [('year', U16), ('ses_id', U16)])

        # seed for year and ses_id
        numpy.random.seed(self.oqparam.ses_seed)
        extra['year'] = numpy.random.choice(itime, len(events)) + 1
        extra['ses_id'] = numpy.random.choice(nses, len(events)) + 1
        self.datastore['events'] = util.compose_arrays(events, extra)
        eindices = get_indices(events['rup_id'])
        arr = numpy.array(list(eindices.values()))[:, 0, :]

        self.datastore['ruptures']['e0'] = arr[:, 0]
        self.datastore['ruptures']['e1'] = arr[:, 1]
Пример #5
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!
        logging.info(
            'Building associations event_id -> rlz_id for %d events'
            ' and %d ruptures', len(events), len(rup_array))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  hdf5path=self.datastore.filename)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='rup_id')  # fast too
        # sanity check
        n_unique_events = len(numpy.unique(events[['id', 'rup_id']]))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        events['id'] = numpy.arange(len(events))
        self.datastore['events'] = events
        eindices = get_indices(events['rup_id'])
        arr = numpy.array(list(eindices.values()))[:, 0, :]
        self.datastore['eslices'] = arr  # shape (U, 2)