示例#1
0
def speigen_range(matrix, retry=True, coerce=True):
    """
    Construct the eigenrange of a potentially sparse matrix.
    """
    if spar.issparse(matrix):
        try:
            emax = spla.eigs(matrix, k=1, which='LR')[0]
        except (spla.ArpackNoConvergence, spla.ArpackError) as e:
            rowsums = np.unique(np.asarray(matrix.sum(axis=1)).flatten())
            if np.allclose(rowsums, np.ones_like(rowsums)):
                emax = np.array([1])
            else:
                Warn('Maximal eigenvalue computation failed to converge'
                     ' and matrix is not row-standardized.')
                raise e
        emin = spla.eigs(matrix, k=1, which='SR')[0]
        if coerce:
            emax = emax.real.astype(float)
            emin = emin.real.astype(float)
    else:
        try:
            eigs = nla.eigvals(matrix)
            emin, emax = eigs.min().astype(float), eigs.max().astype(float)
        except Exception as e:
            Warn('Dense eigenvector computation failed!')
            if retry:
                Warn('Retrying with sparse matrix...')
                spmatrix = spar.csc_matrix(matrix)
                speigen_range(spmatrix)
            else:
                Warn('Bailing...')
                raise e
    return emin, emax
示例#2
0
    def _loop_process(self):
        sunrise = self.sunrise_time().replace(tzinfo=None)
        if (sunrise < datetime.now()):
            return Warn("Sunrise has already passed for today. Skipping today")

        for curtain in self._System.Curtains_list():
            try:
                curtain_option = curtain.CurtainOption(self._option_id)
                if (not curtain_option.is_on()): continue

                curtain_buffer_time = 0 if (isinstance(
                    curtain.buffer_time(),
                    type(None))) else curtain.buffer_time()
                buffer_td = timedelta(seconds=curtain_buffer_time / 10 / 2)
                # .5: buffer both sides; .10: precision
                if (curtain.CurtainEvents_for_range(
                        earliest=sunrise - buffer_td,
                        latest=sunrise + buffer_td)):
                    Warn("Event already set for sunrise time.")
                    continue
                    # don't duplicate sunrise

                position = curtain_option.data() if curtain_option.data(
                ) else curtain.length()
                curtain.open(desired_position=position,
                             Options_id=curtain_option.Options_id(),
                             time=sunrise)

            except Exception as error:
                Logger.log_error(error)
示例#3
0
    def __init__(self, Y, X, Delta,
                 n_samples=1000, n_jobs=1,
                 extra_traced_params = None,
                 priors=None,
                 starting_values=None):
        super(Base_MVCM, self).__init__()

        N, p = X.shape
        _N, J = Delta.shape
        self.state = Hashmap(**{'X':X, 'Y':Y, 'Delta':Delta,
                           'N':N, 'J':J, 'p':p })
        self.traced_params = copy.deepcopy(SAMPLERS)
        extras = extra_traced_params
        if extras is not None:
            self.traced_params.extend(extra_tracked_params)
        hashmaps = [Hashmap(**{k:[] for k in self.traced_params})]*n_jobs
        self.trace = Trace(*hashmaps)

        if priors is None:
            priors = dict()
        if starting_values is None:
            starting_values = dict()

        self._setup_priors(**priors)
        self._setup_starting_values(**starting_values)

        self.cycles = 0
        self.configs = None


        try:
            self.sample(n_samples, n_jobs=n_jobs)
        except (np.linalg.LinAlgError, ValueError) as e:
            Warn('Encountered the following LinAlgError. '
                 'Model will return for debugging. \n {}'.format(e))
示例#4
0
def _circle(A,B,C, dmetric=dist.euclidean):
    """
    Returns (radius, (center_x, center_y)) of the circumscribed circle by the
    triangle pqr.

    note, this does not assume that p!=q!=r
    """
    Ax,Ay = dec.Decimal(A[0]), dec.Decimal(A[1])
    Bx,By = dec.Decimal(B[0]), dec.Decimal(B[1])
    Cx,Cy = dec.Decimal(C[0]), dec.Decimal(C[1])
    if np.array_equal([Ax,Ay], [Bx,By]) or np.array_equal([Bx,By],[Cx,Cy]):
        Warn('Duplicate neighboring point detected!')
    elif np.allclose(_angle(A,B,C), 0):
        #Warn('angle close to zero')
        radii = dist.euclidean(A,B)/2.
        center_x = float(Ax + Bx)/2.
        center_y = float(Ay + By)/2.
    else:
        try:
            D = 2*(Ax*(By - Cy) + Bx*(Cy - Ay) + Cx*(Ay - By))
            center_x = float(((Ax**2 + Ay**2)*(By-Cy) 
                            + (Bx**2 + By**2)*(Cy-Ay) 
                            + (Cx**2 + Cy**2)*(Ay-By)) / D)
            center_y = float(((Ax**2 + Ay**2)*(Cx-Bx) 
                            + (Bx**2 + By**2)*(Ax-Cx) 
                            + (Cx**2 + Cy**2)*(Bx-Ax)) / D)
            radii = np.max([dmetric((center_x, center_y), pt) for pt in [A,B,C]])
        except dec.InvalidOperation:
            center_x = center_y = radii = -np.inf
    return radii, (center_x, center_y)
示例#5
0
 def sleep_time(self):
     now = datetime.now()
     time_plus_1_second = self._time + timedelta(seconds=1)
     if (time_plus_1_second < now):
         Warn("Event {} is scheduled at a time in the past".format(
             self._id))
     return (self._time - now).seconds if (now < self._time) else .25
示例#6
0
    def reweight(self,
                 k=None,
                 p=None,
                 new_data=None,
                 new_ids=None,
                 inplace=True):
        """
        Redo K-Nearest Neighbor weights construction using given parameters

        Parameters
        ----------
        new_data    : np.ndarray
                      an array containing additional data to use in the KNN
                      weight
        new_ids     : list
                      a list aligned with new_data that provides the ids for
                      each new observation
        inplace     : bool
                      a flag denoting whether to modify the KNN object 
                      in place or to return a new KNN object
        k           : int
                      number of nearest neighbors
        p           : float
                      Minkowski p-norm distance metric parameter:
                      1<=p<=infinity
                      2: Euclidean distance
                      1: Manhattan distance
                      Ignored if the KDTree is an ArcKDTree

        Returns
        -------
        A copy of the object using the new parameterization, or None if the
        object is reweighted in place.
        """

        if new_data is not None:
            new_data = np.asarray(new_data).reshape(-1, 2)
            data = np.vstack((self.data, new_data)).reshape(-1, 2)
            if new_ids is not None:
                ids = copy.deepcopy(self.id_order)
                ids.extend(list(new_ids))
            else:
                ids = list(range(data.shape[0]))
        elif (new_data is None) and (new_ids is None):
            # If not, we can use the same kdtree we have
            data = self.kdtree
            ids = self.id_order
        elif (new_data is None) and (new_ids is not None):
            Warn("Remapping ids must be done using w.remap_ids")
        if k is None:
            k = self.k
        if p is None:
            p = self.p
        if inplace:
            self._reset()
            self.__init__(data, ids=ids, k=k, p=p)
        else:
            return KNN(data, ids=ids, k=k, p=p)
示例#7
0
 def test_ord(self):
     reg = ML_Error(self.y,
                    self.x,
                    w=self.w,
                    name_y=self.y_name,
                    name_x=self.x_names,
                    name_w='south_q.gal',
                    method='ORD')
     betas = np.array([[6.1492], [4.4024], [1.7784], [-0.3781], [0.4858],
                       [0.2991]])
     Warn('Running higher-tolerance tests in test_ml_error.py')
     np.testing.assert_allclose(reg.betas, betas, RTOL + .0001)
     u = np.array([-5.97649777])
     np.testing.assert_allclose(reg.u[0], u, RTOL)
     predy = np.array([6.92258051])
     np.testing.assert_allclose(reg.predy[0], predy, RTOL)
     n = 1412
     np.testing.assert_allclose(reg.n, n, RTOL)
     k = 5
     np.testing.assert_allclose(reg.k, k, RTOL)
     y = np.array([0.94608274])
     np.testing.assert_allclose(reg.y[0], y, RTOL)
     x = np.array([1., -0.39902838, 0.89645344, 6.85780705, 7.2636377])
     np.testing.assert_allclose(reg.x[0], x, RTOL)
     e = np.array([-4.92843327])
     np.testing.assert_allclose(reg.e_filtered[0], e, RTOL)
     my = 9.5492931620846928
     np.testing.assert_allclose(reg.mean_y, my)
     sy = 7.0388508798387219
     np.testing.assert_allclose(reg.std_y, sy)
     vm = np.array([
         1.06476526, 0.05548248, 0.04544514, 0.00614425, 0.01481356,
         0.001501
     ])
     np.testing.assert_allclose(reg.vm.diagonal(), vm, RTOL * 10)
     sig2 = np.array([[32.40685441]])
     np.testing.assert_allclose(reg.sig2, sig2, RTOL)
     pr2 = 0.3057664820364818
     np.testing.assert_allclose(reg.pr2, pr2)
     std_err = np.array([
         1.03187463, 0.23554719, 0.21317867, 0.07838525, 0.12171098,
         0.038744
     ])
     np.testing.assert_allclose(reg.std_err, std_err, RTOL * 10)
     z_stat = [(5.95927510, 2.5335927e-09), (18.6901829, 5.9508630e-78),
               (8.34216329, 7.2943634e-17), (-4.8232686, 1.4122457e-06),
               (3.99130608, 6.5710407e-05), (7.71923784, 1.1702739e-14)]
     np.testing.assert_allclose(reg.z_stat, z_stat, rtol=RTOL, atol=ATOL)
     logll = -4471.407066887894
     np.testing.assert_allclose(reg.logll, logll, RTOL)
     aic = 8952.8141337757879
     np.testing.assert_allclose(reg.aic, aic, RTOL)
     schwarz = 8979.0779458660545
     np.testing.assert_allclose(reg.schwarz, schwarz, RTOL)
示例#8
0
def spsolve(A, b):
    """
    Solve the system Ax=b for x, depending on the type of A. The solution vector is equivalent to A^{-1}b

    If a is sparse, the result will be sparse. Otherwise, the result will be dense.
    """
    if spar.issparse(A):
        return spla.spsolve(A, b)
    elif spar.issparse(b):
        Warn('b is sparse, but A is dense. Solving the dense system.')
        return spsolve(A, b.toarray())
    return scla.solve(A, b)
示例#9
0
def splogdet(matrix):
    """
    compute the log determinant via an appropriate method according to the input.
    """
    redo = False
    if spar.issparse(matrix):
        LU = spla.splu(spar.csc_matrix(matrix))
        ldet = np.sum(np.log(np.abs(LU.U.diagonal())))
    else:
        sgn, ldet = nla.slogdet(matrix)
        if np.isinf(ldet) or sgn is 0:
            Warn('Dense log determinant via numpy.linalg.slogdet() failed!')
            redo = True
        if sgn not in [-1,1]:
            Warn("Drastic loss of precision in numpy.linalg.slogdet()!")
            redo = True
        ldet = sgn*ldet
    if redo:
        Warn("Please pass convert to a sparse weights matrix. Trying sparse determinant...", UserWarning)
        ldet = splogdet(spar.csc_matrix(matrix))
    return ldet
示例#10
0
 def _estimate_and_compare(self, method='FULL', RTOL=RTOL):
     reg = ML_Error(self.y,self.x,w=self.w,name_y=self.y_name,name_x=self.x_names,\
            name_w="south_q.gal", method=method)
     betas = np.array([[6.1492], [4.4024], [1.7784], [-0.3781], [0.4858],
                       [0.2991]])
     Warn('Running higher-tolerance tests in test_ml_error.py')
     np.testing.assert_allclose(reg.betas, betas, RTOL + .0001)
     u = np.array([-5.97649777])
     np.testing.assert_allclose(reg.u[0], u, RTOL)
     predy = np.array([6.92258051])
     np.testing.assert_allclose(reg.predy[0], predy, RTOL)
     n = 1412
     np.testing.assert_allclose(reg.n, n, RTOL)
     k = 5
     np.testing.assert_allclose(reg.k, k, RTOL)
     y = np.array([0.94608274])
     np.testing.assert_allclose(reg.y[0], y, RTOL)
     x = np.array([1., -0.39902838, 0.89645344, 6.85780705, 7.2636377])
     np.testing.assert_allclose(reg.x[0], x, RTOL)
     e = np.array([-4.92843327])
     np.testing.assert_allclose(reg.e_filtered[0], e, RTOL)
     my = 9.5492931620846928
     np.testing.assert_allclose(reg.mean_y, my)
     sy = 7.0388508798387219
     np.testing.assert_allclose(reg.std_y, sy)
     vm = np.array([
         1.06476526, 0.05548248, 0.04544514, 0.00614425, 0.01481356,
         0.00143001
     ])
     np.testing.assert_allclose(reg.vm.diagonal(), vm, RTOL)
     sig2 = np.array([[32.40685441]])
     np.testing.assert_allclose(reg.sig2, sig2, RTOL)
     pr2 = 0.3057664820364818
     np.testing.assert_allclose(reg.pr2, pr2)
     std_err = np.array([
         1.03187463, 0.23554719, 0.21317867, 0.07838525, 0.12171098,
         0.03781546
     ])
     np.testing.assert_allclose(reg.std_err, std_err, RTOL)
     z_stat = [(5.9592751097983534, 2.5335926307459251e-09),
               (18.690182928021841, 5.9508619446611137e-78),
               (8.3421632936950338, 7.2943630281051907e-17),
               (-4.8232686291115678, 1.4122456582517099e-06),
               (3.9913060809142995, 6.5710406838016854e-05),
               (7.9088780724028922, 2.5971882547279339e-15)]
     np.testing.assert_allclose(reg.z_stat, z_stat, RTOL, atol=ATOL)
     logll = -4471.407066887894
     np.testing.assert_allclose(reg.logll, logll, RTOL)
     aic = 8952.8141337757879
     np.testing.assert_allclose(reg.aic, aic, RTOL)
     schwarz = 8979.0779458660545
     np.testing.assert_allclose(reg.schwarz, schwarz, RTOL)
示例#11
0
def covariates(X):
    """
    This

    1. checks if the lower-level covariate contains a constant
    2. adds a constant to the lower-level covariate if it has no constant
    """
    if constant_check(X):
        Warn("X array should not contain a constant vector;"
             " constant will be added automatically")
    else:
        X = sphstack(np.ones((X.shape[0], 1)), X)

    return X
示例#12
0
文件: utils.py 项目: knaaptime/spenc
def check_weights(W, X=None, transform=None):
    """
    Check that the provided weights matrix and the X matrix are conformal.
    Further, check that the spatial weights are fully connected. 
    """
    if X is not None:
        assert W.shape[0] == X.shape[
            0], "W does not have the same number of samples as X"
    graph = sp.csc_matrix(W)
    graph.eliminate_zeros()
    components, labels = csg.connected_components(graph)
    if components > 1:
        Warn(
            'Spatial affinity matrix is disconnected, and has {} subcomponents.'
            'This will certainly affect the solution output.')
    return W
示例#13
0
def _winsor_unc(design, lower=.25, upper=.75):
    """
    This winsorizes vote shares to a given percentile.
    """
    try:
        from scipy.stats.mstats import winsorize
    except ImportError:
        Warn('Cannot import scipy.stats.mstats.winsorize, censoring instead.',
                stacklevel=2)
        return _censor_unc(shares, lower=lower, upper=1-upper)
    # WARNING: the winsorize function here is a little counterintuitive in that
    #          it requires the upper limit to be stated as "from the right,"
    #          so it should be less than .5, just like "lower"
    design['vote_share'] = np.asarray(winsorize(design.vote_share, 
                                                limits=(lower, 1-upper)))
    return design
示例#14
0
    def __init__(self, Y, X, W, M, Delta,
                 n_samples=1000, n_jobs=1,
                 extra_traced_params = None,
                 priors=None,
                 starting_values=None,
                 configs=None,
                 truncation=None):
        super(Base_Generic, self).__init__()

        N, p = X.shape
        _N, J = Delta.shape
        self.state = Hashmap(**{'X':X, 'Y':Y, 'M':M, 'W':W, 'Delta':Delta,
                           'N':N, 'J':J, 'p':p })
        self.traced_params = copy.deepcopy(SAMPLERS)
        if extra_traced_params is not None:
            self.traced_params.extend(extra_traced_params)
        hashmaps = [{k:[] for k in self.traced_params}]*n_jobs
        self.trace = Trace(*hashmaps)

        if priors is None:
            priors = dict()
        if starting_values is None:
            starting_values = dict()
        if configs is None:
            configs = dict()
        if truncation is None:
            truncation = dict()

        self._setup_priors(**priors)
        self._setup_configs(**configs)
        self._setup_truncation(**truncation)
        self._setup_starting_values(**starting_values)

        ## Covariance, computing the starting values
        self.state.Psi_1 = ind_covariance
        self.state.Psi_1i = ind_covariance
        self.state.Psi_2 = ind_covariance
        self.state.Psi_2i = ind_covariance

        self.cycles = 0

        if n_samples > 0:
            try:
                self.sample(n_samples, n_jobs=n_jobs)
            except (np.linalg.LinAlgError, ValueError) as e:
                Warn('Encountered the following LinAlgError. '
                     'Model will return for debugging. \n {}'.format(e))
示例#15
0
def weights(W,M, transform):
    """
    This tries to transform a pysal spatial weights object into being
    row-standardized. It warns if the objects do not support transformation.

    """
    try:
        if M is not None:
            M.transform = 'r'
        if W is not None:
            W.transform = 'r'
    except AttributeError:
        Warn("Weights objects do not support transformation. Proceeding without transforming weights.", UserWarning)
    try:
        if M is not None and W is not None:
            assert M.n <= W.n
    except AssertionError:
        raise AssertionError('M (n={}) is larger than W (n={}).'.format(M.n, W.n))
    return W,M
示例#16
0
def CreateCSR(X, n_jobs = 1):
	"""
	[Added 10/10/2018] [Edited 13/10/2018]
	Much much faster than Scipy. In fact, HyperLearn uses less memory,
	by noticing indices >= 0, hence unsigned ints are used.

	Likewise, parallelisation is seen possible with Numba with n_jobs.
	Notice, an error message will be provided if 20% of the data is only zeros.
	It needs to be more than 20% zeros for CSR Matrix to shine.
	"""
	n,p = X.shape
	rowCount = getDtype(p, n)

	rowCount, nnz = determine_nnz(X, rowCount)

	if nnz/(n*p) > 0.8:
		Warn("Created sparse matrix has just under 20% zeros. Not a good idea to sparsify the matrix.")

	temp = getDtype(nnz, 1)

	f = create_csr_cache if n_jobs == 1 else create_csr_parallel
	return f(X, rowCount, nnz, temp)
示例#17
0
    def ESPG_retrieve():
        if not os.path.isfile('./' + espg_file):

            # ESPG XML data URL
            espg_url = 'http://epsg.io/' + espg_file
            urlreq = URL.Request(
                espg_url,
                data=None,
                headers={
                    'User-Agent':
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 #(KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
                })

            # Try to download the XML file and save it
            with open(espg_file, 'w') as espg_of:
                try:
                    espg_of.write(
                        str(URL.urlopen(urlreq).read().decode('utf-8')))
                except:
                    Warn("Warning: Could not download ESPG metadata")

                espg_of.close()
示例#18
0
 def sleep(self, sleep_amount: int = 0) -> None:
     with self._condition:
         if (sleep_amount): self._condition.wait(sleep_amount)
         else:
             Warn(f"Thread: {self.name} has been indefinitely put to sleep")
             self._condition.wait()
示例#19
0
    def __init__(
        self,
        elex_frame,
        holdout=None,
        threshold=.95,
        share_pattern='_share',
        turnout_col='turnout',
        year_col='year',
    ):
        """
        Construct a Seats-Votes object for a given election.

        Arguments
        ---------
        data        :   dataframe
                        dataframe containing the elections to be analyzed
        holdout     :   string (default: first non-turnout column matching `share_pattern`)
                        party name to consider as the `holdout` party, against which
                        all log contrasts are constructed
        threshold   :   float (default: 95)
                        threshold beyond which all elections are considered uncontested.
        share_pattern:  string (default: `_share`)
                        pattern denoting how all the vote share column in the dataframe
                        are named. By default, all columns with `_share` in their name
                        are matched using dataframe.filter(like=share_pattern)
        turnout_col :   string
                        name of column containing turnout information

        """
        share_frame = elex_frame.filter(like=share_pattern)
        if share_frame.empty:
            raise KeyError(
                "no columns in the input dataframe "
                "were found that match pattern: {}".format(share_pattern))
        turnout = elex_frame.get(turnout_col)
        if threshold < .5:
            Warn(
                'Threshold is an upper, not lower bound. Converting to upper bound.'
            )
            threshold = 1 - threshold
        if holdout is None:
            holdout = elex_frame.columns[1]
        if isinstance(holdout, int):
            self._holdout_idx = holdout
        else:
            self._holdout_idx = list(elex_frame.columns).index(holdout)
        self._data = elex_frame
        self._share_cols = share_frame.columns.tolist()
        self._turnout_col = turnout_col
        self.turnout = turnout
        self.shares = share_frame
        self.N, self.P = share_frame.shape
        filtered = prep.filter_uncontested(self.shares, threshold)
        self.uncontested = elex_frame.drop(filtered.index, inplace=False)
        self.contested = elex_frame.drop(self.uncontested.index, inplace=False)
        self.n_uncontested = self.uncontested.shape[0]
        self._uncontested_threshold = threshold
        unc_d = (self.uncontested[self._share_cols].values >
                 self._uncontested_threshold).sum(axis=0)
        self._uncontested_p = unc_d / self.n_uncontested
        self.patterns = prep.extract_patterns(self.contested)
        contrasts = []
        hyperweights = []
        n_contested = 0
        for pattern in self.patterns:
            contrast = prep.make_log_contrasts(pattern.contests,
                                               holdout=holdout,
                                               votecols=self._share_cols +
                                               ['turnout'])
            contrasts.append(contrast)
            hyperweights.append(contrast.shape[0] / self.N)
            n_contested += contrast.shape[0]
        assert n_contested + self.n_uncontested == self.N, "missing/double-counting!"
        self.n_contested = n_contested
        self.contrasts = contrasts
        self._frac_contested = n_contested / self.N
        self.hyperweights = hyperweights
        self._max_size, self._argmax_size = np.max(self.P), np.argmax(self.P)
        self._has_been_fit = False
示例#20
0
import unittest as ut
import numpy as np
import scipy.sparse as spar

filterwarnings('ignore', category=spar.SparseEfficiencyWarning)

ALL_FUNCS = [f for f,v in list(spu.__dict__.items()) \
      if (callable(v) \
   and not f.startswith('_'))]
COVERAGE = ['spinv', 'splogdet', 'spisfinite', 'spmin', 'spfill_diagonal', \
     'spmax', 'spbroadcast', 'sphstack', 'spmultiply', 'spdot']

NOT_COVERED = set(ALL_FUNCS).difference(COVERAGE)

if len(NOT_COVERED) > 0:
    Warn('The following functions in {} are not covered:\n'
         '{}'.format(spu.__file__, NOT_COVERED))


class Test_Sparse_Utils(ut.TestCase):
    def setUp(self):
        np.random.seed(8879)

        self.n = 20
        self.dense0 = np.random.randint(2, size=(self.n, self.n))
        self.d0td0 = self.dense0.T.dot(self.dense0)
        self.dense1 = np.eye(self.n)
        self.sparse0 = spar.csc_matrix(self.dense0)
        self.s0ts0 = self.sparse0.T.dot(self.sparse0)
        self.sparse1 = spar.csc_matrix(spar.identity(self.n))

    def test_inv(self):
示例#21
0
args = argparser.parse_args()

# Formatting defaults to pretty format
#
if args.formatting is None:
    args.formatting = 'pretty'

################################################################

if not len(sys.argv) > 1:
    argparser.print_help()
    exit(0)

if not args.inputfile.endswith('.jp2'):
    Warn("Warning: Not a valid JPEG2000 file suffix")

if args.outputformat is None:
    raise ValueError("Error: No output format or file specified")

elif args.outputformat not in ('json', 'xml', 'tfw', 'worldfile', 'info'):
    raise ValueError("Error: Not a valid output format")

################################################################
#
# JPEG2000 CHECK STRINGS

# Look for these jp2 strings in file header
#
jp2_header_str = ['ftypjp2', 'jp2 jpx', 'jp2', 'jp2h', 'jp2 J2P1']
示例#22
0
    def simulate_attainment_gap(self, t=-1, year=None, Xhyp=None, predict=False, q=[5,50,95],
                                 n_sim_batches=1000, sim_batch_size=None, 
                                 best_target=None, return_all=False, **optimize_kws
                                 ):
        """
        Estimate the attainment gap through simulation. Given a target vote share `best_target`,
        find the q'th quantiles (5,50,95 by default) of (.5 - minV) where minV is the smallest vote
        share in the batch (of size `sim_batch_size`) where the party stil retains a majority of the
        house. If this simulation is centered at the "optimal" attainment gap value from `optimal_attainment_gap`, 
        this should estimate percentile bounds on the smallest attainment gaps at that vote share. 

        For example, if best_target = .5, then this means `n_sim_batches` of simulations would be conducted
        where the average vote share over the entire batch was .5. Over these batches (each one of size `sim_batch_size`),
        all realizations where the party wins a majority are retained. Then, the minimum average vote share in these
        batches is computed and stored. 

        After all these minima are computed, the qth quantiles of these minima are returned. 
        They represent a the typical minimum vote share required by the party to win a majority. 
        `best_target`, then, simply represents a target for the search space. It should
        be small enough that the party occasionally wins very small majorities, but large enough that 
        they win at least one majority per `sim_batch_size`. 
        
        Arguments
        ----------
        t, year, Xhyp, predict (refer to self.simulate_elections)
        q       :   iterable
                    quantiles to use to summarize the minima
        n_sim_batches:  int
                        number of batches with which to simulate minima
        sim_batch_size: int
                        number of elections to simulate within each batch
        best_target:    float
                        vote share to center the batches
        **optimize_kws: keyword argument dictionary
                        passed to self.optimal_attainment_gap if no target 
                        is provided. 
        """
        if year is None:
            year = self._years[t]
        elif year is not None:
            t = self._years.tolist().index(year)
        if sim_batch_size is None:
            sim_batch_size = n_sim_batches // 10
        if best_target is None:
            best_target = .5 + -1 * self.optimal_attainment_gap(t=t, year=year, Xhyp=Xhyp,
                                                      predict=predict, q=[50], 
                                                      **optimize_kws)
        agaps = []
        weights = 1/self.models[t].model.weights
        counter = 0
        retry = 0
        for _ in tqdm(range(n_sim_batches), 
                      desc='simulating with target={}'.format(best_target)):
            batch = self.simulate_elections(target_v=best_target, t=t, predict=predict,
                                            Xhyp=Xhyp, n_sims=sim_batch_size, fix=False)
            majorities = np.asarray([((sim > .5).mean() > .5) for sim in batch])
            if not majorities.any():
                retry += 1
                continue
            candidate = np.average(batch[majorities], weights=weights, axis=1).min()
            agaps.append(candidate)
        if retry > 0:
            Warn('no majorities found in {} simulations! Configuration is: '
                 '\n\t target: \t{} '
                 '\n\t Xhyp is None: \t{}'
                 '\n\t batch_size: \t{}'
                 '\n\t n_batches: \t{}'
                 ''.format(retry, best_target, Xhyp is None, 
                           sim_batch_size, n_sim_batches))
        if not return_all:
            return np.percentile(.5 - np.asarray(agaps), q=q)
        else:
            return .5 - agaps
示例#23
0
    def get_swing_ratio(self, n_sims=1000, t=-1,
                                  Xhyp=None,
                                  predict=False, use_sim_swing=True):
        """
        Generic method to either compute predictive or counterfactual elections.

        See also: predict, counterfactal

        Arguments
        ---------
        n_sims      :   int
                        number of simulations to conduct
        t           :   int
                        the target year to use for the counterfactual simulations
        swing       :   float
                        arbitrary shift in vote means
        Xhyp        :   (n,k)
                        artificial data to use in the simulation
        target_v    :   float
                        target mean vote share to peg the simulations to. Will ensure that the average of all simulations conducted is this value.
        fix         :   bool
                        flag to denote whether each simulation is pegged exactly to `target_v`, or if it's only the average of all simulations pegged to this value.
        predict     :   bool
                        whether or not to use the predictive distribution or the counterfactual distribution
        use_sim_swing:  bool
                        whether to use the instantaneous change observed in simulations around the observed seatshare/voteshare point, or to use the aggregate slope of the seats-votes curve over all simulations as the swing ratio
        """
        ### Simulated elections
        simulations = self.simulate_elections(n_sims=n_sims, t=t,
                                              swing=None, Xhyp=Xhyp,
                                              target_v=.5, fix=False, predict=predict)
        turnout = 1/self.models[t].model.weights
        ref_voteshares = np.average(simulations, weights=turnout, axis=1)
        grand_ref_voteshare = ref_voteshares.mean()

        ref_seatshares = (simulations > .5).mean(axis=1)
        grand_ref_seatshare = ref_seatshares.mean()

        # chose to do this via tuples so that we can use the method elsewhere
        obs_turnout, *rest = self._extract_election(t=t)
        obs_voteshares, obs_party_voteshares, *rest = rest
        obs_seats, obs_party_seatshares = rest

        ## Swing Around Median
        party_voteshares = np.hstack((ref_voteshares.reshape(-1,1),
                                      1-ref_voteshares.reshape(-1,1)))
        party_seatshares = np.hstack((ref_seatshares.reshape(-1,1),
                                      1-ref_seatshares.reshape(-1,1)))

        swing_near_median = est.swing_about_pivot(party_seatshares,
                                                  party_voteshares,
                                            np.ones_like(obs_party_voteshares)*.5)

        ## Swing near observed voteshare
        shift_simulations = simulations + (obs_party_voteshares[0] - .5)
        shift_ref_voteshares = np.average(shift_simulations,
                                          weights=turnout, axis=1)
        shift_ref_seatshares = (shift_simulations > .5).mean(axis=1)

        shift_party_voteshares = np.hstack((shift_ref_voteshares.reshape(-1,1),
                                      1-shift_ref_voteshares.reshape(-1,1)))
        shift_party_seatshares = np.hstack((shift_ref_seatshares.reshape(-1,1),
                                      1-shift_ref_seatshares.reshape(-1,1)))

        swing_at_observed = est.swing_about_pivot(shift_party_seatshares,
                                                  shift_party_voteshares,
                                                  obs_party_voteshares)
        ## Sanity Check
        if not np.isfinite(swing_near_median).all():
            Warn('The computation returned an infinite swing ratio. Returning for'
                 ' debugging purposes...', stacklevel=2)
            return party_seatshares, party_voteshares, obs_party_voteshares
        elif not np.isfinite(swing_at_observed).all():
            Warn('The computation returned an infinite swing ratio. Returning for'
                 ' debugging purposes...', stacklevel=2)
            return (shift_party_seatshares, shift_party_voteshares, obs_party_voteshares)
        median_conints = est.intervals(party_seatshares, party_voteshares)
        observed_conints = est.intervals(shift_party_seatshares,
                                         shift_party_voteshares)
        swing_lm, swing_lm_resid = est.swing_slope(shift_party_seatshares,
                                                   shift_party_voteshares)

        self._swing_ratios_emp = swing_at_observed[0]
        self._swing_ratios_med = swing_near_median[0]
        self._swing_ratios_lm = swing_lm.mean() #pool the parties in a 2party system
        self._swing_CIs = observed_conints
        self._swing_CIs_med = median_conints

        self._use_sim_swing = use_sim_swing

        return swing_at_observed[0] if use_sim_swing else swing_lm