Exemplo n.º 1
0
    def fillcontinents(self,color=0.8):
        """
 Fill continents.

 color - color to fill continents (default gray).
        """
        # get current axes instance.
        ax = pylab.gca()
        # define corners of map domain.
        p1 = (self.llcrnrx,self.llcrnry); p2 = (self.urcrnrx,self.urcrnry)
        p3 = (self.llcrnrx,self.urcrnry); p4 = (self.urcrnrx,self.llcrnry)
        for x,y in self.coastpolygons:
            xa = pylab.array(x,'f')
            ya = pylab.array(y,'f')
        # clip to map domain.
            xa = pylab.clip(xa, self.xmin, self.xmax)
            ya = pylab.clip(ya, self.ymin, self.ymax)
        # check to see if all four corners of domain in polygon (if so,
        # don't draw since it will just fill in the whole map).
            delx = 10; dely = 10
            if self.projection in ['cyl']:
                delx = 0.1
                dely = 0.1
            test1 = pylab.fabs(xa-self.xmax) < delx
            test2 = pylab.fabs(xa-self.xmin) < delx
            test3 = pylab.fabs(ya-self.ymax) < dely
            test4 = pylab.fabs(ya-self.ymin) < dely
            hasp1 = sum(test1*test3)
            hasp2 = sum(test2*test3)
            hasp4 = sum(test2*test4)
            hasp3 = sum(test1*test4)
            if not hasp1 or not hasp2 or not hasp3 or not hasp4:
                xy = zip(xa.tolist(),ya.tolist())
                poly = Polygon(xy,facecolor=color,edgecolor=color,linewidth=0)
                ax.add_patch(poly)
Exemplo n.º 2
0
 def mu_age_derivative_potential(mu_age=mu_age,
                                 increasing_a0=pl.clip(parameters['increasing']['age_start']-ages[0], 0, len(ages)),
                                 increasing_a1=pl.clip(parameters['increasing']['age_end']-ages[0], 0, len(ages)),
                                 decreasing_a0=pl.clip(parameters['decreasing']['age_start']-ages[0], 0, len(ages)),
                                 decreasing_a1=pl.clip(parameters['decreasing']['age_end']-ages[0], 0, len(ages))):
     mu_prime = pl.diff(mu_age)
     inc_violation = mu_prime[increasing_a0:increasing_a1].clip(-pl.inf, 0.).sum()
     dec_violation = mu_prime[decreasing_a0:decreasing_a1].clip(0., pl.inf).sum()
     return -1.e12 * (inc_violation**2 + dec_violation**2)
Exemplo n.º 3
0
 def mu_age(unconstrained_mu_age=unconstrained_mu_age,
            value=parameters['level_value']['value'],
            age_before=pl.clip(parameters['level_value']['age_before']-ages[0], 0, len(ages)),
            age_after=pl.clip(parameters['level_value']['age_after']-ages[0], 0, len(ages)),
            lower=parameters['level_bounds']['lower'],
            upper=parameters['level_bounds']['upper']):
     mu_age = unconstrained_mu_age.copy()
     mu_age[:age_before] = value
     if age_after < len(mu_age)-1:
         mu_age[(age_after+1):] = value
     return mu_age.clip(lower, upper)
Exemplo n.º 4
0
 def mu_age(unconstrained_mu_age=unconstrained_mu_age,
            value=parameters['level_value']['value'],
            age_before=pl.clip(
                parameters['level_value']['age_before'] - ages[0], 0,
                len(ages)),
            age_after=pl.clip(
                parameters['level_value']['age_after'] - ages[0], 0,
                len(ages)),
            lower=parameters['level_bounds']['lower'],
            upper=parameters['level_bounds']['upper']):
     mu_age = unconstrained_mu_age.copy()
     mu_age[:age_before] = value
     if age_after < len(mu_age) - 1:
         mu_age[(age_after + 1):] = value
     return mu_age.clip(lower, upper)
Exemplo n.º 5
0
def scale_mtx(M, normalize=False, dbscale=False, norm=False, bels=False):
    """
    ::

        Perform mutually-orthogonal scaling operations, otherwise return identity:
          normalize [False]
          dbscale  [False]
          norm      [False]        
    """
    if not (normalize or dbscale or norm or bels):
        return M
    else:
        X = M.copy()  # don't alter the original
        if norm:
            nz_idx = (X * X).sum(1) > 0
            X[nz_idx] = (X[nz_idx].T / np.sqrt(
                (X[nz_idx] * X[nz_idx]).sum(1))).T
        if normalize:
            X = X - np.min(X)
            X = X / np.max(X)
        if dbscale or bels:
            X = P.log10(P.clip(X, 0.0001, X.max()))
            if dbscale:
                X = 20 * X
    return X
Exemplo n.º 6
0
def plot_mtx(mtx=None, title=None, newfig=False, cbar=True, **kwargs):
    """
    ::

        static method for plotting a matrix as a time-frequency distribution (audio features)
    """
    if mtx is None or type(mtx) != np.ndarray:
        raise ValueError('First argument, mtx, must be a array')
    if newfig: P.figure()
    dbscale = kwargs.pop('dbscale', False) 
    bels = kwargs.pop('bels',False)
    norm = kwargs.pop('norm',False)
    normalize = kwargs.pop('normalize',False)
    origin=kwargs.pop('origin','lower')
    aspect=kwargs.pop('aspect','auto')
    interpolation=kwargs.pop('interpolation','nearest')
    cmap=kwargs.pop('cmap',P.cm.gray_r)
    clip=-100.
    X = scale_mtx(mtx, normalize=normalize, dbscale=dbscale, norm=norm, bels=bels)
    i_min, i_max = np.where(X.mean(1))[0][[0,-1]]
    X = X[i_min:i_max+1].copy()
    if dbscale or bels:
        if bels: clip/=10.
        P.imshow(P.clip(X,clip,0),origin=origin, aspect=aspect, interpolation=interpolation, cmap=cmap, **kwargs)
    else:
        P.imshow(X,origin=origin, aspect=aspect, interpolation=interpolation, cmap=cmap, **kwargs)
    if title:
        P.title(title,fontsize=16)
    if cbar:
        P.colorbar()
    P.yticks(np.arange(0,i_max+1-i_min,3),pc_labels[i_min:i_max+1:3],fontsize=14)
    P.xlabel('Tactus', fontsize=14)
    P.ylabel('MIDI Pitch', fontsize=14)
    P.grid()
Exemplo n.º 7
0
    def mu_interval(mu_age=mu_age,
                    theta=theta,
                    age_mid=pl.array(age_mid, dtype=int),
                    age_width=pl.array(age_width, dtype=float)):

        return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) -
                           ages[0]) + theta * age_width
Exemplo n.º 8
0
    def feature_plot(M,
                     normalize=False,
                     dbscale=False,
                     norm=False,
                     title_string=None,
                     interp='nearest',
                     bels=False):
        """
        ::

            static method for plotting a matrix as a time-frequency distribution (audio features)
        """
        X = adb.feature_scale(M, normalize, dbscale, norm, bels)
        pylab.figure()
        clip = -100.
        if dbscale or bels:
            if bels: clip /= 10.
            pylab.imshow(pylab.clip(X, clip, 0),
                         origin='lower',
                         aspect='auto',
                         interpolation=interp)
        else:
            pylab.imshow(X,
                         origin='lower',
                         aspect='auto',
                         interpolation=interp)
        if title_string:
            pylab.title(title_string)
        pylab.colorbar()
Exemplo n.º 9
0
def plot_mtx(mtx=None, title=None, newfig=False, cbar=True, **kwargs):
    """
    ::

        static method for plotting a matrix as a time-frequency distribution (audio features)
    """
    if mtx is None or type(mtx) != np.ndarray:
        raise ValueError('First argument, mtx, must be a array')
    if newfig: P.figure()
    dbscale = kwargs.pop('dbscale', False) 
    bels = kwargs.pop('bels',False)
    norm = kwargs.pop('norm',False)
    normalize = kwargs.pop('normalize',False)
    origin=kwargs.pop('origin','lower')
    aspect=kwargs.pop('aspect','auto')
    interpolation=kwargs.pop('interpolation','nearest')
    cmap=kwargs.pop('cmap',P.cm.gray_r)
    clip=-100.
    X = scale_mtx(mtx, normalize=normalize, dbscale=dbscale, norm=norm, bels=bels)
    i_min, i_max = np.where(X.mean(1))[0][[0,-1]]
    X = X[i_min:i_max+1].copy()
    if dbscale or bels:
        if bels: clip/=10.
        P.imshow(P.clip(X,clip,0),origin=origin, aspect=aspect, interpolation=interpolation, cmap=cmap, **kwargs)
    else:
        P.imshow(X,origin=origin, aspect=aspect, interpolation=interpolation, cmap=cmap, **kwargs)
    if title:
        P.title(title,fontsize=16)
    if cbar:
        P.colorbar()
    P.yticks(np.arange(0,i_max+1-i_min,3),pc_labels[i_min:i_max+1:3],fontsize=14)
    P.xlabel('Tactus', fontsize=14)
    P.ylabel('MIDI Pitch', fontsize=14)
    P.grid()
Exemplo n.º 10
0
    def threshold(self, ax, data):
        data = N.asarray(data)
        mu, sd = data.mean(), data.std()

        data = clip(data, mu - 3*sd, mu + 3*sd)
        data = blur_image(data, 5) 


        orig = data.copy()

        f = fft.fft2(data)
        f[0:5,:] = 0
        f[:,0:5] = 0
        data = N.abs(fft.ifft2(f))


        print "mu, std =", orig.mean(), orig.std()

        """
        mu, sigma = data.mean(), data.std()
        mask = data > mu + 2*sigma
        """
        #im = ax.imshow(data, origin="lower", interpolation='nearest')
        im = ax.imshow(orig, origin="lower", interpolation='nearest')
        self.set_callbacks([("button_release_event", self.imclick, ())])
        self.f.colorbar(im, fraction = 0.08)
        return data
Exemplo n.º 11
0
 def mu_age_derivative_potential(
     mu_age=mu_age,
     increasing_a0=pl.clip(parameters['increasing']['age_start'] - ages[0],
                           0, len(ages)),
     increasing_a1=pl.clip(parameters['increasing']['age_end'] - ages[0], 0,
                           len(ages)),
     decreasing_a0=pl.clip(parameters['decreasing']['age_start'] - ages[0],
                           0, len(ages)),
     decreasing_a1=pl.clip(parameters['decreasing']['age_end'] - ages[0], 0,
                           len(ages))):
     mu_prime = pl.diff(mu_age)
     inc_violation = mu_prime[increasing_a0:increasing_a1].clip(
         -pl.inf, 0.).sum()
     dec_violation = mu_prime[decreasing_a0:decreasing_a1].clip(
         0., pl.inf).sum()
     return -1.e12 * (inc_violation**2 + dec_violation**2)
Exemplo n.º 12
0
def density_plot ( x, D ):
    """Plot the density D along with a confidence region"""
    # TODO: pass parameters through (e.g. color, axes, ...)
    fx = D(x)
    x_ = pl.concatenate ( (x, x[::-1]) )
    fx_ = pl.clip(pl.concatenate ( (fx+D.c,fx[::-1]-D.c) ), 0, pl.inf )
    pl.fill ( x_, fx_, edgecolor=[.5]*3, facecolor=[.8]*3 )
    pl.plot ( x, fx, color=[0]*3 )
Exemplo n.º 13
0
def interpolateLin(y,x,xNew):
    """
    linear interpolation of y[x] onto y[xNew]
    Linearly extrapolates if outside range
    """
    xInd = M.clip(M.searchsorted(x,xNew)-1,0,len(x)-2)
    xFract = (xNew-x[xInd])/(x[xInd+1]-x[xInd])
    return y[xInd]+xFract*(y[xInd+1]-y[xInd])
Exemplo n.º 14
0
def my_hor_to_eq(az, el, lat, lsts):
    dec = arcsin(sin(el) * sin(lat) + cos(el) * cos(lat) * cos(az))
    argument = (sin(el) - sin(lat) * sin(dec)) / (cos(lat) * cos(dec))
    argument = clip(argument, -1.0, 1.0)
    H = arccos(argument)
    flag = sin(az) > 0
    H[flag] = 2.0*pi - H[flag]
    ra = lsts - H
    ra %= 2*pi
    return ra,dec
Exemplo n.º 15
0
def sub_mean(x, N):
    N = int(N)
    L = len(x)
    y = pl.zeros_like(x)
    ii = pl.arange(-N, N + 1)
    k = 1.0 / len(ii) # 1 / (2 * N + 1)
    for n in range(L):
        iii = pl.clip(ii + n, 0, L - 1)
        s = k * sum(x[iii])
        y[n] = x[n] - s
    print n, x[n], iii[0], iii[-1], s
    return y
Exemplo n.º 16
0
def hor_to_eq(az, el, lat, lst):
    dec = arcsin(sin(el) * sin(lat) + cos(el) * cos(lat) * cos(az))
    argument = (sin(el) - sin(lat) * sin(dec)) / (cos(lat) * cos(dec))
    argument = pylab.clip(argument, -1.0, 1.0)
    H = arccos(argument)
    flag = sin(az) > 0
    if type(flag) is ndarray:
        H[flag] = 2.0 * pi - H[flag]
    elif flag:
        H = 2.0 * pi - H
    ra = lst - H
    ra %= 2 * pi
    return ra, dec
Exemplo n.º 17
0
def interpolateLinLog(y,x,xNew):
    """
    linear interpolation in LOG space of y[x] onto y[xNew]
    Linearly extrapolates if outside range
    """
    logx = M.log(x)
    logy = M.log(y)
    logxNew = M.log(xNew)
    
    logxInd = M.clip(M.searchsorted(logx,logxNew)-1,0,len(logx)-2)
    logxFract = (logxNew-logx[logxInd])/(logx[logxInd+1]-logx[logxInd])

    return M.exp(logy[logxInd]+logxFract*(logy[logxInd+1]-logy[logxInd]))
Exemplo n.º 18
0
    def imshow(self, ax, data):
        mu, sd = data.mean(), data.stddev()
        data = clip(data, mu - 3*sd, mu + 3*sd)

        #data = signal.detrend(signal.detrend(data, axis=0), axis=1)
        #data -= data.min()
        #data = signal.spline_filter(data)

        cmap = self.get_cmap()
        im = ax.imshow(data, origin="lower", cmap=cmap, interpolation='nearest')
        self.set_callbacks([("button_release_event", self.imclick, ())])
        self.f.colorbar(im, fraction = 0.08)

        return data
Exemplo n.º 19
0
    def fillcontinents(self,color=0.8):
        """
 Fill continents.

 color - color to fill continents (default gray).
        """
        # get current axes instance.
        ax = pylab.gca()
        # define corners of map domain.
        p1 = (self.llcrnrx,self.llcrnry); p2 = (self.urcrnrx,self.urcrnry)
        p3 = (self.llcrnrx,self.urcrnry); p4 = (self.urcrnrx,self.llcrnry)
        for x,y in self.coastpolygons:
            xa = pylab.array(x,'f')
            ya = pylab.array(y,'f')
        # clip to map domain.
            xa = pylab.clip(xa, self.xmin, self.xmax)
            ya = pylab.clip(ya, self.ymin, self.ymax)
        # check to see if all four corners of domain in polygon (if so,
        # don't draw since it will just fill in the whole map).
            delx = 10; dely = 10
            if self.projection in ['cyl']:
                delx = 0.1
                dely = 0.1
            test1 = pylab.fabs(xa-self.xmax) < delx
            test2 = pylab.fabs(xa-self.xmin) < delx
            test3 = pylab.fabs(ya-self.ymax) < dely
            test4 = pylab.fabs(ya-self.ymin) < dely
            hasp1 = sum(test1*test3)
            hasp2 = sum(test2*test3)
            hasp4 = sum(test2*test4)
            hasp3 = sum(test1*test4)
            if not hasp1 or not hasp2 or not hasp3 or not hasp4:
                xy = zip(xa.tolist(),ya.tolist())
                poly = Polygon(xy,facecolor=color,edgecolor=color,linewidth=0)
                ax.add_patch(poly)
        # set axes limits to fit map region.
        self.set_axes_limits()
Exemplo n.º 20
0
    def make_range_frame (self):

        rx = self.axes.get_xlim()
        ry = self.axes.get_ylim()
        px = pl.prctile ( self.x )
        py = pl.prctile ( self.y )

        if self.trim:
            if px[2]-px[0]>1.5*(px[3]-px[1]):
                px[0] = self.x[self.x>px[2]-1.5*(px[3]-px[1])].min()
            if px[4]-px[2]>1.5*(px[3]-px[1]):
                px[4] = self.x[self.x<px[2]+1.5*(px[3]-px[1])].min()

        x = px-rx[0]
        x /= rx[1]-rx[0]
        y = py-ry[0]
        y /= ry[1]-ry[0]
        ex = .003
        ey = .003
        xline = [
                [(x[0],0),(x[1],0)],
                [(x[1],ey),(x[2]-ex,ey)],
                [(x[2]+ex,ey),(x[3],ey)],
                [(x[3],0),(x[4],0)]
                ]
        yline = [
                [(0,y[0]),(0,y[1])],
                [(ex,y[1]),(ex,y[2]-ey)],
                [(ex,y[2]+ey),(ex,y[3])],
                [(0,y[3]),(0,y[4])]
                ]
        widths = [1,1,1,1]
        range_lines = LineCollection(
                segments=pl.clip(xline+yline,0,1),
                linewidths=widths+widths,
                colors=[[0]*3]*2*len(widths) )
        range_lines.set_transform ( self.axes.transAxes )
        range_lines.set_zorder(10)

        self.axes.get_xaxis().tick_bottom()
        self.axes.get_yaxis().tick_left()
        self.axes.set_xticks(px)
        self.axes.set_yticks(py)
        self.axes.tick_params ( width=0 )

        return range_lines
Exemplo n.º 21
0
def feature_plot(M, normalize=False, dbscale=False, norm=False, title_string=None, interp='nearest', bels=False, nofig=False,**kwargs):
    """
    ::

        static method for plotting a matrix as a time-frequency distribution (audio features)
    """
    X = feature_scale(M, normalize, dbscale, norm, bels)
    if not nofig: P.figure()
    clip=-100.
    if dbscale or bels:
        if bels: clip/=10.
        P.imshow(P.clip(X,clip,0),origin='lower',aspect='auto', interpolation=interp, **kwargs)
    else:
        P.imshow(X,origin='lower',aspect='auto', interpolation=interp, **kwargs)
    if title_string:
        P.title(title_string)
    P.colorbar()
Exemplo n.º 22
0
    def feature_plot(M, normalize=False, dbscale=False, norm=False, title_string=None, interp="nearest", bels=False):
        """
        ::

            static method for plotting a matrix as a time-frequency distribution (audio features)
        """
        X = adb.feature_scale(M, normalize, dbscale, norm, bels)
        pylab.figure()
        clip = -100.0
        if dbscale or bels:
            if bels:
                clip /= 10.0
            pylab.imshow(pylab.clip(X, clip, 0), origin="lower", aspect="auto", interpolation=interp)
        else:
            pylab.imshow(X, origin="lower", aspect="auto", interpolation=interp)
        if title_string:
            pylab.title(title_string)
        pylab.colorbar()
Exemplo n.º 23
0
def feature_plot(M,
                 normalize=False,
                 dbscale=False,
                 norm=False,
                 ttl=None,
                 interp='nearest',
                 bels=False,
                 nofig=False,
                 x_lbl='',
                 y_lbl='',
                 cbar=False,
                 save_image_as=None,
                 **kwargs):
    """
    Static method for plotting a matrix as a time-frequency distribution (audio features)
    """
    X = feature_scale(M, normalize, dbscale, norm, bels)
    if not nofig: plt.figure()
    clip = -100.
    if dbscale or bels:
        if bels: clip /= 10.
        plt.imshow(P.clip(X, clip, 0),
                   origin='lower',
                   aspect='auto',
                   interpolation=interp,
                   **kwargs)
    else:
        plt.imshow(X,
                   origin='lower',
                   aspect='auto',
                   interpolation=interp,
                   **kwargs)
    if ttl:
        plt.title(ttl)
    if x_lbl:
        plt.xlabel(x_lbl)
    if y_lbl:
        plt.ylabel(y_lbl)
    if cbar:
        plt.colorbar()
    if save_image_as is not None and os.path.exists(
            save_image_as) is not True:  # full path!
        plt.savefig(save_image_as)
Exemplo n.º 24
0
    def _mfcc(self): 
        """
        ::

            DCT of the Log magnitude CQFT 
        """
        fp = self._check_feature_params()
        if not self._cqft():
            return False
        self._make_dct()
        AA = P.log10(P.clip(self.CQFT,0.0001,self.CQFT.max()))
        self.MFCC = P.dot(self.DCT, AA)
        self._have_mfcc=True
        if self.verbosity:
            print "Extracted MFCC: lcoef=%d, ncoef=%d, intensified=%d" %(self.lcoef, self.ncoef, self.intensify)
        n=self.ncoef
        l=self.lcoef
        self.X=self.MFCC[l:l+n,:]
        return True
Exemplo n.º 25
0
    def _mfcc(self): 
        """
        ::

            DCT of the Log magnitude CQFT 
        """
        fp = self._check_feature_params()
        if not self._cqft():
            return False
        self._make_dct()
        AA = P.log10(P.clip(self.CQFT,0.0001,self.CQFT.max()))
        self.MFCC = P.dot(self.DCT, AA)
        self._have_mfcc=True
        if self.verbosity:
            print "Extracted MFCC: lcoef=%d, ncoef=%d, intensified=%d" %(self.lcoef, self.ncoef, self.intensify)
        n=self.ncoef
        l=self.lcoef
        self.X=self.MFCC[l:l+n,:]
        return True
Exemplo n.º 26
0
def feature_scale(M, normalize=False, dbscale=False, norm=False, bels=False):
    """
    Perform mutually-orthogonal scaling operations, otherwise return identity:
    normalize [False]
    dbscale [False]
    norm [False]        
    """
    if not (normalize or dbscale or norm or bels):
        return M
    else:
        X = M.copy()  # don't alter the original
        if norm:
            X = X / P.tile(P.sqrt((X * X).sum(0)), (X.shape[0], 1))
        if normalize:
            X = _normalize(X)
        if dbscale or bels:
            X = P.log10(P.clip(X, 0.0001, X.max()))
            if dbscale:
                X = 20 * X
    return X
Exemplo n.º 27
0
def feature_scale(M, normalize=False, dbscale=False, norm=False, bels=False):
    """
    ::

        Perform mutually-orthogonal scaling operations, otherwise return identity:
          normalize [False]
          dbscale  [False]
          norm      [False]        
    """
    if not (normalize or dbscale or norm or bels):
        return M
    else:
        X = M.copy() # don't alter the original
        if norm:
            X = X / P.tile(P.sqrt((X*X).sum(0)),(X.shape[0],1))
        if normalize:
            X = _normalize(X)
        if dbscale or bels:
            X = P.log10(P.clip(X,0.0001,X.max()))
            if dbscale:                
                X = 20*X
    return X
Exemplo n.º 28
0
def luminancecode ( x, basecolor, **kwargs ):
    """Create a code for the values in x

    :Parameters:
        *x*
            values to be coded
        *basecolor*
            basic color that should be mixed with white for lower values

    :Optional Keyword Arguments:
        *vmin*
            minimum of color scale (default: min(x))
        *vmax*
            maximum of color scale (default: max(x))
        *mincol*
            minimum color concentration (default: 0.1)
    """
    vmin = float(kwargs.setdefault ( 'vmin', min(x) ))
    vmax = float(kwargs.setdefault ( 'vmax', max(x) ))
    mincol = float(kwargs.setdefault('mincol', 0.1 ))

    ratios = pl.clip(((vmax-x)/(vmax-vmin)),0,1e8)/mincol

    return [cmix('w',basecolor,r) for r in ratios]
Exemplo n.º 29
0
    def forward(self, ys):
        """Forward propagate activations. This updates the internal
        state for a subsequent call to `backward` and returns the output
        activations."""
        n = len(ys)
        # inputs, zs = [None]*n,[None]*n
        zs = [None] * n

        for i in range(n):
            # inputs[i] = concatenate([ones(1), ys[i]])
            # print self.W2[:,0]

            temp = dot(self.W2[:,1:], ys[i]) + self.W2[:,0]
            # print 'yss', ys[i].shape, self.W2[:,1:].shape, temp.shape
            # temp = dot(self.W2, inputs[i])
            # print temp - dot(self.W2[:,1:], ys[i]) - self.W2[:,0]
            # print inputs[i].shape, self.W2.shape, temp.shape
            # print self.W2[i], i, n
            # temp = dot(self.W2[:,1:], ys[i]) + self.W2[:,0]
            temp = exp(clip(temp,-100,100))
            temp /= sum(temp)
            zs[i] = temp
        # self.state = (inputs,zs)
        return zs
Exemplo n.º 30
0
    def forward(self, ys):
        """Forward propagate activations. This updates the internal
        state for a subsequent call to `backward` and returns the output
        activations."""
        n = len(ys)
        # inputs, zs = [None]*n,[None]*n
        zs = [None] * n

        for i in range(n):
            # inputs[i] = concatenate([ones(1), ys[i]])
            # print self.W2[:,0]

            temp = dot(self.W2[:, 1:], ys[i]) + self.W2[:, 0]
            # print 'yss', ys[i].shape, self.W2[:,1:].shape, temp.shape
            # temp = dot(self.W2, inputs[i])
            # print temp - dot(self.W2[:,1:], ys[i]) - self.W2[:,0]
            # print inputs[i].shape, self.W2.shape, temp.shape
            # print self.W2[i], i, n
            # temp = dot(self.W2[:,1:], ys[i]) + self.W2[:,0]
            temp = exp(clip(temp, -100, 100))
            temp /= sum(temp)
            zs[i] = temp
        # self.state = (inputs,zs)
        return zs
Exemplo n.º 31
0
def scale_mtx(M, normalize=False, dbscale=False, norm=False, bels=False):
    """
    ::

        Perform mutually-orthogonal scaling operations, otherwise return identity:
          normalize [False]
          dbscale  [False]
          norm      [False]        
    """
    if not (normalize or dbscale or norm or bels):
        return M
    else:
        X = M.copy() # don't alter the original
        if norm:
            nz_idx = (X*X).sum(1) > 0
            X[nz_idx] = (X[nz_idx].T / np.sqrt((X[nz_idx]*X[nz_idx]).sum(1))).T
        if normalize:
            X=X-np.min(X)
            X=X/np.max(X)
        if dbscale or bels:
            X = P.log10(P.clip(X,0.0001,X.max()))
            if dbscale:                
                X = 20*X
    return X
Exemplo n.º 32
0
    def _process_segment(self, page, filename, page_id, file_id):
        if self.parameter['parallel'] < 2:
            LOG.info("INPUT FILE %s ", filename)
        raw = ocrolib.read_image_gray(filename)

        flat = raw
        #flat = np.array(binImg)
        # estimate skew angle and rotate
        if self.parameter['maxskew'] > 0:
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Skew Angle")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            flat = amax(flat) - flat
            flat -= amin(flat)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            ma = self.parameter['maxskew']
            ms = int(2 * self.parameter['maxskew'] *
                     self.parameter['skewsteps'])
            angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
            flat = interpolation.rotate(flat,
                                        angle,
                                        mode='constant',
                                        reshape=0)
            flat = amax(flat) - flat
        else:
            angle = 0

        # self.write_angles_to_pageXML(base,angle)
        # estimate low and high thresholds
        if self.parameter['parallel'] < 2:
            LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        if self.parameter['parallel'] < 2:
            LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        deskewed = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                 (filename, lo, hi, angle))
        if self.parameter['parallel'] < 2:
            LOG.info("Writing")
        #ocrolib.write_image_binary(base+".ds.png", deskewed)

        #TODO: Need some clarification as the results effect the following pre-processing steps.
        #orientation = -angle
        #orientation = 180 - ((180 - orientation) % 360)
        page.set_orientation(angle)

        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path, comment="deskewed"))
Exemplo n.º 33
0
Arquivo: data.py Projeto: peterhm/gbd
    def _input_data_from_gbd_json(dm, covs):
        """ translate input data"""
        import dismod3

        # remove any rows with 'ignore' columns set to 1
        dm['data'] = [d for d in dm['data'] if not (d.get('Ignore') or d.get('ignore'))]

        # remove any data with type-specific heterogeneity set to Unusable
        if 'global_priors' in dm['params']:
            for t in dm['params']['global_priors']['heterogeneity']:
                if dm['params']['global_priors']['heterogeneity'][t] == 'Unusable':
                    print '%s has heterogeneity unusable, dropping %d rows' % (t, len([d for d in dm['data'] if d['data_type'] == t + ' data']))
                    dm['data'] = [d for d in dm['data'] if d['data_type'] != t + ' data']

        input_data = {}
        for field in 'effective_sample_size age_start age_end year_start year_end'.split():
            input_data[field] = []
            for row in dm['data']:
                val = row.get(field, '')
                if val == '':
                    val = pl.nan
                input_data[field].append(float(val))

        input_data['sex'] = []
        for row in dm['data']:
            input_data['sex'].append(row['sex'])

            # replace sex 'all' with sex 'total'
            if input_data['sex'][-1] == 'all':
                input_data['sex'][-1] = 'total'
                
            assert input_data['sex'][-1] != ''

        new_type_name = {'incidence data':'i', 'prevalence data': 'p', 'remission data': 'r', 'excess-mortality data': 'f',
                         'prevalence x excess-mortality data': 'pf', 'all-cause mortality data': 'm_all', 'relative-risk data': 'rr',
                         'duration data': 'X', 'smr data': 'smr', 'cause-specific mortality data': 'csmr', 'mortality data': 'm_with'}
        input_data['data_type'] = [new_type_name[row['data_type']] for row in dm['data']]

        for field in 'value standard_error lower_ci upper_ci'.split():
            input_data[field] = []
            for row in dm['data']:
                val = row.get(field, '')
                if val == '':
                    val = pl.nan
                else:
                    val = float(val) / float(row.get('units', '1').replace(',', ''))
                input_data[field].append(val)

        input_data['area'] = []
        for row in dm['data']:
            val = row.get('country_iso3_code', '')
            if val == '' or val == 'all':
                val = dismod3.utils.clean(row['gbd_region'])
            input_data['area'].append(val)

            assert input_data['area'][-1] != ''

        input_data['age_weights'] = [';'.join(['%.4f'%w for w in row.get('age_weights', [])]) for row in dm['data']]  # store age_weights as semi-colon delimited text, since Pandas doesn't like arrays in arrays and doesn't save comma-separated fields correctly

        # add selected covariates
        if 'covariates' in dm['params']:
            for level in ['Country_level', 'Study_level']:
                for cv in dm['params']['covariates'].get(level, []):
                    if dm['params']['covariates'][level][cv]['rate']['value']:
                        input_data['x_%s'%cv] = []
                        for row in dm['data']:
                            if level == 'Country_level':
                                if row['data_type'] == 'all-cause mortality data':
                                    input_data['x_%s'%cv].append(0.)  # don't bother to merge covariates into all-cause mortality data
                                elif row['region'] == 'all':
                                    input_data['x_%s'%cv].append(0.)  # don't bother to merge covariates into regionall data
                                    
                                elif row.get('country_iso3_code'):
                                    iso3 = row['country_iso3_code']

                                    # special case for countries that CODEm does not report on
                                    if 'ASDR' in cv:
                                        if iso3 in ['HKG', 'MAC']:
                                            iso3 = 'TWN'  # TODO: average over CHN, PRK, TWN
                                        if iso3 in ['PRI', 'BMU']:
                                            iso3 = 'CUB' # TODO: average over caribbean countries
                                        
                                    input_data['x_%s'%cv].append(
                                        covs[cv][iso3, row['sex'],
                                                 pl.clip((row['year_start']+row['year_end'])/2, 1980., 2012.)]
                                        )
                                else:
                                    # handle regional data
                                    df = covs[(covs['region'] == dismod3.utils.clean(row['gbd_region']))&
                                              (covs.index.get_level_values(1)==row['sex'])&
                                              (covs.index.get_level_values(2)==pl.clip((row['year_start']+row['year_end'])/2, 1980., 2012.))]
                                    #input_data['x_%s'%cv].append(
                                    #    (df[cv]*df['pop']).sum() / df['pop'].sum()
                                    #    )
                                    input_data['x_%s'%cv].append(0.) # TODO: remove regional data
                            elif level == 'Study_level':
                                input_data['x_%s'%cv].append(float(row.get(dismod3.utils.clean(cv), '') or 0.))
                    # also include column of input data for 'z_%s'%cv if it is requested
                    if dm['params']['covariates'][level][cv]['error']['value']:
                        input_data['z_%s'%cv] = [float(row.get(dismod3.utils.clean(cv), '') or 0.) for row in dm['data']]

        input_data = pandas.DataFrame(input_data)


        # replace age_end 1 with age_end 0, correcting a common mistake in data entry
        i = (input_data['age_start']==0) & (input_data['age_end']==1)
        if i.sum() > 0:
            print 'WARNING: correcting age_end in %d rows that have age_start == 0, age_end == 1 (old format uses "demographic" notation)' % i.sum()
            input_data['age_end'][i] = 0

        # replace triple underscores with single underscore, a problem with consistency in the spacing in "North Africa / Middle East"
        input_data['area'] = [a.replace('___', '_') for a in input_data['area']]

        # print checks of data
        for i, row in input_data.T.iteritems():
            if pl.isnan(row['value']):
                print 'WARNING: value in row %d is missing' % i
        input_data = input_data[~pl.isnan(input_data['value'])]

        return input_data
Exemplo n.º 34
0
def figure3 ( ):
    w,h = 25,8.5
    fig = pl.figure ( figsize=(fullwidth,h*fullwidth/w) )

    # a,b,c,d = place_axes ( fig, 1.5,2, [9,9,5,5],[6]*4,
    #         [True]*2+[False]*2, [1.8,1.8,.5,.5], (w,h) )
    a,b,c = place_axes ( fig, 1.5,2, [9,9,5],[6]*3,
            [True]*2+[False], [1.8,1.8,.5], (w,h) )
    d = fig.add_axes ( [10,10,1,1] )
    a.text ( .05, laby, r"\textbf{a}", transform=a.transAxes )
    b.text ( .05, laby, r"\textbf{b}", transform=b.transAxes )
    c.text ( .05, laby, r"\textbf{c}", transform=c.transAxes )
    d.text ( .05, laby, r"\textbf{d}", transform=d.transAxes )
    M = results['model_w_hist']

    # Figures 3 A,B
    for condition in plotinfo['conditions']:
        condition = int ( condition )
        print "c",condition
        d_ = data.getsummary ( condition )
        # x = pl.mgrid[0:plotinfo['xmax']:100j]
        x = pl.mgrid[0:30:100j]
        # if len(data.th_features)>0:
        #     x = threshold.u_v ( x, results['model_w_hist'].nu )

        wfit  = results['model_w_hist'].w[plotinfo['indices'][condition]]
        w0fit = results['model_nohist'].w[plotinfo['indices'][condition]]
        pfit  = results['model_w_hist'].pi
        p0fit  = results['model_nohist'].pi
        x_ = threshold.u_v ( x, results['model_w_hist'].nu )
        x0 = threshold.u_v ( x, results['model_nohist'].nu )

        col = plotinfo['colors'][condition]
        pmf = 0.5*(pfit[1]+pfit[2]*model.logistic ( wfit[0]+wfit[1]*x_ )) + \
                0.5*(1-(pfit[1]+pfit[2]*model.logistic ( wfit[0]-wfit[1]*x_ )))
        p0f = 0.5*(p0fit[1]+p0fit[2]*model.logistic ( w0fit[0]+w0fit[1]*x0 )) + \
                0.5*(1-(p0fit[1]+p0fit[2]*model.logistic ( w0fit[0]-w0fit[1]*x0 )))
        print p0fit
        perror = (1-p0f-(1-pmf))/(1-p0f)

        a.plot ( x, pmf, color = col )
        a.plot ( x, p0f, color = col, linestyle='--' )
        b.plot ( x, pl.clip(perror,0,1e5), color = col )

    a.yaxis.set_major_formatter ( prcformatter )
    a.xaxis.set_major_formatter ( myformatter )
    a.set_xticks ( (0,10,20,30) )

    pl.setp ( (a,b), xlabel='Stimulus intensity' )
    a.set_ylabel ( 'Probability correct [\%]' )
    b.set_ylabel ( 'Error rate exp. [\%]' )
    b.set_xticks ( (0,10,20,30) )
    b.yaxis.set_major_locator ( tckr ( density=2, figure=fig, which=1 ) )
    b.yaxis.set_major_formatter ( prcformatter )
    b.xaxis.set_major_formatter ( myformatter )
    if observer in ['KP','sim_KP','sim_KP_nh']:
        b.set_ylim ( 0, .35 )
    if observer in ['pk']:
        pl.setp ( (a,b), xlim=(-.1,30.1) )

    # figure 3 C
    textfile.write ( "Figure 3C:\n" )
    z0 = 0
    C = statistics.EvaluationCollector ( M )
    ewh = C(results['model_w_hist'])
    enh = C(results['model_nohist'])
    hf0 = M.hf0
    # perm = results['permutation_wh']
    # # TODO: These indices have to be adapted to the revised collector
    # thresholds_wh = pl.array([C.get_thres ( perm[i,13+hf0:13+2*hf0], perm[i,12+hf0], perm[i,9:12], p=0.75 ) \
    #         for i in xrange ( 2000 )])
    # perm = results['permutation_nh']
    # thresholds_nh = pl.array([C.get_thres ( perm[i,13+hf0:13+2*hf0], perm[i,12+hf0], perm[i,9:12], p=0.75 ) \
    #         for i in xrange ( 2000 )])
    if thlev == .75:
        thind = 11
    elif thlev == .85:
        thind = 10+hf0
    else:
        raise ValueError

    for condition in xrange ( 1, M.hf0 ):
        s_wh = results['permutation_wh'][:,thind+condition]
        s_nh = results['permutation_nh'][:,thind+condition]
        # s_wh = thresholds_wh[:,condition]
        # s_nh = thresholds_nh[:,condition]
        s_ratio = s_wh/s_nh
        s_ratio_obs = ewh[thind+condition]/enh[thind+condition]
        # s_ratio_obs = results['model_w_hist'].w[condition]/results['model_nohist'].w[condition]
        z = (s_ratio_obs-pl.mean(s_ratio))/pl.std(s_ratio)
        cpe = pl.mean ( s_ratio < s_ratio_obs )
        ci = pl.prctile ( s_ratio, (2.5,97.5) )
        if z < z0 and ci[1]-ci[0] > 0:
            c0 = condition
            s_ratio_ = s_ratio
            s_ratio_obs_ = s_ratio_obs
            ci_ = ci
        textfile.write (
                "Condition %d\n  th75_ratio = %g\n  cpe = %g\n  percentiles of Null-Distribution: %g, %g\n" % \
                        (condition,s_ratio_obs,cpe,ci[0],ci[1]) )
    try:
        print "Using condition %d for figure 3C" % (c0,)
    except:
        c0 = 1
        s_ratio_ = s_ratio
        s_ratio_obs_ = s_ratio_obs
        ci_ = ci

    hist,bins = pl.histogram ( s_ratio_ )
    c.bar ( bins[:-1], hist, pl.diff ( bins ),
        edgecolor=graphics.histogram_color, facecolor=graphics.histogram_color )
    yrange = c.get_ylim ()
    # c.plot ( [1]*2, yrange, 'k:' )
    if s_ratio_obs<ci_[0]:
        c.plot ( [s_ratio_obs_]*2, (yrange[0],yrange[0]+0.85*(yrange[1]-yrange[0])), linewidth=2,
                color=graphics.observed_color )
        c.plot ( [s_ratio_obs_], [yrange[0]+0.95*(yrange[1]-yrange[0])], '*', color=graphics.observed_color )
    else:
        c.plot ( [s_ratio_obs_]*2, yrange, linewidth=2, color=graphics.observed_color )
    c.plot ( [ci_[0]]*2, yrange, color=graphics.C95_color )
    c.plot ( [ci_[1]]*2, yrange, color=graphics.C95_color )
    c.set_ylim ( *yrange )
    c.set_xlabel ( r'Threshold ratio' )
    c.xaxis.set_major_formatter ( myformatter )
    c.xaxis.set_major_formatter ( myformatter )
    # c.text ( .7, 0.7, r"$\frac{\theta_\mathrm{h}}{\theta_0}$",
    #         transform=c.transAxes )
    # c.set_xlim ( trimmed_hlim ( s_ratio_, s_ratio_obs_ ) )
    # c.xaxis.set_major_locator ( tckr ( density=0.4, figure=fig, which=0 ) )
    c.set_xlim ( .99, 1.01 )
    # c.xaxis.set_ticks ( (.95,1) )
    # c.set_xlim ( .85, 1.05 )
    c.xaxis.set_ticks ( (.99,1.,1.01) )


    # figure 3 D
    l_wh  = 0.5*results['permutation_wh'][:,[9,10]].sum(1)
    l_nh  = 0.5*results['permutation_nh'][:,[9,10]].sum(1)
    l_ratio = l_wh-l_nh
    l_ratio_obs = results['model_w_hist'].pi[[0,1]].sum()-results['model_nohist'].pi[[0,1]].sum()
    cpe = pl.mean ( l_ratio < l_ratio_obs )
    ci = pl.prctile ( l_ratio, (2.5,97.5) )
    textfile.write (
        "Figure 3D:\n  lapse_ratio = %g\n  cpe = %g\n  percentiles of Null-distribution: %g, %g\n  lapse_rate (w hist) = %g\n  lapse_rate (no hist) = %g\n" % \
                (l_ratio_obs,cpe,ci[0],ci[1],results['model_w_hist'].pi[[0,1]].sum(),results['model_nohist'].pi[[0,1]].sum()) )

    d = graphics.prepare_axes ( d, haveon=('bottom',) )
    # hist,bins = pl.histogram ( l_ratio )
    hist,bins = pl.histogram ( l_ratio, bins=good_lapse_bins ( l_ratio ) )
    # hist,bins = pl.histogram ( l_ratio, bins=pl.mgrid[-.0001:.0001:20j] )
    d.bar ( bins[:-1], hist, pl.diff(bins),
        edgecolor=graphics.histogram_color, facecolor=graphics.histogram_color, zorder=0 )
    yrange = d.get_ylim ()
    # d.plot ( [1]*2, yrange, 'k:' )
    if l_ratio_obs < ci[0] or l_ratio_obs > ci[1]:
        d.plot ( [l_ratio_obs]*2, [yrange[0], yrange[0]+0.85*(yrange[1]-yrange[0])],
                linewidth=2, color=graphics.observed_color)
        d.plot ( [l_ratio_obs], [yrange[0]+0.95*(yrange[1]-yrange[0])], '*', color=graphics.observed_color)
    else:
        print "lrobs",l_ratio_obs
        d.plot ( [l_ratio_obs]*2, yrange, color=graphics.observed_color, zorder=2)
    d.plot ([ci[0]]*2, yrange, color=graphics.C95_color, zorder=1 )
    d.plot ([ci[1]]*2, yrange, color=graphics.C95_color, zorder=1 )
    d.set_ylim ( yrange )

    d.set_xlabel ( r'Asymptote difference' )
    # d.text ( .7, 0.7, r"$\frac{\lambda_\mathrm{h}}{\lambda_0}$",
    #         transform=d.transAxes )
    # d.set_xlim ( trimmed_hlim ( l_ratio, l_ratio_obs, (0,5) ) )
    d.set_xlim ( -.003, .001 )
    d.xaxis.set_major_locator ( tckr ( density=0.4, figure=fig, which=0 ) )
    d.xaxis.set_ticks ( (-.002,0) )
    # d.set_xlim ( (.75, 1.25) )
    d.xaxis.set_major_formatter ( myformatter )

    a.set_ylim ( .49, 1.01 )

    pl.savefig ( "figures/%s3.pdf" % ( figname, ) )
    pl.savefig ( "figures/%s3.eps" % ( figname, ) )
Exemplo n.º 35
0
 def do_fontsize(k):
     return float(clip(max_text_size*sqrt(data[k]),\
         min_text_size,max_text_size))
Exemplo n.º 36
0
 def mu_interval(mu_age=mu_age,
                 theta=theta,
                 age_mid=pl.array(age_mid, dtype=int),
                 age_width=pl.array(age_width, dtype=float)):
     
     return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) - ages[0]) + theta*age_width
Exemplo n.º 37
0
def log_add(x, y):
    return where(
        abs(x - y) > 10, maximum(x, y),
        log(exp(clip(x - y, -20, 20)) + 1) + y)
Exemplo n.º 38
0
def consistent(model,
               reference_area='all',
               reference_sex='total',
               reference_year='all',
               priors={},
               zero_re=True,
               rate_type='neg_binom'):
    """ Generate PyMC objects for consistent model of epidemological data
    
    :Parameters:
      - `model` : data.ModelData
      - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf'
      - `root_area, root_sex, root_year` : the node of the model to
        fit consistently
      - `priors` : dictionary, with keys for data types for lists of
        priors on age patterns
      - `zero_re` : boolean, change one stoch from each set of
        siblings in area hierarchy to a 'sum to zero' deterministic
      - `rate_type` : str or dict, optional. One of 'beta_binom',
        'binom', 'log_normal_model', 'neg_binom',
        'neg_binom_lower_bound_model', 'neg_binom_model',
        'normal_model', 'offest_log_normal', or 'poisson', optionally
        as a dict, with keys i, r, f, p, m_with

    :Results:
      - Returns dict of dicts of PyMC objects, including 'i', 'p',
        'r', 'f', the covariate adjusted predicted values for each row
        of data
    
    .. note::
      - dict priors can contain keys (t, 'mu') and (t, 'sigma') to
        tell the consistent model about the priors on levels for the
        age-specific rate of type t (these are arrays for mean and
        standard deviation a priori for mu_age[t]
      - it can also contain dicts keyed by t alone to insert empirical
        priors on the fixed effects and random effects

    """
    # TODO: refactor the way priors are handled
    # current approach is much more complicated than necessary
    for t in 'i r pf p rr f'.split():
        if t in priors:
            model.parameters[t]['random_effects'].update(
                priors[t]['random_effects'])
            model.parameters[t]['fixed_effects'].update(
                priors[t]['fixed_effects'])

    # if rate_type is a string, make it into a dict
    if type(rate_type) == str:
        rate_type = dict(i=rate_type,
                         r=rate_type,
                         f=rate_type,
                         p=rate_type,
                         m_with=rate_type)

    if 'm_with' not in rate_type.keys():
        rate_type['m_with'] = 'neg_binom'
    if 'i' not in rate_type.keys():
        rate_type['i'] = 'neg_binom'
    if 'r' not in rate_type.keys():
        rate_type['r'] = 'neg_binom'
    if 'f' not in rate_type.keys():
        rate_type['f'] = 'neg_binom'

    rate = {}
    ages = model.parameters['ages']

    for t in 'irf':
        rate[t] = age_specific_rate(
            model,
            t,
            reference_area,
            reference_sex,
            reference_year,
            mu_age=None,
            mu_age_parent=priors.get((t, 'mu')),
            sigma_age_parent=priors.get((t, 'sigma')),
            zero_re=zero_re,
            rate_type=rate_type[t]
        )[t]  # age_specific_rate()[t] is to create proper nesting of dict

        # set initial values from data
        if t in priors:
            if isinstance(priors[t], mc.Node):
                initial = priors[t].value
            else:
                initial = pl.array(priors[t])
        else:
            initial = rate[t]['mu_age'].value.copy()
            df = model.get_data(t)
            if len(df.index) > 0:
                mean_data = df.groupby(['age_start',
                                        'age_end']).mean().delevel()
                for i, row in mean_data.T.iteritems():
                    start = row['age_start'] - rate[t]['ages'][0]
                    end = row['age_end'] - rate[t]['ages'][0]
                    initial[start:end] = row['value']

        for i, k in enumerate(rate[t]['knots']):
            rate[t]['gamma'][i].value = pl.log(initial[k -
                                                       rate[t]['ages'][0]] +
                                               1.e-9)

    m_all = .01 * pl.ones(101)
    df = model.get_data('m_all')
    if len(df.index) == 0:
        print 'WARNING: all-cause mortality data not found, using m_all = .01'
    else:
        mean_mortality = df.groupby(['age_start', 'age_end']).mean().delevel()

        knots = []
        for i, row in mean_mortality.T.iteritems():
            knots.append(
                pl.clip((row['age_start'] + row['age_end'] + 1.) / 2., 0, 100))

            m_all[knots[-1]] = row['value']

        # extend knots as constant beyond endpoints
        knots = sorted(knots)
        m_all[0] = m_all[knots[0]]
        m_all[100] = m_all[knots[-1]]

        knots.insert(0, 0)
        knots.append(100)

        m_all = scipy.interpolate.interp1d(knots, m_all[knots],
                                           kind='linear')(pl.arange(101))
    m_all = m_all[ages]

    logit_C0 = mc.Uniform('logit_C0', -15, 15, value=-10.)

    # use Runge-Kutta 4 ODE solver
    import dismod_ode

    N = len(m_all)
    num_step = 10  # double until it works
    ages = pl.array(ages, dtype=float)
    fun = dismod_ode.ode_function(num_step, ages, m_all)

    @mc.deterministic
    def mu_age_p(logit_C0=logit_C0,
                 i=rate['i']['mu_age'],
                 r=rate['r']['mu_age'],
                 f=rate['f']['mu_age']):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = mc.invlogit(logit_C0)

        x = pl.hstack((i, r, f, 1 - C0, C0))
        y = fun.forward(0, x)

        susceptible = y[:N]
        condition = y[N:]

        p = condition / (susceptible + condition)
        p[pl.isnan(p)] = 0.
        return p

    p = age_specific_rate(model,
                          'p',
                          reference_area,
                          reference_sex,
                          reference_year,
                          mu_age_p,
                          mu_age_parent=priors.get(('p', 'mu')),
                          sigma_age_parent=priors.get(('p', 'sigma')),
                          zero_re=zero_re,
                          rate_type=rate_type['p'])['p']

    @mc.deterministic
    def mu_age_pf(p=p['mu_age'], f=rate['f']['mu_age']):
        return p * f

    pf = age_specific_rate(model,
                           'pf',
                           reference_area,
                           reference_sex,
                           reference_year,
                           mu_age_pf,
                           mu_age_parent=priors.get(('pf', 'mu')),
                           sigma_age_parent=priors.get(('pf', 'sigma')),
                           lower_bound='csmr',
                           include_covariates=False,
                           zero_re=zero_re)['pf']

    @mc.deterministic
    def mu_age_m(pf=pf['mu_age'], m_all=m_all):
        return (m_all - pf).clip(1.e-6, 1.e6)

    rate['m'] = age_specific_rate(model,
                                  'm_wo',
                                  reference_area,
                                  reference_sex,
                                  reference_year,
                                  mu_age_m,
                                  None,
                                  None,
                                  include_covariates=False,
                                  zero_re=zero_re)['m_wo']

    @mc.deterministic
    def mu_age_rr(m=rate['m']['mu_age'], f=rate['f']['mu_age']):
        return (m + f) / m

    rr = age_specific_rate(model,
                           'rr',
                           reference_area,
                           reference_sex,
                           reference_year,
                           mu_age_rr,
                           mu_age_parent=priors.get(('rr', 'mu')),
                           sigma_age_parent=priors.get(('rr', 'sigma')),
                           rate_type='log_normal',
                           include_covariates=False,
                           zero_re=zero_re)['rr']

    @mc.deterministic
    def mu_age_smr(m=rate['m']['mu_age'], f=rate['f']['mu_age'], m_all=m_all):
        return (m + f) / m_all

    smr = age_specific_rate(model,
                            'smr',
                            reference_area,
                            reference_sex,
                            reference_year,
                            mu_age_smr,
                            mu_age_parent=priors.get(('smr', 'mu')),
                            sigma_age_parent=priors.get(('smr', 'sigma')),
                            rate_type='log_normal',
                            include_covariates=False,
                            zero_re=zero_re)['smr']

    @mc.deterministic
    def mu_age_m_with(m=rate['m']['mu_age'], f=rate['f']['mu_age']):
        return m + f

    m_with = age_specific_rate(model,
                               'm_with',
                               reference_area,
                               reference_sex,
                               reference_year,
                               mu_age_m_with,
                               mu_age_parent=priors.get(('m_with', 'mu')),
                               sigma_age_parent=priors.get(
                                   ('m_with', 'sigma')),
                               include_covariates=False,
                               zero_re=zero_re,
                               rate_type=rate_type['m_with'])['m_with']

    # duration = E[time in bin C]
    @mc.deterministic
    def mu_age_X(r=rate['r']['mu_age'],
                 m=rate['m']['mu_age'],
                 f=rate['f']['mu_age']):
        hazard = r + m + f
        pr_not_exit = pl.exp(-hazard)
        X = pl.empty(len(hazard))
        X[-1] = 1 / hazard[-1]
        for i in reversed(range(len(X) - 1)):
            X[i] = pr_not_exit[i] * (X[i + 1] + 1) + 1 / hazard[i] * (
                1 - pr_not_exit[i]) - pr_not_exit[i]
        return X

    X = age_specific_rate(model,
                          'X',
                          reference_area,
                          reference_sex,
                          reference_year,
                          mu_age_X,
                          mu_age_parent=priors.get(('X', 'mu')),
                          sigma_age_parent=priors.get(('X', 'sigma')),
                          rate_type='normal',
                          include_covariates=True,
                          zero_re=zero_re)['X']

    vars = rate
    vars.update(logit_C0=logit_C0,
                p=p,
                pf=pf,
                rr=rr,
                smr=smr,
                m_with=m_with,
                X=X)
    return vars
Exemplo n.º 39
0
def sumouter(us, vs, lo=-1.0, hi=1.0, out=None):
    result = out or zeros((len(us[0]), len(vs[0])))
    for u, v in zip(us, vs):
        result += outer(clip(u, lo, hi), v)
    return result
Exemplo n.º 40
0
def sumprod(us, vs, lo=-1.0, hi=1.0, out=None):
    assert len(us[0]) == len(vs[0])
    result = out or zeros(len(us[0]))
    for u, v in zip(us, vs):
        result += clip(u, lo, hi) * v
    return result
Exemplo n.º 41
0
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)

            print_info("# %s" % (fname))
            raw = ocrolib.read_image_gray(img.filename)

            self.dshow(raw, "input")

            # perform image normalization
            image = raw - amin(raw)
            if amax(image) == amin(image):
                print_info("# image is empty: %s" % (fname))
                return
            image /= amax(image)

            if not self.parameter['nocheck']:
                check = self.check_page(amax(image) - image)
                if check is not None:
                    print_error(fname + " SKIPPED. " + check +
                                " (use -n to disable this check)")
                    return

            # check whether the image is already effectively binarized
            if self.parameter['gray']:
                extreme = 0
            else:
                extreme = (np.sum(image < 0.05) +
                           np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
            if extreme > 0.95:
                comment = "no-normalization"
                flat = image
            else:
                comment = ""
                # if not, we need to flatten it by estimating the local whitelevel
                print_info("flattening")
                m = interpolation.zoom(image, self.parameter['zoom'])
                m = filters.percentile_filter(m,
                                              self.parameter['perc'],
                                              size=(self.parameter['range'],
                                                    2))
                m = filters.percentile_filter(m,
                                              self.parameter['perc'],
                                              size=(2,
                                                    self.parameter['range']))
                m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
                if self.parameter['debug'] > 0:
                    clf()
                    imshow(m, vmin=0, vmax=1)
                    ginput(1, self.parameter['debug'])
                w, h = minimum(array(image.shape), array(m.shape))
                flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
                if self.parameter['debug'] > 0:
                    clf()
                    imshow(flat, vmin=0, vmax=1)
                    ginput(1, self.parameter['debug'])

            # estimate low and high thresholds
            print_info("estimating thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if self.parameter['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = self.parameter['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if self.parameter['debug'] > 0:
                    imshow(v)
                    ginput(1, self.parameter['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
            hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
            # rescale the image to get the gray scale image
            print_info("rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if self.parameter['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            binarized = 1 * (flat > self.parameter['threshold'])

            # output the normalized grayscale and the thresholded images
            # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
            print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment))
            print_info("writing")
            if self.parameter['debug'] > 0 or self.parameter['show']:
                clf()
                gray()
                imshow(binarized)
                ginput(1, max(0.1, self.parameter['debug']))
            base, _ = ocrolib.allsplitext(img.filename)
            ocrolib.write_image_binary(base + ".bin.png", binarized)
            # ocrolib.write_image_gray(base +".nrm.png", flat)
            # print("########### File path : ", base+".nrm.png")
            # write_to_xml(base+".bin.png")
            # return base+".bin.png"

            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype="image/png",
                                    url=base + ".bin.png",
                                    local_filename='%s/%s' %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts).encode('utf-8'))
Exemplo n.º 42
0
Arquivo: data.py Projeto: aflaxman/gbd
    def _input_data_from_gbd_json(dm, covs):
        """ translate input data"""
        import dismod3

        # remove any rows with 'ignore' columns set to 1
        dm["data"] = [d for d in dm["data"] if not (d.get("Ignore") or d.get("ignore"))]

        # remove any data with type-specific heterogeneity set to Unusable
        if "global_priors" in dm["params"]:
            for t in dm["params"]["global_priors"]["heterogeneity"]:
                if dm["params"]["global_priors"]["heterogeneity"][t] == "Unusable":
                    print "%s has heterogeneity unusable, dropping %d rows" % (
                        t,
                        len([d for d in dm["data"] if d["data_type"] == t + " data"]),
                    )
                    dm["data"] = [d for d in dm["data"] if d["data_type"] != t + " data"]

        input_data = {}
        for field in "effective_sample_size age_start age_end year_start year_end".split():
            input_data[field] = []
            for row in dm["data"]:
                val = row.get(field, "")
                if val == "":
                    val = pl.nan
                input_data[field].append(float(val))

        input_data["sex"] = []
        for row in dm["data"]:
            input_data["sex"].append(row["sex"])

            # replace sex 'all' with sex 'total'
            if input_data["sex"][-1] == "all":
                input_data["sex"][-1] = "total"

            assert input_data["sex"][-1] != ""

        new_type_name = {
            "incidence data": "i",
            "prevalence data": "p",
            "remission data": "r",
            "excess-mortality data": "f",
            "prevalence x excess-mortality data": "pf",
            "all-cause mortality data": "m_all",
            "relative-risk data": "rr",
            "duration data": "X",
            "smr data": "smr",
            "cause-specific mortality data": "csmr",
            "mortality data": "m_with",
        }
        input_data["data_type"] = [new_type_name[row["data_type"]] for row in dm["data"]]

        for field in "value standard_error lower_ci upper_ci".split():
            input_data[field] = []
            for row in dm["data"]:
                val = row.get(field, "")
                if val == "":
                    val = pl.nan
                else:
                    val = float(val) / float(row.get("units", "1").replace(",", ""))
                input_data[field].append(val)

        input_data["area"] = []
        for row in dm["data"]:
            val = row.get("country_iso3_code", "")
            if val == "" or val == "all":
                val = dismod3.utils.clean(row["gbd_region"])
            input_data["area"].append(val)

            assert input_data["area"][-1] != ""

        input_data["age_weights"] = [
            ";".join(["%.4f" % w for w in row.get("age_weights", [])]) for row in dm["data"]
        ]  # store age_weights as semi-colon delimited text, since Pandas doesn't like arrays in arrays and doesn't save comma-separated fields correctly

        # add selected covariates
        if "covariates" in dm["params"]:
            for level in ["Country_level", "Study_level"]:
                for cv in dm["params"]["covariates"].get(level, []):
                    if dm["params"]["covariates"][level][cv]["rate"]["value"]:
                        input_data["x_%s" % cv] = []
                        for row in dm["data"]:
                            if level == "Country_level":
                                if row["data_type"] == "all-cause mortality data":
                                    input_data["x_%s" % cv].append(
                                        0.0
                                    )  # don't bother to merge covariates into all-cause mortality data
                                elif row["region"] == "all":
                                    input_data["x_%s" % cv].append(
                                        0.0
                                    )  # don't bother to merge covariates into regionall data

                                elif row.get("country_iso3_code"):
                                    iso3 = row["country_iso3_code"]

                                    # special case for countries that CODEm does not report on
                                    if "ASDR" in cv:
                                        if iso3 in ["HKG", "MAC"]:
                                            iso3 = "TWN"  # TODO: average over CHN, PRK, TWN
                                        if iso3 in ["PRI", "BMU"]:
                                            iso3 = "CUB"  # TODO: average over caribbean countries

                                    input_data["x_%s" % cv].append(
                                        covs[cv][
                                            iso3,
                                            row["sex"],
                                            pl.clip((row["year_start"] + row["year_end"]) / 2, 1980.0, 2012.0),
                                        ]
                                    )
                                else:
                                    # handle regional data
                                    df = covs[
                                        (covs["region"] == dismod3.utils.clean(row["gbd_region"]))
                                        & (covs.index.get_level_values(1) == row["sex"])
                                        & (
                                            covs.index.get_level_values(2)
                                            == pl.clip((row["year_start"] + row["year_end"]) / 2, 1980.0, 2012.0)
                                        )
                                    ]
                                    # input_data['x_%s'%cv].append(
                                    #    (df[cv]*df['pop']).sum() / df['pop'].sum()
                                    #    )
                                    input_data["x_%s" % cv].append(0.0)  # TODO: remove regional data
                            elif level == "Study_level":
                                input_data["x_%s" % cv].append(float(row.get(dismod3.utils.clean(cv), "") or 0.0))
                    # also include column of input data for 'z_%s'%cv if it is requested
                    if dm["params"]["covariates"][level][cv]["error"]["value"]:
                        input_data["z_%s" % cv] = [
                            float(row.get(dismod3.utils.clean(cv), "") or 0.0) for row in dm["data"]
                        ]

        input_data = pandas.DataFrame(input_data)

        # replace age_end 1 with age_end 0, correcting a common mistake in data entry
        i = (input_data["age_start"] == 0) & (input_data["age_end"] == 1)
        if i.sum() > 0:
            print 'WARNING: correcting age_end in %d rows that have age_start == 0, age_end == 1 (old format uses "demographic" notation)' % i.sum()
            input_data["age_end"][i] = 0

        # replace triple underscores with single underscore, a problem with consistency in the spacing in "North Africa / Middle East"
        input_data["area"] = [a.replace("___", "_") for a in input_data["area"]]

        # print checks of data
        for i, row in input_data.T.iteritems():
            if pl.isnan(row["value"]):
                print "WARNING: value in row %d is missing" % i
        input_data = input_data[~pl.isnan(input_data["value"])]

        return input_data
Exemplo n.º 43
0
from matplotlib.toolkits.basemap import Basemap, cm
import pylab, copy
from matplotlib import rcParams

# make tick labels smaller
rcParams['xtick.labelsize'] = 9
rcParams['ytick.labelsize'] = 9

# plot rainfall from NWS using special precipitation
# colormap used by the NWS, and included in basemap.

nc = NetCDFFile('nws_precip_conus_20061222.nc')
# data from http://www.srh.noaa.gov/rfcshare/precip_analysis_new.php
prcpvar = nc.variables['amountofprecip']
data = 0.01 * prcpvar[:]
data = pylab.clip(data, 0, 10000)
latcorners = nc.variables['lat'][:]
loncorners = -nc.variables['lon'][:]
plottitle = prcpvar.long_name + ' for period ending ' + prcpvar.dateofdata
print data.min(), data.max()
print latcorners
print loncorners
print plottitle
print data.shape
lon_0 = -nc.variables['true_lon'].getValue()
lat_0 = nc.variables['true_lat'].getValue()
# create polar stereographic Basemap instance.
m = Basemap(projection='stere',lon_0=lon_0,lat_0=90.,lat_ts=lat_0,\
            llcrnrlat=latcorners[0],urcrnrlat=latcorners[2],\
            llcrnrlon=loncorners[0],urcrnrlon=loncorners[2],\
            rsphere=6371200.,resolution='l',area_thresh=10000)
Exemplo n.º 44
0
def one_compartment_ode(S, t, h_b, h_m):
    # piecewise-constant functions of time implementend as array
    t = int(pl.clip(t, 0, len(h_b)-1))
    return (h_b[t]-h_m[t])*S
Exemplo n.º 45
0
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)
            param = self.parameter
            base, _ = ocrolib.allsplitext(fname)
            #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0]

            if param['parallel'] < 2:
                print_info("=== %s " % (fname))
            raw = ocrolib.read_image_gray(img.filename)

            flat = raw
            #flat = np.array(binImg)
            # estimate skew angle and rotate
            if param['maxskew'] > 0:
                if param['parallel'] < 2:
                    print_info("estimating skew angle")
                d0, d1 = flat.shape
                o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1)
                flat = amax(flat) - flat
                flat -= amin(flat)
                est = flat[o0:d0 - o0, o1:d1 - o1]
                ma = param['maxskew']
                ms = int(2 * param['maxskew'] * param['skewsteps'])
                angle = self.estimate_skew_angle(est,
                                                 linspace(-ma, ma, ms + 1))
                flat = interpolation.rotate(flat,
                                            angle,
                                            mode='constant',
                                            reshape=0)
                flat = amax(flat) - flat
            else:
                angle = 0

            # self.write_angles_to_pageXML(base,angle)
            # estimate low and high thresholds
            if param['parallel'] < 2:
                print_info("estimating thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if param['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = param['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if param['debug'] > 0:
                    imshow(v)
                    ginput(1, param['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), param['lo'])
            hi = stats.scoreatpercentile(est.ravel(), param['hi'])
            # rescale the image to get the gray scale image
            if param['parallel'] < 2:
                print_info("rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if param['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, param['debug'])
            deskewed = 1 * (flat > param['threshold'])

            # output the normalized grayscale and the thresholded images
            print_info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                       (pcgts.get_Page().imageFilename, lo, hi, angle))
            if param['parallel'] < 2:
                print_info("writing")
            ocrolib.write_image_binary(base + ".ds.png", deskewed)

            orientation = -angle
            orientation = 180 - (180 - orientation) % 360
            pcgts.get_Page().set_orientation(orientation)

            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype="image/png",
                                    url=base + ".ds.png",
                                    local_filename='%s/%s' %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts).encode('utf-8'))
Exemplo n.º 46
0
 def mu_interval(mu_age=mu_age, age_mid=pl.array(age_mid, dtype=int)):
     return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) - ages[0])
Exemplo n.º 47
0
    def run(self, fpath, job):
        param = self.param
        base, _ = ocrolib.allsplitext(fpath)
        basefile = ocrolib.allsplitext(os.path.basename(fpath))[0]

        if param['parallel'] < 2:
            print_info("=== %s %-3d" % (fpath, job))
        raw = ocrolib.read_image_gray(fpath)

        flat = raw
        # estimate skew angle and rotate
        if param['maxskew'] > 0:
            if param['parallel'] < 2:
                print_info("estimating skew angle")
            d0, d1 = flat.shape
            o0, o1 = int(param['bignore']*d0), int(param['bignore']*d1)
            flat = amax(flat)-flat
            flat -= amin(flat)
            est = flat[o0:d0-o0, o1:d1-o1]
            ma = param['maxskew']
            ms = int(2*param['maxskew']*param['skewsteps'])
            angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms+1))
            flat = interpolation.rotate(flat, angle, mode='constant', reshape=0)
            flat = amax(flat)-flat
        else:
            angle = 0

        # estimate low and high thresholds
        if param['parallel'] < 2:
            print_info("estimating thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(param['bignore']*d0), int(param['bignore']*d1)
        est = flat[o0:d0-o0, o1:d1-o1]
        if param['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = param['escale']
            v = est-filters.gaussian_filter(est, e*20.0)
            v = filters.gaussian_filter(v**2, e*20.0)**0.5
            v = (v > 0.3*amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e*50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e*50))))
            if param['debug'] > 0:
                imshow(v)
                ginput(1, param['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), param['lo'])
        hi = stats.scoreatpercentile(est.ravel(), param['hi'])
        # rescale the image to get the gray scale image
        if param['parallel'] < 2:
            print_info("rescaling")
        flat -= lo
        flat /= (hi-lo)
        flat = clip(flat, 0, 1)
        if param['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, param['debug'])
        deskewed = 1*(flat > param['threshold'])

        # output the normalized grayscale and the thresholded images
        print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle))
        if param['parallel'] < 2:
            print_info("writing")
        ocrolib.write_image_binary(base+".ds.png", deskewed)
        return base+".ds.png"
Exemplo n.º 48
0
    def _process_segment(self, page, filename, page_id, file_id):
        raw = ocrolib.read_image_gray(filename)
        self.dshow(raw, "input")

        # perform image normalization
        image = raw - amin(raw)
        if amax(image) == amin(image):
            LOG.info("# image is empty: %s" % (page_id))
            return
        image /= amax(image)

        if not self.parameter['nocheck']:
            check = self.check_page(amax(image) - image)
            if check is not None:
                LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " +
                          check + " (use -n to disable this check)")
                return

        # check whether the image is already effectively binarized
        if self.parameter['gray']:
            extreme = 0
        else:
            extreme = (np.sum(image < 0.05) +
                       np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
        if extreme > 0.95:
            comment = "no-normalization"
            flat = image
        else:
            comment = ""
            # if not, we need to flatten it by estimating the local whitelevel
            LOG.info("Flattening")
            m = interpolation.zoom(image, self.parameter['zoom'])
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(self.parameter['range'], 2))
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(2, self.parameter['range']))
            m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
            if self.parameter['debug'] > 0:
                clf()
                imshow(m, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            w, h = minimum(array(image.shape), array(m.shape))
            flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
            if self.parameter['debug'] > 0:
                clf()
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])

        # estimate low and high thresholds
        LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        binarized = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
        LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment))
        LOG.info("writing")
        if self.parameter['debug'] > 0 or self.parameter['show']:
            clf()
            gray()
            imshow(binarized)
            ginput(1, max(0.1, self.parameter['debug']))
        #base, _ = ocrolib.allsplitext(filename)
        #ocrolib.write_image_binary(base + ".bin.png", binarized)
        # ocrolib.write_image_gray(base +".nrm.png", flat)
        # print("########### File path : ", base+".nrm.png")
        # write_to_xml(base+".bin.png")
        # return base+".bin.png"

        bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path, comment="binarized"))
Exemplo n.º 49
0
 def do_fontsize(k):
     return float(clip(max_text_size*sqrt(data[k]),\
         min_text_size,max_text_size))
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID
            page = pcgts.get_Page()

            # why does it save the image ??
            page_image, page_xywh, _ = self.workspace.image_from_page(
                page, page_id)

            if self.parameter['parallel'] < 2:
                LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID)
            raw = ocrolib.read_image_gray(page_image.filename)

            flat = raw
            #flat = np.array(binImg)
            # estimate skew angle and rotate
            if self.parameter['maxskew'] > 0:
                if self.parameter['parallel'] < 2:
                    LOG.info("Estimating Skew Angle")
                d0, d1 = flat.shape
                o0, o1 = int(self.parameter['bignore'] * d0), int(
                    self.parameter['bignore'] * d1)
                flat = amax(flat) - flat
                flat -= amin(flat)
                est = flat[o0:d0 - o0, o1:d1 - o1]
                ma = self.parameter['maxskew']
                ms = int(2 * self.parameter['maxskew'] *
                         self.parameter['skewsteps'])
                angle = self.estimate_skew_angle(est,
                                                 linspace(-ma, ma, ms + 1))
                flat = interpolation.rotate(flat,
                                            angle,
                                            mode='constant',
                                            reshape=0)
                flat = amax(flat) - flat
            else:
                angle = 0

            # self.write_angles_to_pageXML(base,angle)
            # estimate low and high thresholds
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if self.parameter['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = self.parameter['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if self.parameter['debug'] > 0:
                    imshow(v)
                    ginput(1, self.parameter['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
            hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
            # rescale the image to get the gray scale image
            if self.parameter['parallel'] < 2:
                LOG.info("Rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if self.parameter['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            deskewed = 1 * (flat > self.parameter['threshold'])

            # output the normalized grayscale and the thresholded images
            LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                     (pcgts.get_Page().imageFilename, lo, hi, angle))
            if self.parameter['parallel'] < 2:
                LOG.info("Writing")
            #ocrolib.write_image_binary(base+".ds.png", deskewed)

            #TODO: Need some clarification as the results effect the following pre-processing steps.
            #orientation = -angle
            #orientation = 180 - ((180 - orientation) % 360)
            pcgts.get_Page().set_orientation(angle)
            #print(orientation, angle)

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)

            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))
Exemplo n.º 51
0
from matplotlib.toolkits.basemap import Basemap, cm
import pylab, copy
from matplotlib import rcParams

# make tick labels smaller
rcParams['xtick.labelsize']=9
rcParams['ytick.labelsize']=9

# plot rainfall from NWS using special precipitation
# colormap used by the NWS, and included in basemap.

nc = NetCDFFile('nws_precip_conus_20061222.nc')
# data from http://www.srh.noaa.gov/rfcshare/precip_analysis_new.php
prcpvar = nc.variables['amountofprecip']
data = 0.01*prcpvar[:]
data = pylab.clip(data,0,10000)
latcorners = nc.variables['lat'][:]
loncorners = -nc.variables['lon'][:]
plottitle = prcpvar.long_name+' for period ending '+prcpvar.dateofdata
print data.min(), data.max()
print latcorners
print loncorners
print plottitle
print data.shape
lon_0 = -nc.variables['true_lon'].getValue()
lat_0 = nc.variables['true_lat'].getValue()
# create polar stereographic Basemap instance.
m = Basemap(projection='stere',lon_0=lon_0,lat_0=90.,lat_ts=lat_0,\
            llcrnrlat=latcorners[0],urcrnrlat=latcorners[2],\
            llcrnrlon=loncorners[0],urcrnrlon=loncorners[2],\
            rsphere=6371200.,resolution='l',area_thresh=10000)
Exemplo n.º 52
0
 def mu_interval(mu_age=mu_age,
                 age_mid=pl.array(age_mid, dtype=int)):
     return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) - ages[0])
Exemplo n.º 53
0
Arquivo: ism.py Projeto: peterhm/gbd
def consistent(
    model,
    reference_area="all",
    reference_sex="total",
    reference_year="all",
    priors={},
    zero_re=True,
    rate_type="neg_binom",
):
    """ Generate PyMC objects for consistent model of epidemological data
    
    :Parameters:
      - `model` : data.ModelData
      - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf'
      - `root_area, root_sex, root_year` : the node of the model to
        fit consistently
      - `priors` : dictionary, with keys for data types for lists of
        priors on age patterns
      - `zero_re` : boolean, change one stoch from each set of
        siblings in area hierarchy to a 'sum to zero' deterministic
      - `rate_type` : str or dict, optional. One of 'beta_binom',
        'binom', 'log_normal_model', 'neg_binom',
        'neg_binom_lower_bound_model', 'neg_binom_model',
        'normal_model', 'offest_log_normal', or 'poisson', optionally
        as a dict, with keys i, r, f, p, m_with

    :Results:
      - Returns dict of dicts of PyMC objects, including 'i', 'p',
        'r', 'f', the covariate adjusted predicted values for each row
        of data
    
    .. note::
      - dict priors can contain keys (t, 'mu') and (t, 'sigma') to
        tell the consistent model about the priors on levels for the
        age-specific rate of type t (these are arrays for mean and
        standard deviation a priori for mu_age[t]
      - it can also contain dicts keyed by t alone to insert empirical
        priors on the fixed effects and random effects

    """
    # TODO: refactor the way priors are handled
    # current approach is much more complicated than necessary
    for t in "i r pf p rr f".split():
        if t in priors:
            model.parameters[t]["random_effects"].update(priors[t]["random_effects"])
            model.parameters[t]["fixed_effects"].update(priors[t]["fixed_effects"])

    # if rate_type is a string, make it into a dict
    if type(rate_type) == str:
        rate_type = dict(i=rate_type, r=rate_type, f=rate_type, p=rate_type, m_with=rate_type)

    if "m_with" not in rate_type.keys():
        rate_type["m_with"] = "neg_binom"
    if "i" not in rate_type.keys():
        rate_type["i"] = "neg_binom"
    if "r" not in rate_type.keys():
        rate_type["r"] = "neg_binom"
    if "f" not in rate_type.keys():
        rate_type["f"] = "neg_binom"

    rate = {}
    ages = model.parameters["ages"]

    for t in "irf":
        rate[t] = age_specific_rate(
            model,
            t,
            reference_area,
            reference_sex,
            reference_year,
            mu_age=None,
            mu_age_parent=priors.get((t, "mu")),
            sigma_age_parent=priors.get((t, "sigma")),
            zero_re=zero_re,
            rate_type=rate_type[t],
        )[
            t
        ]  # age_specific_rate()[t] is to create proper nesting of dict

        # set initial values from data
        if t in priors:
            if isinstance(priors[t], mc.Node):
                initial = priors[t].value
            else:
                initial = pl.array(priors[t])
        else:
            initial = rate[t]["mu_age"].value.copy()
            df = model.get_data(t)
            if len(df.index) > 0:
                mean_data = df.groupby(["age_start", "age_end"]).mean().delevel()
                for i, row in mean_data.T.iteritems():
                    start = row["age_start"] - rate[t]["ages"][0]
                    end = row["age_end"] - rate[t]["ages"][0]
                    initial[start:end] = row["value"]

        for i, k in enumerate(rate[t]["knots"]):
            rate[t]["gamma"][i].value = pl.log(initial[k - rate[t]["ages"][0]] + 1.0e-9)

    m_all = 0.01 * pl.ones(101)
    df = model.get_data("m_all")
    if len(df.index) == 0:
        print "WARNING: all-cause mortality data not found, using m_all = .01"
    else:
        mean_mortality = df.groupby(["age_start", "age_end"]).mean().delevel()

        knots = []
        for i, row in mean_mortality.T.iteritems():
            knots.append(pl.clip((row["age_start"] + row["age_end"] + 1.0) / 2.0, 0, 100))

            m_all[knots[-1]] = row["value"]

        # extend knots as constant beyond endpoints
        knots = sorted(knots)
        m_all[0] = m_all[knots[0]]
        m_all[100] = m_all[knots[-1]]

        knots.insert(0, 0)
        knots.append(100)

        m_all = scipy.interpolate.interp1d(knots, m_all[knots], kind="linear")(pl.arange(101))
    m_all = m_all[ages]

    logit_C0 = mc.Uniform("logit_C0", -15, 15, value=-10.0)

    # use Runge-Kutta 4 ODE solver
    import dismod_ode

    N = len(m_all)
    num_step = 10  # double until it works
    ages = pl.array(ages, dtype=float)
    fun = dismod_ode.ode_function(num_step, ages, m_all)

    @mc.deterministic
    def mu_age_p(logit_C0=logit_C0, i=rate["i"]["mu_age"], r=rate["r"]["mu_age"], f=rate["f"]["mu_age"]):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = mc.invlogit(logit_C0)

        x = pl.hstack((i, r, f, 1 - C0, C0))
        y = fun.forward(0, x)

        susceptible = y[:N]
        condition = y[N:]

        p = condition / (susceptible + condition)
        p[pl.isnan(p)] = 0.0
        return p

    p = age_specific_rate(
        model,
        "p",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_p,
        mu_age_parent=priors.get(("p", "mu")),
        sigma_age_parent=priors.get(("p", "sigma")),
        zero_re=zero_re,
        rate_type=rate_type["p"],
    )["p"]

    @mc.deterministic
    def mu_age_pf(p=p["mu_age"], f=rate["f"]["mu_age"]):
        return p * f

    pf = age_specific_rate(
        model,
        "pf",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_pf,
        mu_age_parent=priors.get(("pf", "mu")),
        sigma_age_parent=priors.get(("pf", "sigma")),
        lower_bound="csmr",
        include_covariates=False,
        zero_re=zero_re,
    )["pf"]

    @mc.deterministic
    def mu_age_m(pf=pf["mu_age"], m_all=m_all):
        return (m_all - pf).clip(1.0e-6, 1.0e6)

    rate["m"] = age_specific_rate(
        model,
        "m_wo",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_m,
        None,
        None,
        include_covariates=False,
        zero_re=zero_re,
    )["m_wo"]

    @mc.deterministic
    def mu_age_rr(m=rate["m"]["mu_age"], f=rate["f"]["mu_age"]):
        return (m + f) / m

    rr = age_specific_rate(
        model,
        "rr",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_rr,
        mu_age_parent=priors.get(("rr", "mu")),
        sigma_age_parent=priors.get(("rr", "sigma")),
        rate_type="log_normal",
        include_covariates=False,
        zero_re=zero_re,
    )["rr"]

    @mc.deterministic
    def mu_age_smr(m=rate["m"]["mu_age"], f=rate["f"]["mu_age"], m_all=m_all):
        return (m + f) / m_all

    smr = age_specific_rate(
        model,
        "smr",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_smr,
        mu_age_parent=priors.get(("smr", "mu")),
        sigma_age_parent=priors.get(("smr", "sigma")),
        rate_type="log_normal",
        include_covariates=False,
        zero_re=zero_re,
    )["smr"]

    @mc.deterministic
    def mu_age_m_with(m=rate["m"]["mu_age"], f=rate["f"]["mu_age"]):
        return m + f

    m_with = age_specific_rate(
        model,
        "m_with",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_m_with,
        mu_age_parent=priors.get(("m_with", "mu")),
        sigma_age_parent=priors.get(("m_with", "sigma")),
        include_covariates=False,
        zero_re=zero_re,
        rate_type=rate_type["m_with"],
    )["m_with"]

    # duration = E[time in bin C]
    @mc.deterministic
    def mu_age_X(r=rate["r"]["mu_age"], m=rate["m"]["mu_age"], f=rate["f"]["mu_age"]):
        hazard = r + m + f
        pr_not_exit = pl.exp(-hazard)
        X = pl.empty(len(hazard))
        X[-1] = 1 / hazard[-1]
        for i in reversed(range(len(X) - 1)):
            X[i] = pr_not_exit[i] * (X[i + 1] + 1) + 1 / hazard[i] * (1 - pr_not_exit[i]) - pr_not_exit[i]
        return X

    X = age_specific_rate(
        model,
        "X",
        reference_area,
        reference_sex,
        reference_year,
        mu_age_X,
        mu_age_parent=priors.get(("X", "mu")),
        sigma_age_parent=priors.get(("X", "sigma")),
        rate_type="normal",
        include_covariates=True,
        zero_re=zero_re,
    )["X"]

    vars = rate
    vars.update(logit_C0=logit_C0, p=p, pf=pf, rr=rr, smr=smr, m_with=m_with, X=X)
    return vars
Exemplo n.º 54
0
def one_compartment_ode(S, t, h_b, h_m):
    # piecewise-constant functions of time implementend as array
    t = int(pl.clip(t, 0, len(h_b) - 1))
    return (h_b[t] - h_m[t]) * S
Exemplo n.º 55
0
def binarize_image(job):
    image_object, i = job
    raw = read_image_gray(image_object)
    image = raw - amin(raw)
    if amax(image) == amin(image):
        return  # Image is empty
    image /= amax(image)
    check = check_page(amax(image) - image)
    if check is not None:
        return
    if args.gray:
        extreme = 0
    else:
        extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod(
            image.shape)

    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        m = interpolation.zoom(image, args.zoom)
        m = filters.percentile_filter(m, args.perc, size=(args.range, 2))
        m = filters.percentile_filter(m, args.perc, size=(2, args.range))
        m = interpolation.zoom(m, 1.0 / args.zoom)
        w, h = minimum(array(image.shape), array(m.shape))
        flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)

    if args.maxskew > 0:
        d0, d1 = flat.shape
        o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
        flat = amax(flat) - flat
        flat -= amin(flat)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        ma = args.maxskew
        ms = int(2 * args.maxskew * args.skewsteps)
        angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
        flat = interpolation.rotate(flat, angle, mode='constant', reshape=0)
        flat = amax(flat) - flat
    else:
        angle = 0

    d0, d1 = flat.shape
    o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]

    if args.escale > 0:
        e = args.escale
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args.lo)
    hi = stats.scoreatpercentile(est.ravel(), args.hi)
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    binary = 1 * (flat > args.threshold)
    return (binary, flat)
Exemplo n.º 56
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):

        raw = ocrolib.pil2array(page_image)
        flat = raw.astype("float64")

        # estimate skew angle and rotate
        if self.parameter['maxskew'] > 0:
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Skew Angle")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            flat = amax(flat) - flat
            flat -= amin(flat)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            ma = self.parameter['maxskew']
            ms = int(2 * self.parameter['maxskew'] *
                     self.parameter['skewsteps'])
            angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
            flat = interpolation.rotate(flat,
                                        angle,
                                        mode='constant',
                                        reshape=0)
            flat = amax(flat) - flat
        else:
            angle = 0

        # self.write_angles_to_pageXML(base,angle)
        # estimate low and high thresholds
        if self.parameter['parallel'] < 2:
            LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])

        # rescale the image to get the gray scale image
        if self.parameter['parallel'] < 2:
            LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        deskewed = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle))

        #TODO: Need some clarification as the results effect the following pre-processing steps.
        #orientation = -angle
        #orientation = 180 - ((180 - orientation) % 360)

        if angle is None:  # FIXME: quick fix to prevent angle of "none"
            angle = 0

        page.set_orientation(angle)

        page_xywh['features'] += ',deskewed'
        bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B')
        page_image = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(page_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))
Exemplo n.º 57
0
    def _input_data_from_gbd_json(dm, covs):
        """ translate input data"""
        import dismod3

        # remove any rows with 'ignore' columns set to 1
        dm['data'] = [
            d for d in dm['data'] if not (d.get('Ignore') or d.get('ignore'))
        ]

        # remove any data with type-specific heterogeneity set to Unusable
        if 'global_priors' in dm['params']:
            for t in dm['params']['global_priors']['heterogeneity']:
                if dm['params']['global_priors']['heterogeneity'][
                        t] == 'Unusable':
                    print '%s has heterogeneity unusable, dropping %d rows' % (
                        t,
                        len([
                            d for d in dm['data']
                            if d['data_type'] == t + ' data'
                        ]))
                    dm['data'] = [
                        d for d in dm['data'] if d['data_type'] != t + ' data'
                    ]

        input_data = {}
        for field in 'effective_sample_size age_start age_end year_start year_end'.split(
        ):
            input_data[field] = []
            for row in dm['data']:
                val = row.get(field, '')
                if val == '':
                    val = pl.nan
                input_data[field].append(float(val))

        input_data['sex'] = []
        for row in dm['data']:
            input_data['sex'].append(row['sex'])

            # replace sex 'all' with sex 'total'
            if input_data['sex'][-1] == 'all':
                input_data['sex'][-1] = 'total'

            assert input_data['sex'][-1] != ''

        new_type_name = {
            'incidence data': 'i',
            'prevalence data': 'p',
            'remission data': 'r',
            'excess-mortality data': 'f',
            'prevalence x excess-mortality data': 'pf',
            'all-cause mortality data': 'm_all',
            'relative-risk data': 'rr',
            'duration data': 'X',
            'smr data': 'smr',
            'cause-specific mortality data': 'csmr',
            'mortality data': 'm_with'
        }
        input_data['data_type'] = [
            new_type_name[row['data_type']] for row in dm['data']
        ]

        for field in 'value standard_error lower_ci upper_ci'.split():
            input_data[field] = []
            for row in dm['data']:
                val = row.get(field, '')
                if val == '':
                    val = pl.nan
                else:
                    val = float(val) / float(
                        row.get('units', '1').replace(',', ''))
                input_data[field].append(val)

        input_data['area'] = []
        for row in dm['data']:
            val = row.get('country_iso3_code', '')
            if val == '' or val == 'all':
                val = dismod3.utils.clean(row['gbd_region'])
            input_data['area'].append(val)

            assert input_data['area'][-1] != ''

        input_data['age_weights'] = [
            ';'.join(['%.4f' % w for w in row.get('age_weights', [])])
            for row in dm['data']
        ]  # store age_weights as semi-colon delimited text, since Pandas doesn't like arrays in arrays and doesn't save comma-separated fields correctly

        # add selected covariates
        if 'covariates' in dm['params']:
            for level in ['Country_level', 'Study_level']:
                for cv in dm['params']['covariates'].get(level, []):
                    if dm['params']['covariates'][level][cv]['rate']['value']:
                        input_data['x_%s' % cv] = []
                        for row in dm['data']:
                            if level == 'Country_level':
                                if row['data_type'] == 'all-cause mortality data':
                                    input_data['x_%s' % cv].append(
                                        0.
                                    )  # don't bother to merge covariates into all-cause mortality data
                                elif row['region'] == 'all':
                                    input_data['x_%s' % cv].append(
                                        0.
                                    )  # don't bother to merge covariates into regionall data

                                elif row.get('country_iso3_code'):
                                    iso3 = row['country_iso3_code']

                                    # special case for countries that CODEm does not report on
                                    if 'ASDR' in cv:
                                        if iso3 in ['HKG', 'MAC']:
                                            iso3 = 'TWN'  # TODO: average over CHN, PRK, TWN
                                        if iso3 in ['PRI', 'BMU']:
                                            iso3 = 'CUB'  # TODO: average over caribbean countries

                                    input_data['x_%s' % cv].append(
                                        covs[cv][iso3, row['sex'],
                                                 pl.clip((row['year_start'] +
                                                          row['year_end']) /
                                                         2, 1980., 2012.)])
                                else:
                                    # handle regional data
                                    df = covs[(covs['region'] == dismod3.utils.
                                               clean(row['gbd_region']))
                                              & (covs.index.get_level_values(1)
                                                 == row['sex']) &
                                              (covs.index.get_level_values(2)
                                               == pl.clip(
                                                   (row['year_start'] +
                                                    row['year_end']) / 2,
                                                   1980., 2012.))]
                                    #input_data['x_%s'%cv].append(
                                    #    (df[cv]*df['pop']).sum() / df['pop'].sum()
                                    #    )
                                    input_data['x_%s' % cv].append(
                                        0.)  # TODO: remove regional data
                            elif level == 'Study_level':
                                input_data['x_%s' % cv].append(
                                    float(
                                        row.get(dismod3.utils.clean(cv), '')
                                        or 0.))
                    # also include column of input data for 'z_%s'%cv if it is requested
                    if dm['params']['covariates'][level][cv]['error']['value']:
                        input_data['z_%s' % cv] = [
                            float(row.get(dismod3.utils.clean(cv), '') or 0.)
                            for row in dm['data']
                        ]

        input_data = pandas.DataFrame(input_data)

        # replace age_end 1 with age_end 0, correcting a common mistake in data entry
        i = (input_data['age_start'] == 0) & (input_data['age_end'] == 1)
        if i.sum() > 0:
            print 'WARNING: correcting age_end in %d rows that have age_start == 0, age_end == 1 (old format uses "demographic" notation)' % i.sum(
            )
            input_data['age_end'][i] = 0

        # replace triple underscores with single underscore, a problem with consistency in the spacing in "North Africa / Middle East"
        input_data['area'] = [
            a.replace('___', '_') for a in input_data['area']
        ]

        # print checks of data
        for i, row in input_data.T.iteritems():
            if pl.isnan(row['value']):
                print 'WARNING: value in row %d is missing' % i
        input_data = input_data[~pl.isnan(input_data['value'])]

        return input_data