Пример #1
0
def elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty,
          nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr,
          maxit, family):

    # load shared fortran library
    glmlib = loadGlmLib()

    # pre-process data
    ybar = scipy.dot(scipy.transpose(y), weights)
    ybar = ybar / sum(weights)
    nulldev = (y - ybar)**2 * weights
    # ka
    lst = ['covariance', 'naive']
    ka = [i for i in range(len(lst)) if lst[i] == gtype]
    if len(ka) == 0:
        raise ValueError('unrecognized type for ka')
    else:
        ka = ka[0] + 1  # convert from 0-based to 1-based index for fortran
    # offset
    if len(offset) == 0:
        offset = y * 0
        is_offset = False
    else:
        is_offset = True

    # remove offset from y
    y = y - offset

    # now convert types and allocate memory before calling
    # glmnet fortran library
    ######################################
    # --------- PROCESS INPUTS -----------
    ######################################
    # force inputs into fortran order and into the correct scipy datatype
    copyFlag = False
    x = x.astype(dtype=scipy.float64, order='F', copy=copyFlag)
    irs = irs.astype(dtype=scipy.int32, order='F', copy=copyFlag)
    pcs = pcs.astype(dtype=scipy.int32, order='F', copy=copyFlag)
    y = y.astype(dtype=scipy.float64, order='F', copy=copyFlag)
    weights = weights.astype(dtype=scipy.float64, order='F', copy=copyFlag)
    jd = jd.astype(dtype=scipy.int32, order='F', copy=copyFlag)
    vp = vp.astype(dtype=scipy.float64, order='F', copy=copyFlag)
    cl = cl.astype(dtype=scipy.float64, order='F', copy=copyFlag)
    ulam = ulam.astype(dtype=scipy.float64, order='F', copy=copyFlag)

    ######################################
    # --------- ALLOCATE OUTPUTS ---------
    ######################################
    # lmu
    lmu = -1
    lmu_r = ctypes.c_int(lmu)
    # a0
    a0 = scipy.zeros([nlam], dtype=scipy.float64)
    a0 = a0.astype(dtype=scipy.float64, order='F', copy=False)
    a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ca
    ca = scipy.zeros([nx, nlam], dtype=scipy.float64)
    ca = ca.astype(dtype=scipy.float64, order='F', copy=False)
    ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ia
    ia = -1 * scipy.ones([nx], dtype=scipy.int32)
    ia = ia.astype(dtype=scipy.int32, order='F', copy=False)
    ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # nin
    nin = -1 * scipy.ones([nlam], dtype=scipy.int32)
    nin = nin.astype(dtype=scipy.int32, order='F', copy=False)
    nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # rsq
    rsq = -1 * scipy.ones([nlam], dtype=scipy.float64)
    rsq = rsq.astype(dtype=scipy.float64, order='F', copy=False)
    rsq_r = rsq.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # alm
    alm = -1 * scipy.ones([nlam], dtype=scipy.float64)
    alm = alm.astype(dtype=scipy.float64, order='F', copy=False)
    alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # nlp
    nlp = -1
    nlp_r = ctypes.c_int(nlp)
    # jerr
    jerr = -1
    jerr_r = ctypes.c_int(jerr)

    #  ###################################
    #   main glmnet fortran caller
    #  ###################################
    if is_sparse:
        # sparse elnet
        glmlib.spelnet_(
            ctypes.byref(ctypes.c_int(ka)),
            ctypes.byref(ctypes.c_double(parm)),
            ctypes.byref(ctypes.c_int(len(weights))),
            ctypes.byref(ctypes.c_int(nvars)),
            x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            pcs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            irs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)),
            ctypes.byref(ctypes.c_int(nlam)),
            ctypes.byref(ctypes.c_double(flmin)),
            ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_double(thresh)),
            ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)),
            ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(lmu_r), a0_r,
            ca_r, ia_r, nin_r, rsq_r, alm_r, ctypes.byref(nlp_r),
            ctypes.byref(jerr_r))
    else:
        # call fortran elnet routine
        glmlib.elnet_(ctypes.byref(ctypes.c_int(ka)),
                      ctypes.byref(ctypes.c_double(parm)),
                      ctypes.byref(ctypes.c_int(len(weights))),
                      ctypes.byref(ctypes.c_int(nvars)),
                      x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                      y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                      weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                      jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
                      vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                      cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                      ctypes.byref(ctypes.c_int(ne)),
                      ctypes.byref(ctypes.c_int(nx)),
                      ctypes.byref(ctypes.c_int(nlam)),
                      ctypes.byref(ctypes.c_double(flmin)),
                      ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                      ctypes.byref(ctypes.c_double(thresh)),
                      ctypes.byref(ctypes.c_int(isd)),
                      ctypes.byref(ctypes.c_int(intr)),
                      ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(lmu_r),
                      a0_r, ca_r, ia_r, nin_r, rsq_r, alm_r,
                      ctypes.byref(nlp_r), ctypes.byref(jerr_r))

    #  ###################################
    #   post process results
    #  ###################################

    # check for error
    if (jerr_r.value > 0):
        raise ValueError("Fatal glmnet error in library call : error code = ",
                         jerr_r.value)
    elif (jerr_r.value < 0):
        print("Warning: Non-fatal error in glmnet library call: error code = ",
              jerr_r.value)
        print("Check results for accuracy. Partial or no results returned.")

    # clip output to correct sizes
    lmu = lmu_r.value
    a0 = a0[0:lmu]
    ca = ca[0:nx, 0:lmu]
    ia = ia[0:nx]
    nin = nin[0:lmu]
    rsq = rsq[0:lmu]
    alm = alm[0:lmu]

    # ninmax
    ninmax = max(nin)
    # fix first value of alm (from inf to correct value)
    if lempty:
        t1 = scipy.log(alm[1])
        t2 = scipy.log(alm[2])
        alm[0] = scipy.exp(2 * t1 - t2)
    # create return fit dictionary
    if ninmax > 0:
        ca = ca[0:ninmax, :]
        df = scipy.sum(scipy.absolute(ca) > 0, axis=0)
        ja = ia[0:ninmax] - 1  # ia is 1-indexed in fortran
        oja = scipy.argsort(ja)
        ja1 = ja[oja]
        beta = scipy.zeros([nvars, lmu], dtype=scipy.float64)
        beta[ja1, :] = ca[oja, :]
    else:
        beta = scipy.zeros([nvars, lmu], dtype=scipy.float64)
        df = scipy.zeros([1, lmu], dtype=scipy.float64)

    fit = dict()
    fit['a0'] = a0
    fit['beta'] = beta
    fit['dev'] = rsq
    fit['nulldev'] = nulldev
    fit['df'] = df
    fit['lambdau'] = alm
    fit['npasses'] = nlp_r.value
    fit['jerr'] = jerr_r.value
    fit['dim'] = scipy.array([nvars, lmu], dtype=scipy.integer)
    fit['offset'] = is_offset
    fit['class'] = 'elnet'

    #  ###################################
    #   return to caller
    #  ###################################

    return fit
Пример #2
0
def fishnet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd,
            vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit,
            family):

    # load shared fortran library
    glmlib = loadGlmLib()

    if numpy.any(y < 0):
        raise ValueError('negative responses not permitted for Poisson family')

    if len(offset) == 0:
        offset = y * 0
        is_offset = False
    else:
        is_offset = True

    # now convert types and allocate memory before calling
    # glmnet fortran library
    ######################################
    # --------- PROCESS INPUTS -----------
    ######################################
    # force inputs into fortran order and scipy float64
    copyFlag = False
    x = x.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    irs = irs.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    pcs = pcs.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    y = y.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    weights = weights.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    offset = offset.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    jd = jd.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    vp = vp.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    cl = cl.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    ulam = ulam.astype(dtype=numpy.float64, order='F', copy=copyFlag)

    ######################################
    # --------- ALLOCATE OUTPUTS ---------
    ######################################
    # lmu
    lmu = -1
    lmu_r = ctypes.c_int(lmu)
    # a0
    a0 = numpy.zeros([nlam], dtype=numpy.float64)
    a0 = a0.astype(dtype=numpy.float64, order='F', copy=False)
    a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ca
    ca = numpy.zeros([nx, nlam], dtype=numpy.float64)
    ca = ca.astype(dtype=numpy.float64, order='F', copy=False)
    ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ia
    ia = -1 * numpy.ones([nx], dtype=numpy.int32)
    ia = ia.astype(dtype=numpy.int32, order='F', copy=False)
    ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # nin
    nin = -1 * numpy.ones([nlam], dtype=numpy.int32)
    nin = nin.astype(dtype=numpy.int32, order='F', copy=False)
    nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # dev
    dev = -1 * numpy.ones([nlam], dtype=numpy.float64)
    dev = dev.astype(dtype=numpy.float64, order='F', copy=False)
    dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # alm
    alm = -1 * numpy.ones([nlam], dtype=numpy.float64)
    alm = alm.astype(dtype=numpy.float64, order='F', copy=False)
    alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # nlp
    nlp = -1
    nlp_r = ctypes.c_int(nlp)
    # jerr
    jerr = -1
    jerr_r = ctypes.c_int(jerr)
    # dev0
    dev0 = -1
    dev0_r = ctypes.c_double(dev0)

    #  ###################################
    #   main glmnet fortran caller
    #  ###################################
    if is_sparse:
        # sparse lognet
        glmlib.spfishnet_(
            ctypes.byref(ctypes.c_double(parm)),
            ctypes.byref(ctypes.c_int(nobs)),
            ctypes.byref(ctypes.c_int(nvars)),
            x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            pcs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            irs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)),
            ctypes.byref(ctypes.c_int(nlam)),
            ctypes.byref(ctypes.c_double(flmin)),
            ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_double(thresh)),
            ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)),
            ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(lmu_r), a0_r,
            ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r,
            ctypes.byref(nlp_r), ctypes.byref(jerr_r))
    else:
        glmlib.fishnet_(
            ctypes.byref(ctypes.c_double(parm)),
            ctypes.byref(ctypes.c_int(nobs)),
            ctypes.byref(ctypes.c_int(nvars)),
            x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)),
            ctypes.byref(ctypes.c_int(nlam)),
            ctypes.byref(ctypes.c_double(flmin)),
            ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_double(thresh)),
            ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)),
            ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(lmu_r), a0_r,
            ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r,
            ctypes.byref(nlp_r), ctypes.byref(jerr_r))

    #  ###################################
    #   post process results
    #  ###################################

    # check for error
    if (jerr_r.value > 0):
        raise ValueError("Fatal glmnet error in library call : error code = ",
                         jerr_r.value)
    elif (jerr_r.value < 0):
        print("Warning: Non-fatal error in glmnet library call: error code = ",
              jerr_r.value)
        print("Check results for accuracy. Partial or no results returned.")

    # clip output to correct sizes
    lmu = lmu_r.value
    a0 = a0[0:lmu]
    ca = ca[0:nx, 0:lmu]
    ia = ia[0:nx]
    nin = nin[0:lmu]
    dev = dev[0:lmu]
    alm = alm[0:lmu]

    # ninmax
    ninmax = max(nin)
    # fix first value of alm (from inf to correct value)
    if ulam[0] == 0.0:
        t1 = numpy.log(alm[1])
        t2 = numpy.log(alm[2])
        alm[0] = numpy.exp(2 * t1 - t2)
    # create return fit dictionary
    dd = numpy.array([nvars, lmu], dtype=numpy.integer)
    if ninmax > 0:
        ca = ca[0:ninmax, :]
        df = numpy.sum(numpy.absolute(ca) > 0, axis=0)
        ja = ia[0:ninmax] - 1  # ia is 1-indexed in fortran
        oja = numpy.argsort(ja)
        ja1 = ja[oja]
        beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
        beta[ja1, :] = ca[oja, :]
    else:
        beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
        df = numpy.zeros([1, lmu], dtype=numpy.float64)

    fit = dict()
    fit['a0'] = a0
    fit['beta'] = beta
    fit['dev'] = dev
    fit['nulldev'] = dev0_r.value
    fit['df'] = df
    fit['lambdau'] = alm
    fit['npasses'] = nlp_r.value
    fit['jerr'] = jerr_r.value
    fit['dim'] = dd
    fit['offset'] = is_offset
    fit['class'] = 'fishnet'

    #  ###################################
    #   return to caller
    #  ###################################

    return fit
Пример #3
0
def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd,
           vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit, kopt,
           family):

    # load shared fortran library
    glmlib = loadGlmLib()

    #
    noo = y.shape[0]
    if len(y.shape) > 1:
        nc = y.shape[1]
    else:
        nc = 1

    if (noo != nobs):
        raise ValueError(
            'x and y have different number of rows in call to glmnet')

    if nc == 1:
        classes, sy = numpy.unique(y, return_inverse=True)
        nc = len(classes)
        indexes = numpy.eye(nc, nc)
        y = indexes[sy, :]
    else:
        classes = numpy.arange(nc) + 1  # 1:nc
    #
    if family == 'binomial':
        if nc > 2:
            raise ValueError(
                'More than two classes in y. use multinomial family instead')
        else:
            nc = 1
            y = y[:, [1, 0]]
    #
    if (len(weights) != 0):
        t = weights > 0
        if ~numpy.all(t):
            t = numpy.reshape(t, (len(y), ))
            y = y[t, :]
            x = x[t, :]
            weights = weights[t]
            nobs = numpy.sum(t)
        else:
            t = numpy.empty([0], dtype=numpy.integer)
        #
        if len(y.shape) == 1:
            mv = len(y)
            ny = 1
        else:
            mv, ny = y.shape

        y = y * numpy.tile(weights, (1, ny))

    #
    if len(offset) == 0:
        offset = y * 0
        is_offset = False
    else:
        if len(t) != 0:
            offset = offset[t, :]
        do = offset.shape
        if do[0] != nobs:
            raise ValueError(
                'offset should have the same number of values as observations in binominal/multinomial call to glmnet'
            )
        if nc == 1:
            if do[1] == 1:
                offset = numpy.column_stack((offset, -offset))
            if do[1] > 2:
                raise ValueError(
                    'offset should have 1 or 2 columns in binomial call to glmnet'
                )
        if (family == 'multinomial') and (do[1] != nc):
            raise ValueError(
                'offset should have same shape as y in multinomial call to glmnet'
            )
        is_offset = True

    # now convert types and allocate memory before calling
    # glmnet fortran library
    ######################################
    # --------- PROCESS INPUTS -----------
    ######################################
    # force inputs into fortran order and scipy float64
    copyFlag = False
    x = x.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    irs = irs.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    pcs = pcs.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    y = y.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    weights = weights.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    offset = offset.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    jd = jd.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    vp = vp.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    cl = cl.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    ulam = ulam.astype(dtype=numpy.float64, order='F', copy=copyFlag)

    ######################################
    # --------- ALLOCATE OUTPUTS ---------
    ######################################
    # lmu
    lmu = -1
    lmu_r = ctypes.c_int(lmu)
    # a0, ca
    if nc == 1:
        a0 = numpy.zeros([nlam], dtype=numpy.float64)
        ca = numpy.zeros([nx, nlam], dtype=numpy.float64)
    else:
        a0 = numpy.zeros([nc, nlam], dtype=numpy.float64)
        ca = numpy.zeros([nx, nc, nlam], dtype=numpy.float64)
    # a0
    a0 = a0.astype(dtype=numpy.float64, order='F', copy=False)
    a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ca
    ca = ca.astype(dtype=numpy.float64, order='F', copy=False)
    ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ia
    ia = -1 * numpy.ones([nx], dtype=numpy.int32)
    ia = ia.astype(dtype=numpy.int32, order='F', copy=False)
    ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # nin
    nin = -1 * numpy.ones([nlam], dtype=numpy.int32)
    nin = nin.astype(dtype=numpy.int32, order='F', copy=False)
    nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # dev
    dev = -1 * numpy.ones([nlam], dtype=numpy.float64)
    dev = dev.astype(dtype=numpy.float64, order='F', copy=False)
    dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # alm
    alm = -1 * numpy.ones([nlam], dtype=numpy.float64)
    alm = alm.astype(dtype=numpy.float64, order='F', copy=False)
    alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # nlp
    nlp = -1
    nlp_r = ctypes.c_int(nlp)
    # jerr
    jerr = -1
    jerr_r = ctypes.c_int(jerr)
    # dev0
    dev0 = -1
    dev0_r = ctypes.c_double(dev0)

    #  ###################################
    #   main glmnet fortran caller
    #  ###################################
    if is_sparse:
        # sparse lognet
        glmlib.splognet_(
            ctypes.byref(ctypes.c_double(parm)),
            ctypes.byref(ctypes.c_int(nobs)),
            ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nc)),
            x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            pcs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            irs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)),
            ctypes.byref(ctypes.c_int(nlam)),
            ctypes.byref(ctypes.c_double(flmin)),
            ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_double(thresh)),
            ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)),
            ctypes.byref(ctypes.c_int(maxit)),
            ctypes.byref(ctypes.c_int(kopt)), ctypes.byref(lmu_r), a0_r,
            ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r,
            ctypes.byref(nlp_r), ctypes.byref(jerr_r))
    else:
        # call fortran lognet routine
        glmlib.lognet_(ctypes.byref(ctypes.c_double(parm)),
                       ctypes.byref(ctypes.c_int(nobs)),
                       ctypes.byref(ctypes.c_int(nvars)),
                       ctypes.byref(ctypes.c_int(nc)),
                       x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                       y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                       offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                       jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
                       vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                       cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                       ctypes.byref(ctypes.c_int(ne)),
                       ctypes.byref(ctypes.c_int(nx)),
                       ctypes.byref(ctypes.c_int(nlam)),
                       ctypes.byref(ctypes.c_double(flmin)),
                       ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                       ctypes.byref(ctypes.c_double(thresh)),
                       ctypes.byref(ctypes.c_int(isd)),
                       ctypes.byref(ctypes.c_int(intr)),
                       ctypes.byref(ctypes.c_int(maxit)),
                       ctypes.byref(ctypes.c_int(kopt)),
                       ctypes.byref(lmu_r), a0_r, ca_r, ia_r, nin_r,
                       ctypes.byref(dev0_r), dev_r, alm_r, ctypes.byref(nlp_r),
                       ctypes.byref(jerr_r))

    #  ###################################
    #   post process results
    #  ###################################

    # check for error
    if (jerr_r.value > 0):
        raise ValueError("Fatal glmnet error in library call : error code = ",
                         jerr_r.value)
    elif (jerr_r.value < 0):
        print("Warning: Non-fatal error in glmnet library call: error code = ",
              jerr_r.value)
        print("Check results for accuracy. Partial or no results returned.")

    # clip output to correct sizes
    lmu = lmu_r.value
    if nc == 1:
        a0 = a0[0:lmu]
        ca = ca[0:nx, 0:lmu]
    else:
        a0 = a0[0:nc, 0:lmu]
        ca = ca[0:nx, 0:nc, 0:lmu]
    ia = ia[0:nx]
    nin = nin[0:lmu]
    dev = dev[0:lmu]
    alm = alm[0:lmu]

    # ninmax
    ninmax = max(nin)
    # fix first value of alm (from inf to correct value)
    if ulam[0] == 0.0:
        t1 = numpy.log(alm[1])
        t2 = numpy.log(alm[2])
        alm[0] = numpy.exp(2 * t1 - t2)
    # create return fit dictionary

    if family == 'multinomial':
        a0 = a0 - numpy.tile(numpy.mean(a0), (nc, 1))
        dfmat = a0.copy()
        dd = numpy.array([nvars, lmu], dtype=numpy.integer)
        beta_list = list()
        if ninmax > 0:
            # TODO: is the reshape here done right?
            ca = numpy.reshape(ca, (nx, nc, lmu))
            ca = ca[0:ninmax, :, :]
            ja = ia[0:ninmax] - 1  # ia is 1-indexed in fortran
            oja = numpy.argsort(ja)
            ja1 = ja[oja]
            df = numpy.any(numpy.absolute(ca) > 0, axis=1)
            df = numpy.sum(df)
            df = numpy.reshape(df, (1, df.size))
            for k in range(0, nc):
                ca1 = numpy.reshape(ca[:, k, :], (ninmax, lmu))
                cak = ca1[oja, :]
                dfmat[k, :] = numpy.sum(numpy.absolute(cak) > 0, axis=0)
                beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
                beta[ja1, :] = cak
                beta_list.append(beta)
        else:
            for k in range(0, nc):
                dfmat[k, :] = numpy.zeros([1, lmu], dtype=numpy.float64)
                beta_list.append(numpy.zeros([nvars, lmu],
                                             dtype=numpy.float64))
            #
            df = numpy.zeros([1, lmu], dtype=numpy.float64)
        #
        if kopt == 2:
            grouped = True
        else:
            grouped = False
        #
        fit = dict()
        fit['a0'] = a0
        fit['label'] = classes
        fit['beta'] = beta_list
        fit['dev'] = dev
        fit['nulldev'] = dev0_r.value
        fit['dfmat'] = dfmat
        fit['df'] = df
        fit['lambdau'] = alm
        fit['npasses'] = nlp_r.value
        fit['jerr'] = jerr_r.value
        fit['dim'] = dd
        fit['grouped'] = grouped
        fit['offset'] = is_offset
        fit['class'] = 'multnet'
    else:
        dd = numpy.array([nvars, lmu], dtype=numpy.integer)
        if ninmax > 0:
            ca = ca[0:ninmax, :]
            df = numpy.sum(numpy.absolute(ca) > 0, axis=0)
            ja = ia[0:ninmax] - 1  # ia is 1-indexes in fortran
            oja = numpy.argsort(ja)
            ja1 = ja[oja]
            beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
            beta[ja1, :] = ca[oja, :]
        else:
            beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
            df = numpy.zeros([1, lmu], dtype=numpy.float64)
        #
        fit = dict()
        fit['a0'] = a0
        fit['label'] = classes
        fit['beta'] = beta
        fit['dev'] = dev
        fit['nulldev'] = dev0_r.value
        fit['df'] = df
        fit['lambdau'] = alm
        fit['npasses'] = nlp_r.value
        fit['jerr'] = jerr_r.value
        fit['dim'] = dd
        fit['offset'] = is_offset
        fit['class'] = 'lognet'

    #  ###################################
    #   return to caller
    #  ###################################

    return fit
Пример #4
0
def mrelnet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd,
            vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, jsd, intr, maxit,
            family):

    # load shared fortran library
    glmlib = loadGlmLib()

    #
    nr = y.shape[1]
    wym = wtmean(y, weights)
    wym = numpy.reshape(wym, (1, wym.size))
    yt2 = (y - numpy.tile(wym, (y.shape[0], 1)))**2
    nulldev = numpy.sum(wtmean(yt2, weights) * numpy.sum(weights))

    if len(offset) == 0:
        offset = y * 0
        is_offset = False
    else:
        if offset.shape != y.shape:
            raise ValueError('Offset must match dimension of y')
        is_offset = True
    #
    y = y - offset
    # now convert types and allocate memory before calling
    # glmnet fortran library
    ######################################
    # --------- PROCESS INPUTS -----------
    ######################################
    # force inputs into fortran order and scipy float64
    copyFlag = False
    x = x.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    irs = irs.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    pcs = pcs.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    y = y.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    weights = weights.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    jd = jd.astype(dtype=numpy.int32, order='F', copy=copyFlag)
    vp = vp.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    cl = cl.astype(dtype=numpy.float64, order='F', copy=copyFlag)
    ulam = ulam.astype(dtype=numpy.float64, order='F', copy=copyFlag)

    ######################################
    # --------- ALLOCATE OUTPUTS ---------
    ######################################
    # lmu
    lmu = -1
    lmu_r = ctypes.c_int(lmu)
    # a0
    a0 = numpy.zeros([nr, nlam], dtype=numpy.float64)
    a0 = a0.astype(dtype=numpy.float64, order='F', copy=False)
    a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ca
    ca = numpy.zeros([nx, nr, nlam], dtype=numpy.float64)
    ca = ca.astype(dtype=numpy.float64, order='F', copy=False)
    ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ia
    ia = -1 * numpy.ones([nx], dtype=numpy.int32)
    ia = ia.astype(dtype=numpy.int32, order='F', copy=False)
    ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # nin
    nin = -1 * numpy.ones([nlam], dtype=numpy.int32)
    nin = nin.astype(dtype=numpy.int32, order='F', copy=False)
    nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # rsq
    rsq = -1 * numpy.ones([nlam], dtype=numpy.float64)
    rsq = rsq.astype(dtype=numpy.float64, order='F', copy=False)
    rsq_r = rsq.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # alm
    alm = -1 * numpy.ones([nlam], dtype=numpy.float64)
    alm = alm.astype(dtype=numpy.float64, order='F', copy=False)
    alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # nlp
    nlp = -1
    nlp_r = ctypes.c_int(nlp)
    # jerr
    jerr = -1
    jerr_r = ctypes.c_int(jerr)

    #  ###################################
    #   main glmnet fortran caller
    #  ###################################
    if is_sparse:
        # sparse multnet
        glmlib.multspelnet_(
            ctypes.byref(ctypes.c_double(parm)),
            ctypes.byref(ctypes.c_int(nobs)),
            ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nr)),
            x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            pcs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            irs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)),
            ctypes.byref(ctypes.c_int(nlam)),
            ctypes.byref(ctypes.c_double(flmin)),
            ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_double(thresh)),
            ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(jsd)),
            ctypes.byref(ctypes.c_int(intr)),
            ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(lmu_r), a0_r,
            ca_r, ia_r, nin_r, rsq_r, alm_r, ctypes.byref(nlp_r),
            ctypes.byref(jerr_r))
    else:
        # call fortran multnet routine
        glmlib.multelnet_(
            ctypes.byref(ctypes.c_double(parm)),
            ctypes.byref(ctypes.c_int(nobs)),
            ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nr)),
            x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
            vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)),
            ctypes.byref(ctypes.c_int(nlam)),
            ctypes.byref(ctypes.c_double(flmin)),
            ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
            ctypes.byref(ctypes.c_double(thresh)),
            ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(jsd)),
            ctypes.byref(ctypes.c_int(intr)),
            ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(lmu_r), a0_r,
            ca_r, ia_r, nin_r, rsq_r, alm_r, ctypes.byref(nlp_r),
            ctypes.byref(jerr_r))

    #  ###################################
    #   post process results
    #  ###################################

    # check for error
    if (jerr_r.value > 0):
        raise ValueError("Fatal glmnet error in library call : error code = ",
                         jerr_r.value)
    elif (jerr_r.value < 0):
        print("Warning: Non-fatal error in glmnet library call: error code = ",
              jerr_r.value)
        print("Check results for accuracy. Partial or no results returned.")

    # clip output to correct sizes
    lmu = lmu_r.value
    a0 = a0[0:nr, 0:lmu]
    ca = ca[0:nx, 0:nr, 0:lmu]
    ia = ia[0:nx]
    nin = nin[0:lmu]
    rsq = rsq[0:lmu]
    alm = alm[0:lmu]

    # ninmax
    ninmax = max(nin)
    # fix first value of alm (from inf to correct value)
    if ulam[0] == 0.0:
        t1 = numpy.log(alm[1])
        t2 = numpy.log(alm[2])
        alm[0] = numpy.exp(2 * t1 - t2)
    # create return fit dictionary
    if nr > 1:
        dfmat = a0.copy()
        dd = numpy.array([nvars, lmu], dtype=numpy.integer)
        beta_list = list()
        if ninmax > 0:
            # TODO: is the reshape here done right?
            ca = numpy.reshape(ca, (nx, nr, lmu))
            ca = ca[0:ninmax, :, :]
            ja = ia[0:ninmax] - 1  # ia is 1-indexed in fortran
            oja = numpy.argsort(ja)
            ja1 = ja[oja]
            df = numpy.any(numpy.absolute(ca) > 0, axis=1)
            df = numpy.sum(df, axis=0)
            df = numpy.reshape(df, (1, df.size))
            for k in range(0, nr):
                ca1 = numpy.reshape(ca[:, k, :], (ninmax, lmu))
                cak = ca1[oja, :]
                dfmat[k, :] = numpy.sum(numpy.absolute(cak) > 0, axis=0)
                beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
                beta[ja1, :] = cak
                beta_list.append(beta)
        else:
            for k in range(0, nr):
                dfmat[k, :] = numpy.zeros([1, lmu], dtype=numpy.float64)
                beta_list.append(numpy.zeros([nvars, lmu],
                                             dtype=numpy.float64))
            #
            df = numpy.zeros([1, lmu], dtype=numpy.float64)
        #
        fit = dict()
        fit['beta'] = beta_list
        fit['dfmat'] = dfmat
    else:
        dd = numpy.array([nvars, lmu], dtype=numpy.integer)
        if ninmax > 0:
            ca = ca[0:ninmax, :]
            df = numpy.sum(numpy.absolute(ca) > 0, axis=0)
            ja = ia[0:ninmax] - 1  # ia is 1-indexes in fortran
            oja = numpy.argsort(ja)
            ja1 = ja[oja]
            beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
            beta[ja1, :] = ca[oja, :]
        else:
            beta = numpy.zeros([nvars, lmu], dtype=numpy.float64)
            df = numpy.zeros([1, lmu], dtype=numpy.float64)
            fit['beta'] = beta

    fit['a0'] = a0
    fit['dev'] = rsq
    fit['nulldev'] = nulldev
    fit['df'] = df
    fit['lambdau'] = alm
    fit['npasses'] = nlp_r.value
    fit['jerr'] = jerr_r.value
    fit['dim'] = dd
    fit['offset'] = is_offset
    fit['class'] = 'mrelnet'

    #  ###################################
    #   return to caller
    #  ###################################

    return fit