Exemplo n.º 1
0
def getvalues(num, denom, id, dsname):
    """return vectors of num.  denom, and id values from constants in syntax or variable values"""

    if isname(num[0]) or isname(denom[0]) or isname(id):
        spss.StartDataStep()
        ds = spss.Dataset(dsname)
    else:
        ds = None
    id = [id]
    try:
        vallist = []
        if ds:
            vl = [v.name.lower()
                  for v in ds.varlist]  # variables in the dataset
        for v in num, denom, id:
            try:
                vallist.append([float(val) for val in v])
            except:  #variable name argument or None
                if v[0] is None:  # can only happen with id variable
                    vallist.append([None])  # null label in case no id variable
                else:
                    if len(v) > 1:
                        raise ValueError(
                            "Error: Only one variable may be named on each of NUM, DENOM, and ID, and a variable may not be combined with a value: "
                            + " ".join(v))
                    try:
                        vindex = vl.index(v[0].lower())
                        vallist.append([val[vindex] for val in ds.cases])
                    except:
                        raise ValueError(
                            "Error: An undefined variable name was specified in NUM, DENOM, or ID: "
                            + " ".join(v))
    finally:
        spss.EndDataStep()

    # check and fix value list lengths
    maxlen = max([len(vl) for vl in vallist])

    for i in range(len(vallist)):
        if len(vallist[i]) == 1:
            vallist[i] = maxlen * vallist[i]
        if len(vallist[i]) != maxlen:
            raise ValueError(
                "Error: NUM, DENOM and optional ID do not all have the same number of items"
            )
    return vallist
Exemplo n.º 2
0
def metadata(datain, path):
  f = open(path + ".met", "w")  # open the metadata file
  f.write("standard;\n")
  f.write("variables\n")  # write the variable metadata
  if not re.match(r"\.sav$", datain, flags=re.IGNORECASE):  # get datain
    datain += ".sav"  # add .sav
  spss.Submit("get file='{0}'.".format(datain))
  spss.StartDataStep()
  ds = spss.Dataset()
  type = -1
  frames = 0
  for var in ds.varlist:
    line = "  name={0}".format(var.name)  # name
    if var.label:
      line += ' label="{0}"'.format(var.label.replace('"', '"'))  # label
    if var.type != type:
      if var.type == 0:  # type and width
        line += " type=float width=8"
      else:
        line += " type=char width={0}".format(var.type)
      type = var.type
    if var.valueLabels:  # codeframe
      line += ' codeframe="{0}"'.format(var.name)
      frames = 1
    line += ";\n"
    f.write(line)
  if frames:  # write the codeframe metadata
    f.write("codeframes\n")
    for var in ds.varlist:
      if var.valueLabels:
        f.write("  name={0}\n".format(var.name))
        for val, lab in var.valueLabels.data.iteritems():
          f.write('    {0} = "{1}"\n'.format(val, lab))
        f.write("  ;\n")
  ds.close()
  spss.EndDataStep()
  f.close()
  return 0
 def __exit__(self, type, value, tb):
     spss.EndDataStep()
     return False
Exemplo n.º 4
0
 def close(self):
     try:
         spss.EndDataStep()
     except:
         pass
Exemplo n.º 5
0
def dopropor(num=None,
             denom=None,
             id=None,
             dsname="*",
             alpha=.05,
             adjust='bonferroni'):

    if num is None or denom is None:
        raise ValueError("Error: NUM and DENOM keywords are required")
    if spss.PyInvokeSpss.IsUTF8mode():
        unistr = str
    else:
        unistr = str

    currentds = spss.ActiveDataset()
    if currentds == "*":
        currentds = "S" + str(random.uniform(0, 1))
        spss.Submit("DATASET NAME %s" % currentds)
        dsnamed = True
    else:
        dsnamed = False

    numvec, denomvec, idvec = getvalues(num, denom, id, dsname)
    # clean data, discard missing
    droplist = []
    for i in range(len(numvec)):
        droplist.append(numvec[i] is not None
                        and denomvec[i] is not None)  #missing data
        if (droplist[i] and (numvec[i] > denomvec[i] or denomvec[i] <= 0)):
            raise ValueError(
                "Error: NUM value greater than DENOM value or zero denominator: %s, %s"
                % (numvec[i], denomvec[i]))
    for lis in numvec, denomvec, idvec:
        lis = [x for f, x in zip(droplist, lis) if f]  #prune missing values
    if len(numvec) == 0:
        raise ValueError("Error: No valid proportions were found to analyze")

    alphalow = alpha / 2
    alphahigh = 1 - alphalow
    dotest = len(numvec) > 1
    try:
        spss.StartDataStep()  #TODO: pending transformations
    except:
        spss.Submit("EXECUTE")
        spss.StartDataStep()

    # calculate ci's via SPSS IDFs

    ds = spss.Dataset(name=None)
    spss.SetActive(ds)
    ds.varlist.append("p", 0)
    ds.varlist.append("num", 0)
    ds.varlist.append("denom", 0)

    p0 = numvec[0] / denomvec[0]
    sdvec = []
    for i in range(len(numvec)):
        p1 = numvec[i] / denomvec[i]
        sdvec.append(
            sqrt(p0 * (1 - p0) / denomvec[0] + p1 * (1 - p1) / denomvec[i]))
        #p = (numvec[i] + numvec[0]) / (denomvec[i] + denomvec[0])
        #z = (p1 - p0)/sqrt(p * (1 - p)*(1/denomvec[0] + 1/denomvec[i]))

        ds.cases.append([p1, numvec[i], denomvec[i]])
    spss.EndDataStep()

    cmd =r"""COMPUTE PLOWBI = IDF.BETA(%(alphalow)s, num + .5, denom-num + .5).
    COMPUTE PHIGHBI = IDF.BETA(%(alphahigh)s, num + .5,  denom - num + .5).
    DO IF num > 0.
    COMPUTE PLOWPOIS = (IDF.CHISQ(%(alphalow)s, 2*num)/2)/denom.
    ELSE.
    COMPUTE PLOWPOIS = 0.
    END IF.
    COMPUTE PHIGHPOIS = (IDF.CHISQ(%(alphahigh)s, 2*(num+1))/2) / denom.
    COMPUTE ZTAIL = IDF.NORMAL(%(alphahigh)s, 0,1).
    EXECUTE."""\
    % {"alphalow": alphalow, "alphahigh": alphahigh}

    spss.Submit(cmd)
    plowbi = []
    phighbi = []
    plowpois = []
    phighpois = []
    spss.StartDataStep()
    ds = spss.Dataset(name="*")
    for case in ds.cases:
        i = 3
        for v in plowbi, phighbi, plowpois, phighpois:
            v.append(case[i])
            i += 1
    zalpha2 = case[-1]
    try:
        closeafter = False
        spss.SetActive(spss.Dataset(name=currentds))
    except:
        closeafter = True
    ds.close()
    spss.EndDataStep()

    from spss import CellText
    spss.StartProcedure("Proportions")
    table = spss.BasePivotTable("Proportion Confidence Intervals",
                                "Proportions")
    titlefootnote = "Alpha = %.3f" % alpha
    if 0. in numvec:
        titlefootnote += " (One-sided %.3f when p = 0)" % (alpha / 2.)
    table.TitleFootnotes(titlefootnote)
    rowdim = table.Append(spss.Dimension.Place.row, "Proportions")
    coldim = table.Append(spss.Dimension.Place.column, "Statistics")
    cols = [
        "p", "Binomial\nLower CI", "Binomial\nUpper CI", "Poisson\nLower CI",
        "Poisson\nUpper CI", "Difference\nfrom p0",
        "Difference from p0\nLower CI", "Difference from p0\nUpper CI"
    ]
    table.SetCategories(coldim, [CellText.String(v) for v in cols])
    idvec = [
        not v is None and unistr(v) or unistr(i + 1)
        for i, v in enumerate(idvec)
    ]
    table.SetCategories(rowdim, [CellText.String(v) for v in idvec])
    for i in range(len(numvec)):
        p1 = numvec[i] / denomvec[i]
        if i > 0:
            zdifflow = p1 - p0 - sdvec[i] * zalpha2
            zdiffhigh = p1 - p0 + sdvec[i] * zalpha2
        else:
            zdifflow = zdiffhigh = 0.
        table.SetCellsByRow(CellText.String(idvec[i]), [
            CellText.Number(v)
            for v in (numvec[i] / denomvec[i], plowbi[i], phighbi[i],
                      plowpois[i], phighpois[i], p1 - p0, zdifflow, zdiffhigh)
        ])
        if i == 0:
            table[(CellText.String(idvec[0]),
                   CellText.String(cols[-3]))] = CellText.String("-")
            table[(CellText.String(idvec[0]),
                   CellText.String(cols[-2]))] = CellText.String("-")
            table[(CellText.String(idvec[0]),
                   CellText.String(cols[-1]))] = CellText.String("-")
    spss.EndProcedure()
    if closeafter:
        spss.Submit(r"""NEW FILE.
        DATASET NAME %s.""" % "S" + str(random.uniform(0, 1)))
def genVarsCategoryList(varnames, specialvalues, macroname, missing, order, 
        weightvar, specialsorder, valuelabelsdict, missingvaluesdict,
        customattr, attrname):
    """Generate sorted list(s) of values with possible insertion of extra values
    and return list of SPSS macros to be created.
    
    varnames is a sequence of variable names to process.
    specialvalues is a sequence of values that should be inserted before the first zero count or at the end if no zeros or None.
    If a special value already occurs in a varname, it will be moved.
    macroname is a list of macronames of the same length as varnames to generate or None.
    missing is 'include' or 'exclude' to determine whether user missing values are included or excluded.
    order is 'a' or 'd' to specify the sort direction.
    weightvar can be specified as a variable name to be used as a weight in determing the counts to sort by.
    It must not occur in varnames.
    specialsorder is 'before' or 'after' and indicates the location of the specials section
    If other, values that have value labels are appended to the list of values found
    in the data.
    customattr indicates whether a custom attribute with the order should be generated
    attrname is the name of the custom attribute
    

    This function is mainly useful as a helper function for Ctables in building CATEGORIES subcommands.
    It may be useful to combine it with OTHERNM and/or MISSING in the category list.
    """

    if weightvar:
        if weightvar in varnames:
            raise ValueError(_("""The weight variable cannot be included as a variable."""))
        varnamesAndWeight = varnames + [weightvar]
    else:
        varnamesAndWeight = varnames
    curs = spssdata.Spssdata(indexes=varnamesAndWeight, names=False, omitmissing=missing =='exclude')
    nvar = len(varnames)
    
    vvalues=[{} for i in range(nvar)]  # for accumulating counts for all variable values
    for cn, case in enumerate(curs):
        casecpy = copy.copy(case)
        if weightvar:
            w = casecpy[nvar]
            if w is None:
                w = 0.0
        else:
            w = 1.0
        for i in range(nvar):
            if not casecpy[i] is None:   # omit sysmis values and optionally user missing values
                curval = casecpy[i]
                vvalues[i][curval] = vvalues[i].get(curval,0.) + w   # count occurrences, possibly weighted
    curs.CClose()
    
    valuelist = []
    macrosgenerated = []
    customattrlist = []
    for i, vname in enumerate(varnames):
        # if labeled values were supplied but did not occur in the data,
        # add them with a count of zero
        if not valuelabelsdict is None:
            labeledbutnotfound = valuelabelsdict[vname] - set(vvalues[i].keys())
            for val in labeledbutnotfound:
                vvalues[i][val] = 0.
        if not specialvalues is None:  # remove special values from count list
            for v in specialvalues:
                if v in vvalues[i]:
                    del(vvalues[i][v])
        valuelist.append(sorted([(value, key) for (key, value) in vvalues[i].iteritems()], reverse = order == 'd'))
        if not specialvalues is None:
            if specialsorder == "after":
                valuelist[i].extend([(None, v) for v in specialvalues])
            else:
                valuelist[i] = [(None, v) for v in specialvalues] + valuelist[i]

        if isinstance(valuelist[i][0][1], basestring):
            qchar = '"'
        else:
            qchar = ''
        if macroname is not None:
            if not macroname[i].startswith("!"):
                macroname[i] = "!" + macroname[i]
            macrosgenerated.append([macroname[i],
                " ".join([qchar + strconv(k).rstrip() + qchar  for (value, k) in valuelist[i]])])
        if customattr:
            customattrlist.append([vname, " ".join([qchar + strconv(k).rstrip() + qchar  for (value, k) in valuelist[i]])])
    
    if customattr:
        try:   # cannot start datastep if there are pending transformations
            spss.StartDataStep()
        except:
            spss.Submit("EXECUTE.")
            spss.StartDataStep()
        ds = spss.Dataset()
        
        for spec in customattrlist:
            ds.varlist[spec[0]].attributes[attrname] = spec[1]
        spss.EndDataStep()
            
        
    return macrosgenerated, customattrlist