def getvalues(num, denom, id, dsname): """return vectors of num. denom, and id values from constants in syntax or variable values""" if isname(num[0]) or isname(denom[0]) or isname(id): spss.StartDataStep() ds = spss.Dataset(dsname) else: ds = None id = [id] try: vallist = [] if ds: vl = [v.name.lower() for v in ds.varlist] # variables in the dataset for v in num, denom, id: try: vallist.append([float(val) for val in v]) except: #variable name argument or None if v[0] is None: # can only happen with id variable vallist.append([None]) # null label in case no id variable else: if len(v) > 1: raise ValueError( "Error: Only one variable may be named on each of NUM, DENOM, and ID, and a variable may not be combined with a value: " + " ".join(v)) try: vindex = vl.index(v[0].lower()) vallist.append([val[vindex] for val in ds.cases]) except: raise ValueError( "Error: An undefined variable name was specified in NUM, DENOM, or ID: " + " ".join(v)) finally: spss.EndDataStep() # check and fix value list lengths maxlen = max([len(vl) for vl in vallist]) for i in range(len(vallist)): if len(vallist[i]) == 1: vallist[i] = maxlen * vallist[i] if len(vallist[i]) != maxlen: raise ValueError( "Error: NUM, DENOM and optional ID do not all have the same number of items" ) return vallist
def metadata(datain, path): f = open(path + ".met", "w") # open the metadata file f.write("standard;\n") f.write("variables\n") # write the variable metadata if not re.match(r"\.sav$", datain, flags=re.IGNORECASE): # get datain datain += ".sav" # add .sav spss.Submit("get file='{0}'.".format(datain)) spss.StartDataStep() ds = spss.Dataset() type = -1 frames = 0 for var in ds.varlist: line = " name={0}".format(var.name) # name if var.label: line += ' label="{0}"'.format(var.label.replace('"', '"')) # label if var.type != type: if var.type == 0: # type and width line += " type=float width=8" else: line += " type=char width={0}".format(var.type) type = var.type if var.valueLabels: # codeframe line += ' codeframe="{0}"'.format(var.name) frames = 1 line += ";\n" f.write(line) if frames: # write the codeframe metadata f.write("codeframes\n") for var in ds.varlist: if var.valueLabels: f.write(" name={0}\n".format(var.name)) for val, lab in var.valueLabels.data.iteritems(): f.write(' {0} = "{1}"\n'.format(val, lab)) f.write(" ;\n") ds.close() spss.EndDataStep() f.close() return 0
def __exit__(self, type, value, tb): spss.EndDataStep() return False
def close(self): try: spss.EndDataStep() except: pass
def dopropor(num=None, denom=None, id=None, dsname="*", alpha=.05, adjust='bonferroni'): if num is None or denom is None: raise ValueError("Error: NUM and DENOM keywords are required") if spss.PyInvokeSpss.IsUTF8mode(): unistr = str else: unistr = str currentds = spss.ActiveDataset() if currentds == "*": currentds = "S" + str(random.uniform(0, 1)) spss.Submit("DATASET NAME %s" % currentds) dsnamed = True else: dsnamed = False numvec, denomvec, idvec = getvalues(num, denom, id, dsname) # clean data, discard missing droplist = [] for i in range(len(numvec)): droplist.append(numvec[i] is not None and denomvec[i] is not None) #missing data if (droplist[i] and (numvec[i] > denomvec[i] or denomvec[i] <= 0)): raise ValueError( "Error: NUM value greater than DENOM value or zero denominator: %s, %s" % (numvec[i], denomvec[i])) for lis in numvec, denomvec, idvec: lis = [x for f, x in zip(droplist, lis) if f] #prune missing values if len(numvec) == 0: raise ValueError("Error: No valid proportions were found to analyze") alphalow = alpha / 2 alphahigh = 1 - alphalow dotest = len(numvec) > 1 try: spss.StartDataStep() #TODO: pending transformations except: spss.Submit("EXECUTE") spss.StartDataStep() # calculate ci's via SPSS IDFs ds = spss.Dataset(name=None) spss.SetActive(ds) ds.varlist.append("p", 0) ds.varlist.append("num", 0) ds.varlist.append("denom", 0) p0 = numvec[0] / denomvec[0] sdvec = [] for i in range(len(numvec)): p1 = numvec[i] / denomvec[i] sdvec.append( sqrt(p0 * (1 - p0) / denomvec[0] + p1 * (1 - p1) / denomvec[i])) #p = (numvec[i] + numvec[0]) / (denomvec[i] + denomvec[0]) #z = (p1 - p0)/sqrt(p * (1 - p)*(1/denomvec[0] + 1/denomvec[i])) ds.cases.append([p1, numvec[i], denomvec[i]]) spss.EndDataStep() cmd =r"""COMPUTE PLOWBI = IDF.BETA(%(alphalow)s, num + .5, denom-num + .5). COMPUTE PHIGHBI = IDF.BETA(%(alphahigh)s, num + .5, denom - num + .5). DO IF num > 0. COMPUTE PLOWPOIS = (IDF.CHISQ(%(alphalow)s, 2*num)/2)/denom. ELSE. COMPUTE PLOWPOIS = 0. END IF. COMPUTE PHIGHPOIS = (IDF.CHISQ(%(alphahigh)s, 2*(num+1))/2) / denom. COMPUTE ZTAIL = IDF.NORMAL(%(alphahigh)s, 0,1). EXECUTE."""\ % {"alphalow": alphalow, "alphahigh": alphahigh} spss.Submit(cmd) plowbi = [] phighbi = [] plowpois = [] phighpois = [] spss.StartDataStep() ds = spss.Dataset(name="*") for case in ds.cases: i = 3 for v in plowbi, phighbi, plowpois, phighpois: v.append(case[i]) i += 1 zalpha2 = case[-1] try: closeafter = False spss.SetActive(spss.Dataset(name=currentds)) except: closeafter = True ds.close() spss.EndDataStep() from spss import CellText spss.StartProcedure("Proportions") table = spss.BasePivotTable("Proportion Confidence Intervals", "Proportions") titlefootnote = "Alpha = %.3f" % alpha if 0. in numvec: titlefootnote += " (One-sided %.3f when p = 0)" % (alpha / 2.) table.TitleFootnotes(titlefootnote) rowdim = table.Append(spss.Dimension.Place.row, "Proportions") coldim = table.Append(spss.Dimension.Place.column, "Statistics") cols = [ "p", "Binomial\nLower CI", "Binomial\nUpper CI", "Poisson\nLower CI", "Poisson\nUpper CI", "Difference\nfrom p0", "Difference from p0\nLower CI", "Difference from p0\nUpper CI" ] table.SetCategories(coldim, [CellText.String(v) for v in cols]) idvec = [ not v is None and unistr(v) or unistr(i + 1) for i, v in enumerate(idvec) ] table.SetCategories(rowdim, [CellText.String(v) for v in idvec]) for i in range(len(numvec)): p1 = numvec[i] / denomvec[i] if i > 0: zdifflow = p1 - p0 - sdvec[i] * zalpha2 zdiffhigh = p1 - p0 + sdvec[i] * zalpha2 else: zdifflow = zdiffhigh = 0. table.SetCellsByRow(CellText.String(idvec[i]), [ CellText.Number(v) for v in (numvec[i] / denomvec[i], plowbi[i], phighbi[i], plowpois[i], phighpois[i], p1 - p0, zdifflow, zdiffhigh) ]) if i == 0: table[(CellText.String(idvec[0]), CellText.String(cols[-3]))] = CellText.String("-") table[(CellText.String(idvec[0]), CellText.String(cols[-2]))] = CellText.String("-") table[(CellText.String(idvec[0]), CellText.String(cols[-1]))] = CellText.String("-") spss.EndProcedure() if closeafter: spss.Submit(r"""NEW FILE. DATASET NAME %s.""" % "S" + str(random.uniform(0, 1)))
def genVarsCategoryList(varnames, specialvalues, macroname, missing, order, weightvar, specialsorder, valuelabelsdict, missingvaluesdict, customattr, attrname): """Generate sorted list(s) of values with possible insertion of extra values and return list of SPSS macros to be created. varnames is a sequence of variable names to process. specialvalues is a sequence of values that should be inserted before the first zero count or at the end if no zeros or None. If a special value already occurs in a varname, it will be moved. macroname is a list of macronames of the same length as varnames to generate or None. missing is 'include' or 'exclude' to determine whether user missing values are included or excluded. order is 'a' or 'd' to specify the sort direction. weightvar can be specified as a variable name to be used as a weight in determing the counts to sort by. It must not occur in varnames. specialsorder is 'before' or 'after' and indicates the location of the specials section If other, values that have value labels are appended to the list of values found in the data. customattr indicates whether a custom attribute with the order should be generated attrname is the name of the custom attribute This function is mainly useful as a helper function for Ctables in building CATEGORIES subcommands. It may be useful to combine it with OTHERNM and/or MISSING in the category list. """ if weightvar: if weightvar in varnames: raise ValueError(_("""The weight variable cannot be included as a variable.""")) varnamesAndWeight = varnames + [weightvar] else: varnamesAndWeight = varnames curs = spssdata.Spssdata(indexes=varnamesAndWeight, names=False, omitmissing=missing =='exclude') nvar = len(varnames) vvalues=[{} for i in range(nvar)] # for accumulating counts for all variable values for cn, case in enumerate(curs): casecpy = copy.copy(case) if weightvar: w = casecpy[nvar] if w is None: w = 0.0 else: w = 1.0 for i in range(nvar): if not casecpy[i] is None: # omit sysmis values and optionally user missing values curval = casecpy[i] vvalues[i][curval] = vvalues[i].get(curval,0.) + w # count occurrences, possibly weighted curs.CClose() valuelist = [] macrosgenerated = [] customattrlist = [] for i, vname in enumerate(varnames): # if labeled values were supplied but did not occur in the data, # add them with a count of zero if not valuelabelsdict is None: labeledbutnotfound = valuelabelsdict[vname] - set(vvalues[i].keys()) for val in labeledbutnotfound: vvalues[i][val] = 0. if not specialvalues is None: # remove special values from count list for v in specialvalues: if v in vvalues[i]: del(vvalues[i][v]) valuelist.append(sorted([(value, key) for (key, value) in vvalues[i].iteritems()], reverse = order == 'd')) if not specialvalues is None: if specialsorder == "after": valuelist[i].extend([(None, v) for v in specialvalues]) else: valuelist[i] = [(None, v) for v in specialvalues] + valuelist[i] if isinstance(valuelist[i][0][1], basestring): qchar = '"' else: qchar = '' if macroname is not None: if not macroname[i].startswith("!"): macroname[i] = "!" + macroname[i] macrosgenerated.append([macroname[i], " ".join([qchar + strconv(k).rstrip() + qchar for (value, k) in valuelist[i]])]) if customattr: customattrlist.append([vname, " ".join([qchar + strconv(k).rstrip() + qchar for (value, k) in valuelist[i]])]) if customattr: try: # cannot start datastep if there are pending transformations spss.StartDataStep() except: spss.Submit("EXECUTE.") spss.StartDataStep() ds = spss.Dataset() for spec in customattrlist: ds.varlist[spec[0]].attributes[attrname] = spec[1] spss.EndDataStep() return macrosgenerated, customattrlist