def dolabels(variables=None, varpattern=None, lblvars=None, lblpattern=None, execute=True, varsperpass=20, syntax=None): """Execute STATS VALLBLS FROMDATA""" # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(1) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use ### ###SpssClient._heartBeat(False) #except: #pass try: vardict = spssaux.VariableDict(caseless=True) except: raise ValueError(_("""This command requires a newer version the spssaux module. \n It can be obtained from the SPSS Community website (www.ibm.com/developerworks/spssdevcentral)""")) varstolabel = resolve(vardict, _("variables to label"), variables, varpattern, stringonly=False) labelvars = resolve(vardict, _("label variables"), lblvars, lblpattern, stringonly=True) if len(varstolabel) == 0 or len(labelvars) == 0: raise ValueError(_("""No variables to label or no labelling variables were specified. If a pattern was used, it may not have matched any variables.""")) if len(labelvars) > 1 and len(labelvars) != len(varstolabel): raise ValueError(_("The number of label variables is different from the number of variables to label")) if min([vardict[item].VariableType for item in labelvars]) == 0: raise ValueError(_("""The label variables must all have type string""")) dsname = spss.ActiveDataset() if dsname == "*": raise ValueError(_("""The active dataset must have a dataset name in order to use this procedure""")) if syntax: syntax = syntax.replace("\\", "/") syntax = FileHandles().resolve(syntax) mkvl = Mkvls(varstolabel, labelvars, varsperpass, execute, syntax, vardict) for i in range(0, len(varstolabel), varsperpass): spss.Submit("""DATASET ACTIVATE %s""" % dsname) mkvl.doaggr(i) spss.Submit("""DATASET ACTIVATE %s""" % dsname) labelsyntax = mkvl.dolabels() if labelsyntax and execute: spss.Submit(labelsyntax) mkvl.report(labelsyntax) if labelsyntax and syntax: writesyntax(labelsyntax, syntax, mkvl)
def doactions(filespec=None, conflict="noname", currentactivedsn=None): """Execute command""" # debugging # makes debug apply only to the current thread try: import wingdbstub if wingdbstub.debugger != None: import time wingdbstub.debugger.StopDebug() time.sleep(1) wingdbstub.debugger.StartDebug() import _thread wingdbstub.debugger.SetDebugThreads({_thread.get_ident(): 1}, default_policy=0) # for V19 use ## ###SpssClient._heartBeat(False) except: pass if filespec is None and currentactivedsn is None: raise ValueError(_("No actions were specified for this command")) activeds = spss.ActiveDataset().lower() alldatasets = getAllDatasetNames() if currentactivedsn is not None: if currentactivedsn.lower() != activeds and \ currentactivedsn.lower() in alldatasets: raise ValueError(_("""The dataset name to be assigned is already in use for another dataset: %s""")\ % currentactivedsn) spss.Submit("""DATASET NAME %(currentactivedsn)s. DATAFILE ATTRIBUTE ATTRIBUTE=%(customdsattr)s(%(currentactivedsn)s).""") if filespec is not None: # The unnamed active dataset might be empty, but we preserve it in case it isn't if activeds == "*": spss.Submit("""DATASET NAME %s.""" % ("D" + random.ranunif(.1, 1.))) spss.Submit("""GET FILE="%s". """ % filespec) thedsn = spss.GetDataFileAttributes(customdsattr) if len(thedsn) == 0: print(_("The data file does not contain a permanent dataset name. No session dataset name has been assigned.")) else: if thedsn[0].lower() in alldatasets: if conflict != "override": print(_("The permanent dataset name is already in use in this session. No session dataset name has been assigned.")) else: print(_("The dataset name has been removed from an already open dataset: %s") % thedsn[0]) spss.Submit("""DATASET NAME %s.""" % thedsn[0])
def weightedkappaextension(variables, wttype=1, cilevel=95): varnames = expandvarnames(variables) caption = varnames[0] + _(" vs. ") + varnames[1] vardict = spssaux.VariableDict(varnames) if len(vardict) != len(varnames): spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""An invalid variable has been specified. This command is not executed.""" )) spss.EndProcedure() elif len(varnames) != 2: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""Exactly two variables must be specified. This command is not executed.""" )) spss.EndProcedure() else: try: warntext = [] if cilevel < 50: warntext.append( _("CILEVEL cannot be less than 50%. It has been set to 50%." )) cilevel = 50 if cilevel > 99.999: warntext.append( _("CILEVEL cannot be greater than 99.999%. It has been set to 99.999%." )) cilevel = 99.999 if cilevel == int(cilevel): cilevel = int(cilevel) if wttype != 1: if wttype != 2: warntext.append( _("WTTYPE must be 1 or 2. It has been set to 1.")) wttype = 1 varlist = varnames[0] + ' ' + varnames[1] spss.Submit("PRESERVE.") tempdir = tempfile.gettempdir() spss.Submit("""CD "%s".""" % tempdir) wtvar = spss.GetWeightVar() if wtvar != None: spss.Submit(r""" COMPUTE %s=RND(%s).""" % (wtvar, wtvar)) spss.Submit(r""" EXECUTE.""") maxloops = 2 * spss.GetCaseCount() spss.Submit("""SET PRINTBACK=OFF MPRINT=OFF MXLOOPS=%s.""" % maxloops) activeds = spss.ActiveDataset() if activeds == "*": activeds = "D" + str(random.uniform(.1, 1)) spss.Submit("DATASET NAME %s" % activeds) tmpvar1 = "V" + str(random.uniform(.1, 1)) tmpvar2 = "V" + str(random.uniform(.1, 1)) tmpvar3 = "V" + str(random.uniform(.1, 1)) tmpvar4 = "V" + str(random.uniform(.1, 1)) tmpvar5 = "V" + str(random.uniform(.1, 1)) tmpvar6 = "V" + str(random.uniform(.1, 1)) tmpdata1 = "D" + str(random.uniform(.1, 1)) tmpdata2 = "D" + str(random.uniform(.1, 1)) omstag1 = "T" + str(random.uniform(.1, 1)) omstag2 = "T" + str(random.uniform(.1, 1)) omstag3 = "T" + str(random.uniform(.1, 1)) omstag4 = "T" + str(random.uniform(.1, 1)) omstag5 = "T" + str(random.uniform(.1, 1)) omstag6 = "T" + str(random.uniform(.1, 1)) tmpfile1 = "F" + str(random.uniform(.1, 1)) tmpfile2 = "F" + str(random.uniform(.1, 1)) lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel spss.Submit(r""" DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1) filt = spssaux.GetSHOW("FILTER", olang="english") if filt != "No case filter is in effect": filtcond = filt.strip("(FILTER)") select = "SELECT IF " + str(filtcond) + "." spss.Submit("""%s""" % select) spss.Submit("""EXECUTE.""") spss.Submit("""USE ALL.""") banana = spssaux.getDatasetInfo(Info="SplitFile") if banana != "": warntext.append(_("This procedure ignores split file status.")) spss.Submit(r"""SPLIT FILE OFF.""") spss.Submit(r""" COUNT %s=%s (MISSING).""" % (tmpvar1, varlist)) spss.Submit(r""" SELECT IF %s=0.""" % tmpvar1) spss.Submit(r""" EXECUTE.""") validn = spss.GetCaseCount() if validn < 2: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""There are too few complete cases. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) else: spss.Submit(r""" AGGREGATE /OUTFILE=* MODE=ADDVARIABLES /%s=SD(%s) /%s=SD(%s).""" % (tmpvar2, varnames[0], tmpvar3, varnames[1])) try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() datarow = cur.fetchone() cur.close() sd1 = datarow[-2] sd2 = datarow[-1] if min(sd1, sd2) == 0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""All ratings are the same for at least one rater. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) else: if len(warntext) > 0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) if len(warntext) == 1: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable( "Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String("%s" % warntext[0]) spss.EndProcedure() if len(warntext) == 2: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable( "Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( "%s \n" "%s" % (warntext[0], warntext[1])) spss.EndProcedure() if len(warntext) == 3: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable( "Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( "%s \n" "%s \n" "%s" % (warntext[0], warntext[1], warntext[2])) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) spss.Submit(r""" DELETE VARIABLES %s %s.""" % (tmpvar2, tmpvar3)) spss.Submit(r""" AGGREGATE /OUTFILE=%s /BREAK=%s /%s=N.""" % (tmpfile1, varlist, tmpvar4)) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Variables to Cases'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) spss.Submit(r""" VARSTOCASES /MAKE %s FROM %s.""" % (tmpvar5, varlist)) spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) catdata = [] try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() while True: datarow = cur.fetchone() if datarow is None: break catdata.append(datarow[-1]) cur.close() cats = list(set(catdata)) cattest = 0 if any(item != round(item) for item in cats): cattest = 1 spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""Some ratings are not integers. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) elif min(cats) < 1.0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""Some ratings are less than 1. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) else: spss.Submit(r""" AGGREGATE /OUTFILE=%s /BREAK=%s /%s=N.""" % (tmpfile2, tmpvar5, tmpvar6)) spss.Submit(r""" DATASET DECLARE %s WINDOW=HIDDEN""" % tmpdata2) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Matrix'] /DESTINATION VIEWER=NO /TAG='"%s"'.""" % omstag3) spss.Submit(r""" MATRIX. GET x /FILE=%s /VARIABLES=%s %s. GET ratecats /FILE=%s /VARIABLES=%s. COMPUTE size=MMAX(ratecats). COMPUTE y=MAKE(size,size,0). LOOP i=1 to NROW(y). + LOOP j=1 to NCOL(y). + LOOP k=1 to NROW(x). + DO IF (x(k,1)=i and x(k,2)=j). + COMPUTE y(i,j)=x(k,3). + END IF. + END LOOP. + END LOOP. END LOOP. COMPUTE wttype=%s. COMPUTE wt=MAKE(NROW(y),NCOL(y),0). LOOP i=1 to NROW(y). + LOOP j=1 to NCOL(y). + DO IF wttype=1. + COMPUTE wt(i,j)=1-(ABS(i-j)/(size-1)). + ELSE IF wttype=2. + COMPUTE wt(i,j)=1-((i-j)/(NROW(y)-1))**2. + END IF. + END LOOP. END LOOP. COMPUTE n=MSUM(y). COMPUTE prop=y/n. COMPUTE p_i=RSUM(prop). COMPUTE p_j=CSUM(prop). COMPUTE w_i=(wt*T(p_j))*MAKE(1,size,1). COMPUTE w_j=MAKE(size,1,1)*(T(p_i)*wt). COMPUTE po=MSUM(wt&*prop). COMPUTE pe=MSUM(MDIAG(p_i)*wt*MDIAG(p_j)). COMPUTE kstat=(po-pe)/(1-pe). COMPUTE var0=(T(p_i)*((wt-(w_i+w_j))&**2)*T(p_j)-pe**2)/(n*(1-pe)**2). DO IF var0>=0. + COMPUTE ase0=SQRT(var0). ELSE. + COMPUTE ase0=-1. END IF. DO IF ase0>0. + COMPUTE z=kstat/ase0. + COMPUTE sig=1-CHICDF(z**2,1). ELSE. + COMPUTE z=-1. + COMPUTE sig=-1. END IF. COMPUTE var1=(MSUM((prop&*((wt-(w_i+w_j)&*(1-kstat))&**2)))-(kstat-pe*(1-kstat))**2)/(n*(1-pe)**2). DO IF var1>=0. + COMPUTE ase1=SQRT(var1). ELSE. + COMPUTE ase1=-1. END IF. SAVE {wttype,kstat,ase1,z,sig,ase0} /OUTFILE=%s /VARIABLES=wttype,kstat,ase1,z,sig,ase0. END MATRIX.""" % (tmpfile1, varlist, tmpvar4, tmpfile2, tmpvar5, wttype, tmpdata2)) spss.Submit(r""" OMSEND TAG=['"%s"'].""" % omstag3) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2) spss.Submit(r""" DO IF ase0=-1. + RECODE z sig (-1=SYSMIS). END IF. EXECUTE. DELETE VARIABLES ase0. RECODE ase1 (-1=SYSMIS). COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel) spss.Submit(r""" COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel) spss.Submit(r""" FORMATS kstat ase1 z sig lower upper (F11.3). VARIABLE LABELS kstat %s.""" % _smartquote(_("""Kappa"""))) spss.Submit(r""" VARIABLE LABELS ase1 %s.""" % _smartquote(_("""Asymptotic Standard Error"""))) spss.Submit(r""" VARIABLE LABELS z %s.""" % _smartquote(_("""Z"""))) spss.Submit(r""" VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value"""))) spss.Submit(r""" VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel))) spss.Submit(r""" VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel))) if wttype == 1: spss.Submit(r""" VARIABLE LABELS wttype %s.""" % _smartquote(_("""Linear"""))) if wttype == 2: spss.Submit(r""" VARIABLE LABELS wttype %s.""" % _smartquote(_("""Quadratic"""))) spss.Submit(r""" EXECUTE. """) spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag4) spss.Submit(r""" OMS /SELECT TEXTS /IF COMMANDS=['Weighted Kappa'] LABELS=['Active Dataset'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag5) if len(warntext) > 0: spss.Submit(r""" OMS /SELECT HEADINGS /IF COMMANDS=['Weighted Kappa'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag6) try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() data = cur.fetchone() cur.close() spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable(_("Weighted Kappa"), "Kappa", caption=caption) table.SimplePivotTable(rowdim = _("Weighting"), rowlabels = [CellText.String(spss.GetVariableLabel(0))], coldim = "", collabels = [spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3),spss.GetVariableLabel(4), \ spss.GetVariableLabel(5),spss.GetVariableLabel(6)], cells = [data[1],data[2],data[3],data[4],data[5],data[6]]) spss.EndProcedure() if len(warntext) > 0: spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag6) finally: try: spss.Submit(r""" DATASET CLOSE %s.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds) if validn >= 2: if min(sd1, sd2) > 0: if cattest == 0: if min(cats) >= 1: spss.Submit(r""" OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5)) spss.Submit(r""" DATASET CLOSE %s.""" % tmpdata2) spss.Submit(r""" ERASE FILE=%s.""" % tmpfile2) spss.Submit(r""" ERASE FILE=%s.""" % tmpfile1) except: pass spss.Submit(r""" RESTORE. """)
def fleisskappaextension(variables, cilevel=95): varnames = expandvarnames(variables) vardict = spssaux.VariableDict(varnames) if len(vardict) != len(varnames): spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""An invalid variable has been specified. This command is not executed.""" )) spss.EndProcedure() elif len(varnames) < 2: spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""At least two variables must be specified. This command is not executed.""" )) spss.EndProcedure() else: try: warntext = [] if cilevel < 50: warntext.append( _("CILEVEL cannot be less than 50%. It has been reset to 50%." )) cilevel = 50 if cilevel > 99.999: warntext.append( _("CILEVEL cannot be greater than 99.999%. It has been reset to 99.999%." )) cilevel = 99.999 if cilevel == int(cilevel): cilevel = int(cilevel) varlist = varnames[0] for i in range(1, len(varnames)): varlist = varlist + ' ' + varnames[i] spss.Submit("PRESERVE.") tempdir = tempfile.gettempdir() spss.Submit("""CD "%s".""" % tempdir) wtvar = spss.GetWeightVar() if wtvar != None: spss.Submit(r""" COMPUTE %s=RND(%s).""" % (wtvar, wtvar)) spss.Submit(r""" EXECUTE.""") wtdn = GetWeightSum(varnames) else: wtdn = spss.GetCaseCount() maxloops = wtdn + 1 spss.Submit( """SET PRINTBACK=OFF MPRINT=OFF OATTRS=ENG MXLOOPS=%s.""" % maxloops) activeds = spss.ActiveDataset() if activeds == "*": activeds = "D" + str(random.uniform(.1, 1)) spss.Submit("DATASET NAME %s" % activeds) tmpvar1 = "V" + str(random.uniform(.1, 1)) tmpvar2 = "V" + str(random.uniform(.1, 1)) tmpvar3 = "V" + str(random.uniform(.1, 1)) tmpfile1 = "F" + str(random.uniform(.1, 1)) tmpfile2 = "F" + str(random.uniform(.1, 1)) tmpdata1 = "D" + str(random.uniform(.1, 1)) tmpdata2 = "D" + str(random.uniform(.1, 1)) tmpdata3 = "D" + str(random.uniform(.1, 1)) omstag1 = "T" + str(random.uniform(.1, 1)) omstag2 = "T" + str(random.uniform(.1, 1)) omstag3 = "T" + str(random.uniform(.1, 1)) omstag4 = "T" + str(random.uniform(.1, 1)) omstag5 = "T" + str(random.uniform(.1, 1)) omstag6 = "T" + str(random.uniform(.1, 1)) lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel spss.Submit(r""" DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1) filt = spssaux.GetSHOW("FILTER", olang="english") if filt != "No case filter is in effect": filtcond = filt.strip("(FILTER)") select = "SELECT IF " + str(filtcond) + "." spss.Submit("""%s""" % select) spss.Submit("""EXECUTE.""") spss.Submit("""USE ALL.""") banana = spssaux.getDatasetInfo(Info="SplitFile") if banana != "": warntext.append(_("This command ignores split file status.")) spss.Submit(r"""SPLIT FILE OFF.""") spss.Submit(r""" COUNT %s=%s (MISSING).""" % (tmpvar1, varlist)) spss.Submit(r""" SELECT IF %s=0.""" % tmpvar1) spss.Submit(r""" EXECUTE. MISSING VALUES ALL ().""") validn = spss.GetCaseCount() if wtvar == None: spss.Submit(r""" SAVE OUTFILE=%s.""" % tmpfile1) else: spss.Submit(r""" DO IF %s >= 1.""" % wtvar) spss.Submit(r""" + LOOP #i=1 TO %s.""" % wtvar) spss.Submit(r""" XSAVE OUTFILE=%s /KEEP=%s /DROP=%s.""" % (tmpfile1, varlist, wtvar)) spss.Submit(r""" + END LOOP. END IF. EXECUTE. """) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Variables to Cases'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.Submit(r""" VARSTOCASES /MAKE %s FROM %s.""" % (tmpvar2, varlist)) spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) catdata = [] try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() while True: datarow = cur.fetchone() if datarow is None: break catdata.append(datarow[-1]) cur.close() cats = list(set(catdata)) ncats = len(cats) nraters = len(varnames) neededn = max(ncats, nraters) if validn < neededn: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""There are too few complete cases. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) elif ncats < 2: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""All ratings are the same. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) else: if len(warntext) > 0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) if len(warntext) == 1: spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String("%s" % warntext[0]) spss.EndProcedure() if len(warntext) == 2: spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( "%s \n" "%s" % (warntext[0], warntext[1])) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) spss.Submit(r""" AGGREGATE /OUTFILE=%s /BREAK=%s /%s=N.""" % (tmpfile2, tmpvar2, tmpvar3)) spss.Submit(r""" DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata2) spss.Submit(r""" DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata3) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Matrix'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag3) spss.Submit(r""" MATRIX. GET x /FILE=%s /VARIABLES=%s. GET ratecats /FILE=%s /VARIABLES=%s. COMPUTE n=NROW(x). COMPUTE c=NROW(ratecats). COMPUTE y=MAKE(n,c,0). LOOP i=1 to n. + LOOP j=1 to NCOL(x). + LOOP k=1 to c. + DO IF x(i,j)=ratecats(k). + COMPUTE y(i,k)=y(i,k)+1. + END IF. + END LOOP. + END LOOP. END LOOP. COMPUTE k=NCOL(x). COMPUTE pe=MSUM((CSUM(y)/MSUM(y))&**2). COMPUTE pa=MSSQ(y)/(NROW(y)*k*(k-1))-(1/(k-1)). COMPUTE kstat=(pa-pe)/(1-pe). COMPUTE cp=(CSSQ(y)-CSUM(y))&/((k-1)&*CSUM(y)). COMPUTE pj=CSUM(y)/MSUM(y). COMPUTE one=MAKE(1,NCOL(pj),1). COMPUTE qj=one-pj. COMPUTE kj=(cp-pj)&/qj. COMPUTE num=2*((pj*t(qj))**2-MSUM(pj&*qj&*(qj-pj))). COMPUTE den=n*k*(k-1)*((pj*t(qj))**2). COMPUTE ase=SQRT(num/den). COMPUTE z=kstat/ase. COMPUTE sig=1-CHICDF(z**2,1). SAVE {kstat,ase,z,sig} /OUTFILE=%s /VARIABLES=kstat,ase,z,sig. COMPUTE asej=MAKE(1,c,SQRT(2/(n*k*(k-1)))). COMPUTE zj=kj&/asej. COMPUTE sigj=one-CHICDF(zj&**2,1). SAVE {ratecats,t(cp),t(kj),t(asej),t(zj),t(sigj)} /OUTFILE=%s /VARIABLES=category,cp,kstat,ase,z,sig. END MATRIX.""" % (tmpfile1, varlist, tmpfile2, tmpvar2, tmpdata2, tmpdata3)) spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag3) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2) spss.Submit(r""" COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" FORMATS kstat ase z sig lower upper (F11.3). VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa"""))) spss.Submit(r""" VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error"""))) spss.Submit(r""" VARIABLE LABELS z %s. """ % _smartquote(_("""Z"""))) spss.Submit(r""" VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value"""))) spss.Submit(r""" VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel))) spss.Submit(r""" VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel))) spss.Submit(r""" EXECUTE. """) try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() data1 = cur.fetchone() cur.close() collabels1=[spss.GetVariableLabel(0),spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \ spss.GetVariableLabel(4),spss.GetVariableLabel(5)] celldata1 = [ data1[0], data1[1], data1[2], data1[3], data1[4], data1[5] ] spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata3) spss.Submit(r""" COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" FORMATS category (F10.0) cp kstat ase z sig lower upper (F11.3). VARIABLE LABELS category %s. """ % _smartquote(_("""Rating Category"""))) spss.Submit(r""" VARIABLE LABELS cp %s. """ % _smartquote(_("""Conditional Probability"""))) spss.Submit(r""" VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa"""))) spss.Submit(r""" VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error"""))) spss.Submit(r""" VARIABLE LABELS z %s. """ % _smartquote(_("""Z"""))) spss.Submit(r""" VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value"""))) spss.Submit(r""" VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel))) spss.Submit(r""" VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel))) spss.Submit(r""" EXECUTE.""") spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag4) spss.Submit(r""" OMS /SELECT TEXTS /IF COMMANDS=['Fleiss Kappa'] LABELS=['Active Dataset'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag5) if len(warntext) > 0: spss.Submit(r""" OMS /SELECT HEADINGS /IF COMMANDS=['Fleiss Kappa'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag6) n = spss.GetCaseCount rlabels = [] data2 = [] try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() for i in range(0, spss.GetCaseCount()): datarow = cur.fetchone() data2.append(datarow[1:]) rlabels.append(datarow[0]) cur.close() def _flatten(seq): for item in seq: if spssaux._isseq(item): for subitem in _flatten(item): yield subitem else: yield item data2 = [item for item in _flatten(data2)] spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table1 = spss.BasePivotTable(_("Overall Kappa"), "Overall Kappa") table1.SimplePivotTable(rowdim=_(""), rowlabels=[CellText.String("Overall")], coldim="", collabels=collabels1, cells=celldata1) if any(item != round(item) for item in rlabels): caption = (_( "Non-integer rating category values are truncated for presentation." )) else: caption = ("") table2 = spss.BasePivotTable( _("Kappas for Individual Categories"), _("Individual Category Kappa Statistics"), caption=caption) rowlabels = [(CellText.String("{:>9.0f}".format(rlabels[i]))) for i in range(len(rlabels))] collabels=[spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \ spss.GetVariableLabel(4),spss.GetVariableLabel(5),spss.GetVariableLabel(6), \ spss.GetVariableLabel(7)] table2.SimplePivotTable(rowdim=_(" Rating Category"), rowlabels=rowlabels, coldim="", collabels=collabels, cells=data2) spss.EndProcedure() if len(warntext) > 0: spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag6) finally: try: spss.Submit(""" DATASET CLOSE %s.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds) if validn >= neededn: if ncats >= 2: spss.Submit(""" OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5)) spss.Submit(""" DATASET CLOSE %s.""" % tmpdata2) spss.Submit(""" DATASET CLOSE %s.""" % tmpdata3) spss.Submit(""" ERASE FILE=%s.""" % tmpfile1) spss.Submit(r""" ERASE FILE=%s.""" % tmpfile2) except: pass spss.Submit(""" RESTORE. """)
def buildspec(dims, dss, catvars, totvars, encoding, finalweight): """create raking specification and return control variable list and totals list dims is a list of dimension variables, categories, and totals dss, catvars, and totvars are alternative ways of specifying the same information dss is a list of dataset names, catvars a list of category variable names, and totvars a list of the corresponding control totals""" vardict = spssaux.VariableDict() if finalweight in vardict: raise ValueError(_("FINALWEIGHT cannot specify an existing variable name")) ctlvars= [] ctltotals = [] activedsname = spss.ActiveDataset() if activedsname == "*": #unnamed activedsname = "D" + str(random.uniform(.1,1)) spss.Submit("DATASET NAME %s" % activedsname) for dim in dims: if dim: v = dim if not isinstance(v[0], str): vvname = str(v[0], encoding) else: vvname = v[0] if not v[0] in vardict: raise ValueError(_("A control total variable does not exist: %s") % vvname) if not vardict[v[0]].VariableType == 0: raise ValueError(_("A nonnumeric variable was specified for a control dimension: %s") % vvname) if len(v) == 1 or not len(v) % 2 == 1: raise ValueError(_("An invalid set of values and totals was found for a control dimension: %s") % " ".join(v)) ctlvars.append(v[0]) #ctltotals.append(dict([(float(k),float(v)) for k,v in zip(v[1::2], v[2::2])])) try: # category totals can be numerical expressions # convert to a value after insuring that all numbers are floats ctltotals.append(dict([(float(k), float(eval(decimalize(v)))) for k,v in zip(v[1::2], v[2::2])])) except: raise ValueError(_("""Invalid category or category total for variable: %s""") % vvname) for i, ds in enumerate(dss): catvar = catvars[i] totvar = totvars[i] if not any([ds, catvar, totvar]): continue if ds and (catvar is None or totvar is None): raise ValueError(_("""A dataset was specified without the category or totals variable names: %s""") % ds) try: spss.Submit("DATASET ACTIVATE %s" % ds) dta = spssdata.Spssdata([catvar, totvar], names=False).fetchall() ctlvars.append(catvar) # A dataset value might be simply numeric or a string expression ctltotals.append(dict([(float(k), float(eval(decimalize((v))))) for k,v in dta])) except: # error conditions include nonexistant dataset and variables and type problems spss.Submit("DATASET ACTIVATE %s" % activedsname) raise spss.Submit("DATASET ACTIVATE %s" % activedsname) if not ctlvars: raise ValueError(_("""No raking specifications were given""")) # check for duplicate control variables ctllc = [v.lower() for v in ctlvars] ctlset = set(ctllc) if len(ctllc) != len(ctlset): # any duplicates? for v in ctlset: ctllc.remove(v) raise ValueError(_("""Duplicate control variables were specified: %s""") % ", ".join(set(ctllc))) return ctlvars, ctltotals
def docorr(variables, withvars=None, clevel=95, method="fisher", include=False, exclude=False, listwise=False, pairwise=False): """Calculate confidence intervals for correlations based on CORRELATION output""" activeds = spss.ActiveDataset() if activeds == "*": raise ValueError( _("""The active dataset must have a dataset name to use this procedure""" )) if listwise and pairwise: raise ValueError( _("""Cannot specify both listwise and pairwise deletion""")) missing = listwise and "LISTWISE" or "PAIRWISE" if include and exclude: raise ValueError( _("""Cannot specify both include and exclude missing values""")) inclusion = include and "INCLUDE" or "EXCLUDE" allvars = " ".join(variables) if withvars: allvars2 = allvars + " " + " ".join(withvars) allvarswith = allvars + " WITH " + " ".join(withvars) else: allvarswith = allvars allvars2 = allvars if method == "bootstrap": spss.Submit(r"""PRESERVE. SET RNG=MT. BOOTSTRAP /VARIABLES INPUT = %(allvars2)s /CRITERIA CILEVEL=%(clevel)s CITYPE=PERCENTILE NSAMPLES=1000. CORRELATIONS /VARIABLES = %(allvarswith)s /PRINT=NOSIG /MISSING=%(missing)s %(inclusion)s. RESTORE.""" % locals()) return # regular CIs dsname = "D" + str(random.uniform(.05, 1.)) omstag = "O" + str(random.uniform(.05, 1.)) # run CORRELATIONS with MATRIX output. # Validation of variable list requirements is handled # by CORRELATIONS. try: failed = False spss.Submit(r"""oms /select all except = warnings/destination viewer=no /tag = "%(omstag)s". dataset declare %(dsname)s. correlations /variables = %(allvars2)s /missing=%(missing)s %(inclusion)s /matrix=out(%(dsname)s). """ % locals()) except spss.SpssError: failed = True finally: spss.Submit("""omsend tag=%(omstag)s""" % locals()) if failed: return spss.Submit("dataset activate %(dsname)s." % locals()) spss.Submit("""select if ROWTYPE_ eq "N" or ROWTYPE_ eq "CORR".""") spss.Submit("""sort cases by VARNAME_.""") #dictionary of variable names in matrix dataset matnames = dict([(spss.GetVariableName(i), i) for i in range(spss.GetVariableCount())]) rowtypeloc = matnames["ROWTYPE_"] curs = spssdata.Spssdata() stats = [] uppervariables = [v.upper() for v in variables] for i, case in enumerate(curs): if case.ROWTYPE_.rstrip() == "N": N = case[rowtypeloc + 2:] # screen out rows for any WITH variables if case[rowtypeloc + 1].upper().rstrip() not in uppervariables: continue if case.ROWTYPE_.rstrip() == "CORR": CORR = case[rowtypeloc + 2:] dta = cidata(splitvars=case[0:rowtypeloc], variable=case[rowtypeloc + 1], ns=N, corrs=CORR, cis=ci(N, CORR, clevel / 100.)) stats.append(dta)
def dopropor(num=None, denom=None, id=None, dsname="*", alpha=.05, adjust='bonferroni'): if num is None or denom is None: raise ValueError("Error: NUM and DENOM keywords are required") if spss.PyInvokeSpss.IsUTF8mode(): unistr = str else: unistr = str currentds = spss.ActiveDataset() if currentds == "*": currentds = "S" + str(random.uniform(0, 1)) spss.Submit("DATASET NAME %s" % currentds) dsnamed = True else: dsnamed = False numvec, denomvec, idvec = getvalues(num, denom, id, dsname) # clean data, discard missing droplist = [] for i in range(len(numvec)): droplist.append(numvec[i] is not None and denomvec[i] is not None) #missing data if (droplist[i] and (numvec[i] > denomvec[i] or denomvec[i] <= 0)): raise ValueError( "Error: NUM value greater than DENOM value or zero denominator: %s, %s" % (numvec[i], denomvec[i])) for lis in numvec, denomvec, idvec: lis = [x for f, x in zip(droplist, lis) if f] #prune missing values if len(numvec) == 0: raise ValueError("Error: No valid proportions were found to analyze") alphalow = alpha / 2 alphahigh = 1 - alphalow dotest = len(numvec) > 1 try: spss.StartDataStep() #TODO: pending transformations except: spss.Submit("EXECUTE") spss.StartDataStep() # calculate ci's via SPSS IDFs ds = spss.Dataset(name=None) spss.SetActive(ds) ds.varlist.append("p", 0) ds.varlist.append("num", 0) ds.varlist.append("denom", 0) p0 = numvec[0] / denomvec[0] sdvec = [] for i in range(len(numvec)): p1 = numvec[i] / denomvec[i] sdvec.append( sqrt(p0 * (1 - p0) / denomvec[0] + p1 * (1 - p1) / denomvec[i])) #p = (numvec[i] + numvec[0]) / (denomvec[i] + denomvec[0]) #z = (p1 - p0)/sqrt(p * (1 - p)*(1/denomvec[0] + 1/denomvec[i])) ds.cases.append([p1, numvec[i], denomvec[i]]) spss.EndDataStep() cmd =r"""COMPUTE PLOWBI = IDF.BETA(%(alphalow)s, num + .5, denom-num + .5). COMPUTE PHIGHBI = IDF.BETA(%(alphahigh)s, num + .5, denom - num + .5). DO IF num > 0. COMPUTE PLOWPOIS = (IDF.CHISQ(%(alphalow)s, 2*num)/2)/denom. ELSE. COMPUTE PLOWPOIS = 0. END IF. COMPUTE PHIGHPOIS = (IDF.CHISQ(%(alphahigh)s, 2*(num+1))/2) / denom. COMPUTE ZTAIL = IDF.NORMAL(%(alphahigh)s, 0,1). EXECUTE."""\ % {"alphalow": alphalow, "alphahigh": alphahigh} spss.Submit(cmd) plowbi = [] phighbi = [] plowpois = [] phighpois = [] spss.StartDataStep() ds = spss.Dataset(name="*") for case in ds.cases: i = 3 for v in plowbi, phighbi, plowpois, phighpois: v.append(case[i]) i += 1 zalpha2 = case[-1] try: closeafter = False spss.SetActive(spss.Dataset(name=currentds)) except: closeafter = True ds.close() spss.EndDataStep() from spss import CellText spss.StartProcedure("Proportions") table = spss.BasePivotTable("Proportion Confidence Intervals", "Proportions") titlefootnote = "Alpha = %.3f" % alpha if 0. in numvec: titlefootnote += " (One-sided %.3f when p = 0)" % (alpha / 2.) table.TitleFootnotes(titlefootnote) rowdim = table.Append(spss.Dimension.Place.row, "Proportions") coldim = table.Append(spss.Dimension.Place.column, "Statistics") cols = [ "p", "Binomial\nLower CI", "Binomial\nUpper CI", "Poisson\nLower CI", "Poisson\nUpper CI", "Difference\nfrom p0", "Difference from p0\nLower CI", "Difference from p0\nUpper CI" ] table.SetCategories(coldim, [CellText.String(v) for v in cols]) idvec = [ not v is None and unistr(v) or unistr(i + 1) for i, v in enumerate(idvec) ] table.SetCategories(rowdim, [CellText.String(v) for v in idvec]) for i in range(len(numvec)): p1 = numvec[i] / denomvec[i] if i > 0: zdifflow = p1 - p0 - sdvec[i] * zalpha2 zdiffhigh = p1 - p0 + sdvec[i] * zalpha2 else: zdifflow = zdiffhigh = 0. table.SetCellsByRow(CellText.String(idvec[i]), [ CellText.Number(v) for v in (numvec[i] / denomvec[i], plowbi[i], phighbi[i], plowpois[i], phighpois[i], p1 - p0, zdifflow, zdiffhigh) ]) if i == 0: table[(CellText.String(idvec[0]), CellText.String(cols[-3]))] = CellText.String("-") table[(CellText.String(idvec[0]), CellText.String(cols[-2]))] = CellText.String("-") table[(CellText.String(idvec[0]), CellText.String(cols[-1]))] = CellText.String("-") spss.EndProcedure() if closeafter: spss.Submit(r"""NEW FILE. DATASET NAME %s.""" % "S" + str(random.uniform(0, 1)))