def getsplitinfo():
    """Return list of current split variables and splitmode
    If no splits, return is [], None"""
    
    splitvarlist = spss.GetSplitVariableNames()
    if len(splitvarlist) == 0:
        return [], None
    else:
        splittype = spssaux.getShow("split", olang="english")
        if splittype.lower().startswith("layer"):
            splittype="layered"
        else:
            splittype="separate"
        return splitvarlist, splittype
def dooptbinex(target, binvars, suffix=["_bin"], minsize=10,
    alpha=.05, overwrite=False, syntaxoutfile=None, contintervals=10,
    treetable=True, recodetable=True, execute=True):
    """Execute STATS OPTBINEX command"""
    
        # debugging
    # makes debug apply only to the current thread
    #try:
        #import wingdbstub
        #if wingdbstub.debugger != None:
            #import time
            #wingdbstub.debugger.StopDebug()
            #time.sleep(2)
            #wingdbstub.debugger.StartDebug()
        #import thread
        #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
        ## for V19 use
        #SpssClient._heartBeat(False)
    #except:
        #pass
    
    # Check that TREES procedure is licensed if possible
    treesavailable = True
    try:    # no client in external mode
        SpssClient.StartClient()
        treesavailable = SpssClient.IsOptionAvailable(SpssClient.LicenseOption.TREEVIEW)
    except:
        pass
    finally:
        SpssClient.StopClient()
    if not treesavailable:
        raise ValueError(_("""Error: This command requires the Decision Trees option, which is not licensed."""))

    if syntaxoutfile is None and not execute:
        raise ValueError(_("""No syntax output was specified, and execution was not requested.
There is nothing to do."""))
    suffix = "".join(suffix)   #tokenlist may come through with mult elements
    outnames = [v + suffix for v in binvars]
    toolong = [v for v in outnames if len(v) > 64]  # wrong metric for Unicode
    if toolong:
        raise ValueError(_("""The following new names exceed 64 bytes.
Please choose a shorter suffix or rename the input variables:\n%s""") % " ".join(toolong))
    vardict = spssaux.VariableDict()
    if not overwrite:
        existingnames = set([v.lower() for v in vardict.variables])
        onames = existingnames.intersection(set([v.lower() for v in outnames]))
        if onames:
            raise ValueError(_("""Error: The following variables would be overwritten:\n%s""")\
                % " ".join(onames))
      
    xmlws = "x" + str(random.uniform(.1,1))
    visible = (treetable and "yes") or "no"
    tempdir = tempfile.gettempdir()
    rulesfilespec = os.path.join(tempdir, "F" + str(random.uniform(.1,1)))

    treetemplate = """TREE %(target)s BY %(indvar)s    
/TREE DISPLAY=NONE /PRINT TREETABLE /GAIN SUMMARYTABLE=NO
/RULES OUTFILE="%(rulesfilespec)s"
/GROWTHLIMIT MAXDEPTH=1 MINPARENTSIZE=10 MINCHILDSIZE=%(minsize)s
/CHAID ALPHASPLIT=%(alpha)s INTERVALS=%(contintervals)s.
"""
    if syntaxoutfile is None:
        insertfile = os.path.join(tempdir, "F" + str(random.uniform(.1,1)))
    else:
        insertfile = syntaxoutfile
    insertfilef = open(insertfile, "wb")
    
    # run TREES for each independent variable and accumulate resulting tranformations
    
    oattrs = spssaux.getShow("OATTRS")
    empty = True
    failedvars = []
    outvars = []
    outlabels = []
    try:
        for indvar in binvars:
            # set OMS to use language-invariant text
            spss.Submit("""set oattrs=eng.
    oms select tables /if subtypes='TreeTable'
    /destination format=oxml xmlworkspace="%(xmlws)s" viewer=%(visible)s
    /tag = "%(xmlws)s".
    """ % locals())
            spss.Submit(treetemplate % locals())
            spss.Submit("""omsend tag="%(xmlws)s".""" % locals())
            labels = spss.EvaluateXPath(xmlws, "/", 
                """//pivotTable//group[@text_eng="Primary Independent Variable"]/category[@text_eng="Split Values"]/cell/@text""")
            if not labels:
                failedvars.append(indvar)
                continue
            empty = False
            outputname = indvar + suffix
            if recodetable:
                outvars.append(outputname)
                outlabels.append(labels)
            definitions = getrules(rulesfilespec, outputname, labels, 
                vardict[indvar].VariableLabel)  # also removes temporary file
            insertfilef.writelines([line + "\n" for line in definitions])
    finally:    
        insertfilef.close()
        spss.Submit("SET OATTRS=%s" % oattrs)  # restore setting
        spss.DeleteXPathHandle(xmlws)            
    if execute and not empty:
        spss.Submit("""INSERT FILE="%s".""" % insertfile)
    if syntaxoutfile is None:
        os.remove(insertfile)
        
    if failedvars or recodetable:
        from spss import CellText
        StartProcedure(_("Extended Optimal Binning"), "STATSOPTBINEX")
        if failedvars:
            wtable = spss.BasePivotTable("Warnings ", "Warnings")
            wtable.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
            rowLabel = CellText.String("1")
            wtable[(rowLabel,)] = \
            spss.CellText.String(_("""These variables could not be binned.  New variables were not created for them.\n%s""") % " ".join(failedvars))

        if recodetable and not empty:
            table = spss.BasePivotTable(_("Variable Binning"), "OPTBIN")
            table.Append(spss.Dimension.Place.row, _("Variable"))
            table.Append(spss.Dimension.Place.row, _("Value"))
            table.Append(spss.Dimension.Place.column, _("Definition"), 
                hideName=True)
            reccoldef = CellText.String("Definition")
            for i in range(len(outvars)):
                var = CellText.String(outvars[i])
                for j, val in enumerate(outlabels[i]):
                    rec = CellText.String(j)
                    recval = CellText.String(val)
                    table[(var, rec, reccoldef)] = recval
        spss.EndProcedure()
def arguments(name, dir, mode, proc):
    msg = None
    if not name:  # name
        msg = "Missing run name."
    else:
        name = name.strip()
        m = re.match(r"^[\w -]+$", name)  # check name
        if not m:
            msg = "Invalid name."
    if not msg:
        cwd = spssaux.getShow("DIRECTORY")  # spss current working directory
        if not dir:
            path = os.path.join(cwd, name)  # spss current working directory
        else:  # specified directory
            dir = dir.strip()
            m = re.match(r'^"([^"]+)"$', dir)  # remove double quotes
            if m:
                dir = m.group(1).strip()
            else:
                m = re.match(r"^'([^']+)'$", dir)  # remove single quotes
                if m:
                    dir = m.group(1).strip()
            m = re.match(r"^[~\.\\/:\w -]+$", dir)  # check dir
            if not m:
                msg = "Invalid directory."
            if not msg:
                path = os.path.join(os.path.normpath(dir), name)
    if not msg:
        if mode:
            mode = mode.strip()
            if not re.match(r"^(debug|test)$", mode,
                            flags=re.IGNORECASE):  # mode
                msg = "Mode error."
    if not msg:
        setup = ""  # get the setup
        with open(path + ".set", "r") as f:
            for line in f:
                setup += line
        if not setup:
            msg = "Missing setup."
    if not msg:
        m = re.search(r"(^|;)\s*datain\s+([^;]+);", setup,
                      flags=re.IGNORECASE)  # datain
        if m:
            datain = m.group(2).strip()
            if proc != "combine":
                m = re.match(r'^"([^"]+)"', datain)  # remove double quotes
                if m:
                    datain = m.group(1).strip()
                else:
                    m = re.match(r"^'([^']+)'", datain)  # remove single quotes
                    if m:
                        datain = m.group(1).strip()
                    else:
                        m = re.match(r"^([^ ]+)",
                                     datain)  # remove subsequent file names
                        if m:
                            datain = m.group(1).strip()
                m = re.match(r"^[~\.\\/:\w -]+$", datain)  # check name
                if not m:
                    msg = "Invalid datain."
        else:
            msg = "Missing datain."
    if not msg:
        m = re.search(r"(^|;)\s*dataout\s+([^;]+);",
                      setup,
                      flags=re.IGNORECASE)  # dataout
        if m:
            dataout = m.group(2).strip()
            m = re.match(r"^(.+)\s+(all|con|concat|concatenate)$",
                         dataout,
                         flags=re.IGNORECASE)  # rmove all or concatenate
            if m:
                dataout = m.group(1).strip()
            m = re.match(r'^"([^"]+)"$', dataout)  # remove double quotes
            if m:
                dataout = m.group(1).strip()
            else:
                m = re.match(r"^'([^']+)'$", dataout)  # remove single quotes
                if m:
                    dataout = m.group(1).strip()
            m = re.match(r"^[~\.\\/:\w -]+$", dataout)  # check name
            if not m:
                msg = "Invalid dataout."
            if not msg:
                if sys.platform.startswith("win"):  # windows
                    m = re.match(r"^([a-zA-Z]:|[\\/])", dataout)
                else:  # linux
                    m = re.match("^(~|/)", dataout)
                if not m:  # not full path
                    dataout = os.path.join(
                        cwd, dataout)  # prefix the current working directory
                dataout = os.path.normpath(dataout)  # normalize the path
        else:
            dataout = None  # no dataout
            if proc == "combine":
                msg = "Missing dataout."
    if msg:
        print msg  # print the error message
        return None
    return [path, datain, dataout, setup]
Пример #4
0
def putdata(name=None,
            dir=None,
            mode=None,
            dataout=None,
            impl=None,
            mult=None):
    msg = None
    if not name:  # name
        msg = "Missing run name."
    else:
        m = re.match(r"^\s*(\w[\w\-]*)\s*$", name)
        if m:
            name = m.group(1)
        else:
            msg = "Run name error."
    if not msg:
        cwd = spssaux.getShow("DIRECTORY")  # spss current working directory
        if not dir:
            path = os.path.join(cwd, name)  # spss current working directory
        else:  # specified directory
            m = re.match(r'^\s*"([~\.\\/:\w\- ]*)"\s*$', dir)  # double quotes
            if m:
                dir = m.group(1).strip()
            else:
                m = re.match(r"^\s*'([~\.\\/:\w\- ]*)'\s*$",
                             dir)  # single quotes
                if m:
                    dir = m.group(1).strip()
                else:
                    m = re.match(r"^\s*([~\.\\/:\w\-]*)\s*", dir)  # no quotes
                    if m:
                        dir = m.group(1)
                    else:
                        msg = "Invalid directory."
            if not msg:
                path = os.path.join(os.path.normpath(dir), name)
    if not msg:  # mode
        if mode:
            m = re.match(r"^\s*(debug|test)\s*$", mode, flags=re.IGNORECASE)
            if m:
                mode = m.group(1).lower()
            else:
                msg = "Mode error."
    if not msg:  # dataout
        if not dataout:
            msg = "Missing dataout."
        else:
            m = re.match(r'^\s*"([~\.\\/:\w\- ]*)"\s*$',
                         dataout)  # double quotes
            if m:
                dataout = m.group(1).strip()
            else:
                m = re.match(r"^\s*'([~\.\\/:\w\- ]*)'\s*$",
                             dataout)  # single quotes
                if m:
                    dataout = m.group(1).strip()
                else:
                    m = re.match(r"^\s*([~\.\\/:\w\-]*)\s*",
                                 dataout)  # no quotes
                    if m:
                        dataout = m.group(1)
                    else:
                        msg = "Invalid dataout."
            if not msg:
                if sys.platform.startswith("win"):  # windows
                    m = re.match(r"^([a-zA-Z]:|[\\/])", dataout)
                else:  # linux
                    m = re.match("^(~|/)", dataout)
                if not m:  # not full path
                    dataout = os.path.join(
                        cwd, dataout)  # prefix the current working directory
                dataout = os.path.normpath(dataout)  # normalize the path
    if not msg:  # impl
        if impl:
            m = re.match(r"^\s*(all|\d+)\s*$", impl, flags=re.IGNORECASE)
            if m:
                impl = m.group(1).lower()
            else:
                msg = "Invalid implicate."
    if not msg:  # mult
        if mult:
            m = re.match(r"^\s*(all|\d+)\s*$", mult, flags=re.IGNORECASE)
            if m:
                mult = m.group(1).lower()
            else:
                msg = "Invalid multiple."
    if not msg:
        args = []  # execute putdata
        args.append("/dataout={0}".format(dataout))
        if impl:
            args.append("/impl={0}".format(impl))
        if mult:
            args.append("/mult={0}".format(mult))
        rc = srclib.execute("putdata", path, mode, args)
        if rc != 0:
            msg = "Abnormal termination of putdata"
        else:
            if not os.path.exists(dataout + ".out"):  # write the imputed data
                msg = "Missing " + dataout + ".out file"
            else:
                f = open(dataout + ".out", "r")
                cmd = f.read()
                f.close()
                spss.Submit(cmd)  # execute the command
                if not mode:
                    os.remove(dataout + ".imp")
                    os.remove(dataout + ".out")
    if msg:
        print msg  # print the error message