Exemplo n.º 1
0
    def bar(self, var: str, title: str = '', label: str = '') -> object:
        """
        This method requires a character column (use the contents method to see column types)
        and generates a bar chart.

        :param var: the CHAR variable (column) you want to plot
        :param title: an optional title for the chart
        :param label: LegendLABEL= value for sgplot
        :return: graphic plot
        """
        code = "proc sgplot data=" + self.libref + '.' + self.table + self._dsopts(
        )
        code += ";\n\tvbar " + var
        if len(label) > 0:
            code += " / LegendLABEL='" + label + "'"
        code += ";\n"
        if len(title) > 0:
            code += '\ttitle "' + title + '";\n'
        code += 'run;\ntitle;'

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()
        if not ll:
            html = self.HTML
            self.HTML = 1
            ll = self.sas._io.submit(code)
            self.HTML = html
        if not self.sas.batch:
            DISPLAY(HTML(ll['LST']))
        else:
            return ll
Exemplo n.º 2
0
    def columnInfo(self):
        """
        display metadata about the table, size, number of rows, columns and their data type
        """
        code = "proc contents data=" + self.libref + '.' + self.table + ' ' + self._dsopts(
        ) + ";ods select Variables;run;"

        if self.sas.nosub:
            print(code)
            return

        if self.results.upper() == 'PANDAS':
            code = "proc contents data=%s.%s %s ;ods output Variables=work._variables ;run;" % (
                self.libref, self.table, self._dsopts())
            pd = self._returnPD(code, '_variables')
            pd['Type'] = pd['Type'].str.rstrip()
            return pd

        else:
            ll = self._is_valid()
            if self.HTML:
                if not ll:
                    ll = self.sas._io.submit(code)
                if not self.sas.batch:
                    DISPLAY(HTML(ll['LST']))
                else:
                    return ll
            else:
                if not ll:
                    ll = self.sas._io.submit(code, "text")
                if not self.sas.batch:
                    print(ll['LST'])
                else:
                    return ll
Exemplo n.º 3
0
    def hist(self, var: str, title: str = '', label: str = '') -> object:
        """
        This method requires a numeric column (use the contents method to see column types) and generates a histogram.

        :param var: the NUMERIC variable (column) you want to plot
        :param title: an optional Title for the chart
        :param label: LegendLABEL= value for sgplot
        :return:
        """
        code = "proc sgplot data=" + self.libref + '.' + self.table + self._dsopts(
        )
        code += ";\n\thistogram " + var + " / scale=count"
        if len(label) > 0:
            code += " LegendLABEL='" + label + "'"
        code += ";\n"
        if len(title) > 0:
            code += '\ttitle "' + title + '";\n'
        code += "\tdensity " + var + ';\nrun;\n' + 'title;'

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()
        if not ll:
            html = self.HTML
            self.HTML = 1
            ll = self.sas._io.submit(code)
            self.HTML = html
        if not self.sas.batch:
            DISPLAY(HTML(ll['LST']))
        else:
            return ll
Exemplo n.º 4
0
    def top(self,
            var: str,
            n: int = 10,
            order: str = 'freq',
            title: str = '') -> object:
        """
        Return the most commonly occuring items (levels)

        :param var: the CHAR variable (column) you want to count
        :param n: the top N to be displayed (defaults to 10)
        :param order: default to most common use order='data' to get then in alphbetic order
        :param title: an optional Title for the chart
        :return: Data Table
        """
        code = "proc freq data=%s.%s %s order=%s noprint;" % (
            self.libref, self.table, self._dsopts(), order)
        code += "\n\ttables %s / out=tmpFreqOut;" % var
        code += "\nrun;"
        if len(title) > 0:
            code += '\ttitle "' + title + '";\n'
        code += "proc print data=tmpFreqOut(obs=%s); \nrun;" % n
        code += 'title;'

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()
        if self.results.upper() == 'PANDAS':
            code = "proc freq data=%s.%s%s order=%s noprint;" % (
                self.libref, self.table, self._dsopts(), order)
            code += "\n\ttables %s / out=tmpFreqOut;" % var
            code += "\nrun;"
            code += "\ndata tmpFreqOut; set tmpFreqOut(obs=%s); run;" % n
            return self._returnPD(code, 'tmpFreqOut')
        else:
            if self.HTML:
                if not ll:
                    ll = self.sas._io.submit(code)
                if not self.sas.batch:
                    DISPLAY(HTML(ll['LST']))
                else:
                    return ll
            else:
                if not ll:
                    ll = self.sas._io.submit(code, "text")
                if not self.sas.batch:
                    print(ll['LST'])
                else:
                    return ll
Exemplo n.º 5
0
    def heatmap(self,
                x: str,
                y: str,
                options: str = '',
                title: str = '',
                label: str = '') -> object:
        """
        Documentation link: http://support.sas.com/documentation/cdl/en/grstatproc/67909/HTML/default/viewer.htm#n0w12m4cn1j5c6n12ak64u1rys4w.htm

        :param x: x variable
        :param y: y variable
        :param options: display options (string)
        :param title: graph title
        :param label:
        :return:
        """
        code = "proc sgplot data=%s.%s %s;" % (self.libref, self.table,
                                               self._dsopts())
        if len(options):
            code += "\n\theatmap x=%s y=%s / %s;" % (x, y, options)
        else:
            code += "\n\theatmap x=%s y=%s;" % (x, y)

        if len(label) > 0:
            code += " LegendLABEL='" + label + "'"
        code += ";\n"
        if len(title) > 0:
            code += "\ttitle '%s';\n" % title
        code += "run;\ntitle;"

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()
        if not ll:
            html = self.HTML
            self.HTML = 1
            ll = self.sas._io.submit(code)
            self.HTML = html
        if not self.sas.batch:
            DISPLAY(HTML(ll['LST']))
        else:
            return ll
Exemplo n.º 6
0
    def score(self,
              file: str = '',
              code: str = '',
              out: 'SASdata' = None) -> 'SASdata':
        """
        This method is meant to update a SAS Data object with a model score file.

        :param file: a file reference to the SAS score code
        :param code: a string of the valid SAS score code
        :param out: Where to the write the file. Defaults to update in place
        :return: The Scored SAS Data object.
        """
        if out is not None:
            outTable = out.table
            outLibref = out.libref
        else:
            outTable = self.table
            outLibref = self.libref
        codestr = code
        code = "data %s.%s%s;" % (outLibref, outTable, self._dsopts())
        code += "set %s.%s%s;" % (self.libref, self.table, self._dsopts())
        if len(file) > 0:
            code += '%%include "%s";' % file
        else:
            code += "%s;" % codestr
        code += "run;"

        if self.sas.nosub:
            print(code)
            return None

        ll = self._is_valid()
        if not ll:
            html = self.HTML
            self.HTML = 1
            ll = self.sas._io.submit(code)
            self.HTML = html
        if not self.sas.batch:
            DISPLAY(HTML(ll['LST']))
        else:
            return ll
Exemplo n.º 7
0
    def contents(self):
        """
        display metadata about the table. size, number of rows, columns and their data type ...

        :return: output
        """
        code = "proc contents data=" + self.libref + '.' + self.table + self._dsopts(
        ) + ";run;"

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()
        if self.results.upper() == 'PANDAS':
            code = "proc contents data=%s.%s %s ;" % (self.libref, self.table,
                                                      self._dsopts())
            code += "ods output Attributes=work._attributes;"
            code += "ods output EngineHost=work._EngineHost;"
            code += "ods output Variables=work._Variables;"
            code += "ods output Sortedby=work._Sortedby;"
            code += "run;"
            return self._returnPD(
                code,
                ['_attributes', '_EngineHost', '_Variables', '_Sortedby'])

        else:
            if self.HTML:
                if not ll:
                    ll = self.sas._io.submit(code)
                if not self.sas.batch:
                    DISPLAY(HTML(ll['LST']))
                else:
                    return ll
            else:
                if not ll:
                    ll = self.sas._io.submit(code, "text")
                if not self.sas.batch:
                    print(ll['LST'])
                else:
                    return ll
Exemplo n.º 8
0
    def scatter(self, x: str, y: list, title: str = '') -> object:
        """
        This method plots a scatter of x,y coordinates. You can provide a list of y columns for multiple line plots.

        :param x: the x axis variable; generally a time or continuous variable.
        :param y: the y axis variable(s), you can specify a single column or a list of columns
        :param title: an optional Title for the chart
        :return: graph object
        """

        code = "proc sgplot data=" + self.libref + '.' + self.table + self._dsopts(
        ) + ";\n"
        if len(title) > 0:
            code += '\ttitle "' + title + '";\n'

        if isinstance(y, list):
            num = len(y)
        else:
            num = 1
            y = [y]

        for i in range(num):
            code += "\tscatter x=" + x + " y=" + y[i] + ";\n"

        code += 'run;\n' + 'title;'

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()
        if not ll:
            html = self.HTML
            self.HTML = 1
            ll = self.sas._io.submit(code)
            self.HTML = html
        if not self.sas.batch:
            DISPLAY(HTML(ll['LST']))
        else:
            return ll
Exemplo n.º 9
0
    def means(self):
        """
        display descriptive statistics for the table; summary statistics. This is an alias for 'describe'

        :return:
        """
        dsopts = self._dsopts().partition(';\n\tformat')

        code = "proc means data=" + self.libref + '.' + self.table + dsopts[
            0] + " stackodsoutput n nmiss median mean std min p25 p50 p75 max;"
        code += dsopts[1] + dsopts[2] + "run;"

        if self.sas.nosub:
            print(code)
            return

        ll = self._is_valid()

        if self.results.upper() == 'PANDAS':
            code = "proc means data=%s.%s %s stackodsoutput n nmiss median mean std min p25 p50 p75 max; %s ods output Summary=work._summary; run;" % (
                self.libref, self.table, dsopts[0], dsopts[1] + dsopts[2])
            return self._returnPD(code, '_summary')
        else:
            if self.HTML:
                if not ll:
                    ll = self.sas._io.submit(code)
                if not self.sas.batch:
                    DISPLAY(HTML(ll['LST']))
                else:
                    return ll
            else:
                if not ll:
                    ll = self.sas._io.submit(code, "text")
                if not self.sas.batch:
                    print(ll['LST'])
                else:
                    return ll
Exemplo n.º 10
0
    def head(self, obs=5):
        """
        display the first n rows of a table

        :param obs: the number of rows of the table that you want to display. The default is 5
        :return:
        """
        topts = dict(self.dsopts)
        topts['obs'] = obs
        code = "proc print data=" + self.libref + '.' + self.table + self.sas._dsopts(
            topts) + ";run;"

        if self.sas.nosub:
            print(code)
            return

        if self.results.upper() == 'PANDAS':
            code = "data _head ; set %s.%s %s; run;" % (
                self.libref, self.table, self.sas._dsopts(topts))
            return self._returnPD(code, '_head')
        else:
            ll = self._is_valid()
            if self.HTML:
                if not ll:
                    ll = self.sas._io.submit(code)
                if not self.sas.batch:
                    DISPLAY(HTML(ll['LST']))
                else:
                    return ll
            else:
                if not ll:
                    ll = self.sas._io.submit(code, "text")
                if not self.sas.batch:
                    print(ll['LST'])
                else:
                    return ll
Exemplo n.º 11
0
    def execute_table(self, _output_type, **kwargs: dict) -> 'SASresults':
        """
        executes a PROC TABULATE statement 

        You must specify an output type to use this method, of 'HTML', 'text', or 'Pandas'.
        There are three convenience functions for generating specific output; see:
            .text_table()
            .table()
            .to_dataframe()

        :param _output_type: style of output to use
        :param left: the query for the left side of the table
        :param top: the query for the top of the table
        :return:
        """

        left = kwargs.pop('left', None)
        top = kwargs.pop('top', None)
        sets = dict(classes=set(), vars=set())
        left._gather(sets)
        if top: top._gather(sets)

        table = top \
            and '%s, %s' % (str(left), str(top)) \
            or str(left)

        proc_kwargs = dict(cls=' '.join(sets['classes']),
                           var=' '.join(sets['vars']),
                           table=table)

        # permit additional valid options if passed; for now, just 'where'
        proc_kwargs.update(kwargs)

        # we can't easily use the SASProcCommons approach for submiting,
        # since this is merely an output / display proc for us;
        # but we can at least use it to check valid options in the canonical saspy way
        required_options = {'cls', 'var', 'table'}
        allowed_options = {'cls', 'var', 'table', 'where'}
        verifiedKwargs = SASProcCommons._stmt_check(self, required_options,
                                                    allowed_options,
                                                    proc_kwargs)

        if (_output_type == 'Pandas'):
            # for pandas, use the out= directive
            code = "proc tabulate data=%s.%s %s out=temptab;\n" % (
                self.data.libref, self.data.table, self.data._dsopts())
        else:
            code = "proc tabulate data=%s.%s %s;\n" % (
                self.data.libref, self.data.table, self.data._dsopts())

        # build the code
        for arg, value in verifiedKwargs.items():
            code += "  %s %s;\n" % (arg == 'cls' and 'class' or arg, value)
        code += "run;"

        # teach_me_SAS
        if self.sas.nosub:
            print(code)
            return

        # submit the code
        ll = self.data._is_valid()

        if _output_type == 'HTML':
            if not ll:
                html = self.data.HTML
                self.data.HTML = 1
                ll = self.sas._io.submit(code)
                self.data.HTML = html
            if not self.sas.batch:
                DISPLAY(HTML(ll['LST']))
                check, errorMsg = self.data._checkLogForError(ll['LOG'])
                if not check:
                    raise ValueError("Internal code execution failed: " +
                                     errorMsg)
            else:
                return ll

        elif _output_type == 'text':
            if not ll:
                html = self.data.HTML
                self.data.HTML = 1
                ll = self.sas._io.submit(code, 'text')
                self.data.HTML = html
            print(ll['LST'])
            return

        elif _output_type == 'Pandas':
            return self.to_nested_dataframe(code)
Exemplo n.º 12
0
    def tail(self, obs=5):
        """
        display the last n rows of a table

        :param obs: the number of rows of the table that you want to display. The default is 5
        :return:
        """
        code = "proc sql;select count(*) format best32. into :lastobs from " + self.libref + '.' + self.table + self._dsopts(
        ) + ";%put lastobs=&lastobs tom;quit;"

        nosub = self.sas.nosub
        self.sas.nosub = False

        le = self._is_valid()
        if not le:
            ll = self.sas.submit(code, "text")

            lastobs = ll['LOG'].rpartition("lastobs=")
            lastobs = lastobs[2].partition(" tom")
            lastobs = int(lastobs[0])
        else:
            lastobs = obs

        firstobs = lastobs - (obs - 1)
        if firstobs < 1:
            firstobs = 1

        topts = dict(self.dsopts)
        topts['obs'] = lastobs
        topts['firstobs'] = firstobs

        code = "proc print data=" + self.libref + '.' + self.table + self.sas._dsopts(
            topts) + ";run;"

        self.sas.nosub = nosub
        if self.sas.nosub:
            print(code)
            return

        if self.results.upper() == 'PANDAS':
            code = "data _tail ; set %s.%s %s; run;" % (
                self.libref, self.table, self.sas._dsopts(topts))
            return self._returnPD(code, '_tail')
        else:
            if self.HTML:
                if not le:
                    ll = self.sas._io.submit(code)
                else:
                    ll = le
                if not self.sas.batch:
                    DISPLAY(HTML(ll['LST']))
                else:
                    return ll
            else:
                if not le:
                    ll = self.sas._io.submit(code, "text")
                else:
                    ll = le
                if not self.sas.batch:
                    print(ll['LST'])
                else:
                    return ll