Exemplo n.º 1
0
 def getMenuInfo(self, entity, dataHandler):
     if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity):
         return [
             {"categoryId": "Download", "title": "Download as File", "icon": "fa-download", "id": "downloadFile"}
         ]
     else:
         return []
Exemplo n.º 2
0
def getDataHandler(options, entity):
    if dataFrameMisc.isPySparkDataFrame(entity):
        return PySparkDataFrameDataHandler(options, entity)
    elif dataFrameMisc.isPandasDataFrame(entity):
        return PandasDataFrameDataHandler(options, entity)

    return None
Exemplo n.º 3
0
 def getMenuInfo(self,entity):
     if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity):
         return [
             {"categoryId": "Download", "title": "Download as File", "icon": "fa-download", "id": "downloadFile"}
         ]
     else:
         return []
 def getMenuInfo(self, entity, dataHandler):
     if dataHandler is not None:
         entity = dataHandler.entity
     if dataFrameMisc.isPySparkDataFrame(
             entity) or dataFrameMisc.isPandasDataFrame(entity):
         return [{
             "categoryId": "Table",
             "title": "DataFrame Table",
             "icon": "fa-table",
             "id": "dataframe"
         }]
     elif dataFrameMisc.fqName(
             entity) == "graphframes.graphframe.GraphFrame":
         return [{
             "categoryId": "Table",
             "title": "Graph Vertices",
             "icon": "fa-location-arrow",
             "id": "vertices"
         }, {
             "categoryId": "Table",
             "title": "Graph Edges",
             "icon": "fa-link",
             "id": "edges"
         }]
     else:
         return []
Exemplo n.º 5
0
def getDataHandler(options, entity):
    if dataFrameMisc.isPySparkDataFrame(entity):
        return PySparkDataFrameDataHandler(options, entity)
    elif dataFrameMisc.isPandasDataFrame(entity):
        return PandasDataFrameDataHandler(options, entity)
    elif isinstance(entity, StreamingDataAdapter):
        return entity.getDisplayDataHandler(options, entity)

    return None
Exemplo n.º 6
0
def getDataHandler(options, entity):
    if dataFrameMisc.isPySparkDataFrame(entity):
        return PySparkDataFrameDataHandler(options, entity)
    elif dataFrameMisc.isPandasDataFrame(entity):
        return PandasDataFrameDataHandler(options, entity)
    elif isinstance(entity, dict) or isArrayOfDict(entity):
        return JSONDataHandler(options, entity)
    elif isinstance(entity, StreamingDataAdapter):
        return entity.getDisplayDataHandler(options, entity)

    return None
Exemplo n.º 7
0
 def getMenuInfo(self,entity, dataHandler):
     if dataHandler is not None:
         entity = dataHandler.entity
     if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity):
         return [
             {"categoryId": "Table", "title": "DataFrame Table", "icon": "fa-table", "id": "dataframe"}
         ]
     elif dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame":
         return [
             {"categoryId": "Table", "title": "Graph Vertices", "icon": "fa-location-arrow", "id":"vertices"},
             {"categoryId": "Table", "title": "Graph Edges", "icon": "fa-link", "id":"edges"}
         ]
     else:
         return []
Exemplo n.º 8
0
 def doRender(self, handlerId):
     entity=self.entity       
     if dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame":
         if handlerId == "edges":
             entity=entity.edges
         else:
             entity=entity.vertices
     if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity):
         self._addHTMLTemplate('dataframeTable.html', entity=PandasDataFrameAdapter(entity))
         return
         
     self._addHTML("""
         <b>Unable to display object</b>
     """
     )
Exemplo n.º 9
0
   def doRender(self, handlerId):
       entity=self.entity       
       if dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame":
           if handlerId == "edges":
               entity=entity.edges
           else:
               entity=entity.vertices
       if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity):
           self.hideColumns = self.options.get("hideColumns") and [a.strip() for a in self.options.get("hideColumns").split(",")]
           self.showColumns = self.options.get("showColumns") and [a.strip() for a in self.options.get("showColumns").split(",")]
           self._addHTMLTemplate('dataframeTable.html', entity=PandasDataFrameAdapter(entity), table_noschema=self.options.get("table_noschema", "false"))
           return
 
       self._addHTML("""
           <b>Unable to display object {}</b>
       """.format(entity)
       )
Exemplo n.º 10
0
 def getMenuInfo(self, entity):
     if dataFrameMisc.isPySparkDataFrame(
             entity) or dataFrameMisc.isPandasDataFrame(entity):
         return [{
             "categoryId": "Chart",
             "title": "Bar Chart",
             "icon": "fa-bar-chart",
             "id": "barChart"
         }, {
             "categoryId": "Chart",
             "title": "Line Chart",
             "icon": "fa-line-chart",
             "id": "lineChart"
         }, {
             "categoryId": "Chart",
             "title": "Scatter Plot",
             "icon": "fa-circle",
             "id": "scatterPlot"
         }, {
             "categoryId": "Chart",
             "title": "Pie Chart",
             "icon": "fa-pie-chart",
             "id": "pieChart"
         }, {
             "categoryId": "Chart",
             "title": "Map",
             "icon": "fa-globe",
             "id": "mapChart"
         }, {
             "categoryId": "Chart",
             "title": "Histogram",
             "icon": "fa-table",
             "id": "histogram"
         }]
     else:
         return []
Exemplo n.º 11
0
    def doRender(self, handlerId):
        entity = self.entity
        if dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame":
            if handlerId == "edges":
                entity = entity.edges
            else:
                entity = entity.vertices
        if dataFrameMisc.isPySparkDataFrame(
                entity) or dataFrameMisc.isPandasDataFrame(entity):
            hcmap = {}
            if "hideColumns" in self.options:
                hcarr = self.options.get("showColumns").split(",")
                for s in hcarr:
                    hcmap[s] = 1
            self._addHTMLTemplate('dataframeTable.html',
                                  entity=PandasDataFrameAdapter(entity),
                                  table_noschema=self.options.get(
                                      "table_noschema", "false"),
                                  table_hidecols=hcmap)
            return

        self._addHTML("""
            <b>Unable to display object {}</b>
        """.format(entity))
Exemplo n.º 12
0
    def stats_table(self, field):
        self.summary_stats = []
        self.quantiles = []
        self.frequents = []
        
        if isPySparkDataFrame(self.df):
            statsdf = self.df.describe(field)
            lbls = ['count','mean','std','min','max']

            for i in [0,1,2,3,4]:
                if i == 0:
                    self.summary_stats.append((lbls[i], "{:.0f}".format(float(statsdf.collect()[i][1]))))
                else:
                    self.summary_stats.append((lbls[i], "{:.2f}".format(float(statsdf.collect()[i][1]))))
                    
            if Environment.sparkVersion == 2:
                lbls = ['2%','9%','25%','50%','75%','91%','98%']
                quants = self.df.approxQuantile(field, [.02, .09, .25, .50, .75, .91, .98], 0.1)
                for i, q in enumerate(quants):
                    self.quantiles.append((lbls[i] + "ile", "{:.2f}".format(q)))
                
            freqdf = self.df.stat.freqItems([field], 0.1)
            freqlist = freqdf.collect()[0][field+'_freqItems']
            stop = 5
            for i in freqlist:
                if stop > 0:
                    self.frequents.append(str(i))
                stop = stop - 1
        else:
            if not isPandasDataFrame(self.df):
                self.df = self.data_handler.entity
            if isPandasDataFrame(self.df):
                statsdf = self.df[field].describe([.02, .09, .25, .50, .75, .91, .98])
                lbls = ['count','mean','std','min','max']

                for i in range(0,len(lbls)):
                    if i == 0:
                        self.summary_stats.append((lbls[i], "{:.0f}".format(statsdf[i])))
                    else:
                        self.summary_stats.append((lbls[i], "{:.2f}".format(statsdf[lbls[i]])))

                lbls = ['2%','9%','25%','50%','75%','91%','98%']
                for i in range(0,len(lbls)):
                    self.quantiles.append((lbls[i] + "ile", "{:.2f}".format(statsdf[lbls[i]])))

                freqseries = self.df[field].value_counts()
                stop = 5
                for ix in freqseries.index:
                    if stop > 0:
                        self.frequents.append(str(ix))
                    stop = stop - 1
                
        summaryname = '<br>'.join(s[0] for s in self.summary_stats)
        summaryvalue = '<br>'.join(s[1] for s in self.summary_stats)
        quantname = '<br>'.join(q[0] for q in self.quantiles)
        quantvalue = '<br>'.join(q[1] for q in self.quantiles)
        freqvalue = '<br>'.join(f for f in self.frequents)

        table = """
            <table class="stats-table">
                <thead>
                    <tr>""" 
        table += "<th colspan='2'>Summary</th> <th colspan='2'>Quantiles</th> <th>Frequents</th>" if Environment.sparkVersion == 2 else "<th colspan='2'>Summary</th> <th>Frequents</th>"
        table += """
                    </tr>
                </thead>
                <tbody>
                    <tr>"""
        table += "<td>{}</td> <td>{}</td> <td>{}</td> <td>{}</td> <td>{}</td>" if Environment.sparkVersion == 2 else "<td>{}</td> <td>{}</td> <td>{}</td>"
        table += """
                    </tr>
                </tbody>
            </table>"""
            
        if Environment.sparkVersion == 2:
            return table.format(summaryname, summaryvalue, quantname, quantvalue, freqvalue)
        else:
            return table.format(summaryname, summaryvalue, freqvalue)
Exemplo n.º 13
0
def createDataframeAdapter(entity):
    if dataFrameMisc.isPandasDataFrame(entity):
        return PandasDataFrameAdapter(entity)
    elif dataFrameMisc.isPySparkDataFrame(entity):
        return entity
    raise ValueError("Invalid argument")