def __init__(self, dataset_url): self.dataset_url = dataset_url self.query = Query(dataset_url) self._time = Time(dataset_url) self._index = Index(self)
class BatFrame(object): def __init__(self, dataset_url): self.dataset_url = dataset_url self.query = Query(dataset_url) self._time = Time(dataset_url) self._index = Index(self) def __getitem__(self, val): if isinstance(val, str): col = Column(val, self.dataset_url) col.query.mergeQuery(self.query) return col elif isinstance(val, Query): bf = self.copy() # bf.query.addSELECT('*') bf.query.mergeQuery(val) return bf elif isinstance(val, slice): start = val.start stop = val.stop # step = val.step bf = self.copy() # bf.query.addSELECT('*') if start is not None: bf.query.setOFFSET(start) if stop is not None: bf.query.setLIMIT(stop) return bf elif isinstance(val, list): bf = self.copy() for value in val: bf.query.addSELECT("\"{}\"".format(value)) return bf elif isinstance(val, Column): bf = self.copy() bf.query.addWHERE("({})".format(val.execution_name)) return bf @property def columns(self): """Returns a numpy array of the columns name""" return requests.get(self.dataset_url + '/columns').json() @property def rows(self): """Returns a numpy array of the rows name""" bf = self.copy() result = bf.query.executeQuery(format="soa") return result["_rowName"] @property def time(self): copy_time = self._time.copy() return copy_time.query.mergeQuery(self.Query) @property def ix(self): copy_index = self._index.copy() return copy_index def copy(self): bf = BatFrame(self.dataset_url) bf.query = self.query.copy() return bf def toPandas(self): result = self.query.executeQuery(format="aos") if len(result) == 0: return pd.DataFrame() return pd.DataFrame.from_records(result, index="_rowName") def head(self, num_rows=5): bf = self.copy() bf.query.setLIMIT(num_rows) return bf def query(self, query): raise NotImplementedError() def sort(self, value, ascending=True): bf = self.copy() if not isinstance(value, list): value = [value] if not isinstance(ascending, list): ascending = [ascending]*len(value) if len(value) != len(ascending): raise RuntimeError("len(value) != len(ascending)") for by, asc in zip(value, ascending): if asc: sort = "ASC" else: sort = "DESC" bf.query.addORDERBY("\"{}\" {}".format(by, sort)) return bf @property def shape(self): """ Returns (rowCount, valueCount) """ bf = self.copy() content = requests.get(bf.dataset_url).json() rowCount = content['status']['rowCount'] valueCount = content['status']['valueCount'] return (rowCount, valueCount) def __repr__(self): bf = self.copy() bf.query.setLIMIT(40) print(bf.toPandas()) response = requests.get(bf.dataset_url).json() try: rowCount = response['status']['rowCount'] except: rowCount = None if rowCount is not None and rowCount > 40: print("{} rows".format(rowCount)) return ""