def elastic_query(self, query: str) -> Dict[str, Any]: """ Request an http SQL query to elasticsearch """ # Sanitize query query = self.sanitize_query(query) payload = {"query": query} if self.fetch_size is not None: payload["fetch_size"] = self.fetch_size path = f"/{self.sql_path}/" try: response = self.es.transport.perform_request("POST", path, body=payload) except es_exceptions.ConnectionError: raise exceptions.OperationalError( f"Error connecting to Elasticsearch") except es_exceptions.RequestError as ex: raise exceptions.ProgrammingError( f"Error ({ex.error}): {ex.info['error']['reason']}") # Opendistro errors are http status 200 if "error" in response: raise exceptions.ProgrammingError( f"({response['error']['reason']}): {response['error']['details']}" ) return response
def get_array_type_columns(self, table_name: str) -> "Cursor": """ Queries the index (table) for just one record and return a list of array type columns. This is useful since arrays are not supported by ES SQL """ array_columns: List[Tuple[Any, ...]] = [] try: response = self.es.search(index=table_name, size=1) except es_exceptions.ConnectionError as e: raise exceptions.OperationalError( f"Error connecting to {self.url}: {e.info}" ) except es_exceptions.NotFoundError as e: raise exceptions.ProgrammingError( f"Error ({e.error}): {e.info['error']['reason']}" ) try: if response["hits"]["total"]["value"] == 0: source = {} else: source = response["hits"]["hits"][0]["_source"] except KeyError as e: raise exceptions.DataError( f"Error inferring array type columns {self.url}: {e}" ) for col_name, value in source.items(): # If it's a list (ES Array add to cursor) if isinstance(value, list): if len(value) > 0: # If it's an array of objects add all keys if isinstance(value[0], dict): for in_col_name in value[0]: array_columns.append((f"{col_name}.{in_col_name}",)) array_columns.append((f"{col_name}.{in_col_name}.keyword",)) continue array_columns.append((col_name,)) array_columns.append((f"{col_name}.keyword",)) if not array_columns: array_columns = [] self.description = [ CursorDescriptionRow("name", Type.STRING, None, None, None, None, None) ] self._results = array_columns return self
def _show_columns(self, table_name): """ Simulates SHOW COLUMNS FROM more like SQL from elastic itself """ results = self.elastic_query(f"SHOW TABLES LIKE {table_name}") if table_name not in results: raise exceptions.ProgrammingError(f"Table {table_name} not found") rows = [] for col, value in results[table_name]["mappings"]["_doc"][ "properties"].items(): type = value.get("type") if type: rows.append([col, type]) self.description = [ ("column", Type.STRING, None, None, None, None, None), ("mapping", Type.STRING, None, None, None, None, None), ] self._results = rows return self
def elastic_query(self, query: str, csv=False): """ Request an http SQL query to elasticsearch """ self.description = None # Sanitize query query = self.sanitize_query(query) payload = {"query": query} if csv: path = f"/{self.sql_path}/?format=csv" else: path = f"/{self.sql_path}/" try: resp = self.es.transport.perform_request("POST", path, body=payload) except es_exceptions.ConnectionError as e: raise exceptions.OperationalError( f"Error connecting to {self.url}: {e.info}" ) except es_exceptions.RequestError as e: raise exceptions.ProgrammingError( f"Error ({e.error}): {e.info['error']['reason']}" ) return resp