def fetch_from_vertica_to_df(self, data_set, query): data_set_query = self.get_data_set_attribute(data_set, 'query') if data_set_query != query: connection = connect(self.connection_details) cursor = connection.cursor() print 'Executing ', data_set, 'Query...' print query columns = get_column_names_from_sql_query(query) cursor.execute(query) data = [] while True: rows = cursor.fetchmany(10000) data.extend([[str(ele) for ele in row] for row in rows]) if len(rows) <= 1: break df = MetadataDataFrame(data=data, columns=columns, meta_info={ 'query': query, 'built_features': [], 'aggregate_values': {}, 'columns': columns }) cursor.close() if len(df) == 0: raise (ValueError('SQL result in empty fetch!!')) else: self.set_data_set_attribute(data_set, 'data', df) self.set_data_set_attribute(data_set, 'query', query) self.set_data_set_attribute(data_set, 'columns', columns) self.set_data_set_attribute(data_set, 'built_features', [])
def fetch_from_vertica_to_df(self, data_set, query, block_size=100000): data_set_query = self.get_meta_data(data_set, 'query') if data_set_query != query: connection = connect(self.connection_details) cursor_remote = connection.cursor() print 'Executing ', data_set, 'Query...' print query columns = get_column_names_from_sql_query(query) self.create_table(data_set, columns) cursor_remote.execute(query) while True: rows = cursor_remote.fetchmany(block_size) rows = [tuple([str(ele) for ele in row]) for row in rows] self.insert_rows_to_table(data_set, columns, rows) if len(rows) < block_size: break self.db.commit() cursor_remote.close() if self.get_number_of_rows_in_table(data_set) == 0: raise (ValueError('SQL result in empty fetch!!')) else: self.split_table_into_features(data_set) self.set_meta_data(data_set, 'query', query) self.set_meta_data(data_set, 'columns', ','.join(columns)) self.set_meta_data(data_set, 'built_features', '')
def fetch_from_vertica_to_df(self, data_set, query): data_set_query = self.get_data_set_attribute(data_set, 'query') if data_set_query != query: connection = connect(self.connection_details) cursor = connection.cursor() print 'Executing ', data_set, 'Query...' print query columns = get_column_names_from_sql_query(query) cursor.execute(query) data = [] while True: rows = cursor.fetchmany(10000) data.extend([[str(ele) for ele in row] for row in rows]) if len(rows) <= 1: break df = MetadataDataFrame(data=data, columns=columns, meta_info={'query': query, 'built_features': [], 'aggregate_values': {}, 'columns': columns}) cursor.close() if len(df) == 0: raise(ValueError('SQL result in empty fetch!!')) else: self.set_data_set_attribute(data_set, 'data', df) self.set_data_set_attribute(data_set, 'query', query) self.set_data_set_attribute(data_set, 'columns', columns) self.set_data_set_attribute(data_set, 'built_features', [])
def fetch_from_vertica_to_df(self, data_set, query, block_size=100000): data_set_query = self.get_meta_data(data_set, 'query') if data_set_query != query: connection = connect(self.connection_details) cursor_remote = connection.cursor() print 'Executing ', data_set, 'Query...' print query columns = get_column_names_from_sql_query(query) self.create_table(data_set, columns) cursor_remote.execute(query) while True: rows = cursor_remote.fetchmany(block_size) rows = [tuple([str(ele) for ele in row]) for row in rows] self.insert_rows_to_table(data_set, columns, rows) if len(rows) < block_size: break self.db.commit() cursor_remote.close() if self.get_number_of_rows_in_table(data_set) == 0: raise (ValueError('SQL result in empty fetch!!')) else: self.split_table_into_features(data_set) self.set_meta_data(data_set, 'query', query) self.set_meta_data(data_set, 'columns', ','.join(columns)) self.set_meta_data(data_set, 'built_features', '')