def build_sqlalchemy_columns( self, fields, base_query_tables=None, custom_columns=None): ''' Returns an ordered dict of sqlalchemy.sql.schema.Column objects, associated with the sqlalchemy.sql.schema.Table definitions, which are bound to the sqlalchemy.engine.Engine: "Connects a Pool and Dialect together to provide a source of database connectivity and behavior." @param fields - field definitions, from the resource schema @param bridge - a reports.utils.sqlalchemy_bridge.Bridge @param base_query_tables - if specified, the fields for these tables will be available as part of the base query, so the column definitions become simpler, and do not need to be joined in. @param manual_includes - columns to include even if the field visibility is not set ''' DEBUG_BUILD_COLUMNS = False or logger.isEnabledFor(logging.DEBUG) base_query_tables = base_query_tables or [] custom_columns = custom_columns or [] try: columns = OrderedDict() for field in fields: key = field['key'] if key in custom_columns: if DEBUG_BUILD_COLUMNS: logger.info( 'custom field: %r, %r', key,custom_columns[key]) columns[key] = custom_columns[key].label(key) continue if DEBUG_BUILD_COLUMNS: logger.info('build column: %r, %r', field['key'], field) field_name = field.get('field', None) if not field_name: field_name = field['key'] field_table = field.get('table', None) if not field_table and DEBUG_BUILD_COLUMNS: logger.info( 'field: %r, val: %r, skipping field because there is no ' '"field_table" value set',key,field) continue if DEBUG_BUILD_COLUMNS: logger.info( 'field: %r, field_table: %r', field['key'], field_table ) if field_table in base_query_tables: # simple case: table.fields already selected in the base query: # just need to specify them if field_name in get_tables()[field_table].c: col = get_tables()[field_table].c[field_name] else: raise Exception( 'field: %r, not found in table: %r' % (field_name, field_table)) col = col.label(key) columns[key] = col elif field.get('linked_field_value_field', None): link_table = field['table'] link_table_def = get_tables()[link_table] linked_field_parent = field['linked_field_parent'] link_field = linked_field_parent + '_id' join_args = { 'link_table': link_table, 'link_field': link_field, 'parent_table': linked_field_parent } if field['linked_field_type'] != 'fields.ListField': join_stmt = select([link_table_def.c[field_name]]).\ where(text('{link_table}.{link_field}=' '{parent_table}.{link_field}'.format(**join_args))) if field.get('linked_field_meta_field', None): # TODO: test - the linked meta field is the "datacolumn type" linked_field_meta_field = field['linked_field_meta_field'] meta_field_obj = MetaHash.objects.get( key=field['key'], scope=field['scope']) meta_table_def = get_tables()['metahash'] join_stmt.join(meta_table_def, link_table_def.c[linked_field_meta_field]== getattr(meta_field_obj,'pk') ) join_stmt = join_stmt.label(key) columns[key] = join_stmt elif field['linked_field_type'] == 'fields.ListField': join_stmt = select([link_table_def.c[field_name]]).\ where(text('{link_table}.{link_field}=' '{parent_table}.{link_field}'.format(**join_args))) if field.get('linked_field_meta_field', None): # TODO: test - the linked meta field is the "datacolumn type" linked_field_meta_field = field['linked_field_meta_field'] meta_field_obj = MetaHash.objects.get( key=field['key'], scope=field['scope']) meta_table_def = get_tables()['metahash'] join_stmt.join(meta_table_def, link_table_def.c[linked_field_meta_field]== getattr(meta_field_obj,'pk') ) ordinal_field = field.get('ordinal_field', None) if ordinal_field: join_stmt = join_stmt.order_by(link_table_def.c[ordinal_field]) join_stmt = join_stmt.alias('a') stmt2 = select([func.array_to_string( func.array_agg(column(field_name)), LIST_DELIMITER_SQL_ARRAY)]) stmt2 = stmt2.select_from(join_stmt).label(key) columns[key] = stmt2 else: if DEBUG_BUILD_COLUMNS: logger.info( 'field is not in the base tables %r, nor in a linked field, ' 'and is not custom: %s', base_query_tables, key) if DEBUG_BUILD_COLUMNS: logger.info('columns: %r', columns.keys()) return columns except Exception, e: logger.exception('on build sqlalchemy columns') raise e
def _concat(*args): ''' Use as a replacement for sqlalchemy.sql.functions.concat - "concat" is not available in postgresql 8.4 ''' return func.array_to_string(array([x for x in args]),'')
def analytics_query(self, start, end, locations=None, library=None): """Build a database query that fetches rows of analytics data. This method uses low-level SQLAlchemy code to do all calculations and data conversations in the database. It's modeled after Work.to_search_documents, which generates a large JSON document entirely in the database. :return: An iterator of results, each of which can be written directly to a CSV file. """ clauses = [ CirculationEvent.start >= start, CirculationEvent.start < end, ] if locations: event_types = [ CirculationEvent.CM_CHECKOUT, CirculationEvent.CM_FULFILL, CirculationEvent.OPEN_BOOK, ] locations = locations.strip().split(",") clauses += [ CirculationEvent.type.in_(event_types), CirculationEvent.location.in_(locations), ] if library: clauses += [CirculationEvent.library == library] # Build the primary query. This is a query against the # CirculationEvent table and a few other tables joined against # it. This makes up the bulk of the data. events_alias = ( select( [ func.to_char(CirculationEvent.start, "YYYY-MM-DD HH24:MI:SS").label( "start" ), CirculationEvent.type.label("event_type"), Identifier.identifier, Identifier.type.label("identifier_type"), Edition.sort_title, Edition.sort_author, case( [(Work.fiction == True, literal_column("'fiction'"))], else_=literal_column("'nonfiction'"), ).label("fiction"), Work.id.label("work_id"), Work.audience, Edition.publisher, Edition.imprint, Edition.language, CirculationEvent.location, ], ) .select_from( join( CirculationEvent, LicensePool, CirculationEvent.license_pool_id == LicensePool.id, ) .join(Identifier, LicensePool.identifier_id == Identifier.id) .join(Work, Work.id == LicensePool.work_id) .join(Edition, Work.presentation_edition_id == Edition.id) ) .where(and_(*clauses)) .order_by(CirculationEvent.start.asc()) .alias("events_alias") ) # A subquery can hook into the main query by referencing its # 'work_id' field in its WHERE clause. work_id_column = literal_column( events_alias.name + "." + events_alias.c.work_id.name ) # This subquery gets the names of a Work's genres as a single # comma-separated string. # # This Alias selects some number of rows, each containing one # string column (Genre.name). Genres with higher affinities with # this work go first. genres_alias = ( select([Genre.name.label("genre_name")]) .select_from(join(WorkGenre, Genre, WorkGenre.genre_id == Genre.id)) .where(WorkGenre.work_id == work_id_column) .order_by(WorkGenre.affinity.desc(), Genre.name) .alias("genres_subquery") ) # Use array_agg() to consolidate the rows into one row -- this # gives us a single value, an array of strings, for each # Work. Then use array_to_string to convert the array into a # single comma-separated string. genres = select( [func.array_to_string(func.array_agg(genres_alias.c.genre_name), ",")] ).select_from(genres_alias) # This subquery gets the a Work's target age as a single string. # # This Alias selects two fields: the lower and upper bounds of # the Work's target age. This reuses code originally written # for Work.to_search_documents(). target_age = Work.target_age_query(work_id_column).alias("target_age_subquery") # Concatenate the lower and upper bounds with a dash in the # middle. If both lower and upper bound are empty, just give # the empty string. This simulates the behavior of # Work.target_age_string. target_age_string = select( [ case( [ ( or_(target_age.c.lower != None, target_age.c.upper != None), func.concat(target_age.c.lower, "-", target_age.c.upper), ) ], else_=literal_column("''"), ) ] ).select_from(target_age) # Build the main query out of the subqueries. events = events_alias.c query = select( [ events.start, events.event_type, events.identifier, events.identifier_type, events.sort_title, events.sort_author, events.fiction, events.audience, events.publisher, events.imprint, events.language, target_age_string.label("target_age"), genres.label("genres"), events.location, ] ).select_from(events_alias) return query