def translate_web_query_to_dc_query(self, data): ''' Translates search input parameters into a request string for the DocumentCloud API, which utilizes the Apache Lucene syntax. Use 'projectid:1542-city-of-new-orleans-contracts' to restrict search to our project. :param data: The query parameters. :type data: dict :returns: string. The query string ready for the DocumentCloud API. ''' query_builder = QueryBuilder() query_builder.add_text(data['search_input']) query_builder.add_term( self.dc_query.split(':')[0], self.dc_query.split(':')[1]) terms = ['vendor', 'department'] for term in terms: query_value = data[term] if query_value != "": query_builder.add_term(term, query_value.upper()) if len(data['officer']) > 0: officers = [data['officer']] log.debug('Officers: %s', officers) vendor = self.translate_officer_to_vendor(officers[0]) query_builder.add_term("vendor", vendor.upper()) return query_builder.get_query()
def get_contracts(self, offset=0, limit=None): ''' Query the database in reverse chronological order. Specify the number of recent contracts with offset and limit values. :param offset: The number of pages to offset database query. :type offset: int :param limit: The number of records to return. :type limit: int :returns: list. (?) The contracts that matched the query. ''' # sn = sessionmaker(bind=self.engine) # session = sn() offset *= self.pagelength contracts = (SESSION.query(Contract).order_by( Contract.dateadded.desc()).offset(offset).limit(limit).all()) SESSION.close() contracts = self.translate_to_doc_cloud_form(contracts) log.debug('Contracts: %s', contracts) return contracts
def __init__(self): '''docstring''' self.pagelength = 10 # DocumentCloud API default is 10 self.dc_query = 'projectid:1542-city-of-new-orleans-contracts' self.document_cloud_client = DocumentCloud() log.debug('%d documents', self.pagelength)
def get_search_page(self, request): ''' Gets the data necessary for the search page (/contracts/search/). :param request: The search parameters supplied by the user. :type request: dict :returns: dict. Two dicts: one for newly gather data, and the other \ an altered version of the incoming search parameters. ''' # Extract search parameters (text input and dropdown selections) data = self.parse_query_string(request) log.debug('User search parameters: %s', data) # Transform query parameters into string for DocumentCloud API. search_term = self.translate_web_query_to_dc_query(data) # Get a list of contracts by querying our project on DocCloud: documents = self.query_document_cloud(search_term, page=data['current_page']) # TODO: A second search? number_of_documents = self.find_number_of_documents(search_term) log.debug('%d documents', number_of_documents) log.debug('%d documents', len(documents)) number_of_pages = (number_of_documents / self.pagelength) + 1 # Zero-indexing log.debug('Found {:,} documents across {:,} pages'.format( number_of_documents, number_of_pages)) updated_date = time.strftime("%b. %-d, %Y") # Correct for AP Style updated_date = updated_date.replace('Mar.', 'March') updated_date = updated_date.replace('Apr.', 'April') updated_date = updated_date.replace('May.', 'May') updated_date = updated_date.replace('Jun.', 'June') updated_date = updated_date.replace('Jul.', 'July') output_data = { 'current_page': data['current_page'], 'departments': self.get_departments(), 'documents': documents, 'number_of_documents': number_of_documents, 'number_of_pages': number_of_pages, 'officers': self.get_officers(), 'results_language': ResultsLanguage(data, number_of_documents).main(), 'search_input': data['search_input'], 'updated_date': updated_date, 'vendors': self.get_vendors() } return output_data, data
def query_document_cloud(self, search_term, page=1): ''' Queries the DocumentCloud API. This is it's own method so that queries can be cached via @memoize to speed things up. :param search_term: The query term to run against DocumentCloud API. :type search_term: string :param page: The page to receive in return. Useful for pagination. \ Default: 1. :type page: string :returns: dict. (?) The output that matches the query. ''' log.debug('DocumentCloud search: %s', search_term) log.debug('Showing %d results per page, page %d', self.pagelength, page) output = self.document_cloud_client.documents.search( search_term, page=page, per_page=self.pagelength) log.debug('Found documents: %s', output) return output