def index_files_of_interest(self, doc, package_dict): log.info("index_files_of_interest start") name = package_dict['name'] branch = package_dict['branch'] if branch == 'master': branch = 'rawhide' #url = "/".join([self.mdapi_url, branch, "files", name]) #data = self._call_api(url) data = package_dict['file_data'] if data.get('files') is not None: for entry in data['files']: filenames = entry['filenames'].split('/') for filename in filenames: if filename.startswith('/usr/bin'): # index executables log.info("indexing exe file %s" % os.path.basename(filename)) exe_name = filter_search_string( os.path.basename(filename)) self.indexer.index_text_without_positions( "EX__%s__EX" % exe_name) else: log.warn("Failed to get file list for %r" % name) return log.info("index_files_of_interest end")
def _get_xapian_data(self): xapian_dir = '/var/cache/fedoracommunity/packages/xapian/search' if not os.path.exists(xapian_dir): NO_XAP = '__no_xapian_available__' keys = ['icon', 'summary'] dumb_data = dict([(key, NO_XAP) for key in keys]) return dumb_data import xapian from fedoracommunity.search.utils import filter_search_string package_name = filter_search_string(self.name) search_db = xapian.Database(xapian_dir) enquire = xapian.Enquire(search_db) qp = xapian.QueryParser() qp.set_database(search_db) search_string = "Ex__%s__EX" % package_name query = qp.parse_query(search_string) enquire.set_query(query) matches = enquire.get_mset(0, 1) if len(matches) == 0: return None result = json.loads(matches[0].document.get_data()) return result
def search_packages(self, start_row=None, rows_per_page=None, order=-1, sort_col=None, filters = {}, **params): search_string = filters.get('search') # short circut for empty string if not search_string: return (0, []) search_string = urllib.unquote_plus(search_string) unfiltered_search_terms = [ t.strip() for t in search_string.split(' ') if t.strip() ] search_string = utils.filter_search_string(search_string) phrase = '"%s"' % search_string # add exact matchs search_terms = search_string.split(' ') search_terms = [t.strip() for t in search_terms if t.strip()] for term in search_terms: search_string += " EX__%s__EX" % term # add phrase match search_string += " OR %s" % phrase if len(search_terms) > 1: # add near phrase match (phrases that are near each other) search_string += " OR (%s)" % ' NEAR '.join(search_terms) # Add partial/wildcard matches search_string += " OR (%s)" % ' OR '.join([ "*%s*" % term for term in search_terms]) matches = self.do_search(search_string, start_row, rows_per_page, order, sort_col) count = matches.get_matches_estimated() rows = [] for m in matches: result = json.loads(m.document.get_data()) # mark matches in <span class="match"> self._highlight_matches(result, unfiltered_search_terms) rows.append(result) return (count, rows)
def _get_old_document(self, package_name): search_name = utils.filter_search_string(package_name) search_string = "%s EX__%s__EX" % (search_name, search_name) matches = self._xapian_connector().do_search(search_string, 0, 10) for match in matches: result = json.loads(match.document.get_data()) if result['name'] == package_name: return match.document return None
def _get_old_document(self, package_name): search_name = utils.filter_search_string(package_name) search_string = "%s EX__%s__EX" % (search_name, search_name) matches = self._xapian_connector().do_search(search_string, 0, 10) for match in matches: result = json.loads(match.document.get_data()) if result['name'] == package_name: return match.document return None
def search_packages(self, start_row=None, rows_per_page=None, order=-1, sort_col=None, filters={}, **params): search_string = filters.get('search') # short circut for empty string if not search_string: return (0, []) search_string = urllib.unquote_plus(search_string) unfiltered_search_terms = [ t.strip() for t in search_string.split(' ') if t.strip() ] search_string = utils.filter_search_string(search_string) phrase = '"%s"' % search_string # add exact matchs search_terms = search_string.split(' ') search_terms = [t.strip() for t in search_terms if t.strip()] for term in search_terms: search_string += " EX__%s__EX" % term # add phrase match search_string += " OR %s" % phrase if len(search_terms) > 1: # add near phrase match (phrases that are near each other) search_string += " OR (%s)" % ' NEAR '.join(search_terms) # Add partial/wildcard matches search_string += " OR (%s)" % ' OR '.join( ["*%s*" % term for term in search_terms]) matches = self.do_search(search_string, start_row, rows_per_page, order, sort_col) count = matches.get_matches_estimated() rows = [] for m in matches: result = json.loads(m.document.get_data()) # mark matches in <span class="match"> self._highlight_matches(result, unfiltered_search_terms) rows.append(result) return (count, rows)
def get_package_info(self, package_name): search_name = utils.filter_search_string(package_name) search_string = "%s EX__%s__EX" % (search_name, search_name) matches = self.do_search(search_string, 0, 10) if len(matches) == 0: return None # Sometimes (rarely), the first match is not the one we actually want. for match in matches: result = json.loads(match.document.get_data()) if result['name'] == package_name: return result if any([sp['name'] == package_name for sp in result['sub_pkgs']]): return result return None
def get_package_info(self, package_name): search_name = utils.filter_search_string(package_name) search_string = "%s EX__%s__EX" % (search_name, search_name) matches = self.do_search(search_string, 0, 10) if len(matches) == 0: return None # Sometimes (rarely), the first match is not the one we actually want. for match in matches: result = json.loads(match.document.get_data()) if result['name'] == package_name: return result if any([sp['name'] == package_name for sp in result['sub_pkgs']]): return result return None
def get_latest_builds(self, package_name): enquire = xapian.Enquire(self._versionmap_db) qp = xapian.QueryParser() qp.set_database(self._versionmap_db) qp.add_boolean_prefix('key', 'XA') query = qp.parse_query('key:%s' % utils.filter_search_string(package_name)) enquire.set_query(query) matches = enquire.get_mset(0, 1) if len(matches) == 0: return None results = json.loads(matches[0].document.get_data()) latest_builds = OrderedDict() lastdistname = "" for dist in distmappings.tags: distname = dist['name'] if lastdistname != distname and distname in results: latest_builds[distname] = results[distname] lastdistname = distname return latest_builds
def get_latest_builds(self, package_name): enquire = xapian.Enquire(self._versionmap_db) qp = xapian.QueryParser() qp.set_database(self._versionmap_db) qp.add_boolean_prefix('key', 'XA') query = qp.parse_query('key:%s' % utils.filter_search_string(package_name)) enquire.set_query(query) matches = enquire.get_mset(0, 1) if len(matches) == 0: return None results = json.loads(matches[0].document.get_data()) latest_builds = OrderedDict() lastdistname = "" for dist in distmappings.tags: distname = dist['name'] if lastdistname != distname and distname in results: latest_builds[distname] = results[distname] lastdistname = distname return latest_builds
def _create_document(self, package, old_doc=None): log.info("_create_document start") doc = xapian.Document() self.indexer.set_document(doc) filtered_name = filter_search_string(package['name']) filtered_summary = filter_search_string(package['summary']) filtered_description = filter_search_string(package['description']) filtered_owner = filter_search_string(package['devel_owner']) self.indexer.index_text_without_positions( 'EX__' + filtered_name + '__EX', 10, '') self.indexer.index_text_without_positions( 'EX__' + filtered_owner + '__EX', 10, '') name_parts = filtered_name.split('_') for i in range(20): if len(name_parts) > 1: for part in name_parts: self.indexer.index_text_without_positions(part) self.indexer.index_text_without_positions(filtered_name, 10, '') for i in range(4): self.indexer.index_text_without_positions(filtered_summary) self.indexer.index_text_without_positions(filtered_description) self.index_files_of_interest(doc, package) for sub_package in package['sub_pkgs']: filtered_sub_package_name = filter_search_string( sub_package['name']) log.info(" indexing subpackage %s" % sub_package['name']) self.indexer.index_text_without_positions( filtered_sub_package_name) self.indexer.index_text_without_positions( 'EX__' + filtered_sub_package_name + '__EX', 10, '') self.index_files_of_interest(doc, sub_package) # Set special sub-package icon if appstream has one sub_package['icon'] = self.icon_cache.get(sub_package['name'], self.default_icon) # If the parent has a dull icon, give it ours! if sub_package['icon'] != self.default_icon \ and package['icon'] == self.default_icon: package['icon'] = sub_package['icon'] # remove anything we don't want to store del sub_package['package'] # @@: Right now we're only indexing the first part of the # provides/requires, and not boolean comparison or version # for requires in package.requires: # print requires[0] # doc.fields.append(xappy.Field('requires', requires[0])) # for provides in package.provides: # doc.fields.append(xappy.Field('provides', provides[0])) # remove anything we don't want to store and then store data in # json format del package['package'] doc.set_data(json.dumps(package)) # It seems that xapian db.replace_document still creates a new # document. In order to avoid duplicating the document we are # using add_document and then delete the old document. self.db.add_document(doc) if old_doc is not None: self.db.delete_document(old_doc.get_docid()) self.db.commit() log.info("_create_document end")