def create_document(record_id, repo, **kwargs): """ Creates document for full text search. It should be called in add_record_to_index method. """ fields = [] # Add repo and record_id to fields doc_id = repo + ':' + record_id fields.append(appengine_search.TextField(name='repo', value=repo)) fields.append(appengine_search.TextField(name='record_id', value=record_id)) # Add name and location romanized by unidecode for field in kwargs: romanized_value = script_variant.romanize_word(kwargs[field]) fields.append( appengine_search.TextField(name=field, value=romanized_value)) # Add name romanized by japanese name dictionary fields.extend(create_jp_name_fields( given_name=kwargs['given_name'], family_name=kwargs['family_name'], full_name=kwargs['full_name'], alternate_names=kwargs['alternate_names'])) # Add location romanized by japanese location dictionary fields.extend(create_jp_location_fields( home_street=kwargs['home_street'], home_city=kwargs['home_city'], home_state=kwargs['home_state'], home_postal_code=kwargs['home_postal_code'], home_neighborhood=kwargs['home_neighborhood'], home_country=kwargs['home_country'])) return appengine_search.Document(doc_id=doc_id, fields=fields)
def make_or_regexp(query_txt): """ Creates compiled regular expression for OR search. Args: query_txt: Search query Returns: query_word | query_word | ... """ query_words = query_txt.split(' ') query_list = [] for word in query_words: romanized_word_list = script_variant.romanize_word(word) query_list.extend(romanized_word_list) regexp = '|'.join([re.escape(word) for word in query_list if word]) return re.compile(regexp, re.I)
def create_romanized_query_txt(query_txt): """ Applies romanization to each word in query_txt. Args: query_txt: Search query Returns: script varianted query_txt """ query_words = query_txt.split(' ') query_list = [] for word in query_words: romanized_word_list = script_variant.romanize_word(word) romanized_word = ' OR '.join( enclose_in_double_quotes(word) for word in romanized_word_list) query_list.append(romanized_word) romanized_query = ','.join([word for word in query_list]) return enclose_in_parenthesis(romanized_query)
def create_romanized_query_txt(query_txt): """ Applies romanization to each word in query_txt. Args: query_txt: Search query Returns: script varianted query_txt """ query_words = query_txt.split(' ') query_list = [] for word in query_words: romanized_word_list = script_variant.romanize_word(word) romanized_word = ' OR '.join(enclose_in_double_quotes(word) for word in romanized_word_list) query_list.append(romanized_word) romanized_query = ','.join([word for word in query_list]) return enclose_in_parenthesis(romanized_query)