Пример #1
0
    def get_field_words(self, geo):
        '''
        collect words from certain fields in the record:
        '''
        words = {}
        for field in self.word_fields:
            words[field] = []
            if hasattr(geo, field):
                field_words = getattr(
                    geo, field
                )  # can be a string, a list of single words, or a list of paragraphs

                if type(field_words) != list:
                    field_words = [field_words]

                for wl in field_words:
                    # wrap this in a loop n=(1..3)
                    # replace sanitized_list() with str_windows(wl, n)
                    for n in range(1, 4):  # give 1,2,3
                        for w in str_windows(str(wl), n):
                            words[field].append(w)

                    # old code:


#                    for w in sanitized_list(wl): # sanitized_list converts a string to a list
#                        words[field].append(w)

        return words
Пример #2
0
    def get_field_words(self, geo):
        '''
        collect words from certain fields in the record:
        '''
        words={}
        for field in self.word_fields:
            words[field]=[]
            if hasattr(geo, field):
                field_words=getattr(geo, field) # can be a string, a list of single words, or a list of paragraphs

                if type(field_words) != list:
                    field_words=[field_words]

                for wl in field_words:
                    # wrap this in a loop n=(1..3)
                    # replace sanitized_list() with str_windows(wl, n)
                    for n in range(1,4): # give 1,2,3
                        for w in str_windows(str(wl), n):
                            words[field].append(w)

                    # old code:
#                    for w in sanitized_list(wl): # sanitized_list converts a string to a list
#                        words[field].append(w)



        return words
Пример #3
0
def get_field_words(geo):
    '''
    collect words from certain fields in the record:
    '''
    debug='DEBUG' in os.environ
    words={}                # k=field, v=[w1, w2, w3, ...] (w's can be "windows")
    word_fields=['title', 'description', 'summary']
    for field in word_fields:
        words[field]=[]     
        if hasattr(geo, field):
            field_words=getattr(geo, field) # can be a string, a list of single words, or a list of paragraphs
            if type(field_words) != list:
                field_words=[field_words]

            if len(field_words)==0:
                if debug: warn("does this ever happen?" % ())
                continue

            i=0
            for wl in field_words:
#                if debug: warn("\n%s[%d]: wl(%s, %d) is %s" % (field, i, type(wl), len(wl), wl))
                i+=1
                # wrap this in a loop n=(1..3)
                # replace sanitized_list() with str_windows(wl, n)
                for n in range(1,4): # gives 1,2,3
                    if len(wl)>=n:
                        windows=str_windows(wl, n, '[-_\s]+')
#                        if debug: warn("%s(%d): %d windows " % (field, n, len(windows)))
                        for w in windows:
                            words[field].append(w)
                    else:
                        if debug: warn("skipping %s(%d): len(wl)=%d" % (field, n, len(wl)))
                            
    return words