Пример #1
0
 def add_words(self, filename):
     print("Adding words from {}...".format(filename))
     added, skipped = set(), []
     with open(filename) as f:
         for term in f.readlines():
             term = clean_and_qualify_term(term)
             if term:
                 slug = slugify(term)
                 if slug not in added:
                     time.sleep(config.add_word_interval)
                     added.add(slug)
                     add(term)
                 else:
                     skipped.append(term)
             else:
                 skipped.append(term)
     print "Added {} terms, skipped {}".format(len(added), len(skipped))
Пример #2
0
def add_words(bucket, key):
    contents = config.s3.Object(bucket, key).get()
    words = contents['Body'].read().splitlines()
    added, skipped = set(), []
    for term in words:
        term = clean_and_qualify_term(term)
        if term:
            slug = hashslug(term)
            if slug not in added:
                added.add(slug)
                message = {'word': term, 'hashslug': slug}
                tasks.write_message('search', message)
            else:
                skipped.append(term)
        else:
            skipped.append(term)
    print "Added {} terms, skipped {}".format(len(added), len(skipped))
Пример #3
0
def add_words(bucket, key):
    contents = config.s3.Object(bucket, key).get()
    words = contents['Body'].read().splitlines()
    added, skipped = set(), []
    for term in words:
        term = clean_and_qualify_term(term)
        if term:
            slug = hashslug(term)
            if slug not in added:
                added.add(slug)
                message = {'word': term, 'hashslug': slug}
                tasks.write_message('search', message)
            else:
                skipped.append(term)
        else:
            skipped.append(term)
    print "Added {} terms, skipped {}".format(len(added), len(skipped))
Пример #4
0
 def add_words(self, filename):
     print("Adding words from {}...".format(filename))
     added, skipped = set(), []
     with open(filename) as f:
         for term in f.readlines():
             term = clean_and_qualify_term(term)
             if term:
                 slug = slugify(term)
                 if slug not in added:
                     time.sleep(config.add_word_interval)
                     added.add(slug)
                     add(term)
                 else:
                     skipped.append(term)
             else:
                 skipped.append(term)
     print "Added {} terms, skipped {}".format(len(added), len(skipped))
Пример #5
0
def test_disqualify():
    with codecs.open("serapis/tests/data/words_disqualified.txt", "r", "utf-8") as wordlist:
        for word in wordlist.readlines():
            assert not clean_and_qualify_term(word), "Word '{}' falsely marked as valid".format(word.strip())
Пример #6
0
def test_disqualify():
    with codecs.open("serapis/tests/data/words_disqualified.txt", 'r',
                     'utf-8') as wordlist:
        for word in wordlist.readlines():
            assert not clean_and_qualify_term(
                word), "Word '{}' falsely marked as valid".format(word.strip())