예제 #1
0
def create_named_list(lexicon, num_questions, word_length, is_range,
                      questions, name):
    if num_questions == 0:
        logger.debug(">> Not creating empty list " + name)
        return

    nl = NamedList(lexicon=lexicon,
                   numQuestions=num_questions,
                   wordLength=word_length,
                   isRange=is_range,
                   questions=questions,
                   name=name)
    nl.full_clean()
    nl.save()
예제 #2
0
def create_common_words_list(lname, friendly_name):
    f = open(COMMON_WORDS_DIR + lname)
    words = f.read()
    f.close()
    words = words.split('\n')
    r = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT,
                    db=settings.REDIS_ALPHAGRAMS_DB)
    pipe = r.pipeline()
    for word in words:
        alpha = alphagrammize(word)
        pipe.get('%s:%s' % (alpha, OWL2_LEX_INDEX))
    pks = pipe.execute()

    pks = [int(pk) for pk in pks]
    nl = NamedList(lexicon=Lexicon.objects.get(lexiconName='OWL2'),
                   numQuestions=len(pks),
                   wordLength=0,
                   isRange=False,
                   questions=json.dumps(pks),
                   name=friendly_name)

    nl.save()
예제 #3
0
def create_common_words_list(lname, friendly_name):
    f = open(COMMON_WORDS_DIR + lname)
    words = f.read()
    f.close()
    words = words.split('\n')
    alphs = set([alphagrammize(word) for word in words])
    cursor = connection.cursor()
    cursor.execute(
        'SELECT probability_pk FROM base_alphagram '
        'WHERE lexicon_id = %s AND alphagram in %s' %
        (OWL2_LEX_INDEX, str(tuple(alphs)))
    )
    rows = cursor.fetchall()
    pks = []
    for row in rows:
        pks.append(row[0])
    nl = NamedList(lexicon=Lexicon.objects.get(lexiconName='OWL2'),
                   numQuestions=len(pks),
                   wordLength=0,
                   isRange=False,
                   questions=json.dumps(pks),
                   name=friendly_name)

    nl.save()
예제 #4
0
def create_common_words_list(lname, friendly_name):
    f = open(COMMON_WORDS_DIR + lname)
    words = f.read()
    f.close()
    words = words.split('\n')
    alphs = set([alphagrammize(word) for word in words])
    cursor = connection.cursor()
    cursor.execute(
        'SELECT probability_pk FROM base_alphagram '
        'WHERE lexicon_id = %s AND alphagram in %s' %
        (OWL2_LEX_INDEX, str(tuple(alphs)))
    )
    rows = cursor.fetchall()
    pks = []
    for row in rows:
        pks.append(row[0])
    nl = NamedList(lexicon=Lexicon.objects.get(lexiconName='OWL2'),
                   numQuestions=len(pks),
                   wordLength=0,
                   isRange=False,
                   questions=json.dumps(pks),
                   name=friendly_name)

    nl.save()
예제 #5
0
def create_named_list(lexicon, num_questions, word_length, is_range, questions,
                      name):
    if num_questions == 0:
        logger.debug(">> Not creating empty list " + name)
        return

    nl = NamedList(
        lexicon=lexicon,
        numQuestions=num_questions,
        wordLength=word_length,
        isRange=is_range,
        questions=questions,
        name=name,
    )
    nl.full_clean()
    nl.save()
예제 #6
0
def create_wl_lists(i, lex):
    """Create word lists for words with length `i`."""

    lengthCounts = json.loads(lex.lengthCounts)
    numForThisLength = lengthCounts[str(i)]
    minProbPk = alphProbToProbPK(1, lex.pk, i)
    maxProbPk = alphProbToProbPK(numForThisLength, lex.pk, i)

    nl = NamedList(lexicon=lex,
                   numQuestions=numForThisLength,
                   wordLength=i,
                   isRange=True,
                   questions=json.dumps([minProbPk, maxProbPk]),
                   name='The ' + friendlyNumberMap[i])
    nl.save()
    if i >= 7 and i <= 8:
        # create 'every x' list
        for p in range(1, numForThisLength+1, LIST_GRANULARITY):
            minP = alphProbToProbPK(p, lex.pk, i)
            maxP = alphProbToProbPK(
                min(p + LIST_GRANULARITY - 1, numForThisLength), lex.pk, i)

            nl = NamedList(
                lexicon=lex,
                numQuestions=maxP - minP + 1,
                wordLength=i,
                isRange=True,
                questions=json.dumps([minP, maxP]),
                name='%s (%s to %s)' % (friendlyNumberMap[i], p,
                                        min(p + LIST_GRANULARITY - 1,
                                            numForThisLength)))
            nl.save()

    print 'JQXZ', i
    if i >= 4 and i <= 8:
        pks = get_pks_by_condition(
            minProbPk, maxProbPk,
            lambda a: re.search(r'[JQXZ]', a))
        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='JQXZ ' + friendlyNumberMap[i])
        nl.save()

    if i == 7:
        # 4+ vowel 7s
        pks = get_pks_by_condition(
            minProbPk, maxProbPk,
            lambda a: (len(re.findall(r'[AEIOU]', a)) >= 4))
        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='Sevens with 4 or more vowels')
        nl.save()

    if i == 8:
        # 5+ vowel 8s
        pks = get_pks_by_condition(
            minProbPk, maxProbPk,
            lambda a: (len(re.findall(r'[AEIOU]', a)) >= 5))
        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='Eights with 5 or more vowels')
        nl.save()

    if lex.lexiconName == 'CSW12':
        pks = get_pks_by_word_condition(
            minProbPk, maxProbPk,
            lambda w: (w.get('symbols') == '#+'))

        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='CSW12 ' + friendlyNumberMap[i] + ' not in CSW07')

        nl.save()

        pks = get_pks_by_word_condition(
            minProbPk, maxProbPk,
            lambda w: ('#' in w.get('symbols')))

        nl = NamedList(lexicon=lex, numQuestions=len(pks), wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='CSW12 ' + friendlyNumberMap[i] + ' not in OWL2')

        nl.save()

    if lex.lexiconName == 'America':
        pks = get_pks_by_word_condition(
            minProbPk, maxProbPk,
            lambda w: (w.get('symbols') == '+'))

        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='America ' + friendlyNumberMap[i] + ' not in OWL2')

        nl.save()
예제 #7
0
def create_wl_lists(i, lex):
    """Create word lists for words with length `i`."""

    lengthCounts = json.loads(lex.lengthCounts)
    numForThisLength = lengthCounts[str(i)]
    minProbPk = alphProbToProbPK(1, lex.pk, i)
    maxProbPk = alphProbToProbPK(numForThisLength, lex.pk, i)

    nl = NamedList(lexicon=lex,
                   numQuestions=numForThisLength,
                   wordLength=i,
                   isRange=True,
                   questions=json.dumps([minProbPk, maxProbPk]),
                   name='The ' + friendlyNumberMap[i])
    nl.save()
    if i >= 7 and i <= 8:
        # create 'every x' list
        for p in range(1, numForThisLength+1, LIST_GRANULARITY):
            minP = alphProbToProbPK(p, lex.pk, i)
            maxP = alphProbToProbPK(
                min(p + LIST_GRANULARITY - 1, numForThisLength), lex.pk, i)

            nl = NamedList(
                lexicon=lex,
                numQuestions=maxP - minP + 1,
                wordLength=i,
                isRange=True,
                questions=json.dumps([minP, maxP]),
                name='%s (%s to %s)' % (friendlyNumberMap[i], p,
                                        min(p + LIST_GRANULARITY - 1,
                                            numForThisLength)))
            nl.save()

    print 'JQXZ', i
    if i >= 4 and i <= 8:
        pks = get_pks_by_condition(
            minProbPk, maxProbPk,
            lambda a: re.search(r'[JQXZ]', a))
        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='JQXZ ' + friendlyNumberMap[i])
        nl.save()

    if i == 7:
        # 4+ vowel 7s
        pks = get_pks_by_condition(
            minProbPk, maxProbPk,
            lambda a: (len(re.findall(r'[AEIOU]', a)) >= 4))
        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='Sevens with 4 or more vowels')
        nl.save()

    if i == 8:
        # 5+ vowel 8s
        pks = get_pks_by_condition(
            minProbPk, maxProbPk,
            lambda a: (len(re.findall(r'[AEIOU]', a)) >= 5))
        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='Eights with 5 or more vowels')
        nl.save()

    if lex.lexiconName == 'CSW12':
        pks = get_pks_by_word_condition(
            minProbPk, maxProbPk,
            lambda w: (w.get('symbols') == '#+'))

        nl = NamedList(lexicon=lex,
                       numQuestions=len(pks),
                       wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='CSW12 ' + friendlyNumberMap[i] + ' not in CSW07')

        nl.save()

        pks = get_pks_by_word_condition(
            minProbPk, maxProbPk,
            lambda w: ('#' in w.get('symbols')))

        nl = NamedList(lexicon=lex, numQuestions=len(pks), wordLength=i,
                       isRange=False,
                       questions=json.dumps(pks),
                       name='CSW12 ' + friendlyNumberMap[i] + ' not in OWL2')

        nl.save()