Пример #1
0
def add_metric(database_id,
               dataset,
               analysis,
               force_import=False,
               *args,
               **kwargs):
    analysis = Analysis.objects.using(database_id).get(dataset__name=dataset,
                                                       name=analysis)
    try:
        metric = PairwiseTopicMetric.objects.using(database_id).get(
            name=metric_name, analysis=analysis)
        if not force_import:
            raise RuntimeError("%s is already in the database for this"
                               " analysis" % metric_name)
    except PairwiseTopicMetric.DoesNotExist:
        metric = PairwiseTopicMetric(name=metric_name, analysis=analysis)
        metric.save()

    conn = sqlite3.connect(kwargs['counts'])
    c = conn.cursor()
    c.execute('PRAGMA temp_store=MEMORY')
    c.execute('PRAGMA synchronous=OFF')
    c.execute('PRAGMA cache_size=2000000')
    c.execute('PRAGMA journal_mode=OFF')
    c.execute('PRAGMA locking_mode=EXCLUSIVE')
    c.execute("select words from total_counts")
    for row in c:
        total_words = float(row[0])
    c.execute("select cooccurrences from total_counts")
    for row in c:
        total_cooccurrences = float(row[0])

    topics = list(analysis.topics.all().order_by('number'))

    num_words = 10
    topicwords = []
    wordset = set()
    for topic in topics:
        words = topic_words(topic, num_words)
        topicwords.append(words)
        for w in words:
            wordset.add(w)

    for i, topic1 in enumerate(topics):
        print topic1
        topic1_words = topicwords[i]
        for j, topic2 in enumerate(topics):
            print ' ', topic2
            topic2_words = topicwords[j]
            coherence = pairwise_coherence(topic1_words, topic2_words, c,
                                           total_words, total_cooccurrences)
            PairwiseTopicMetricValue.objects.using(database_id).create(
                topic1=topic1, topic2=topic2, metric=metric, value=coherence)