Пример #1
0
def add_metric(dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.get(dataset__name=dataset, name=analysis)
    try:
        metric = PairwiseTopicMetric.objects.get(name=metric_name,
                analysis=analysis)
        if not force_import:
            raise RuntimeError("%s is already in the database for this"
                    " analysis" % metric_name)
    except PairwiseTopicMetric.DoesNotExist:
        metric = PairwiseTopicMetric(name=metric_name, analysis=analysis)
        metric.save()

    num_docs = Document.objects.filter(dataset=analysis.dataset).order_by(
            '-pk')[0].id + 1
    topics = list(analysis.topic_set.all().order_by('number'))

    doctopicvectors = []
    for topic in topics:
        doctopicvectors.append(document_topic_vector(topic, num_docs))

    for i, topic1 in enumerate(topics):
        topic1_doc_vals = doctopicvectors[i]
        for j, topic2 in enumerate(topics):
            topic2_doc_vals = doctopicvectors[j]
            correlation_coeff = pmcc(topic1_doc_vals, topic2_doc_vals)
            if not isnan(correlation_coeff):
                PairwiseTopicMetricValue.objects.create(topic1=topic1,
                    topic2=topic2, metric=metric, value=correlation_coeff)
            else:
                print "Error computing metric between {0} and {1}".format(
                        topic1,topic2)
        transaction.commit()
Пример #2
0
def add_metric(dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.get(dataset__name=dataset, name=analysis)
    try:
        metric = PairwiseTopicMetric.objects.get(name=metric_name,
                analysis=analysis)
        if not force_import:
            raise RuntimeError("%s is already in the database for this"
                    " analysis" % metric_name)
    except PairwiseTopicMetric.DoesNotExist:
        metric = PairwiseTopicMetric(name=metric_name, analysis=analysis)
        metric.save()

    num_words = Word.objects.order_by('-pk')[0].id + 1
    topics = list(analysis.topic_set.all().order_by('number'))

    topicwordvectors = []
    for topic in topics:
        topicwordvectors.append(topic_word_vector(topic, num_words))

    for i, topic1 in enumerate(topics):
        topic1_word_vals = topicwordvectors[i]
        for j, topic2 in enumerate(topics):
            topic2_word_vals = topicwordvectors[j]
            correlation_coeff = pmcc(topic1_word_vals, topic2_word_vals)
            PairwiseTopicMetricValue.objects.create(topic1=topic1,
                    topic2=topic2, metric=metric, value=correlation_coeff)
    transaction.commit()
Пример #3
0
def add_metric(database_id,
               dataset,
               analysis,
               force_import=False,
               *args,
               **kwargs):
    analysis = Analysis.objects.using(database_id).get(dataset__name=dataset,
                                                       name=analysis)
    try:
        metric = PairwiseTopicMetric.objects.using(database_id).get(
            name=metric_name, analysis=analysis)
        if not force_import:
            raise RuntimeError("%s is already in the database for this"
                               " analysis" % metric_name)
    except PairwiseTopicMetric.DoesNotExist:
        metric = PairwiseTopicMetric(name=metric_name, analysis=analysis)
        metric.save()

    conn = sqlite3.connect(kwargs['counts'])
    c = conn.cursor()
    c.execute('PRAGMA temp_store=MEMORY')
    c.execute('PRAGMA synchronous=OFF')
    c.execute('PRAGMA cache_size=2000000')
    c.execute('PRAGMA journal_mode=OFF')
    c.execute('PRAGMA locking_mode=EXCLUSIVE')
    c.execute("select words from total_counts")
    for row in c:
        total_words = float(row[0])
    c.execute("select cooccurrences from total_counts")
    for row in c:
        total_cooccurrences = float(row[0])

    topics = list(analysis.topics.all().order_by('number'))

    num_words = 10
    topicwords = []
    wordset = set()
    for topic in topics:
        words = topic_words(topic, num_words)
        topicwords.append(words)
        for w in words:
            wordset.add(w)

    for i, topic1 in enumerate(topics):
        print topic1
        topic1_words = topicwords[i]
        for j, topic2 in enumerate(topics):
            print ' ', topic2
            topic2_words = topicwords[j]
            coherence = pairwise_coherence(topic1_words, topic2_words, c,
                                           total_words, total_cooccurrences)
            PairwiseTopicMetricValue.objects.using(database_id).create(
                topic1=topic1, topic2=topic2, metric=metric, value=coherence)
Пример #4
0
def add_metric(database_id, dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.using(database_id).get(dataset__name=dataset, name=analysis)
    try:
        metric = PairwiseTopicMetric.objects.using(database_id).get(name=metric_name,
                analysis=analysis)
        if not force_import:
            raise RuntimeError("%s is already in the database for this"
                    " analysis" % metric_name)
    except PairwiseTopicMetric.DoesNotExist:
        metric = PairwiseTopicMetric(name=metric_name, analysis=analysis)
        metric.save()

    conn = sqlite3.connect(kwargs['counts'])
    c = conn.cursor()
    c.execute('PRAGMA temp_store=MEMORY')
    c.execute('PRAGMA synchronous=OFF')
    c.execute('PRAGMA cache_size=2000000')
    c.execute('PRAGMA journal_mode=OFF')
    c.execute('PRAGMA locking_mode=EXCLUSIVE')
    c.execute("select words from total_counts")
    for row in c:
        total_words = float(row[0])
    c.execute("select cooccurrences from total_counts")
    for row in c:
        total_cooccurrences = float(row[0])

    topics = list(analysis.topics.all().order_by('number'))

    num_words = 10
    topicwords = []
    wordset = set()
    for topic in topics:
        words = topic_words(topic, num_words)
        topicwords.append(words)
        for w in words:
            wordset.add(w)

    for i, topic1 in enumerate(topics):
        print topic1
        topic1_words = topicwords[i]
        for j, topic2 in enumerate(topics):
            print ' ', topic2
            topic2_words = topicwords[j]
            coherence = pairwise_coherence(topic1_words, topic2_words, c,
                    total_words, total_cooccurrences)
            PairwiseTopicMetricValue.objects.using(database_id).create(topic1=topic1,
                    topic2=topic2, metric=metric, value=coherence)