Python TopicModeler.get_document_topics示例

def getEvidenceByTopic(request):
    if request.method == 'POST':
        data = json.loads(request.body)
        print data
        collection_id = data['collection_id']
        topic_id = data['topic_id']
        user_id = data['user_id']
        evidence = Evidence.objects.filter(Q(evidencetopic__created_by=collection_id)&Q(evidencetopic__primary_topic=topic_id)).order_by('-evidencetopic__primary_topic_prob').distinct()[:500]
        evidenceBookmarks = EvidenceBookmark.objects.filter(user_id=user_id)
        evidencePersonal = Evidence.objects.filter(Q(created_by=user_id))
        serialized_json = serializers.serialize('json', evidence)
        evidence_json = flattenSerializedJson(serialized_json)
        serialized_json = serializers.serialize('json', evidenceBookmarks)
        evidenceBookmark_json = flattenSerializedJson(serialized_json)
        serialized_json = serializers.serialize('json', evidencePersonal)
        evidencePersonal_json = flattenSerializedJson(serialized_json)   
        evidencePersonal = json.loads(evidencePersonal_json)
        output = {}
        output['evidencePersonal'] = []
        for e in evidencePersonal:
            if len(e['abstract']) > 50:
                name = Collection.objects.get(collection_id=collection_id).collection_name
                topic_dist, primary_topic_terms = TopicModeler.get_document_topics(e['abstract'], name)
                primary_topic_tuple = max(topic_dist, key=lambda x:x[1])
                this_topic = primary_topic_tuple[0]
                if this_topic == topic_id:
                    output['evidencePersonal'].append(e)
        output['evidence'] = json.loads(evidence_json)
        output['evidenceBookmarks'] = json.loads(evidenceBookmark_json)

        return HttpResponse(json.dumps(output), status=status.HTTP_200_OK)

示例#2

显示文件

文件： views_service.py 项目： tacitia/ThoughtFlow

def searchEvidenceByTitle(request):
    if request.method == 'POST':
        data = json.loads(request.body)
        collection_id = data['collection_id']
        title = data['title']
        result_limit = data['result_limit']
        include_personal = data['include_personal']
        user_id = data['user_id']
        # DONE: we can alternatively change this to treat given title as a series of separated terms
        title_terms = title.split(' ')
        print title_terms
        evidence = Evidence.objects.filter(Q(created_by=collection_id)&reduce(lambda x, y: x & y, [Q(title__icontains=word) for word in title_terms]))
        if include_personal:
            personal_evidence = Evidence.objects.filter(Q(created_by=user_id)&reduce(lambda x, y: x & y, [Q(title__icontains=word) for word in title_terms]))
            evidence = chain(evidence, personal_evidence)
        serialized_json = serializers.serialize('json', evidence)
        evidence_json = flattenSerializedJson(serialized_json)
        evidence = json.loads(evidence_json)
        pprint.pprint(evidence)
        for e in evidence:
            e['dist'] = edit_distance(title, e['title'])
        print 'result limit'
        print result_limit
        evidence = sorted(evidence, key=lambda e:e['dist'])[:result_limit]
        for e in evidence:
            e['topic'] = -1
            try:
                e['topic'] = EvidenceTopic.objects.get(evidence=e['id']).primary_topic
            except ObjectDoesNotExist:
                if len(e['abstract']) > 50:
                    name = Collection.objects.get(collection_id=collection_id).collection_name
                    topic_dist, primary_topic_terms = TopicModeler.get_document_topics(e['abstract'], name)
                    primary_topic_tuple = max(topic_dist, key=lambda x:x[1])
                    e['topic'] = primary_topic_tuple[0]
                else:
                    print 'warning: evidence with no topic'
        return HttpResponse(json.dumps(evidence), status=status.HTTP_200_OK)

    elif request.method == 'GET':
        collection_id = 13
        title = 'UpSet: Visualization of Intersecting Sets'
        evidence = Evidence.objects.filter(created_by=collection_id)
        serialized_json = serializers.serialize('json', evidence)
        evidence_json = flattenSerializedJson(serialized_json)
        evidence = json.loads(evidence_json)
        for e in evidence:
            e['dist'] = edit_distance(title, e['title'])
        evidence = sorted(evidence, key=lambda e:e['dist'])
        return HttpResponse(json.dumps(evidence[:20]), status=status.HTTP_200_OK)

示例#3

显示文件

文件： views_backup.py 项目： tacitia/ThoughtFlow

def getEvidenceRecommendation(request):
    if request.method == 'POST':
        data = json.loads(request.body)

#        data = {}
#        data['text'] = 'Using brain imaging in humans, we showed that the lateral PFC is organized as a cascade of executive processes from premotor to anterior PFC regions that control behavior according to stimuli, the present perceptual context, and the temporal episode in which stimuli occur, respectively.'
        collection_id = int(data['collectionId'])
        name = Collection.objects.get(collection_id=collection_id).collection_name

        topic_dist, primary_topic_terms = TopicModeler.get_document_topics(data['text'], name)
        if len(topic_dist) > 0:
            primary_topic_tuple = max(topic_dist, key=lambda x:x[1])
        else:
            primary_topic_tuple = ('', 0)
        output = {}
        output['topics'] = [{}]
        output['topics'][0]['terms'] = primary_topic_terms
        output['topics'][0]['prob'] = primary_topic_tuple[1]

#        evidence = getEvidenceRecommendationAcrossTopics(topic_dist, name)
        output['evidence'] = getEvidenceRecommendationWithinTopics(topic_dist, name, collection_id)[:100]

        return HttpResponse(json.dumps(output), status=status.HTTP_200_OK)