Exemplo n.º 1
0
def tv_cloud(request):
    """Generate termvector word cloud using the termvector approach.

    Returns word cloud data for a single document word cloud (based on a single
    document id) and multiple document word clouds (either based on a list of
    document ids (i.e., timeline burst cloud) or a query with metadata).

    For multiple document word clouds, a celery task generates the cloud data.
    """
    if settings.DEBUG:
        print >> stderr, "termvector cloud()"
    logger.info('services/cloud/ - termvector word cloud')
    logger.info('services/cloud/ - user: {}'.format(request.user.username))

    # Retrieve the cloud settings
    query_id = request.GET.get('queryID')
    min_length = int(request.GET.get('min_length', 2))
    use_stopwords = request.GET.get('stopwords') == "1"
    use_default_stopwords = request.GET.get('stopwords_default') == "1"
    stems = request.GET.get('stems') == "1"

    # Retrieve the stopwords
    stopwords = []
    if use_stopwords:
        stopwords_user = list(StopWord.objects
                              .filter(user=request.user)
                              .filter(query=None)
                              .values_list('word', flat=True))

        stopwords_query = []
        if query_id:
            stopwords_query = list(StopWord.objects
                                   .filter(user=request.user)
                                   .filter(query__id=query_id)
                                   .values_list('word', flat=True))

        stopwords_default = []
        if use_default_stopwords:
            stopwords_default = list(StopWord.objects
                                     .filter(user=None)
                                     .filter(query=None)
                                     .values_list('word', flat=True))

        stopwords = stopwords_user + stopwords_query + stopwords_default

    record_id = request.GET.get('record_id')
    logger.info('services/cloud/ - record_id: {}'.format(record_id))

    idf_timeframe = request.GET.get('idf_timeframe')
    
    if record_id:
        # Cloud for a single document
        t_vector = single_document_word_cloud(settings.ES_INDEX,
                                              settings.ES_DOCTYPE,
                                              record_id,
                                              min_length,
                                              stopwords,
                                              stems)
        normalized = normalize_cloud(t_vector['result'], idf_timeframe)
        return json_response_message('ok', 'Word cloud generated', {'result': normalized})
    else:
        # Cloud for a query
        logger.info('services/cloud/ - multiple document word cloud')

        query = get_object_or_404(Query, pk=query_id)
        params = query.get_query_dict()

        # If we're creating a timeline cloud, set the min/max dates
        date_range = None
        if request.GET.get('is_timeline'):
            date_range = daterange2dates(request.GET.get('date_range'))

        task = generate_tv_cloud.delay(params, min_length, stopwords, date_range, stems, idf_timeframe)
        logger.info('services/cloud/ - Celery task id: {}'.format(task.id))

        return json_response_message('ok', '', {'task': task.id})
Exemplo n.º 2
0
def tv_cloud(request):
    """Generate termvector word cloud using the termvector approach.

    Returns word cloud data for a single document word cloud (based on a single
    document id) and multiple document word clouds (either based on a list of
    document ids (i.e., timeline burst cloud) or a query with metadata).

    For multiple document word clouds, a celery task generates the cloud data.
    """
    if settings.DEBUG:
        print >> stderr, "termvector cloud()"
    logger.info('services/cloud/ - termvector word cloud')
    logger.info('services/cloud/ - user: {}'.format(request.user.username))

    params = get_search_parameters(request.REQUEST)

    ids = request.REQUEST.get('ids')
    query_id = request.GET.get('queryID')
    min_length = int(request.GET.get('min_length', 2))
    use_stopwords = request.GET.get('stopwords') == "1"
    use_default_stopwords = request.GET.get('stopwords_default') == "1"
    stems = request.GET.get('stems') == "1"

    # Retrieve the stopwords
    stopwords = []
    if use_stopwords:
        stopwords_user = list(StopWord.objects
                              .filter(user=request.user)
                              .filter(query=None)
                              .values_list('word', flat=True))

        stopwords_query = []
        if query_id:
            stopwords_query = list(StopWord.objects
                                   .filter(user=request.user)
                                   .filter(query__id=query_id)
                                   .values_list('word', flat=True))

        stopwords_default = []
        if use_default_stopwords:
            stopwords_default = list(StopWord.objects
                                     .filter(user=None)
                                     .filter(query=None)
                                     .values_list('word', flat=True))

        stopwords = stopwords_user + stopwords_query + stopwords_default

    # Cloud by ids
    if ids:
        ids = ids.split(',')

        if len(ids) == 1:
            # Word cloud for single document
            logger.info('services/cloud/ - single document word cloud')
            t_vector = single_document_word_cloud(settings.ES_INDEX,
                                                  settings.ES_DOCTYPE,
                                                  ids[0],
                                                  min_length,
                                                  stopwords,
                                                  stems)
            return json_response_message('ok', 'Word cloud generated', t_vector)

    # Cloud by queryID or multiple ids
    logger.info('services/cloud/ - multiple document word cloud')

    task = generate_tv_cloud.delay(params, min_length, stopwords, ids, stems)
    logger.info('services/cloud/ - Celery task id: {}'.format(task.id))

    return json_response_message('ok', '', {'task': task.id})
Exemplo n.º 3
0
def tv_cloud(request):
    """Generate termvector word cloud using the termvector approach.

    Returns word cloud data for a single document word cloud (based on a single
    document id) and multiple document word clouds (either based on a list of
    document ids (i.e., timeline burst cloud) or a query with metadata).

    For multiple document word clouds, a celery task generates the cloud data.
    """
    if settings.DEBUG:
        print >> stderr, "termvector cloud()"
    logger.info('services/cloud/ - termvector word cloud')
    logger.info('services/cloud/ - user: {}'.format(request.user.username))

    # Retrieve the cloud settings
    query_id = request.GET.get('queryID')
    min_length = int(request.GET.get('min_length', 2))
    use_stopwords = request.GET.get('stopwords') == "1"
    use_default_stopwords = request.GET.get('stopwords_default') == "1"
    stems = request.GET.get('stems') == "1"

    # Retrieve the stopwords
    stopwords = []
    if use_stopwords:
        stopwords_user = list(
            StopWord.objects.filter(user=request.user).filter(
                query=None).values_list('word', flat=True))

        stopwords_query = []
        if query_id:
            stopwords_query = list(
                StopWord.objects.filter(user=request.user).filter(
                    query__id=query_id).values_list('word', flat=True))

        stopwords_default = []
        if use_default_stopwords:
            stopwords_default = list(
                StopWord.objects.filter(user=None).filter(
                    query=None).values_list('word', flat=True))

        stopwords = stopwords_user + stopwords_query + stopwords_default

    record_id = request.GET.get('record_id')
    logger.info('services/cloud/ - record_id: {}'.format(record_id))

    idf_timeframe = request.GET.get('idf_timeframe')

    if record_id:
        # Cloud for a single document
        t_vector = single_document_word_cloud(settings.ES_INDEX,
                                              settings.ES_DOCTYPE, record_id,
                                              min_length, stopwords, stems)
        normalized = normalize_cloud(t_vector['result'], idf_timeframe)
        return json_response_message('ok', 'Word cloud generated',
                                     {'result': normalized})
    else:
        # Cloud for a query
        logger.info('services/cloud/ - multiple document word cloud')

        query = get_object_or_404(Query, pk=query_id)
        params = query.get_query_dict()

        # If we're creating a timeline cloud, set the min/max dates
        date_range = None
        if request.GET.get('is_timeline'):
            date_range = daterange2dates(request.GET.get('date_range'))

        task = generate_tv_cloud.delay(params, min_length, stopwords,
                                       date_range, stems, idf_timeframe)
        logger.info('services/cloud/ - Celery task id: {}'.format(task.id))

        return json_response_message('ok', '', {'task': task.id})