示例#1
0
def test_daterange2dates():
    assert_equals(daterange2dates(TEXCAVATOR_DATE_RANGE),
                  {'lower': '1850-01-01', 'upper': '1990-12-31'})

    # First date after second date
    assert_equals(daterange2dates("19901231,18500101"),
                  {'lower': '1850-01-01', 'upper': '1990-12-31'})

    # Input single date
    assert_equals(daterange2dates("19901231"),
                  {'lower': '1850-01-01', 'upper': '1990-12-31'})

    # Empty input
    assert_equals(daterange2dates(""),
                  {'lower': '1850-01-01', 'upper': '1990-12-31'})
示例#2
0
    def handle(self, *args, **options):
        query_size = 100000
        n_repetitions = 10

        if len(args) > 0:
            query_size = int(args[0])
        if len(args) > 1:
            n_repetitions = int(args[1])

        response_times = []

        for repetition in range(n_repetitions):
            # select random documents
            document_set = DocID.objects.order_by('?')[0:query_size]
            doc_ids = [doc.doc_id for doc in document_set]

            aggr_resp = multiple_document_word_cloud(settings.ES_INDEX,
                                                     settings.ES_DOCTYPE,
                                                     None,
                                                     daterange2dates(''),
                                                     [],
                                                     [],
                                                     doc_ids)
            response_times.append(int(aggr_resp.get('took')))
            self.stdout.write(str(aggr_resp.get('took')))
            self.stdout.flush()

        avg = float(sum(response_times)/len(response_times))
        print 'Average response time for aggregating over {num} documents: ' \
              '{avg} miliseconds'.format(num=query_size, avg=avg)
示例#3
0
    def handle(self, *args, **options):
        query_size = 100000
        n_repetitions = 10

        if len(args) > 0:
            query_size = int(args[0])
        if len(args) > 1:
            n_repetitions = int(args[1])

        response_times = []

        for repetition in range(n_repetitions):
            # select random documents
            document_set = DocID.objects.order_by('?')[0:query_size]
            doc_ids = [doc.doc_id for doc in document_set]

            aggr_resp = multiple_document_word_cloud(settings.ES_INDEX,
                                                     settings.ES_DOCTYPE, None,
                                                     daterange2dates(''), [],
                                                     [], doc_ids)
            response_times.append(int(aggr_resp.get('took')))
            self.stdout.write(str(aggr_resp.get('took')))
            self.stdout.flush()

        avg = float(sum(response_times) / len(response_times))
        print 'Average response time for aggregating over {num} documents: ' \
              '{avg} miliseconds'.format(num=query_size, avg=avg)
示例#4
0
def user_login(request):
    username = request.POST.get('username')
    password = request.POST.get('password')
    next_url = request.POST.get('next_url')

    user = authenticate(username=username, password=password)

    if user is not None:
        if user.is_active:
            login(request, user)

            # TODO: are these date_limits really necessary?
            date_limits = daterange2dates('')
            dates = [date_limits['lower'], date_limits['upper']]
            daterange = [int(d.replace('-', '')) for d in dates]

            params = {
                "user_id": user.id,
                "user_name": user.username,
                "daterange": daterange,
                # TODO: what is timestamp used for? Is it really necessary
                "timestamp": TIMESTAMP,
                "next_url": next_url
            }

            return json_response_message('SUCCESS', '', params)
        else:
            return json_response_message(
                'ERROR', 'Account disabled.\n'
                'Please contact the system '
                'administrator.')

    return json_response_message('ERROR', 'Oops, that is not correct!')
示例#5
0
def user_login(request):
    username = request.POST.get('username')
    password = request.POST.get('password')
    next_url = request.POST.get('next_url')

    user = authenticate(username=username, password=password)

    if user is not None:
        if user.is_active:
            login(request, user)

            # TODO: are these date_limits really necessary?
            date_limits = daterange2dates('')
            dates = [date_limits['lower'], date_limits['upper']]
            daterange = [int(d.replace('-', '')) for d in dates]

            params = {
                "user_id": user.id,
                "user_name": user.username,
                "daterange": daterange,
                # TODO: what is timestamp used for? Is it really necessary
                "timestamp": TIMESTAMP,
                "next_url": next_url
            }

            return json_response_message('SUCCESS', '', params)
        else:
            return json_response_message('ERROR', 'Account disabled.\n'
                                         'Please contact the system '
                                         'administrator.')

    return json_response_message('ERROR', 'Oops, that is not correct!')
示例#6
0
def test_daterange2dates():
    assert_equals(daterange2dates(TEXCAVATOR_DATE_RANGE), {
        'lower': '1850-01-01',
        'upper': '1990-12-31'
    })

    # First date after second date
    assert_equals(daterange2dates("19901231,18500101"), {
        'lower': '1850-01-01',
        'upper': '1990-12-31'
    })

    # Input single date
    assert_equals(daterange2dates("19901231"), {
        'lower': '1850-01-01',
        'upper': '1990-12-31'
    })

    # Empty input
    assert_equals(daterange2dates(""), {
        'lower': '1850-01-01',
        'upper': '1990-12-31'
    })
示例#7
0
def index(request):
    """Render main page."""
    date_limits = daterange2dates('')
    dates = [date_limits['lower'], date_limits['upper']]
    daterange = [int(d.replace('-', '')) for d in dates]

    data = {
        "PROJECT_NAME": settings.PROJECT_NAME,
        "SRU_DATE_LIMITS": daterange,
        "QUERY_DATA_DOWNLOAD_ALLOW": settings.QUERY_DATA_DOWNLOAD_ALLOW,
        "ES_INDEX": settings.ES_INDEX,
        "ILPS_LOGGING": settings.ILPS_LOGGING
    }

    return render_to_response('index.html', data, RequestContext(request))
示例#8
0
def index(request):
    """Render main page."""
    date_limits = daterange2dates('')
    dates = [date_limits['lower'], date_limits['upper']]
    daterange = [int(d.replace('-', '')) for d in dates]

    data = {
        "PROJECT_NAME": settings.PROJECT_NAME,
        "SRU_DATE_LIMITS": daterange,
        "QUERY_DATA_DOWNLOAD_ALLOW": settings.QUERY_DATA_DOWNLOAD_ALLOW,
        "ES_INDEX": settings.ES_INDEX,
        "ILPS_LOGGING": settings.ILPS_LOGGING
    }

    return render_to_response('index.html', data, RequestContext(request))
示例#9
0
    def handle(self, *args, **options):
        print 'Emptying table...'
        DayStatistic.objects.all().delete()

        date_range_str = settings.TEXCAVATOR_DATE_RANGE
        dates = daterange2dates(date_range_str)

        year_lower = datetime.strptime(dates['lower'], '%Y-%m-%d').date().year
        year_upper = datetime.strptime(dates['upper'], '%Y-%m-%d').date().year

        if len(args) > 0:
            year_lower = int(args[0])
        if len(args) > 1:
            year_upper = int(args[1])

        print 'Gathering statistics from %s until %s.' \
            % (year_lower, year_upper)

        agg_name = 'daystatistic'

        for year in range(year_lower, year_upper+1):
            date_range = {
                'lower': '{y}-01-01'.format(y=year),
                'upper': '{y}-12-31'.format(y=year)
            }

            print year

            results = day_statistics(settings.ES_INDEX,
                                     settings.ES_DOCTYPE,
                                     date_range,
                                     agg_name)

            if results:
                # save results to database
                agg_data = results['aggregations'][agg_name]['buckets']

                for date in agg_data:
                    try:
                        d = datetime.strptime(date['key_as_string'],
                                              '%Y-%m-%dT00:00:00.000Z').date()
                        DayStatistic.objects.create(date=str(d),
                                                    count=date['doc_count'])
                    except DatabaseError, exc:
                        msg = "Database Error: %s" % exc
                        if settings.DEBUG:
                            print msg
示例#10
0
    def handle(self, *args, **options):
        if QueryTerm.objects.all().count() == 0:
            print 'No query terms stored in the database. Please run ' \
                  'python manage.py gatherqueryterms\' first.'
            sys.exit(1)

        query_size = 10
        n_repetitions = 10

        if len(args) > 0:
            query_size = int(args[0])
        if len(args) > 1:
            n_repetitions = int(args[1])

        response_times = []
        es_wall_clock = []

        for repetition in range(n_repetitions):
            # generate random weigthed query
            query_terms = QueryTerm.objects.order_by('?')[0:query_size]

            query_list = [
                '{}^{}'.format(t.term, randint(1, 40)) for t in query_terms
            ]
            q = ' OR '.join(query_list)

            t1 = time.time()
            valid_q, result = do_search(settings.ES_INDEX, settings.ES_DOCTYPE,
                                        q, 0, 20, daterange2dates(''), [], [])
            t2 = time.time()

            if not valid_q:
                print 'Invalid query: {}'.format(q)
            else:
                es_wall_clock.append((t2 - t1) * 1000)
                response_times.append(int(result.get('took')))
                self.stdout.write(str(result.get('took')))
                self.stdout.flush()

        avg = float(sum(response_times) / len(response_times))
        avg_wall_clock = float(sum(es_wall_clock) / len(es_wall_clock))
        print 'Average response time for queries of size {}: {} miliseconds'. \
              format(query_size, avg)
        print 'Average wall clock time for queries of size {}: {} ' \
              'miliseconds'.format(query_size, avg_wall_clock)
示例#11
0
    def handle(self, *args, **options):
        print 'Emptying table...'
        DayStatistic.objects.all().delete()

        date_range_str = settings.TEXCAVATOR_DATE_RANGE
        dates = daterange2dates(date_range_str)

        year_lower = datetime.strptime(dates['lower'], '%Y-%m-%d').date().year
        year_upper = datetime.strptime(dates['upper'], '%Y-%m-%d').date().year

        if len(args) > 0:
            year_lower = int(args[0])
        if len(args) > 1:
            year_upper = int(args[1])

        print 'Gathering statistics from %s until %s.' \
            % (year_lower, year_upper)

        agg_name = 'daystatistic'

        for year in range(year_lower, year_upper + 1):
            date_range = {
                'lower': '{y}-01-01'.format(y=year),
                'upper': '{y}-12-31'.format(y=year)
            }

            print year

            results = day_statistics(settings.ES_INDEX, settings.ES_DOCTYPE,
                                     date_range, agg_name)

            if results:
                # save results to database
                agg_data = results['aggregations'][agg_name]['buckets']

                for date in agg_data:
                    try:
                        d = datetime.strptime(date['key_as_string'],
                                              '%Y-%m-%dT00:00:00.000Z').date()
                        DayStatistic.objects.create(date=str(d),
                                                    count=date['doc_count'])
                    except DatabaseError, exc:
                        msg = "Database Error: %s" % exc
                        if settings.DEBUG:
                            print msg