def test_dumps_articles_api():
    Warning: use with caution, it is resource intensive for the remote server.

Test the Articles JSON API: /articles/<version>/
    _VERSION = "v04"
    print "Running test_articles"
    import urllib
    env = 'offline'  # switch to online if want to play it remotely

    base_url = ENV[env]['_SERVICE'] + "/articles/" + _VERSION + "/"

    first = get_curling(base_url)
    first = test_integrity(first)

    bookmark = first['next']
    print bookmark
    for i in range(0, 600):  # higher the second element of the interval to test more pages
        print i
        if bookmark:
            response = urllib.urlopen(bookmark).read()
            response = test_integrity(response)
            for a in response['articles']:
                print orm_new_webresource(a)

            bookmark = response['next']
            print i, bookmark
            print 'Articles finished'
            return None
def download_ids_generator(self, kind, results=(), bookmark='start'):
    Recursive - Fetch and collect all the resources' ids in the datastore
    :param kind: kind of the the entities
    :param results: list of the collected ids
    :param bookmark: bookmark to fetch different datastore's pages
    :return: results list

    # USAGE
        iterated = download_ids_generator(environment='offline')
        for uuid in iterated:
             print uuid

    import itertools

    to_append = get_curling(ENV[test_env]['_SERVICE'] + '/datastore/' + kind + '?index=',
                            {'token': CLIENT_TOKEN,
                             'bookmark': bookmark if bookmark != 'start' else ''})
    to_append = json.loads(to_append)

    if not to_append['next']:
        return itertools.chain(results, iter(to_append['articles']))

    return self.download_ids_generator(
        results=itertools.chain(results, iter(to_append['articles'])),
def bulk_updated():
    Works with server-side chunk:
        if self.request.get('token') == _CLIENT_TOKEN:
        if name == 'correctentries' and self.request.get('skip'):
            from articlesjsonapi import memcache_webresource_query
            from datastore.models import WebResource

            query = WebResource.query()

            for q in query.fetch(500, offset=int(self.request.get('skip'))):
                except AttributeError:
                    setattr(q, 'in_graph', False)

                    if '' in q.url:
                        setattr(q, 'type_of', 'fb')
                    elif '' in q.url:
                        setattr(q, 'type_of', 'tweet')
                    if '' in q.url:
                        setattr(q, 'type_of', 'paper')
                    elif q.title == '' and q.abstract == '':
                        if q.url.endswith(('jpg', 'jpeg', 'png', 'mp3', 'mp4', 'm4v')):
                            setattr(q, 'type_of', 'media')
                        elif q.url.endswith('pdf'):
                            setattr(q, 'type_of', 'pdf')
                            setattr(q, 'type_of', 'link')
                        setattr(q, 'type_of', 'feed')

                print q.to_dict()

    i = 0
    while True:
        t = get_curling(
            ENV['online']['_SERVICE'] + '/datastore/correctentries',
                'token': CLIENT_TOKEN,
                'skip': i

        print i
        if i + 500 > 20500:

        i += 500
def get_resource(*args):
    GET a resource from the datastore
    :param kind: the entity kind to get
    :param uuid: the entity uuid
    :return: a JSON representation of a resource in the datastore
    kind, uuid = args
    allowed_kinds = ['webresource', 'indexer']
    if kind in allowed_kinds:
        resource = get_curling(
            ENV[test_env]['_SERVICE'] + '/datastore/' + kind,
                'token': CLIENT_TOKEN,
                'retrieve': uuid
        return resource
        raise ValueError('get_resource: wrong kind in arguments')