Exemplo n.º 1
0
def save_bare_tags(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMTag.find().count()

    while count < total:
        with transaction.atomic():
            tags = []
            for modm_tag in MODMTag.find().sort('-_id')[count:count +
                                                        page_size]:
                tags.append(Tag(_id=modm_tag._id,
                                lower=modm_tag.lower,
                                system=False))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving tags {} through {}...'.format(
                        count - page_size, count)
                    woot = Tag.objects.bulk_create(tags)
                    now = datetime.now()
                    print 'Done with {} tags in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    tags = None
                    woot = None
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'MODM Tags: {}'.format(total)
    print 'django Tags: {}'.format(Tag.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 2
0
def save_bare_tags(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMTag.find().count()

    while count < total:
        with transaction.atomic():
            tags = []
            for modm_tag in MODMTag.find().sort('-_id')[count:count +
                                                        page_size]:
                tags.append(
                    Tag(_id=modm_tag._id, lower=modm_tag.lower, system=False))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving tags {} through {}...'.format(
                        count - page_size, count)
                    woot = Tag.objects.bulk_create(tags)
                    now = datetime.now()
                    print 'Done with {} tags in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    tags = None
                    woot = None
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'MODM Tags: {}'.format(total)
    print 'django Tags: {}'.format(Tag.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 3
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in Conference.find():
        if (hasattr(conf, 'is_meeting') and (conf.is_meeting is False)):
            break
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys()
        nodes = Node.find(
            Q('tags', 'in', tags) & Q('is_public', 'eq', True)
            & Q('is_deleted', 'ne', True))
        projects.update(list(nodes))

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'],
                     reverse=True)
    return {'submissions': submissions}
Exemplo n.º 4
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys()
        nodes = Node.find(
            Q('tags', 'in', tags) &
            Q('is_public', 'eq', True) &
            Q('is_deleted', 'ne', True)
        )
        projects.update(list(nodes))

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    return {'submissions': submissions}
Exemplo n.º 5
0
def conference_view(**kwargs):
    meetings = []
    submissions = []
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()
        for tag in Tag.find(Q('_id', 'iexact', conf.endpoint)):
            for node in tag.node__tagged:
                if not node:
                    continue
                if not node.is_public or node.is_deleted:
                    continue
                projects.add(node)

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
        meetings.append({
            'name': conf.name,
            'active': conf.active,
            'url': web_url_for('conference_results', meeting=conf.endpoint),
            'count': num_submissions,
        })

    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    meetings.sort(key=lambda meeting: meeting['count'], reverse=True)

    return {'meetings': meetings, 'submissions': submissions}
Exemplo n.º 6
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    conferences = Conference.find(Q('is_meeting', 'ne', False))
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in conferences:
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(
            Q('system', 'eq', False)
            & Q('name', 'iexact', conf.endpoint.lower())).values_list(
                'pk', flat=True)
        nodes = Node.find(
            Q('tags', 'in', tags) & Q('is_public', 'eq', True)
            & Q('is_deleted', 'ne', True)).include('guids')
        projects.update(list(nodes))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
    bulk_update(conferences, update_fields=['num_submissions'])
    return {'success': True}
Exemplo n.º 7
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()
        for tag in Tag.find(Q('lower', 'eq', conf.endpoint.lower())):
            for node in tag.node__tagged.find(Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)):
                projects.add(node)

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    return {'submissions': submissions}
Exemplo n.º 8
0
def do_migration():
    for t in Tag.find():
        logger.info('Migrating tag {!r}'.format(t))
        t.lower = t._id.lower()
        t.save(force=True)
Exemplo n.º 9
0
def do_migration():
    for t in Tag.find():
        logger.info('Migrating tag {!r}'.format(t))
        t.lower = t._id.lower()
        t.save(force=True)