def save_bare_tags(page_size=5000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) count = 0 start = datetime.now() total = MODMTag.find().count() while count < total: with transaction.atomic(): tags = [] for modm_tag in MODMTag.find().sort('-_id')[count:count + page_size]: tags.append(Tag(_id=modm_tag._id, lower=modm_tag.lower, system=False)) count += 1 if count % page_size == 0 or count == total: then = datetime.now() print 'Saving tags {} through {}...'.format( count - page_size, count) woot = Tag.objects.bulk_create(tags) now = datetime.now() print 'Done with {} tags in {} seconds...'.format( len(woot), (now - then).total_seconds()) tags = None woot = None trash = gc.collect() print 'Took out {} trashes'.format(trash) print 'MODM Tags: {}'.format(total) print 'django Tags: {}'.format(Tag.objects.all().count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def save_bare_tags(page_size=5000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) count = 0 start = datetime.now() total = MODMTag.find().count() while count < total: with transaction.atomic(): tags = [] for modm_tag in MODMTag.find().sort('-_id')[count:count + page_size]: tags.append( Tag(_id=modm_tag._id, lower=modm_tag.lower, system=False)) count += 1 if count % page_size == 0 or count == total: then = datetime.now() print 'Saving tags {} through {}...'.format( count - page_size, count) woot = Tag.objects.bulk_create(tags) now = datetime.now() print 'Done with {} tags in {} seconds...'.format( len(woot), (now - then).total_seconds()) tags = None woot = None trash = gc.collect() print 'Took out {} trashes'.format(trash) print 'MODM Tags: {}'.format(total) print 'django Tags: {}'.format(Tag.objects.all().count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ submissions = [] # TODO: Revisit this loop, there has to be a way to optimize it for conf in Conference.find(): if (hasattr(conf, 'is_meeting') and (conf.is_meeting is False)): break # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys() nodes = Node.find( Q('tags', 'in', tags) & Q('is_public', 'eq', True) & Q('is_deleted', 'ne', True)) projects.update(list(nodes)) for idx, node in enumerate(projects): submissions.append(_render_conference_node(node, idx, conf)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions conf.save() if num_submissions < settings.CONFERENCE_MIN_COUNT: continue submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True) return {'submissions': submissions}
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ submissions = [] # TODO: Revisit this loop, there has to be a way to optimize it for conf in Conference.find(): # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys() nodes = Node.find( Q('tags', 'in', tags) & Q('is_public', 'eq', True) & Q('is_deleted', 'ne', True) ) projects.update(list(nodes)) for idx, node in enumerate(projects): submissions.append(_render_conference_node(node, idx, conf)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions conf.save() if num_submissions < settings.CONFERENCE_MIN_COUNT: continue submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True) return {'submissions': submissions}
def conference_view(**kwargs): meetings = [] submissions = [] for conf in Conference.find(): # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() for tag in Tag.find(Q('_id', 'iexact', conf.endpoint)): for node in tag.node__tagged: if not node: continue if not node.is_public or node.is_deleted: continue projects.add(node) for idx, node in enumerate(projects): submissions.append(_render_conference_node(node, idx, conf)) num_submissions = len(projects) if num_submissions < settings.CONFERENCE_MIN_COUNT: continue meetings.append({ 'name': conf.name, 'active': conf.active, 'url': web_url_for('conference_results', meeting=conf.endpoint), 'count': num_submissions, }) submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True) meetings.sort(key=lambda meeting: meeting['count'], reverse=True) return {'meetings': meetings, 'submissions': submissions}
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ conferences = Conference.find(Q('is_meeting', 'ne', False)) # TODO: Revisit this loop, there has to be a way to optimize it for conf in conferences: # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() tags = Tag.find( Q('system', 'eq', False) & Q('name', 'iexact', conf.endpoint.lower())).values_list( 'pk', flat=True) nodes = Node.find( Q('tags', 'in', tags) & Q('is_public', 'eq', True) & Q('is_deleted', 'ne', True)).include('guids') projects.update(list(nodes)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions bulk_update(conferences, update_fields=['num_submissions']) return {'success': True}
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ submissions = [] for conf in Conference.find(): # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() for tag in Tag.find(Q('lower', 'eq', conf.endpoint.lower())): for node in tag.node__tagged.find(Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)): projects.add(node) for idx, node in enumerate(projects): submissions.append(_render_conference_node(node, idx, conf)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions conf.save() if num_submissions < settings.CONFERENCE_MIN_COUNT: continue submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True) return {'submissions': submissions}
def do_migration(): for t in Tag.find(): logger.info('Migrating tag {!r}'.format(t)) t.lower = t._id.lower() t.save(force=True)
def do_migration(): for t in Tag.find(): logger.info('Migrating tag {!r}'.format(t)) t.lower = t._id.lower() t.save(force=True)