def save_organization_info(session, org_dict): ''' Save a dictionary of organization info to the datastore session. Return an app.Organization instance. ''' if not is_safe_name(org_dict['name']): error_dict = { "error" : 'ValueError: Bad organization name: "%(name)s"' % org_dict, "time" : datetime.now() } new_error = Error(**error_dict) session.add(new_error) session.commit() raise ValueError('Bad organization name: "%(name)s"' % org_dict) # Select an existing organization by name. filter = Organization.name == org_dict['name'] existing_org = session.query(Organization).filter(filter).first() # If this is a new organization, save and return it. if not existing_org: new_organization = Organization(**org_dict) session.add(new_organization) # session.commit() return new_organization # Mark the existing organization for safekeeping existing_org.last_updated = time() existing_org.keep = True # Update existing organization details. for (field, value) in org_dict.items(): setattr(existing_org, field, value) # Flush existing object, to prevent a sqlalchemy.orm.exc.StaleDataError. session.flush() return existing_org
def main(org_name=None, org_sources=None): ''' Run update over all organizations. Optionally, update just one. ''' # set org_sources org_sources = org_sources or ORG_SOURCES_FILENAME # Collect a set of fresh organization names. organization_names = set() # Retrieve all organizations and shuffle the list in place. orgs_info = get_organizations(org_sources) shuffle(orgs_info) if org_name: orgs_info = [org for org in orgs_info if org['name'] == org_name] # Iterate over organizations and projects, saving them to db.session. for org_info in orgs_info: if not is_safe_name(org_info['name']): error_dict = { "error": unicode('ValueError: Bad organization name: "%s"' % org_info['name']), "time": datetime.now() } new_error = Error(**error_dict) db.session.add(new_error) # commit the error db.session.commit() continue try: filter = Organization.name == org_info['name'] existing_org = db.session.query(Organization).filter( filter).first() organization_names.add(org_info['name']) # Mark everything associated with this organization for deletion at first. # :::here (event/false, story/false, project/false, organization/false) db.session.execute( db.update(Event, values={ 'keep': False }).where(Event.organization_name == org_info['name'])) db.session.execute( db.update(Story, values={ 'keep': False }).where(Story.organization_name == org_info['name'])) db.session.execute( db.update(Project, values={ 'keep': False }).where(Project.organization_name == org_info['name'])) db.session.execute( db.update(Organization, values={ 'keep': False }).where(Organization.name == org_info['name'])) # commit the false keeps db.session.commit() # Empty lat longs are okay. if 'latitude' in org_info: if not org_info['latitude']: org_info['latitude'] = None if 'longitude' in org_info: if not org_info['longitude']: org_info['longitude'] = None organization = save_organization_info(db.session, org_info) organization_names.add(organization.name) # flush the organization db.session.flush() if organization.rss or organization.website: logging.info("Gathering all of %s's stories." % organization.name) stories = get_stories(organization) if stories: for story_info in stories: save_story_info(db.session, story_info) # flush the stories db.session.flush() if organization.projects_list_url: logging.info("Gathering all of %s's projects." % organization.name) projects = get_projects(organization) for proj_dict in projects: save_project_info(db.session, proj_dict) # flush the projects db.session.flush() if organization.events_url: if not meetup_key: logging.error("No Meetup.com key set.") if 'meetup.com' not in organization.events_url: logging.error("Only Meetup.com events work right now.") else: logging.info("Gathering all of %s's events." % organization.name) identifier = get_event_group_identifier( organization.events_url) if identifier: for event in get_meetup_events(organization, identifier): save_event_info(db.session, event) # flush the events db.session.flush() else: logging.error("%s does not have a valid events url" % organization.name) # Get issues for all of the projects logging.info("Gathering all of %s's open GitHub issues." % organization.name) issues = get_issues(organization.name) for issue in issues: save_issue(db.session, issue) # flush the issues db.session.flush() for issue in issues: save_labels(db.session, issue) # commit everything db.session.commit() # Remove everything marked for deletion. # :::here (event/delete, story/delete, project/delete, issue/delete, organization/delete) db.session.query(Event).filter(Event.keep == False).delete() db.session.query(Story).filter(Story.keep == False).delete() db.session.query(Issue).filter(Issue.keep == False).delete() db.session.query(Project).filter(Project.keep == False).delete() db.session.query(Organization).filter( Organization.keep == False).delete() # commit objects deleted for keep=False db.session.commit() except: # Raise the error, get out of main(), and don't commit the transaction. raise else: # Commit and move on to the next organization. # final commit before moving on to the next organization db.session.commit() # prune orphaned organizations if no organization name was passed if not org_name: for bad_org in db.session.query(Organization): if bad_org.name in organization_names: continue # delete orphaned organizations, all other deletions will cascade db.session.execute( db.delete(Organization).where( Organization.name == bad_org.name)) # commit for deleting orphaned organizations db.session.commit()
def main(org_name=None, org_sources=None): ''' Run update over all organizations. Optionally, update just one. ''' # set org_sources org_sources = org_sources or ORG_SOURCES_FILENAME # Collect a set of fresh organization names. organization_names = set() # Retrieve all organizations and shuffle the list in place. orgs_info = get_organizations(org_sources) shuffle(orgs_info) if org_name: orgs_info = [org for org in orgs_info if org['name'] == org_name] # Iterate over organizations and projects, saving them to db.session. for org_info in orgs_info: if not is_safe_name(org_info['name']): error_dict = { "error": unicode('ValueError: Bad organization name: "%s"' % org_info['name']), "time": datetime.now() } new_error = Error(**error_dict) db.session.add(new_error) # commit the error db.session.commit() continue try: filter = Organization.name == org_info['name'] existing_org = db.session.query(Organization).filter(filter).first() organization_names.add(org_info['name']) # Mark everything associated with this organization for deletion at first. # :::here (event/false, story/false, project/false, organization/false) db.session.execute(db.update(Event, values={'keep': False}).where(Event.organization_name == org_info['name'])) db.session.execute(db.update(Story, values={'keep': False}).where(Story.organization_name == org_info['name'])) db.session.execute(db.update(Project, values={'keep': False}).where(Project.organization_name == org_info['name'])) db.session.execute(db.update(Organization, values={'keep': False}).where(Organization.name == org_info['name'])) # commit the false keeps db.session.commit() # Empty lat longs are okay. if 'latitude' in org_info: if not org_info['latitude']: org_info['latitude'] = None if 'longitude' in org_info: if not org_info['longitude']: org_info['longitude'] = None organization = save_organization_info(db.session, org_info) organization_names.add(organization.name) # flush the organization db.session.flush() if organization.rss or organization.website: logging.info("Gathering all of %s's stories." % organization.name) stories = get_stories(organization) if stories: for story_info in stories: save_story_info(db.session, story_info) # flush the stories db.session.flush() if organization.projects_list_url: logging.info("Gathering all of %s's projects." % organization.name) projects = get_projects(organization) for proj_dict in projects: save_project_info(db.session, proj_dict) # flush the projects db.session.flush() if organization.events_url: if not meetup_key: logging.error("No Meetup.com key set.") if 'meetup.com' not in organization.events_url: logging.error("Only Meetup.com events work right now.") else: logging.info("Gathering all of %s's events." % organization.name) identifier = get_event_group_identifier(organization.events_url) if identifier: for event in get_meetup_events(organization, identifier): save_event_info(db.session, event) # flush the events db.session.flush() else: logging.error("%s does not have a valid events url" % organization.name) # Get issues for all of the projects logging.info("Gathering all of %s's open GitHub issues." % organization.name) issues = get_issues(organization.name) for issue in issues: save_issue(db.session, issue) # flush the issues db.session.flush() for issue in issues: save_labels(db.session, issue) # commit everything db.session.commit() # Remove everything marked for deletion. # :::here (event/delete, story/delete, project/delete, issue/delete, organization/delete) db.session.query(Event).filter(Event.keep == False).delete() db.session.query(Story).filter(Story.keep == False).delete() db.session.query(Issue).filter(Issue.keep == False).delete() db.session.query(Project).filter(Project.keep == False).delete() db.session.query(Organization).filter(Organization.keep == False).delete() # commit objects deleted for keep=False db.session.commit() except: # Raise the error, get out of main(), and don't commit the transaction. raise else: # Commit and move on to the next organization. # final commit before moving on to the next organization db.session.commit() # prune orphaned organizations if no organization name was passed if not org_name: for bad_org in db.session.query(Organization): if bad_org.name in organization_names: continue # delete orphaned organizations, all other deletions will cascade db.session.execute(db.delete(Organization).where(Organization.name == bad_org.name)) # commit for deleting orphaned organizations db.session.commit()
def main(org_name=None, org_sources=None): ''' Run update over all organizations. Optionally, update just one. ''' # Keep a set of fresh organization names. organization_names = set() # Retrieve all organizations and shuffle the list in place. orgs_info = get_organizations(org_sources) shuffle(orgs_info) if org_name: orgs_info = [org for org in orgs_info if org['name'] == org_name] # Iterate over organizations and projects, saving them to db.session. for org_info in orgs_info: if not is_safe_name(org_info['name']): error_dict = { "error" : 'ValueError: Bad organization name: "%s"' % org_info['name'], "time" : datetime.now() } new_error = Error(**error_dict) db.session.add(new_error) db.session.commit() continue try: filter = Organization.name == org_info['name'] existing_org = db.session.query(Organization).filter(filter).first() organization_names.add(org_info['name']) # Mark everything in this organization for deletion at first. db.session.execute(db.update(Event, values={'keep': False}).where(Event.organization_name == org_info['name'])) db.session.execute(db.update(Story, values={'keep': False}).where(Story.organization_name == org_info['name'])) db.session.execute(db.update(Project, values={'keep': False}).where(Project.organization_name == org_info['name'])) db.session.execute(db.update(Organization, values={'keep': False}).where(Organization.name == org_info['name'])) # Empty lat longs are okay. if 'latitude' in org_info: if not org_info['latitude']: org_info['latitude'] = None if 'longitude' in org_info: if not org_info['longitude']: org_info['longitude'] = None organization = save_organization_info(db.session, org_info) organization_names.add(organization.name) if organization.rss or organization.website: logging.info("Gathering all of %s's stories." % organization.name) stories = get_stories(organization) if stories: for story_info in stories: save_story_info(db.session, story_info) if organization.projects_list_url: logging.info("Gathering all of %s's projects." % organization.name) projects = get_projects(organization) for proj_info in projects: save_project_info(db.session, proj_info) if organization.events_url: if not meetup_key: logging.error("No Meetup.com key set.") if 'meetup.com' not in organization.events_url: logging.error("Only Meetup.com events work right now.") else: logging.info("Gathering all of %s's events." % organization.name) identifier = get_event_group_identifier(organization.events_url) if identifier: for event in get_meetup_events(organization, identifier): save_event_info(db.session, event) else: logging.error("%s does not have a valid events url" % organization.name) # Get issues for all of the projects logging.info("Gathering all of %s's open GitHub issues." % organization.name) issues, labels = get_issues(organization.name) for i in range(0,len(issues)): save_issue_info(db.session, issues[i], labels[i]) # Remove everything marked for deletion. db.session.query(Event).filter(not Event.keep).delete() db.session.query(Story).filter(not Story.keep).delete() db.session.query(Project).filter(not Project.keep).delete() db.session.query(Issue).filter(Issue.keep == False).delete() db.session.query(Organization).filter(not Organization.keep).delete() except: # Raise the error, get out of main(), and don't commit the transaction. raise else: # Commit and move on to the next organization. db.session.commit() # Stop right here if an org name was specified. if org_name: return # Delete any organization not found on this round. for bad_org in db.session.query(Organization): if bad_org.name in organization_names: continue db.session.execute(db.delete(Event).where(Event.organization_name == bad_org.name)) db.session.execute(db.delete(Story).where(Story.organization_name == bad_org.name)) db.session.execute(db.delete(Project).where(Project.organization_name == bad_org.name)) db.session.execute(db.delete(Organization).where(Organization.name == bad_org.name)) db.session.commit()
def main(org_name=None, org_sources=None): ''' Run update over all organizations. Optionally, update just one. ''' # Keep a set of fresh organization names. organization_names = set() # Retrieve all organizations and shuffle the list in place. orgs_info = get_organizations(org_sources) shuffle(orgs_info) if org_name: orgs_info = [org for org in orgs_info if org['name'] == org_name] # Iterate over organizations and projects, saving them to db.session. for org_info in orgs_info: if not is_safe_name(org_info['name']): error_dict = { "error": 'ValueError: Bad organization name: "%s"' % org_info['name'], "time": datetime.now() } new_error = Error(**error_dict) db.session.add(new_error) db.session.commit() continue try: filter = Organization.name == org_info['name'] existing_org = db.session.query(Organization).filter( filter).first() organization_names.add(org_info['name']) # Mark everything in this organization for deletion at first. db.session.execute( db.update(Event, values={ 'keep': False }).where(Event.organization_name == org_info['name'])) db.session.execute( db.update(Story, values={ 'keep': False }).where(Story.organization_name == org_info['name'])) db.session.execute( db.update(Project, values={ 'keep': False }).where(Project.organization_name == org_info['name'])) db.session.execute( db.update(Organization, values={ 'keep': False }).where(Organization.name == org_info['name'])) # Empty lat longs are okay. if 'latitude' in org_info: if not org_info['latitude']: org_info['latitude'] = None if 'longitude' in org_info: if not org_info['longitude']: org_info['longitude'] = None organization = save_organization_info(db.session, org_info) organization_names.add(organization.name) if organization.rss or organization.website: logging.info("Gathering all of %s's stories." % organization.name) stories = get_stories(organization) if stories: for story_info in stories: save_story_info(db.session, story_info) if organization.projects_list_url: logging.info("Gathering all of %s's projects." % organization.name) projects = get_projects(organization) for proj_info in projects: save_project_info(db.session, proj_info) if organization.events_url: if not meetup_key: logging.error("No Meetup.com key set.") if 'meetup.com' not in organization.events_url: logging.error("Only Meetup.com events work right now.") else: logging.info("Gathering all of %s's events." % organization.name) identifier = get_event_group_identifier( organization.events_url) if identifier: for event in get_meetup_events(organization, identifier): save_event_info(db.session, event) else: logging.error("%s does not have a valid events url" % organization.name) # Get issues for all of the projects logging.info("Gathering all of %s's open GitHub issues." % organization.name) issues, labels = get_issues(organization.name) for i in range(0, len(issues)): save_issue_info(db.session, issues[i], labels[i]) # Remove everything marked for deletion. db.session.query(Event).filter(not Event.keep).delete() db.session.query(Story).filter(not Story.keep).delete() db.session.query(Project).filter(not Project.keep).delete() db.session.query(Issue).filter(Issue.keep == False).delete() db.session.query(Organization).filter( not Organization.keep).delete() except: # Raise the error, get out of main(), and don't commit the transaction. raise else: # Commit and move on to the next organization. db.session.commit() # Stop right here if an org name was specified. if org_name: return # Delete any organization not found on this round. for bad_org in db.session.query(Organization): if bad_org.name in organization_names: continue db.session.execute( db.delete(Event).where(Event.organization_name == bad_org.name)) db.session.execute( db.delete(Story).where(Story.organization_name == bad_org.name)) db.session.execute( db.delete(Project).where( Project.organization_name == bad_org.name)) db.session.execute( db.delete(Organization).where(Organization.name == bad_org.name)) db.session.commit()
def main(org_name=None, minimum_age=3*3600): ''' Run update over all organizations. Optionally, update just one. Also optionally, reset minimum age to trigger org update, in seconds. ''' # Set a single cutoff timestamp for orgs we'll look at. maximum_updated = time() - minimum_age # Keep a set of fresh organization names. organization_names = set() # Retrieve all organizations and shuffle the list in place. orgs_info = get_organizations() shuffle(orgs_info) if org_name: orgs_info = [org for org in orgs_info if org['name'] == org_name] # Iterate over organizations and projects, saving them to db.session. for org_info in orgs_info: if not is_safe_name(org_info['name']): error_dict = { "error" : 'ValueError: Bad organization name: "%s"' % org_info['name'], "time" : datetime.now() } new_error = Error(**error_dict) db.session.add(new_error) db.session.commit() continue try: filter = Organization.name == org_info['name'] existing_org = db.session.query(Organization).filter(filter).first() organization_names.add(org_info['name']) if existing_org and not org_name: if existing_org.last_updated > maximum_updated: # Skip this organization, it's been updated too recently. logging.info("Skipping update for {0}".format(org_info['name'].encode('utf8'))) continue # Mark everything in this organization for deletion at first. db.session.execute(db.update(Event, values={'keep': False}).where(Event.organization_name == org_info['name'])) db.session.execute(db.update(Story, values={'keep': False}).where(Story.organization_name == org_info['name'])) db.session.execute(db.update(Project, values={'keep': False}).where(Project.organization_name == org_info['name'])) db.session.execute(db.update(Organization, values={'keep': False}).where(Organization.name == org_info['name'])) organization = save_organization_info(db.session, org_info) organization_names.add(organization.name) if organization.rss or organization.website: logging.info("Gathering all of %s's stories." % organization.name) stories = get_stories(organization) if stories: for story_info in stories: save_story_info(db.session, story_info) if organization.projects_list_url: logging.info("Gathering all of %s's projects." % organization.name) projects = get_projects(organization) for proj_info in projects: save_project_info(db.session, proj_info) if organization.events_url: if not meetup_key: logging.error("No Meetup.com key set.") else: logging.info("Gathering all of %s's events." % organization.name) identifier = get_event_group_identifier(organization.events_url) if identifier: for event in get_meetup_events(organization, identifier): save_event_info(db.session, event) else: logging.error("%s does not have a valid events url" % organization.name) # Get issues for all of the projects logging.info("Gathering all of %s's project's issues." % organization.name) issues = get_issues(organization.name) for issue_info in issues: save_issue_info(db.session, issue_info) # Remove everything marked for deletion. db.session.query(Event).filter(not Event.keep).delete() db.session.query(Story).filter(not Story.keep).delete() db.session.query(Project).filter(not Project.keep).delete() db.session.query(Issue).filter(not Issue.keep).delete() db.session.query(Organization).filter(not Organization.keep).delete() except: # Raise the error, get out of main(), and don't commit the transaction. raise else: # Commit and move on to the next organization. db.session.commit() # Stop right here if an org name was specified. if org_name: return # Delete any organization not found on this round. for bad_org in db.session.query(Organization): if bad_org.name in organization_names: continue db.session.execute(db.delete(Event).where(Event.organization_name == bad_org.name)) db.session.execute(db.delete(Story).where(Story.organization_name == bad_org.name)) db.session.execute(db.delete(Project).where(Project.organization_name == bad_org.name)) db.session.execute(db.delete(Organization).where(Organization.name == bad_org.name)) db.session.commit()