示例#1
0
文件: run_update.py 项目: maya/cfapi
def save_organization_info(session, org_dict):
    ''' Save a dictionary of organization info to the datastore session.

        Return an app.Organization instance.
    '''
    if not is_safe_name(org_dict['name']):
        error_dict = {
          "error" : 'ValueError: Bad organization name: "%(name)s"' % org_dict,
          "time" : datetime.now()
        }
        new_error = Error(**error_dict)
        session.add(new_error)
        session.commit()
        raise ValueError('Bad organization name: "%(name)s"' % org_dict)

    # Select an existing organization by name.
    filter = Organization.name == org_dict['name']
    existing_org = session.query(Organization).filter(filter).first()

    # If this is a new organization, save and return it.
    if not existing_org:
        new_organization = Organization(**org_dict)
        session.add(new_organization)
        # session.commit()
        return new_organization

    # Mark the existing organization for safekeeping
    existing_org.last_updated = time()
    existing_org.keep = True

    # Update existing organization details.
    for (field, value) in org_dict.items():
        setattr(existing_org, field, value)

    # Flush existing object, to prevent a sqlalchemy.orm.exc.StaleDataError.
    session.flush()

    return existing_org
示例#2
0
def main(org_name=None, org_sources=None):
    ''' Run update over all organizations. Optionally, update just one.
    '''
    # set org_sources
    org_sources = org_sources or ORG_SOURCES_FILENAME

    # Collect a set of fresh organization names.
    organization_names = set()

    # Retrieve all organizations and shuffle the list in place.
    orgs_info = get_organizations(org_sources)
    shuffle(orgs_info)

    if org_name:
        orgs_info = [org for org in orgs_info if org['name'] == org_name]

    # Iterate over organizations and projects, saving them to db.session.
    for org_info in orgs_info:

        if not is_safe_name(org_info['name']):
            error_dict = {
                "error":
                unicode('ValueError: Bad organization name: "%s"' %
                        org_info['name']),
                "time":
                datetime.now()
            }
            new_error = Error(**error_dict)
            db.session.add(new_error)
            # commit the error
            db.session.commit()
            continue

        try:
            filter = Organization.name == org_info['name']
            existing_org = db.session.query(Organization).filter(
                filter).first()
            organization_names.add(org_info['name'])

            # Mark everything associated with this organization for deletion at first.
            # :::here (event/false, story/false, project/false, organization/false)
            db.session.execute(
                db.update(Event, values={
                    'keep': False
                }).where(Event.organization_name == org_info['name']))
            db.session.execute(
                db.update(Story, values={
                    'keep': False
                }).where(Story.organization_name == org_info['name']))
            db.session.execute(
                db.update(Project, values={
                    'keep': False
                }).where(Project.organization_name == org_info['name']))
            db.session.execute(
                db.update(Organization, values={
                    'keep': False
                }).where(Organization.name == org_info['name']))
            # commit the false keeps
            db.session.commit()

            # Empty lat longs are okay.
            if 'latitude' in org_info:
                if not org_info['latitude']:
                    org_info['latitude'] = None
            if 'longitude' in org_info:
                if not org_info['longitude']:
                    org_info['longitude'] = None

            organization = save_organization_info(db.session, org_info)
            organization_names.add(organization.name)
            # flush the organization
            db.session.flush()

            if organization.rss or organization.website:
                logging.info("Gathering all of %s's stories." %
                             organization.name)
                stories = get_stories(organization)
                if stories:
                    for story_info in stories:
                        save_story_info(db.session, story_info)
                    # flush the stories
                    db.session.flush()

            if organization.projects_list_url:
                logging.info("Gathering all of %s's projects." %
                             organization.name)
                projects = get_projects(organization)
                for proj_dict in projects:
                    save_project_info(db.session, proj_dict)
                # flush the projects
                db.session.flush()

            if organization.events_url:
                if not meetup_key:
                    logging.error("No Meetup.com key set.")
                if 'meetup.com' not in organization.events_url:
                    logging.error("Only Meetup.com events work right now.")
                else:
                    logging.info("Gathering all of %s's events." %
                                 organization.name)
                    identifier = get_event_group_identifier(
                        organization.events_url)
                    if identifier:
                        for event in get_meetup_events(organization,
                                                       identifier):
                            save_event_info(db.session, event)
                        # flush the events
                        db.session.flush()
                    else:
                        logging.error("%s does not have a valid events url" %
                                      organization.name)

            # Get issues for all of the projects
            logging.info("Gathering all of %s's open GitHub issues." %
                         organization.name)
            issues = get_issues(organization.name)
            for issue in issues:
                save_issue(db.session, issue)

            # flush the issues
            db.session.flush()
            for issue in issues:
                save_labels(db.session, issue)

            # commit everything
            db.session.commit()

            # Remove everything marked for deletion.
            # :::here (event/delete, story/delete, project/delete, issue/delete, organization/delete)
            db.session.query(Event).filter(Event.keep == False).delete()
            db.session.query(Story).filter(Story.keep == False).delete()
            db.session.query(Issue).filter(Issue.keep == False).delete()
            db.session.query(Project).filter(Project.keep == False).delete()
            db.session.query(Organization).filter(
                Organization.keep == False).delete()
            # commit objects deleted for keep=False
            db.session.commit()

        except:
            # Raise the error, get out of main(), and don't commit the transaction.
            raise

        else:
            # Commit and move on to the next organization.
            # final commit before moving on to the next organization
            db.session.commit()

    # prune orphaned organizations if no organization name was passed
    if not org_name:
        for bad_org in db.session.query(Organization):
            if bad_org.name in organization_names:
                continue

            # delete orphaned organizations, all other deletions will cascade
            db.session.execute(
                db.delete(Organization).where(
                    Organization.name == bad_org.name))
            # commit for deleting orphaned organizations
            db.session.commit()
示例#3
0
def main(org_name=None, org_sources=None):
    ''' Run update over all organizations. Optionally, update just one.
    '''
    # set org_sources
    org_sources = org_sources or ORG_SOURCES_FILENAME

    # Collect a set of fresh organization names.
    organization_names = set()

    # Retrieve all organizations and shuffle the list in place.
    orgs_info = get_organizations(org_sources)
    shuffle(orgs_info)

    if org_name:
        orgs_info = [org for org in orgs_info if org['name'] == org_name]

    # Iterate over organizations and projects, saving them to db.session.
    for org_info in orgs_info:

        if not is_safe_name(org_info['name']):
            error_dict = {
                "error": unicode('ValueError: Bad organization name: "%s"' % org_info['name']),
                "time": datetime.now()
            }
            new_error = Error(**error_dict)
            db.session.add(new_error)
            # commit the error
            db.session.commit()
            continue

        try:
            filter = Organization.name == org_info['name']
            existing_org = db.session.query(Organization).filter(filter).first()
            organization_names.add(org_info['name'])

            # Mark everything associated with this organization for deletion at first.
            # :::here (event/false, story/false, project/false, organization/false)
            db.session.execute(db.update(Event, values={'keep': False}).where(Event.organization_name == org_info['name']))
            db.session.execute(db.update(Story, values={'keep': False}).where(Story.organization_name == org_info['name']))
            db.session.execute(db.update(Project, values={'keep': False}).where(Project.organization_name == org_info['name']))
            db.session.execute(db.update(Organization, values={'keep': False}).where(Organization.name == org_info['name']))
            # commit the false keeps
            db.session.commit()

            # Empty lat longs are okay.
            if 'latitude' in org_info:
                if not org_info['latitude']:
                    org_info['latitude'] = None
            if 'longitude' in org_info:
                if not org_info['longitude']:
                    org_info['longitude'] = None

            organization = save_organization_info(db.session, org_info)
            organization_names.add(organization.name)
            # flush the organization
            db.session.flush()

            if organization.rss or organization.website:
                logging.info("Gathering all of %s's stories." % organization.name)
                stories = get_stories(organization)
                if stories:
                    for story_info in stories:
                        save_story_info(db.session, story_info)
                    # flush the stories
                    db.session.flush()

            if organization.projects_list_url:
                logging.info("Gathering all of %s's projects." % organization.name)
                projects = get_projects(organization)
                for proj_dict in projects:
                    save_project_info(db.session, proj_dict)
                # flush the projects
                db.session.flush()

            if organization.events_url:
                if not meetup_key:
                    logging.error("No Meetup.com key set.")
                if 'meetup.com' not in organization.events_url:
                    logging.error("Only Meetup.com events work right now.")
                else:
                    logging.info("Gathering all of %s's events." % organization.name)
                    identifier = get_event_group_identifier(organization.events_url)
                    if identifier:
                        for event in get_meetup_events(organization, identifier):
                            save_event_info(db.session, event)
                        # flush the events
                        db.session.flush()
                    else:
                        logging.error("%s does not have a valid events url" % organization.name)

            # Get issues for all of the projects
            logging.info("Gathering all of %s's open GitHub issues." % organization.name)
            issues = get_issues(organization.name)
            for issue in issues:
                save_issue(db.session, issue)

            # flush the issues
            db.session.flush()
            for issue in issues:
                save_labels(db.session, issue)

            # commit everything
            db.session.commit()

            # Remove everything marked for deletion.
            # :::here (event/delete, story/delete, project/delete, issue/delete, organization/delete)
            db.session.query(Event).filter(Event.keep == False).delete()
            db.session.query(Story).filter(Story.keep == False).delete()
            db.session.query(Issue).filter(Issue.keep == False).delete()
            db.session.query(Project).filter(Project.keep == False).delete()
            db.session.query(Organization).filter(Organization.keep == False).delete()
            # commit objects deleted for keep=False
            db.session.commit()

        except:
            # Raise the error, get out of main(), and don't commit the transaction.
            raise

        else:
            # Commit and move on to the next organization.
            # final commit before moving on to the next organization
            db.session.commit()

    # prune orphaned organizations if no organization name was passed
    if not org_name:
        for bad_org in db.session.query(Organization):
            if bad_org.name in organization_names:
                continue

            # delete orphaned organizations, all other deletions will cascade
            db.session.execute(db.delete(Organization).where(Organization.name == bad_org.name))
            # commit for deleting orphaned organizations
            db.session.commit()
示例#4
0
def main(org_name=None, org_sources=None):
    ''' Run update over all organizations. Optionally, update just one.
    '''
    # Keep a set of fresh organization names.
    organization_names = set()

    # Retrieve all organizations and shuffle the list in place.
    orgs_info = get_organizations(org_sources)
    shuffle(orgs_info)

    if org_name:
        orgs_info = [org for org in orgs_info if org['name'] == org_name]

    # Iterate over organizations and projects, saving them to db.session.
    for org_info in orgs_info:

      if not is_safe_name(org_info['name']):
          error_dict = {
            "error" : 'ValueError: Bad organization name: "%s"' % org_info['name'],
            "time" : datetime.now()
          }
          new_error = Error(**error_dict)
          db.session.add(new_error)
          db.session.commit()
          continue

      try:
        filter = Organization.name == org_info['name']
        existing_org = db.session.query(Organization).filter(filter).first()
        organization_names.add(org_info['name'])

        # Mark everything in this organization for deletion at first.
        db.session.execute(db.update(Event, values={'keep': False}).where(Event.organization_name == org_info['name']))
        db.session.execute(db.update(Story, values={'keep': False}).where(Story.organization_name == org_info['name']))
        db.session.execute(db.update(Project, values={'keep': False}).where(Project.organization_name == org_info['name']))
        db.session.execute(db.update(Organization, values={'keep': False}).where(Organization.name == org_info['name']))

        # Empty lat longs are okay.
        if 'latitude' in org_info:
            if not org_info['latitude']:
                org_info['latitude'] = None
        if 'longitude' in org_info:
            if not org_info['longitude']:
                org_info['longitude'] = None

        organization = save_organization_info(db.session, org_info)
        organization_names.add(organization.name)

        if organization.rss or organization.website:
            logging.info("Gathering all of %s's stories." % organization.name)
            stories = get_stories(organization)
            if stories:
                for story_info in stories:
                    save_story_info(db.session, story_info)

        if organization.projects_list_url:
            logging.info("Gathering all of %s's projects." % organization.name)
            projects = get_projects(organization)
            for proj_info in projects:
                save_project_info(db.session, proj_info)

        if organization.events_url:
            if not meetup_key:
                logging.error("No Meetup.com key set.")
            if 'meetup.com' not in organization.events_url:
                logging.error("Only Meetup.com events work right now.")
            else:
                logging.info("Gathering all of %s's events." % organization.name)
                identifier = get_event_group_identifier(organization.events_url)
                if identifier:
                    for event in get_meetup_events(organization, identifier):
                        save_event_info(db.session, event)
                else:
                    logging.error("%s does not have a valid events url" % organization.name)

        # Get issues for all of the projects
        logging.info("Gathering all of %s's open GitHub issues." % organization.name)
        issues, labels = get_issues(organization.name)
        for i in range(0,len(issues)):
            save_issue_info(db.session, issues[i], labels[i])

        # Remove everything marked for deletion.
        db.session.query(Event).filter(not Event.keep).delete()
        db.session.query(Story).filter(not Story.keep).delete()
        db.session.query(Project).filter(not Project.keep).delete()
        db.session.query(Issue).filter(Issue.keep == False).delete()
        db.session.query(Organization).filter(not Organization.keep).delete()

      except:
        # Raise the error, get out of main(), and don't commit the transaction.
        raise

      else:
        # Commit and move on to the next organization.
        db.session.commit()

    # Stop right here if an org name was specified.
    if org_name:
        return

    # Delete any organization not found on this round.
    for bad_org in db.session.query(Organization):
        if bad_org.name in organization_names:
            continue

        db.session.execute(db.delete(Event).where(Event.organization_name == bad_org.name))
        db.session.execute(db.delete(Story).where(Story.organization_name == bad_org.name))
        db.session.execute(db.delete(Project).where(Project.organization_name == bad_org.name))
        db.session.execute(db.delete(Organization).where(Organization.name == bad_org.name))
        db.session.commit()
示例#5
0
def main(org_name=None, org_sources=None):
    ''' Run update over all organizations. Optionally, update just one.
    '''
    # Keep a set of fresh organization names.
    organization_names = set()

    # Retrieve all organizations and shuffle the list in place.
    orgs_info = get_organizations(org_sources)
    shuffle(orgs_info)

    if org_name:
        orgs_info = [org for org in orgs_info if org['name'] == org_name]

    # Iterate over organizations and projects, saving them to db.session.
    for org_info in orgs_info:

        if not is_safe_name(org_info['name']):
            error_dict = {
                "error":
                'ValueError: Bad organization name: "%s"' % org_info['name'],
                "time":
                datetime.now()
            }
            new_error = Error(**error_dict)
            db.session.add(new_error)
            db.session.commit()
            continue

        try:
            filter = Organization.name == org_info['name']
            existing_org = db.session.query(Organization).filter(
                filter).first()
            organization_names.add(org_info['name'])

            # Mark everything in this organization for deletion at first.
            db.session.execute(
                db.update(Event, values={
                    'keep': False
                }).where(Event.organization_name == org_info['name']))
            db.session.execute(
                db.update(Story, values={
                    'keep': False
                }).where(Story.organization_name == org_info['name']))
            db.session.execute(
                db.update(Project, values={
                    'keep': False
                }).where(Project.organization_name == org_info['name']))
            db.session.execute(
                db.update(Organization, values={
                    'keep': False
                }).where(Organization.name == org_info['name']))

            # Empty lat longs are okay.
            if 'latitude' in org_info:
                if not org_info['latitude']:
                    org_info['latitude'] = None
            if 'longitude' in org_info:
                if not org_info['longitude']:
                    org_info['longitude'] = None

            organization = save_organization_info(db.session, org_info)
            organization_names.add(organization.name)

            if organization.rss or organization.website:
                logging.info("Gathering all of %s's stories." %
                             organization.name)
                stories = get_stories(organization)
                if stories:
                    for story_info in stories:
                        save_story_info(db.session, story_info)

            if organization.projects_list_url:
                logging.info("Gathering all of %s's projects." %
                             organization.name)
                projects = get_projects(organization)
                for proj_info in projects:
                    save_project_info(db.session, proj_info)

            if organization.events_url:
                if not meetup_key:
                    logging.error("No Meetup.com key set.")
                if 'meetup.com' not in organization.events_url:
                    logging.error("Only Meetup.com events work right now.")
                else:
                    logging.info("Gathering all of %s's events." %
                                 organization.name)
                    identifier = get_event_group_identifier(
                        organization.events_url)
                    if identifier:
                        for event in get_meetup_events(organization,
                                                       identifier):
                            save_event_info(db.session, event)
                    else:
                        logging.error("%s does not have a valid events url" %
                                      organization.name)

            # Get issues for all of the projects
            logging.info("Gathering all of %s's open GitHub issues." %
                         organization.name)
            issues, labels = get_issues(organization.name)
            for i in range(0, len(issues)):
                save_issue_info(db.session, issues[i], labels[i])

            # Remove everything marked for deletion.
            db.session.query(Event).filter(not Event.keep).delete()
            db.session.query(Story).filter(not Story.keep).delete()
            db.session.query(Project).filter(not Project.keep).delete()
            db.session.query(Issue).filter(Issue.keep == False).delete()
            db.session.query(Organization).filter(
                not Organization.keep).delete()

        except:
            # Raise the error, get out of main(), and don't commit the transaction.
            raise

        else:
            # Commit and move on to the next organization.
            db.session.commit()

    # Stop right here if an org name was specified.
    if org_name:
        return

    # Delete any organization not found on this round.
    for bad_org in db.session.query(Organization):
        if bad_org.name in organization_names:
            continue

        db.session.execute(
            db.delete(Event).where(Event.organization_name == bad_org.name))
        db.session.execute(
            db.delete(Story).where(Story.organization_name == bad_org.name))
        db.session.execute(
            db.delete(Project).where(
                Project.organization_name == bad_org.name))
        db.session.execute(
            db.delete(Organization).where(Organization.name == bad_org.name))
        db.session.commit()
示例#6
0
def main(org_name=None, minimum_age=3*3600):
    ''' Run update over all organizations. Optionally, update just one.
    
        Also optionally, reset minimum age to trigger org update, in seconds.
    '''
    # Set a single cutoff timestamp for orgs we'll look at.
    maximum_updated = time() - minimum_age
    
    # Keep a set of fresh organization names.
    organization_names = set()

    # Retrieve all organizations and shuffle the list in place.
    orgs_info = get_organizations()
    shuffle(orgs_info)

    if org_name:
        orgs_info = [org for org in orgs_info if org['name'] == org_name]

    # Iterate over organizations and projects, saving them to db.session.
    for org_info in orgs_info:

      if not is_safe_name(org_info['name']):
          error_dict = {
            "error" : 'ValueError: Bad organization name: "%s"' % org_info['name'],
            "time" : datetime.now()
          }
          new_error = Error(**error_dict)
          db.session.add(new_error)
          db.session.commit()
          continue

      try:
        filter = Organization.name == org_info['name']
        existing_org = db.session.query(Organization).filter(filter).first()
        organization_names.add(org_info['name'])
        
        if existing_org and not org_name:
            if existing_org.last_updated > maximum_updated:
                # Skip this organization, it's been updated too recently.
                logging.info("Skipping update for {0}".format(org_info['name'].encode('utf8')))
                continue
      
        # Mark everything in this organization for deletion at first.
        db.session.execute(db.update(Event, values={'keep': False}).where(Event.organization_name == org_info['name']))
        db.session.execute(db.update(Story, values={'keep': False}).where(Story.organization_name == org_info['name']))
        db.session.execute(db.update(Project, values={'keep': False}).where(Project.organization_name == org_info['name']))
        db.session.execute(db.update(Organization, values={'keep': False}).where(Organization.name == org_info['name']))

        organization = save_organization_info(db.session, org_info)
        organization_names.add(organization.name)

        if organization.rss or organization.website:
            logging.info("Gathering all of %s's stories." % organization.name)
            stories = get_stories(organization)
            if stories:
                for story_info in stories:
                    save_story_info(db.session, story_info)

        if organization.projects_list_url:
            logging.info("Gathering all of %s's projects." % organization.name)
            projects = get_projects(organization)
            for proj_info in projects:
                save_project_info(db.session, proj_info)

        if organization.events_url:
            if not meetup_key:
                logging.error("No Meetup.com key set.")
            else:
                logging.info("Gathering all of %s's events." % organization.name)
                identifier = get_event_group_identifier(organization.events_url)
                if identifier:
                    for event in get_meetup_events(organization, identifier):
                        save_event_info(db.session, event)
                else:
                    logging.error("%s does not have a valid events url" % organization.name)

        # Get issues for all of the projects
        logging.info("Gathering all of %s's project's issues." % organization.name)
        issues = get_issues(organization.name)
        for issue_info in issues:
            save_issue_info(db.session, issue_info)

        # Remove everything marked for deletion.
        db.session.query(Event).filter(not Event.keep).delete()
        db.session.query(Story).filter(not Story.keep).delete()
        db.session.query(Project).filter(not Project.keep).delete()
        db.session.query(Issue).filter(not Issue.keep).delete()
        db.session.query(Organization).filter(not Organization.keep).delete()

      except:
        # Raise the error, get out of main(), and don't commit the transaction.
        raise

      else:
        # Commit and move on to the next organization.
        db.session.commit()

    # Stop right here if an org name was specified.
    if org_name:
        return

    # Delete any organization not found on this round.
    for bad_org in db.session.query(Organization):
        if bad_org.name in organization_names:
            continue

        db.session.execute(db.delete(Event).where(Event.organization_name == bad_org.name))
        db.session.execute(db.delete(Story).where(Story.organization_name == bad_org.name))
        db.session.execute(db.delete(Project).where(Project.organization_name == bad_org.name))
        db.session.execute(db.delete(Organization).where(Organization.name == bad_org.name))
        db.session.commit()