Пример #1
0
    def handle(self, *args, **options):
        # Grab the data broker database connections
        if not options['test']:
            try:
                db_conn = connections['data_broker']
                db_cursor = db_conn.cursor()
            except Exception as err:
                logger.critical(
                    'Could not connect to database. Is DATA_BROKER_DATABASE_URL set?'
                )
                logger.critical(print(err))
                return
        else:
            db_cursor = PhonyCursor()

        submissions_to_update = []

        if options["update_all"]:
            submissions_to_update = SubmissionAttributes.objects.exclude(
                broker_submission_id__isnull=True)
        else:
            for submission_id in options['submission_id']:
                sub = SubmissionAttributes.objects.filter(
                    broker_submission_id=submission_id).first()
                if not sub:
                    logger.critical(
                        "Submissions not found in datastore".format(
                            options['submission_id']))
                else:
                    submissions_to_update.append(sub)

        failed_submissions = []
        success_submissions = []
        for submission in submissions_to_update:
            # try:
            logger.info("Loading subaward data for submission {}".format(
                submission.broker_submission_id))
            load_subawards(submission,
                           awards_touched=options["awards_touched"],
                           db_cursor=db_cursor)
            success_submissions.append(submission.broker_submission_id)
            # except Exception as e:
            #     exception_logger.exception(e)
            #     failed_submissions.append(failed_submissions)
            #     logger.error("Loading subawards for submission {} failed. Exception has been
            #                   logged.".format(submission.broker_submission_id))

        logger.info("Successfully loaded: {}".format(success_submissions))
        logger.info("Failed to load: {}".format(failed_submissions))
    def handle_loading(self, db_cursor, *args, **options):
        def signal_handler(signal, frame):
            transaction.set_rollback(True)
            raise Exception('Received interrupt signal. Aborting...')

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        submission_id = options['submission_id'][0]

        logger.info(
            'Getting submission {} from broker...'.format(submission_id))
        db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s',
                          [submission_id])

        submission_data = dictfetchall(db_cursor)
        logger.info(
            'Finished getting submission {} from broker'.format(submission_id))

        if len(submission_data) == 0:
            raise CommandError('Could not find submission with id ' +
                               str(submission_id))
        elif len(submission_data) > 1:
            raise CommandError('Found multiple submissions with id ' +
                               str(submission_id))

        submission_data = submission_data[0].copy()
        broker_submission_id = submission_data['submission_id']
        del submission_data[
            'submission_id']  # We use broker_submission_id, submission_id is our own PK
        submission_attributes = get_submission_attributes(
            broker_submission_id, submission_data)

        logger.info('Getting File A data')
        db_cursor.execute(
            'SELECT * FROM certified_appropriation WHERE submission_id = %s',
            [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info('Acquired File A (appropriation) data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(appropriation_data)) + ' rows.')
        logger.info('Loading File A data')
        start_time = datetime.now()
        load_file_a(submission_attributes, appropriation_data, db_cursor)
        logger.info(
            'Finished loading File A data, took {}'.format(datetime.now() -
                                                           start_time))

        logger.info('Getting File B data')
        prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor)
        logger.info(
            'Acquired File B (program activity object class) data for ' +
            str(submission_id) + ', there are ' +
            str(len(prg_act_obj_cls_data)) + ' rows.')
        logger.info('Loading File B data')
        start_time = datetime.now()
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)
        logger.info(
            'Finished loading File B data, took {}'.format(datetime.now() -
                                                           start_time))

        logger.info('Getting File C data')
        # we dont have sub-tier agency info, so we'll do our best
        # to match them to the more specific award records
        award_financial_query = 'SELECT * FROM certified_award_financial WHERE submission_id = {0}'.\
            format(submission_id)
        if isinstance(db_cursor, PhonyCursor):  # spoofed data for test
            award_financial_frame = pd.DataFrame(
                db_cursor.db_responses[award_financial_query])
        else:  # real data
            award_financial_frame = pd.read_sql(award_financial_query,
                                                connections['data_broker'])
        logger.info(
            'Acquired File C (award financial) data for {}, there are {} rows.'
            .format(submission_id, award_financial_frame.shape[0]))
        logger.info('Loading File C data')
        start_time = datetime.now()
        awards_touched = load_file_c(submission_attributes, db_cursor,
                                     award_financial_frame)
        logger.info(
            'Finished loading File C data, took {}'.format(datetime.now() -
                                                           start_time))

        if not options['nosubawards']:
            try:
                start_time = datetime.now()
                logger.info('Loading subaward data...')
                load_subawards(submission_attributes, awards_touched,
                               db_cursor)
                logger.info('Finshed loading subaward data, took {}'.format(
                    datetime.now() - start_time))
            except Exception:
                logger.warning("Error loading subawards for this submission")
        else:
            logger.info('Skipping subawards due to flags...')

        # Once all the files have been processed, run any global cleanup/post-load tasks.
        # Cleanup not specific to this submission is run in the `.handle` method
        logger.info('Successfully loaded broker submission {}.'.format(
            options['submission_id'][0]))
Пример #3
0
    def handle_loading(self, db_cursor, *args, **options):

        # Grab the submission id
        submission_id = options['submission_id'][0]

        # Verify the ID exists in the database, and grab the data
        db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s',
                          [submission_id])
        submission_data = dictfetchall(db_cursor)

        if len(submission_data) == 0:
            logger.error('Could not find submission with id ' +
                         str(submission_id))
            return
        elif len(submission_data) > 1:
            logger.error('Found multiple submissions with id ' +
                         str(submission_id))
            return

        # We have a single submission, which is what we want
        submission_data = submission_data[0]
        broker_submission_id = submission_data['submission_id']
        del submission_data[
            'submission_id']  # To avoid collisions with the newer PK system
        submission_attributes = get_submission_attributes(
            broker_submission_id, submission_data)

        # Move on, and grab file A data
        db_cursor.execute(
            'SELECT * FROM appropriation WHERE submission_id = %s',
            [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info('Acquired appropriation data for ' + str(submission_id) +
                    ', there are ' + str(len(appropriation_data)) + ' rows.')
        load_file_a(submission_attributes, appropriation_data, db_cursor)

        # Let's get File B information
        prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor)
        logger.info('Acquired program activity object class data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(prg_act_obj_cls_data)) + ' rows.')
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)

        # File D2
        db_cursor.execute(
            'SELECT * FROM award_financial_assistance WHERE submission_id = %s',
            [submission_id])
        award_financial_assistance_data = dictfetchall(db_cursor)
        logger.info('Acquired award financial assistance data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(award_financial_assistance_data)) + ' rows.')
        load_base.load_file_d2(submission_attributes,
                               award_financial_assistance_data, db_cursor)

        # File D1
        db_cursor.execute(
            'SELECT * FROM award_procurement WHERE submission_id = %s',
            [submission_id])
        procurement_data = dictfetchall(db_cursor)
        logger.info('Acquired award procurement data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(procurement_data)) + ' rows.')
        load_base.load_file_d1(submission_attributes, procurement_data,
                               db_cursor)

        # Let's get File C information
        # Note: we load File C last, because the D1 and D2 files have the awarding
        # agency top tier (CGAC) and sub tier data needed to look up/create
        # the most specific possible corresponding award. When looking up/
        # creating awards for File C, we dont have sub-tier agency info, so
        # we'll do our best to match them to the more specific award records
        # already created by the D file load

        award_financial_query = 'SELECT * FROM award_financial WHERE submission_id = %s'
        if isinstance(db_cursor, PhonyCursor):  # spoofed data for test
            award_financial_frame = pd.DataFrame(
                db_cursor.db_responses[award_financial_query])
        else:  # real data
            award_financial_frame = pd.read_sql(
                award_financial_query % submission_id,
                connections['data_broker'])
        logger.info(
            'Acquired award financial data for {}, there are {} rows.'.format(
                submission_id, award_financial_frame.shape[0]))
        load_file_c(submission_attributes, db_cursor, award_financial_frame)

        # Once all the files have been processed, run any global
        # cleanup/post-load tasks.
        # 1. Load subawards
        try:
            load_subawards(submission_attributes, db_cursor)
        except:
            logger.warn("Error loading subawards for this submission")
    def handle_loading(self, db_cursor, *args, **options):

        def signal_handler(signal, frame):
            transaction.set_rollback(True)
            raise Exception('Received interrupt signal. Aborting...')

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        submission_id = options['submission_id'][0]

        logger.info('Getting submission {} from broker...'.format(submission_id))
        db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s', [submission_id])

        submission_data = dictfetchall(db_cursor)
        logger.info('Finished getting submission {} from broker'.format(submission_id))

        if len(submission_data) == 0:
            raise CommandError('Could not find submission with id ' + str(submission_id))
        elif len(submission_data) > 1:
            raise CommandError('Found multiple submissions with id ' + str(submission_id))

        submission_data = submission_data[0].copy()
        broker_submission_id = submission_data['submission_id']
        del submission_data['submission_id']  # We use broker_submission_id, submission_id is our own PK
        submission_attributes = get_submission_attributes(broker_submission_id, submission_data)

        logger.info('Getting File A data')
        db_cursor.execute('SELECT * FROM certified_appropriation WHERE submission_id = %s', [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info('Acquired File A (appropriation) data for ' + str(submission_id) + ', there are ' + str(
            len(appropriation_data)) + ' rows.')
        logger.info('Loading File A data')
        start_time = datetime.now()
        load_file_a(submission_attributes, appropriation_data, db_cursor)
        logger.info('Finished loading File A data, took {}'.format(datetime.now() - start_time))

        logger.info('Getting File B data')
        prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor)
        logger.info(
            'Acquired File B (program activity object class) data for ' + str(submission_id) + ', there are ' + str(
                len(prg_act_obj_cls_data)) + ' rows.')
        logger.info('Loading File B data')
        start_time = datetime.now()
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)
        logger.info('Finished loading File B data, took {}'.format(datetime.now() - start_time))

        logger.info('Getting File C data')
        # we dont have sub-tier agency info, so we'll do our best
        # to match them to the more specific award records
        award_financial_query = 'SELECT * FROM certified_award_financial WHERE submission_id = {0}'.\
            format(submission_id)
        if isinstance(db_cursor, PhonyCursor):  # spoofed data for test
            award_financial_frame = pd.DataFrame(db_cursor.db_responses[award_financial_query])
        else:  # real data
            award_financial_frame = pd.read_sql(award_financial_query,
                                                connections['data_broker'])
        logger.info('Acquired File C (award financial) data for {}, there are {} rows.'
                    .format(submission_id, award_financial_frame.shape[0]))
        logger.info('Loading File C data')
        start_time = datetime.now()
        awards_touched = load_file_c(submission_attributes, db_cursor, award_financial_frame)
        logger.info('Finished loading File C data, took {}'.format(datetime.now() - start_time))

        if not options['nosubawards']:
            try:
                start_time = datetime.now()
                logger.info('Loading subaward data...')
                load_subawards(submission_attributes, awards_touched, db_cursor)
                logger.info('Finshed loading subaward data, took {}'.format(datetime.now() - start_time))
            except Exception:
                logger.warning("Error loading subawards for this submission")
        else:
            logger.info('Skipping subawards due to flags...')

        # Once all the files have been processed, run any global cleanup/post-load tasks.
        # Cleanup not specific to this submission is run in the `.handle` method
        logger.info('Successfully loaded broker submission {}.'.format(options['submission_id'][0]))