示例#1
0
    def _start_process(self, start_timeperiod, end_timeperiod, arguments):
        try:
            start_dt = time_helper.synergy_to_datetime(QUALIFIER_HOURLY, start_timeperiod)
            sqoop_slice_starttime = start_dt.strftime(SqoopDriver.SQOOP_DATE_FORMAT)

            end_dt = time_helper.synergy_to_datetime(QUALIFIER_HOURLY, end_timeperiod)
            sqoop_slice_endtime = end_dt.strftime(SqoopDriver.SQOOP_DATE_FORMAT)

            sink_path = ProcessContext.get_sink(self.process_name)

            self.logger.info('start: %s {' % self.process_name)
            p = psutil.Popen([settings['bash_shell'],
                              settings['sqoop_command'],
                              str(sqoop_slice_starttime),
                              str(sqoop_slice_endtime),
                              sink_path + '/' + start_timeperiod],
                             close_fds=True,
                             cwd=settings['process_cwd'],
                             stdin=PIPE,
                             stdout=PIPE,
                             stderr=PIPE)
            self.cli_process = p
            self.logger.info('Started %s with pid = %r' % (self.process_name, p.pid))
        except Exception:
            self.logger.error('Exception on starting: %s' % self.process_name, exc_info=True)
        finally:
            self.logger.info('}')
示例#2
0
    def _skip_the_node(self, node):
        """Method is used during _get_next_node calculations.
        Returns True in case node shall be _skipped_"""
        # case 1: node processing is complete
        if node.time_record.get_state() == TimeTableEntry.STATE_SKIPPED \
            or node.time_record.get_state() == TimeTableEntry.STATE_PROCESSED:
            return True

        # case 2: this is a daily leaf node. retry this time_period for INFINITE_RETRY_HOURS
        if node.process_name == self.process_daily:
            if len(node.children) == 0:
                # no children - this is a leaf
                creation_time = time_helper.synergy_to_datetime(node.process_name, node.timestamp)
                if  datetime.utcnow() - creation_time < timedelta(hours=LIFE_SUPPORT_HOURS):
                    return False
                else:
                    return node.time_record.get_number_of_failures() > MAX_NUMBER_OF_RETRIES

        # case 3: here we process process_daily, process_monthly and process_yearly that have children
        # iterate thru children and check if all of them are in STATE_SKIPPED (i.e. no data for parent to process)
        # if any is still in processing (i.e. has produced some data) - then we can not skip parent of the child node
        # case 3': consider parent as worth processing (i.e. do not skip) if child's time_record is None
        all_children_spoiled = True
        for key in node.children.keys():
            child = node.children[key]
            if child.time_record is None or \
                        (child.time_record.get_number_of_failures() <= MAX_NUMBER_OF_RETRIES
                        and child.time_record.get_state() != TimeTableEntry.STATE_SKIPPED):
                all_children_spoiled = False
                break
        return all_children_spoiled
示例#3
0
    def test_catching_up_time_build_tree(self):
        delta = 5 * 24
        new_synergy_start_time = wind_the_time(QUALIFIER_HOURLY, self.initial_synergy_start_time, -delta)
        settings['synergy_start_timeperiod'] = new_synergy_start_time

        self.tree.build_tree()
        self._perform_assertions(new_synergy_start_time, delta / 24)

        new_actual_timeperiod = wind_the_time(QUALIFIER_HOURLY, self.initial_synergy_start_time, delta)

        time_helper.actual_timeperiod = \
            wind_actual_timeperiod(time_helper.synergy_to_datetime(QUALIFIER_HOURLY, new_actual_timeperiod))
        self.tree.build_tree()
        self._perform_assertions(new_synergy_start_time, 2 * delta / 24)
示例#4
0
    def fire_managed_worker(self, *args):
        """requests vertical aggregator (hourly site, daily variant, etc) to start up"""
        try:
            process_name = args[0]
            scheduler_entry_obj = args[1]
            self.logger.info('%s {' % process_name)

            timetable_record = self.timetable.get_next_job_record(process_name)
            pipeline = self.pipelines[scheduler_entry_obj.state_machine_name]

            run_on_active_timeperiod = ProcessContext.run_on_active_timeperiod(scheduler_entry_obj.process_name)
            if not run_on_active_timeperiod:
                time_qualifier = ProcessContext.get_time_qualifier(process_name)
                incremented_timeperiod = time_helper.increment_timeperiod(time_qualifier, timetable_record.timeperiod)
                dt_record_timestamp = time_helper.synergy_to_datetime(time_qualifier, incremented_timeperiod)
                dt_record_timestamp += timedelta(minutes=LAG_5_MINUTES)

                if datetime.utcnow() <= dt_record_timestamp:
                    self.logger.info('Timetable record %s for timeperiod %s will not be triggered until %s.'
                                     % (timetable_record.document['_id'],
                                        timetable_record.timeperiod,
                                        dt_record_timestamp.strftime('%Y-%m-%d %H:%M:%S')))
                    return

            process_type = ProcessContext.get_process_type(scheduler_entry_obj.process_name)
            if process_type == TYPE_BLOCKING_DEPENDENCIES:
                pipeline.manage_pipeline_with_blocking_dependencies(process_name, timetable_record)
            elif process_type == TYPE_BLOCKING_CHILDREN:
                pipeline.manage_pipeline_with_blocking_children(process_name, timetable_record)
            elif process_type == TYPE_MANAGED:
                pipeline.manage_pipeline_for_process(process_name, timetable_record)

        except (AMQPError, IOError) as e:
            self.logger.error('AMQPError: %s' % str(e), exc_info=True)
            self.publishers.reset_all(suppress_logging=True)
        except Exception as e:
            self.logger.error('Exception: %s' % str(e), exc_info=True)
        finally:
            self.logger.info('}')
示例#5
0
                      QUALIFIER_YEARLY]

        expected = [datetime(year=2010, month=12, day=31, hour=23, minute=12, second=34),
                    datetime(year=2010, month=12, day=31, hour=23, minute=00, second=0),
                    datetime(year=2010, month=12, day=31, hour=00, minute=00, second=0),
                    datetime(year=2010, month=12, day=01, hour=00, minute=00, second=0),
                    datetime(year=2010, month=01, day=01, hour=00, minute=00, second=0)]

        params = ['20101231231234',
                  '2010123123',
                  '2010123100',
                  '2010120000',
                  '2010000000']

        for i in range(5):
            assert time_helper.synergy_to_datetime(qualifiers[i], params[i]) == expected[i]

    def test_increment_time(self):
        stamps = ['2011010100', '2011010112', '2011010123']
        expected = ['2011010101', '2011010113', '2011010200']
        for i in range(3):
            assert time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamps[i]) == expected[i]

        stamps = ['2011010100', '2011013100', '2010123100']
        expected = ['2011010200', '2011020100', '2011010100']
        for i in range(3):
            assert time_helper.increment_timeperiod(QUALIFIER_DAILY, stamps[i]) == expected[i]

        stamps = ['2011010000', '2011120000', '2011100000']
        expected = ['2011020000', '2012010000', '2011110000']
        for i in range(3):
                     process_context.PROCESS_SITE_YEARLY]

        expected = [datetime(year = 2010, month = 12, day = 31, hour = 23, minute = 12, second = 34),
                    datetime(year = 2010, month = 12, day = 31, hour = 23, minute = 00, second = 0),
                    datetime(year = 2010, month = 12, day = 31, hour = 00, minute = 00, second = 0),
                    datetime(year = 2010, month = 12, day = 01, hour = 00, minute = 00, second = 0),
                    datetime(year = 2010, month = 01, day = 01, hour = 00, minute = 00, second = 0),
        ]
        params =   ['20101231231234',
                    '2010123123',
                    '2010123100',
                    '2010120000',
                    '2010000000']

        for i in range(5):
            assert time_helper.synergy_to_datetime(processes[i], params[i]) == expected[i]


    def test_increment_time(self):
        stamps = ['2011010100', '2011010112', '2011010123']
        expected = ['2011010101', '2011010113', '2011010200']
        for i in range(3):
            assert time_helper.increment_time(process_context.PROCESS_SITE_HOURLY, stamps[i]) == expected[i]
        
        stamps = ['2011010100', '2011013100', '2010123100']
        expected = ['2011010200', '2011020100', '2011010100']
        for i in range(3):
            assert time_helper.increment_time(process_context.PROCESS_SITE_DAILY, stamps[i]) == expected[i]
                                                      
        stamps = ['2011010000', '2011120000', '2011100000']
        expected = ['2011020000', '2012010000', '2011110000']