Exemplo n.º 1
0
 def execute_entries(self, print_order=False):
     """
     iterate through entries and run them
     """
     self.order_entries()
     if print_order:
         self.log_message("Printing order of steps",status='start',step='execute entries')
         self.print_the_order()
         self.log_message("Printing order of steps",status='complete')
         return 0
     self.ent_result = {}
     start_time = RawDataUtilities.get_now()
     self.log_message("Now preparing to run config entries",status='start',step='execute entries')
     for i in xrange(len(self.entries)):
         if len(self.entries[i]) > 0:
             self.log_message('processing all configs of type: '+str(self.entries[i][0].get_entry_type())+", now..",status='running',name='congig_file_reader')
         for ent in self.entries[i]:
             if self.do_run(ent):
                 self.log_message(' About to run: '+ent.name, name=ent.name)
                 print "" if ent.name == 'kicker' else '... running '+ent.name
                 ent.execute_entry()
                 self.total_time = RawDataUtilities.get_diff(start_time,seconds=True)
                 self.ent_result[ent.name] = ent.return_val
                 ent.updates['last_processed'] = RawDataUtilities.string_from_date(RawDataUtilities.add_seconds_to_date(self.run_date, self.total_time))
             else:
                 if ent.no_run_reason:
                     print "NOT RUNNING: "+ent.name + " ("+ent.no_run_reason+")"
                 
     
     self.log_message("All entries have run",status='complete')
     total_ret = self.log_summary(self.update_all_configs())
     return total_ret
Exemplo n.º 2
0
    def execute_connection(self):
        """
        Re-read the class description :)
        
        parameters needed:
        file to strip (strip_file): assumption is that it will be found in the working_directory
        date field to use (date_field):  Most common date formats can be used.. recommended is MM/DD/YYYY or 01/01/2015
        strip parameters (strip_criteria):  pass in 2 elements (separated by a comma), Unit, Date Unit (Day, Month, Year)  example strip_criteria = 6,Month 
            (default is 1,Month)
        
        by default this will be from the run date (which defaults to today's date unless set otherwise), but can be from the "last_processed" date by passing in the optional parameter 
            "use_last_processed = true"
        
        another option is to simply pass in the date to use for stripping.. strip_date = 01/01/2015 
        
        Anything occuring before (not equal to) that date will be pulled off of the file
        
        the stripped off records will be saved under the connector name passed in the parameters        
        """
        filename = self.param_dict['strip_file']
        date_field = self.param_dict['date_field']
        units, date_unit = self.param_dict.get('strip_criteria',
                                               '1,Month').split(",")
        use_last = True if self.param_dict.get(
            "use_last_processed", "false").upper == "TRUE" else False
        strip_date = self.entry.last_processed if use_last else RawDataUtilities.date_from_string(
            self.param_dict.get(
                "strip_date",
                RawDataUtilities.string_from_date(self.entry.today_dt)))

        match_date = self.get_match_date(units, date_unit, strip_date)

        file_in = os.path.join(self.entry.working_directory, filename)
        with open(file_in) as csvfile:
            reader = csv.DictReader(csvfile)
            self.setup_csv_temp_writer(self.get_temp_csv_name(),
                                       reader.fieldnames)
            for row in reader:
                compare_date = RawDataUtilities.date_from_string(
                    row[date_field])
                # should subtract the date on the file from match date
                # Example any dates less than 10/1/2014
                # Compare date = 09/1/2014
                # Difference is less than 0
                diff = RawDataUtilities.get_diff(match_date, compare_date)
                if diff < 0:
                    self.write_temp_rec(row)
            self.close_temp_csv()
        return 0
Exemplo n.º 3
0
    def execute_connection(self):
        """
        Re-read the class description :)
        
        parameters needed:
        file to strip (strip_file): assumption is that it will be found in the working_directory
        date field to use (date_field):  Most common date formats can be used.. recommended is MM/DD/YYYY or 01/01/2015
        strip parameters (strip_criteria):  pass in 2 elements (separated by a comma), Unit, Date Unit (Day, Month, Year)  example strip_criteria = 6,Month 
            (default is 1,Month)
        
        by default this will be from the run date (which defaults to today's date unless set otherwise), but can be from the "last_processed" date by passing in the optional parameter 
            "use_last_processed = true"
        
        another option is to simply pass in the date to use for stripping.. strip_date = 01/01/2015 
        
        Anything occuring before (not equal to) that date will be pulled off of the file
        
        the stripped off records will be saved under the connector name passed in the parameters        
        """
        filename = self.param_dict["strip_file"]
        date_field = self.param_dict["date_field"]
        units, date_unit = self.param_dict.get("strip_criteria", "1,Month").split(",")
        use_last = True if self.param_dict.get("use_last_processed", "false").upper == "TRUE" else False
        strip_date = (
            self.entry.last_processed
            if use_last
            else RawDataUtilities.date_from_string(
                self.param_dict.get("strip_date", RawDataUtilities.string_from_date(self.entry.today_dt))
            )
        )

        match_date = self.get_match_date(units, date_unit, strip_date)

        file_in = os.path.join(self.entry.working_directory, filename)
        with open(file_in) as csvfile:
            reader = csv.DictReader(csvfile)
            self.setup_csv_temp_writer(self.get_temp_csv_name(), reader.fieldnames)
            for row in reader:
                compare_date = RawDataUtilities.date_from_string(row[date_field])
                # should subtract the date on the file from match date
                # Example any dates less than 10/1/2014
                # Compare date = 09/1/2014
                # Difference is less than 0
                diff = RawDataUtilities.get_diff(match_date, compare_date)
                if diff < 0:
                    self.write_temp_rec(row)
            self.close_temp_csv()
        return 0
    def get_min_max(self,csv_in, date_field):
        max_field = "max_"+date_field
        min_field = "min_"+date_field
        max_d = None
        min_d = None
        header = []
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]
                
                rec_date = RawDataUtilities.date_from_string(rec_date_str)
                
                if not max_d:
                    max_d = rec_date
                
                adelta = max_d - rec_date
                
                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date
                
                if not min_d:
                    min_d = rec_date
                    
                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date
        return max_d, min_d, header
    def get_min_max(self, csv_in, date_field):
        max_field = "max_" + date_field
        min_field = "min_" + date_field
        max_d = None
        min_d = None
        header = []
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]

                rec_date = RawDataUtilities.date_from_string(rec_date_str)

                if not max_d:
                    max_d = rec_date

                adelta = max_d - rec_date

                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date

                if not min_d:
                    min_d = rec_date

                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date
        return max_d, min_d, header
Exemplo n.º 6
0
 def get_value(self, option, option_val):
     """
     If this is a password entry it must end with "password", then the framework will decrypt it
     otherwise it assumes there was no decryption needed
     """
     if option.endswith('password'):
         return RawDataUtilities.decrypt_password(option_val)
     return option_val
    def execute_processor(self):
        filename = self.param_dict['in_file']
        date_field = self.param_dict['date_field']

        max_field = "max_" + date_field
        min_field = "min_" + date_field

        csv_in = os.path.join(self.entry.working_directory, filename)

        max_d, min_d, header = self.get_min_max(csv_in, date_field)

        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]

                rec_date = RawDataUtilities.date_from_string(rec_date_str)

                if not max_d:
                    max_d = rec_date

                adelta = max_d - rec_date

                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date

                if not min_d:
                    min_d = rec_date

                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date

        self.setup_csv_temp_writer(self.get_temp_csv_name(),
                                   header,
                                   write_header=True)

        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                out_dict = {}
                out_dict[max_field] = max_d
                out_dict[min_field] = min_d
                for key in row.keys():
                    out_dict[key] = row[key]
                self.write_temp_rec(out_dict)

        self.close_temp_csv()

        return 0
Exemplo n.º 8
0
    def get_conver_date(self, units, date_unit, in_date):
        """
        from the passed in date subtract the numbe of units from that of date unit
             
        example:
        ___UNITS____:___DATE_UNIT___:____IN_DATE___:___RESULT___
            6           Day            1/10/2014       1/4/2014
            1           Month         10/23/2013       9/23/2013
            1           Year           7/04/2012       7/04/2011

        """
        delta = int(units)
        if delta > 0:
            delta = delta * -1
        if "MONTH" == date_unit.upper():
            return RawDataUtilities.monthdelta(in_date, delta)
        elif "DAY" == date_unit.upper():
            return RawDataUtilities.daydelta(in_date, delta)
        else:
            return RawDataUtilities.yeardelta(in_date, delta)
Exemplo n.º 9
0
    def get_conver_date(self,units, date_unit, in_date):
        """
        from the passed in date subtract the numbe of units from that of date unit
             
        example:
        ___UNITS____:___DATE_UNIT___:____IN_DATE___:___RESULT___
            6           Day            1/10/2014       1/4/2014
            1           Month         10/23/2013       9/23/2013
            1           Year           7/04/2012       7/04/2011

        """
        delta = int(units)
        if delta > 0:
            delta = delta * -1
        if "MONTH" == date_unit.upper():
            return RawDataUtilities.monthdelta(in_date, delta)
        elif "DAY" == date_unit.upper():
            return RawDataUtilities.daydelta(in_date, delta)
        else:
            return RawDataUtilities.yeardelta(in_date, delta)
Exemplo n.º 10
0
    def execute_processor(self):
        filename = self.param_dict['in_file']
        date_field = self.param_dict['date_field']
        
        max_field = "max_"+date_field
        min_field = "min_"+date_field
        
        csv_in = os.path.join(self.entry.working_directory,filename)
        
        max_d, min_d, header = self.get_min_max(csv_in, date_field)
        
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]
                
                rec_date = RawDataUtilities.date_from_string(rec_date_str)
                
                if not max_d:
                    max_d = rec_date
                
                adelta = max_d - rec_date
                
                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date
                
                if not min_d:
                    min_d = rec_date
                    
                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date
        
        self.setup_csv_temp_writer(self.get_temp_csv_name(), header, write_header=True)
        
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                out_dict = {}
                out_dict[max_field] = max_d
                out_dict[min_field] = min_d
                for key in row.keys():
                    out_dict[key] = row[key]
                self.write_temp_rec(out_dict)
                
        self.close_temp_csv()
        
        return 0
Exemplo n.º 11
0
 def get_instance(self):
     try:
         self.log_message('Will try to import: '+self.src_implementation,step='execute entries',status='running',name=self.name)
         class_ = RawDataUtilities.get_class(self.src_implementation)            
     
         self.options['name'] = self.name
         self.options['entry'] = self
         self.instance = class_(self.options)
         return True
     except Exception as err1:
         self.log_message('problem instantiating class: ' + str(err1) + " Will continue execution",log_level=self.log_error(),name=self.name,status='error')
         traceback.print_exc()
         return False
Exemplo n.º 12
0
 def convert_value(self, attr_v, convert):
     if isinstance(attr_v, datetime):
         str_date = RawDataUtilities.string_from_date(attr_v, str_format=convert)
         return "'" + str_date + "'"
     elif convert == "str":
         return "'" + str(attr_v) + "'"
     # if the attribute isn't a string, no conversion will be done
     elif isinstance(attr_v, str):
         if convert == "int":
             return int(attr_v)
         else:
             # assuming that it is float
             return float(attr_v)
     else:
         return attr_v
Exemplo n.º 13
0
 def run_now(self,run_date=None):
     """
     Determine if this entry should be run today
     """   
     if self.last_run == 'success':
         diff_date = self.today_dt
         if run_date:
             diff_date = run_date
         freq = self.frequencies[self.run_frequency]
         num_since_last_run = RawDataUtilities.get_diff(self.last_processed,diff_date)
         if num_since_last_run >= freq or self.first_run or freq < 0:
             return True
     else:
         self.no_run_reason = "Last Run Ended In Failure"
     return False
Exemplo n.º 14
0
 def convert_value(self, attr_v, convert):
     if isinstance(attr_v, datetime):
         str_date = RawDataUtilities.string_from_date(attr_v, str_format=convert)
         return "'"+str_date+"'"
     elif convert == 'str':
         return "'"+str(attr_v)+"'"
     # if the attribute isn't a string, no conversion will be done
     elif isinstance(attr_v,str):
         if convert == 'int':
             return int(attr_v)
         else:
             # assuming that it is float
             return float(attr_v)
     else:
         return attr_v
Exemplo n.º 15
0
 def __init__(self, params):
     """
     Constructor
     """
     Base_Type.__init__(self)
     if 'run_date' in params:
         self.run_date = RawDataUtilities.date_from_string(params['run_date'])
     
     self.entry_name = params.get('entry_name',None)
     self.total_time = 0.0
     
     self.entries[0] = []
     self.entries[1] = []
     
     #self.set_uuid(params.get('uuid','---none---'))
     self.log_message("Initialization Complete", log_type='main', status='running', step='load configs',name='config_file_reader',log_level=self.log_info())
Exemplo n.º 16
0
 def run_now(self, run_date=None):
     """
     Determine if this entry should be run today
     """
     if self.last_run == 'success':
         diff_date = self.today_dt
         if run_date:
             diff_date = run_date
         freq = self.frequencies[self.run_frequency]
         num_since_last_run = RawDataUtilities.get_diff(
             self.last_processed, diff_date)
         if num_since_last_run >= freq or self.first_run or freq < 0:
             return True
     else:
         self.no_run_reason = "Last Run Ended In Failure"
     return False
Exemplo n.º 17
0
 def clone_entry(self, c_entry):
     kicker = copy.copy(c_entry)
     kicker.name = 'kicker'
     kicker.out_filename = 'kicker.csv'
     kicker.file_write = "Append"
     kicker.description = 'An entry designed to just start the process'
     instance = create_entry('kicker',c_entry.get_entry_type())
     instance.entry = kicker
     kicker.instance = instance
     kicker.dependencies = []
     kicker.run_frequency = 'Every'
     kicker.last_run = 'success'
     kicker.source_file = None
     kicker.temp_results = 'None'
     kicker.return_val = 0
     kicker.last_processed = RawDataUtilities.get_now()
     kicker.order = 0
     return kicker
Exemplo n.º 18
0
    def get_instance(self):
        try:
            self.log_message('Will try to import: ' + self.src_implementation,
                             step='execute entries',
                             status='running',
                             name=self.name)
            class_ = RawDataUtilities.get_class(self.src_implementation)

            self.options['name'] = self.name
            self.options['entry'] = self
            self.instance = class_(self.options)
            return True
        except Exception as err1:
            self.log_message('problem instantiating class: ' + str(err1) +
                             " Will continue execution",
                             log_level=self.log_error(),
                             name=self.name,
                             status='error')
            traceback.print_exc()
            return False
Exemplo n.º 19
0
    def load_filters(self):
        crit_list = {
            'equal': self._equal,
            'gt': self._gt,
            'lt': self._lt,
            'gte': self._gte,
            'lte': self._lte,
            'in': self._in
        }

        self.crit_dict = {}
        for param in self.param_dict.keys():
            criterion = param[:param.find('_')]
            if criterion in crit_list.keys():
                field = param[len(criterion) + 1:]
                dt = False
                if field.endswith("_dt"):
                    dt = True
                    field = field[:-3]
                value = self.param_dict[param]

                if dt:
                    value = RawDataUtilities.date_from_string(value)
                else:
                    if criterion == 'in':
                        mlist = value.split(",")
                        value = [
                            self.convert(m) if self.convert(m) else m
                            for m in mlist
                        ]
                    else:
                        value = self.convert(value)

                self.crit_dict[param] = {
                    'field': field,
                    'criterion': criterion,
                    'value': value,
                    'function': crit_list[criterion]
                }
Exemplo n.º 20
0
 def load_filters(self):
     crit_list = {'equal':self._equal,'gt':self._gt,'lt':self._lt,'gte':self._gte,'lte':self._lte,'in':self._in}
     
     self.crit_dict = {}
     for param in self.param_dict.keys():
         criterion = param[:param.find('_')]
         if criterion in crit_list.keys():
             field = param[len(criterion)+1:]
             dt = False
             if field.endswith("_dt"):
                 dt = True
                 field = field[:-3]
             value = self.param_dict[param]
             
             if dt:
                 value = RawDataUtilities.date_from_string(value)
             else:
                 if criterion == 'in':
                     mlist = value.split(",")
                     value = [self.convert(m) if self.convert(m) else m for m in mlist]
                 else:
                     value = self.convert(value)
                 
             self.crit_dict[param] = {'field':field,'criterion':criterion,'value':value,'function':crit_list[criterion]}
Exemplo n.º 21
0
            else RawDataUtilities.date_from_string(
                self.param_dict.get("strip_date", RawDataUtilities.string_from_date(self.entry.today_dt))
            )
        )

        match_date = self.get_match_date(units, date_unit, strip_date)

        file_in = os.path.join(self.entry.working_directory, filename)
        with open(file_in) as csvfile:
            reader = csv.DictReader(csvfile)
            self.setup_csv_temp_writer(self.get_temp_csv_name(), reader.fieldnames)
            for row in reader:
                compare_date = RawDataUtilities.date_from_string(row[date_field])
                # should subtract the date on the file from match date
                # Example any dates less than 10/1/2014
                # Compare date = 09/1/2014
                # Difference is less than 0
                diff = RawDataUtilities.get_diff(match_date, compare_date)
                if diff < 0:
                    self.write_temp_rec(row)
            self.close_temp_csv()
        return 0


if __name__ == "__main__":
    """
    allow to run outside of framework
    """
    fs = FileDateStrip({"name": "helper"})
    print fs.get_match_date(10, "year", RawDataUtilities.get_today_date())
Exemplo n.º 22
0
 def __init__(self, params):
     """
     Constructor
     """
     Base_Type.__init__(self)
     ## class variables
     self.updates={}
     self.return_val = None
     self.description = None
     self.connection_type = None
     self.last_processed = None
     self.num_run = 0
     self.message = None
     self.no_run_reason = 'Schedule Not Ready'
 
     ### valid options... Default is Append
     # Append - Add records to the end
     # Overlay - replace the current with the new
     # New - Add a new file with the run number on the end Most current result has the name with no number
     self.file_write=None
 
     ## Valid Options... Default is Temp CSV file
     # Dataframe
     # ListList
     # DictionaryList
     # TempCSV
     # None
     self.temp_results=None
     self.out_filename = None
     self.src_implementation = None
     self.run_frequency = None
 
     ## location where results are written
     self.working_directory = None
 
     self.options = {}
     self.today_dt = None
     self.source_file = None
     self.instance = None
     self.temp_csv = None
     self.csv = None
     
     ## required fields
     self.today_dt = params['run_date']
     self.name = params['name']
     self.source_file = params['source_file']
     self.description = params['description']
     self.src_implementation = params['src_implementation']
     self.working_directory = params['working_directory']
     ## optional fields with defaults
     self.dependencies = [] if (params.get('dependencies', None)) == None else params['dependencies'].split(",")
     self.dependencies.append('kicker')
     self.successors= [] if (params.get('successors', None)) == None else params['successors'].split(",")
     self.connection_type = params.get('connection_type','none')    
     
     #  if it hasn't run before it will be empty
     self.first_run = False
     if params.get('last_processed',None):
         self.last_processed = params['last_processed']
     else:
         self.last_processed = RawDataUtilities.string_from_date(self.today_dt)
         self.first_run = True
     
     self.last_processed = RawDataUtilities.date_from_string(self.last_processed)
     
     self.num_run = int(params.get('num_run',0))
     self.out_filename = params.get('out_filename',self.name)
     self.run_frequency = params.get('run_frequency','Daily')
     self.temp_results = params.get('temp_results','TempCSV')
     self.file_write = params.get('file_write','Append')
     #self.entry_type = params.get('entry_type', 'Connector')
     self.last_run = params.get('last_run','success')
     self.instantiate_instance = True if params.get("instantiate","true") == "true" else False
     ## parameters to pass down to the entry implementation
     self.options = params.get('non_std',{})
     
     self.updates['last_processed'] = RawDataUtilities.string_from_date(self.today_dt)
     self.updates['num_run'] = str(self.num_run + 1)
     
     self.order = 0
     self.ready = True
     if self.instantiate_instance and self.get_instance():
         self.log_message("Initialization Complete (success): "+self.name, log_type='entry', status='running', step='load configs',name='config_entry',log_level=self.log_info())
     else:
         if self.instantiate_instance:
             self.ready = False
             self.log_message("Initialization Complete (failure): "+self.name, log_type='entry', status='running', step='load configs',name='config_entry',log_level=self.log_info())
Exemplo n.º 23
0
                "strip_date",
                RawDataUtilities.string_from_date(self.entry.today_dt)))

        match_date = self.get_match_date(units, date_unit, strip_date)

        file_in = os.path.join(self.entry.working_directory, filename)
        with open(file_in) as csvfile:
            reader = csv.DictReader(csvfile)
            self.setup_csv_temp_writer(self.get_temp_csv_name(),
                                       reader.fieldnames)
            for row in reader:
                compare_date = RawDataUtilities.date_from_string(
                    row[date_field])
                # should subtract the date on the file from match date
                # Example any dates less than 10/1/2014
                # Compare date = 09/1/2014
                # Difference is less than 0
                diff = RawDataUtilities.get_diff(match_date, compare_date)
                if diff < 0:
                    self.write_temp_rec(row)
            self.close_temp_csv()
        return 0


if __name__ == "__main__":
    """
    allow to run outside of framework
    """
    fs = FileDateStrip({"name": "helper"})
    print fs.get_match_date(10, "year", RawDataUtilities.get_today_date())
Exemplo n.º 24
0
    def __init__(self, params):
        """
        Constructor
        """
        Base_Type.__init__(self)
        ## class variables
        self.updates = {}
        self.return_val = None
        self.description = None
        self.connection_type = None
        self.last_processed = None
        self.num_run = 0
        self.message = None
        self.no_run_reason = 'Schedule Not Ready'

        ### valid options... Default is Append
        # Append - Add records to the end
        # Overlay - replace the current with the new
        # New - Add a new file with the run number on the end Most current result has the name with no number
        self.file_write = None

        ## Valid Options... Default is Temp CSV file
        # Dataframe
        # ListList
        # DictionaryList
        # TempCSV
        # None
        self.temp_results = None
        self.out_filename = None
        self.src_implementation = None
        self.run_frequency = None

        ## location where results are written
        self.working_directory = None

        self.options = {}
        self.today_dt = None
        self.source_file = None
        self.instance = None
        self.temp_csv = None
        self.csv = None

        ## required fields
        self.today_dt = params['run_date']
        self.name = params['name']
        self.source_file = params['source_file']
        self.description = params['description']
        self.src_implementation = params['src_implementation']
        self.working_directory = params['working_directory']
        ## optional fields with defaults
        self.dependencies = [] if (params.get(
            'dependencies',
            None)) == None else params['dependencies'].split(",")
        self.dependencies.append('kicker')
        self.successors = [] if (params.get(
            'successors', None)) == None else params['successors'].split(",")
        self.connection_type = params.get('connection_type', 'none')

        #  if it hasn't run before it will be empty
        self.first_run = False
        if params.get('last_processed', None):
            self.last_processed = params['last_processed']
        else:
            self.last_processed = RawDataUtilities.string_from_date(
                self.today_dt)
            self.first_run = True

        self.last_processed = RawDataUtilities.date_from_string(
            self.last_processed)

        self.num_run = int(params.get('num_run', 0))
        self.out_filename = params.get('out_filename', self.name)
        self.run_frequency = params.get('run_frequency', 'Daily')
        self.temp_results = params.get('temp_results', 'TempCSV')
        self.file_write = params.get('file_write', 'Append')
        #self.entry_type = params.get('entry_type', 'Connector')
        self.last_run = params.get('last_run', 'success')
        self.instantiate_instance = True if params.get(
            "instantiate", "true") == "true" else False
        ## parameters to pass down to the entry implementation
        self.options = params.get('non_std', {})

        self.updates['last_processed'] = RawDataUtilities.string_from_date(
            self.today_dt)
        self.updates['num_run'] = str(self.num_run + 1)

        self.order = 0
        self.ready = True
        if self.instantiate_instance and self.get_instance():
            self.log_message("Initialization Complete (success): " + self.name,
                             log_type='entry',
                             status='running',
                             step='load configs',
                             name='config_entry',
                             log_level=self.log_info())
        else:
            if self.instantiate_instance:
                self.ready = False
                self.log_message("Initialization Complete (failure): " +
                                 self.name,
                                 log_type='entry',
                                 status='running',
                                 step='load configs',
                                 name='config_entry',
                                 log_level=self.log_info())
Exemplo n.º 25
0
class ConfigEntry(Base_Type):
    """
    This is an entry in the configuration file that will need to be processed

    Each frequency has a number of days to way to know when to run based on the difference in the last run date and today's date
    These are designed to be approximates.
    The scheduler package will look daily to see if the entry needs to be run... so it will likely off on any given day.. but for the most part okay
    
    03/20/2015 - Added Never so that some can be there to run as successors
    
    See ConfigFileReader for more info on dependencies and successors
    """
    frequencies = {
        'Never': 999999,
        'Daily': 1,
        'Weekly': 7,
        'Monthly': 31,
        'Quarterly': 92,
        'Annual': 360,
        "Every": -1,
        "Fifteen": RawDataUtilities.fifteen_mins(),
        'Hour': RawDataUtilities.one_hour()
    }
    file_actions = {'Append': 'a', 'Overlay': 'w', 'New': 'w'}

    def __init__(self, params):
        """
        Constructor
        """
        Base_Type.__init__(self)
        ## class variables
        self.updates = {}
        self.return_val = None
        self.description = None
        self.connection_type = None
        self.last_processed = None
        self.num_run = 0
        self.message = None
        self.no_run_reason = 'Schedule Not Ready'

        ### valid options... Default is Append
        # Append - Add records to the end
        # Overlay - replace the current with the new
        # New - Add a new file with the run number on the end Most current result has the name with no number
        self.file_write = None

        ## Valid Options... Default is Temp CSV file
        # Dataframe
        # ListList
        # DictionaryList
        # TempCSV
        # None
        self.temp_results = None
        self.out_filename = None
        self.src_implementation = None
        self.run_frequency = None

        ## location where results are written
        self.working_directory = None

        self.options = {}
        self.today_dt = None
        self.source_file = None
        self.instance = None
        self.temp_csv = None
        self.csv = None

        ## required fields
        self.today_dt = params['run_date']
        self.name = params['name']
        self.source_file = params['source_file']
        self.description = params['description']
        self.src_implementation = params['src_implementation']
        self.working_directory = params['working_directory']
        ## optional fields with defaults
        self.dependencies = [] if (params.get(
            'dependencies',
            None)) == None else params['dependencies'].split(",")
        self.dependencies.append('kicker')
        self.successors = [] if (params.get(
            'successors', None)) == None else params['successors'].split(",")
        self.connection_type = params.get('connection_type', 'none')

        #  if it hasn't run before it will be empty
        self.first_run = False
        if params.get('last_processed', None):
            self.last_processed = params['last_processed']
        else:
            self.last_processed = RawDataUtilities.string_from_date(
                self.today_dt)
            self.first_run = True

        self.last_processed = RawDataUtilities.date_from_string(
            self.last_processed)

        self.num_run = int(params.get('num_run', 0))
        self.out_filename = params.get('out_filename', self.name)
        self.run_frequency = params.get('run_frequency', 'Daily')
        self.temp_results = params.get('temp_results', 'TempCSV')
        self.file_write = params.get('file_write', 'Append')
        #self.entry_type = params.get('entry_type', 'Connector')
        self.last_run = params.get('last_run', 'success')
        self.instantiate_instance = True if params.get(
            "instantiate", "true") == "true" else False
        ## parameters to pass down to the entry implementation
        self.options = params.get('non_std', {})

        self.updates['last_processed'] = RawDataUtilities.string_from_date(
            self.today_dt)
        self.updates['num_run'] = str(self.num_run + 1)

        self.order = 0
        self.ready = True
        if self.instantiate_instance and self.get_instance():
            self.log_message("Initialization Complete (success): " + self.name,
                             log_type='entry',
                             status='running',
                             step='load configs',
                             name='config_entry',
                             log_level=self.log_info())
        else:
            if self.instantiate_instance:
                self.ready = False
                self.log_message("Initialization Complete (failure): " +
                                 self.name,
                                 log_type='entry',
                                 status='running',
                                 step='load configs',
                                 name='config_entry',
                                 log_level=self.log_info())

    def run_now(self, run_date=None):
        """
        Determine if this entry should be run today
        """
        if self.last_run == 'success':
            diff_date = self.today_dt
            if run_date:
                diff_date = run_date
            freq = self.frequencies[self.run_frequency]
            num_since_last_run = RawDataUtilities.get_diff(
                self.last_processed, diff_date)
            if num_since_last_run >= freq or self.first_run or freq < 0:
                return True
        else:
            self.no_run_reason = "Last Run Ended In Failure"
        return False

    def get_instance(self):
        try:
            self.log_message('Will try to import: ' + self.src_implementation,
                             step='execute entries',
                             status='running',
                             name=self.name)
            class_ = RawDataUtilities.get_class(self.src_implementation)

            self.options['name'] = self.name
            self.options['entry'] = self
            self.instance = class_(self.options)
            return True
        except Exception as err1:
            self.log_message('problem instantiating class: ' + str(err1) +
                             " Will continue execution",
                             log_level=self.log_error(),
                             name=self.name,
                             status='error')
            traceback.print_exc()
            return False

    def execute_entry(self):
        """
        run this entry
        """
        self.run_entry()

        return self.return_val

    def run_entry(self):
        """
        This will call the connector or processor execution function
        
        That function will return results back to this entry to process as a new output or append to the existing
        
        Then instance could run correctly but still fail if the entry can not handle the results returned
        """
        self.return_val = self.instance.run_execution()
        rslts = self.instance.return_results

        try:
            if self.return_val == 0:
                action = self.file_actions[self.file_write]

                if self.csv_exist():
                    if self.file_write == 'Overlay':
                        self.csv_exist(delete=True)
                else:
                    if action == 'a':
                        action = 'w'

                if self.temp_results == 'TempCSV':
                    self.return_val = self.processCSV(rslts, action)
                elif self.temp_results == 'ListList':
                    self.return_val = self.processListOfLists(rslts, action)
                elif self.temp_results == "DictrionaryList":
                    self.return_val = self.processDictionaryList(rslts, action)
                elif self.temp_results == 'Dataframe':
                    ### process the DataFrame
                    self.return_val = self.processDataFrame(rslts, action)
                else:
                    ## do nothing because the temp_results is equal to None or something else
                    self.log_message(
                        'nothing to do because temp_results is: ' +
                        str(self.temp_results))
                    self.return_val = 0

                if self.file_write == 'New' and self.temp_results != 'None':
                    '''Here we are making sure that results are written to a standard name (essentially making a copy)'''
                    new_path = self.get_csv_path()
                    self.file_write = 'Overlay'
                    self.csv = None
                    csv_path = self.get_csv_path()
                    self.csv_exist(delete=True)
                    shutil.copy(new_path, csv_path)

        except Exception as err1:
            self.log_message('problem processing entry results: \"' +
                             str(err1) + "\", Will continue execution",
                             log_level=self.log_error(),
                             name=self.name,
                             status='error')
            self.instance.set_message(traceback.format_exc())
            traceback.print_exc()
            self.return_val = 1

    def csv_exist(self, delete=False):
        return self.does_file_exist(self.get_csv_name(),
                                    self.working_directory, delete)

    def tempcsv_exist(self):
        return self.does_file_exist(self.temp_csv,
                                    self.working_directory,
                                    delete=False)

    def get_entry_type(self):
        #return self.entry_types[self.entry_type]
        return self.instance.get_type()

    def processDataFrame(self, results, action):
        """
        If the it is a dataframe.. let's write it out to a temporary csv file
        Then process like it was like that all along
        
        Until I find a better way (or have more time).. this is the way it'll be done
        """
        results.to_csv(self.get_temp_csv_name(), index_label="Idx")
        return self.processCSV(results, action)

    def processDictionaryList(self, results, action):
        """
        Structure should be [{Header1:Row1_1,Header2:Row1_2},{Header1:Row2_1,Header2:Row2_2}]
        see: https://docs.python.org/2/library/csv.html#csv.DictWriter
        
        if you want it to be in a specific order you must set csv_header to a value in the connection
        otherwise it'll be pulled from dictionary.keys() of the first occurrence
        """
        if not self.conn_instance.csv_header:
            self.conn_instance.csv_header = results[0].keys()

        with open(self.get_csv_path(), action) as f:
            writer = csv.DictWriter(f,
                                    lineterminator='\n',
                                    fieldnames=self.conn_instance.csv_header)
            if action != 'a':
                writer.writeheader()
            writer.writerows(results)
            f.close()

        return 0

    def processCSV(self, results, action):
        """ 
        assumption is that the results is the filename
        """
        self.log_message('processing temp CSV')
        if action == 'w':
            # just rename the temp file because it has already been deleted at this point
            os.rename(self.get_temp_csv_name(), self.get_csv_path())
        else:
            with open(self.get_csv_path(), action) as f:
                writer = csv.writer(f, lineterminator='\n')
                tempfile = open(self.get_temp_csv_name(), 'r')
                reader = csv.reader(tempfile)
                reader.next()
                for row in reader:
                    writer.writerow(row)
                f.close()
                tempfile.close()
            self.does_file_exist(self.temp_csv,
                                 self.working_directory,
                                 delete=True)
        return 0

    def processListOfLists(self, results, action):
        """
        assumes that the first entry is the headers
        will delete on appends.. so this could result in a data loss if you don't add the header
        seriously.. why wouldn't you add the header.. it's python.. so it is easy!

        Structure should be [[header1, header2],[row1_1, row1_2],[row2_1, row2_2]]
        """
        with open(self.get_csv_path(), action) as f:
            writer = csv.writer(f, lineterminator='\n')
            if action == 'a':
                # remove the header recs
                results.pop(0)
            writer.writerows(results)
            f.close()
        return 0

    def get_temp_csv_name(self):
        if self.temp_csv == None:
            self.temp_csv = self.name + "__TEMP.csv"
            self.does_file_exist(
                self.temp_csv, self.working_directory, delete=True
            )  # when getting the temporary name for the first time.. delete it if it exists
        return os.path.join(self.working_directory, self.temp_csv)

    def get_csv_path(self):
        return os.path.join(self.working_directory, self.get_csv_name())

    def get_csv_name(self):
        if self.csv == None:
            suffix = ""
            if self.file_write == 'New':
                suffix = "_" + self.updates['num_run']
            self.csv = self.out_filename + suffix + ".csv"

        return self.csv
Exemplo n.º 26
0
class ConfigFileReader(Base_Type):
    """
    The config file reader reads in the config files found in the directories passed
    
    For each entry in a config file, it will create a ConfigEntry instance, which then can run the Processor or Connector
    and handle the output
    
    Here is the idea behind dependencies and successors
    If an entry has a dependency.. then if that dependency is also running as part of the schedule, the entry will run after that.. 
       if the dependency is not scheduled.. then the job will run as if it did not have any dependencies
       Successors are only used if you want to chain some entries together, if a config entry doesn't exist.. the 
         non-std options will be passed to that successor (in this case the successor name will need to be the src_implementation
         
        RIGHT NOW... SUCCESSORS have not been implemented yet.. may never be..
        
    THere can be more than one dependency and successor
    If you have a connector and a processor - you do not need dependencies (they are run separately by default)
       successors and dependencies only work within connectors and within processors
    """
    entries = {}
    
    source_files = {}
    entry_dict = {}
    run_date = RawDataUtilities.get_now()
    entry_types = {Base_Type.CONNECTOR:0,Base_Type.PROCESSOR:1}
    no_run_reasons = {0:"A Dependency is still in a failed status",1:"A dependency last run is still before this instance's last run",
                      2:"A dependency did not run because of dependency chain failure",3:"A Dependency Failed",
                      4:"Dependency does not exist in schedule"}
    
    ##
    #  These options determine basic functionality
    # Changing behavior would requre the changing of how these options are used
    # standard options do not get passed down to the instantiation directly
    #   - but they are accessible through the entry.. which is accessible
    standard_options = ['description',
                        'connection_type',
                        'last_processed',
                        'num_run',
                        'out_filename',
                        'src_implementation',
                        'run_frequency',
                        'temp_results', 
                        'working_directory',
                        'dependencies',
                        'successors',
                        'file_write',
        #                'entry_type',
                        'last_run'] 
    
    # update the updates dictionary to be used
    USED = '$$$$$   USED   $$$$$'

    def __init__(self, params):
        """
        Constructor
        """
        Base_Type.__init__(self)
        if 'run_date' in params:
            self.run_date = RawDataUtilities.date_from_string(params['run_date'])
        
        self.entry_name = params.get('entry_name',None)
        self.total_time = 0.0
        
        self.entries[0] = []
        self.entries[1] = []
        
        #self.set_uuid(params.get('uuid','---none---'))
        self.log_message("Initialization Complete", log_type='main', status='running', step='load configs',name='config_file_reader',log_level=self.log_info())
    
    def execute_entries(self, print_order=False):
        """
        iterate through entries and run them
        """
        self.order_entries()
        if print_order:
            self.log_message("Printing order of steps",status='start',step='execute entries')
            self.print_the_order()
            self.log_message("Printing order of steps",status='complete')
            return 0
        self.ent_result = {}
        start_time = RawDataUtilities.get_now()
        self.log_message("Now preparing to run config entries",status='start',step='execute entries')
        for i in xrange(len(self.entries)):
            if len(self.entries[i]) > 0:
                self.log_message('processing all configs of type: '+str(self.entries[i][0].get_entry_type())+", now..",status='running',name='congig_file_reader')
            for ent in self.entries[i]:
                if self.do_run(ent):
                    self.log_message(' About to run: '+ent.name, name=ent.name)
                    print "" if ent.name == 'kicker' else '... running '+ent.name
                    ent.execute_entry()
                    self.total_time = RawDataUtilities.get_diff(start_time,seconds=True)
                    self.ent_result[ent.name] = ent.return_val
                    ent.updates['last_processed'] = RawDataUtilities.string_from_date(RawDataUtilities.add_seconds_to_date(self.run_date, self.total_time))
                else:
                    if ent.no_run_reason:
                        print "NOT RUNNING: "+ent.name + " ("+ent.no_run_reason+")"
                    
        
        self.log_message("All entries have run",status='complete')
        total_ret = self.log_summary(self.update_all_configs())
        return total_ret
    
    def print_the_order(self):
        num = 0
        for i in range(len(self.entries)):
            kicker = self.entries[i][0]
            print '\nRun Order of '+kicker.instance.log_type+"s: \n"
            for x in range(len(self.entries[i])):
                ent = self.entries[i][x]
                if x > 0 and self.do_run(ent, pre_run=True):                    
                    num = num + 1
                    print "("+str(num)+") " + str(ent.order) + ": Name: "+ent.name
        print '\n'
    
    def do_run(self, _entry, pre_run = False):
        """
        supports the regular framework run and a single entry run from the command line
        """
        if _entry.name == 'zip_working_append1':
            pass
        if self.entry_name:
            return _entry.name == self.entry_name
        if _entry.run_now(self.run_date):
            for dep in _entry.dependencies:
                ## check if a prior run failed or didn't run because of a previous failure
                if self.dep_error(_entry, dep, pre_run):      
                    return False
            return True
                             
        #return _entry.run_now(self.run_date)
        return False
    
    def dep_error(self, _entry, _dep, pre_run=False):
        """
        This section of code is designed to determine if the entry should not run based on the results of a dependency
        As of 4/7/2015 there are 3 reasons why an entry can not run because of a dependency problem
        1) The last run of a dependency ended in "failure"
        2) The current run of the dependency failed
        3) The dependency has not ran since the last run of the entry (ensures not re-running a duplicate through)
         --- this is not a concern if everything is set to "New" in your chain
        """
        #for ent in _entries: # processors
        #    if _dep == ent.name:
        if _dep == 'kicker':
            return False
        ent = self.entry_dict.get(_dep,{'entry':None})['entry']
        if not ent:
            _entry.no_run_reason = self.no_run_reasons[4]
            return True
        
        if ent.return_val == None:
            if pre_run:
                if ent.last_run == 'failure':
                    _entry.no_run_reason = self.no_run_reasons[0]
                    return True
                if not self.do_run(self.entry_dict[_dep]['entry'],pre_run=True):
                    _entry.no_run_reason = "A Dependency Is Not Running"
                    return True
                else:
                    return False
                
            if ent.last_run == 'failure':
                self.log_message("Not running entry ("+_entry.name+"): because of a prior failure of dependency: "+_dep)
                _entry.no_run_reason = self.no_run_reasons[0]
                return True
            elif not _entry.first_run and _entry.last_processed >= ent.last_processed:
                self.log_message("No running entry ("+_entry.name+"): because dependency has not run since last run: "+_dep)
                _entry.no_run_reason = self.no_run_reasons[1]
                return True
            elif ent.no_run_reason in self.no_run_reasons.values():
                self.log_message("Not running entry ("+_entry.name+"): because of a failure in the dependency chain: " +_dep)
                _entry.no_run_reason = self.no_run_reasons[2]
                return True
                
        if ent.return_val > 0:
            self.log_message("Not running entry ("+_entry.name+"): because of a dependency failed: "+_dep)
            _entry.no_run_reason = self.no_run_reasons[3]
            return True
        return False
    
    def log_summary(self, no_run_list):
        """
        Wrap up the log entry and summarize why entries did not run if they were found
        """
        self.log_message('Entries not run' ,step='summary',status='start',name='config_file_reader')
        for name in no_run_list.keys():
            self.log_message('Did not run: '+name+', '+no_run_list[name],status='running')
        
        ret_total = 0
        for x in xrange(2):
            for ent in self.entries[x]:
                ret_total = ret_total + 0 if ent.return_val == None else ent.return_val
        self.log_message('Summary Complete, Run Time = ('+str(self.total_time)+')',status='complete')
        return ret_total
    
    def order_entries(self):
        """
        Go through the connectors and processors and if there is more than 1.. we'll create a kicker to start off that 
          group of entries and then order the rest
        """
        for key in self.entries.keys():
            entry_set = self.entries[key]
            if len(entry_set) > 0:
                sorted_set = self.order_entry_set(entry_set)
                self.entries[key] = sorted_set
        
    def order_entry_set(self, entry_set):
        """
        Create the kicker which will start all of the execution
        
        Then look for entries that have more than 1 dependency (all will have kicker as dependency.. except well .. kicker
        If they don't have more than 1, put that in the no_dep dictionary with their order number (should be 1) .. kicker will be 0
        
        Now loop through the dependency list removing each until the all dependencies have been identified and placed in the schedule
        then adding to the dependency dictionary..  this will go into a loop if 2 entries depend on each other.. If you do this.. you are a doofus!
        """
        kicker = self.clone_entry(entry_set[0])
        ret_set = []
        ret_set.append(kicker)
        no_dep = {}
        no_dep[kicker.name] = kicker.order
        
        dep = []
        _all = []
        for c_entry in entry_set:
            c_entry.order = 1
            _all.append(c_entry.name)
            if len(c_entry.dependencies) > 1:
                dep.append(c_entry)
            else:
                no_dep[c_entry.name] = c_entry.order
                ret_set.append(c_entry)
        
        while len(dep) > 0:
            for c_entry in dep:
                dl = copy.copy(c_entry.dependencies)
                for d in c_entry.dependencies:
                    if d == 'kicker':
                        """ the kicker of everything """
                        dl.remove(d)
                    elif not d in _all:
                        """ not scheduled today """
                        dl.remove(d)
                    elif d in no_dep:
                        """ found the dependency in the schedule.. make sure the order is at least 1 more than that """
                        dl.remove(d)
                        if not c_entry.order > no_dep[d]:
                            c_entry.order = no_dep[d] + 1

                if len(dl) == 0:
                    no_dep[c_entry.name] = c_entry.order
                    ret_set.append(c_entry)
                    dep.remove(c_entry)
                    
                    
        return sorted(ret_set, key=lambda c_entry: c_entry.order)
                
    def clone_entry(self, c_entry):
        kicker = copy.copy(c_entry)
        kicker.name = 'kicker'
        kicker.out_filename = 'kicker.csv'
        kicker.file_write = "Append"
        kicker.description = 'An entry designed to just start the process'
        instance = create_entry('kicker',c_entry.get_entry_type())
        instance.entry = kicker
        kicker.instance = instance
        kicker.dependencies = []
        kicker.run_frequency = 'Every'
        kicker.last_run = 'success'
        kicker.source_file = None
        kicker.temp_results = 'None'
        kicker.return_val = 0
        kicker.last_processed = RawDataUtilities.get_now()
        kicker.order = 0
        return kicker
         
    def process_config(self, filename):
        """
        the config file reader can process multiple config files
        """
        
        self.log_message("processing config file: "+filename)
        parser = SafeConfigParser()
        parser.optionxform = str
        parser.read(filename)
        self.source_files[filename] = parser
        
        sections = parser.sections()
        for section in sections:
            
            options = parser.options(section)
            params = {}
            non_std = {}
            for option in options:
                ## any option that ends with the word "password" will be encrypted and will automatically be decrypted upon
                ## processing        
                if option in self.standard_options:
                    params[option] = self.get_value(option, parser.get(section, option))
                else:
                    non_std[option] =  self.get_value(option, parser.get(section, option))

            params['non_std'] = non_std
            params['source_file'] = filename
            params['name']=section
            params['run_date']=self.run_date
            c_entry = ConfigEntry(params)
            if c_entry.ready: 
                entry_num = c_entry.get_entry_type()
                self.entries[self.entry_types[entry_num]].append(c_entry)
                self.entry_dict[section] = {'source':filename,'entry':c_entry}
                self.log_message("Loaded Config Entry: "+section)
            else:
                self.log_message("Failed to load config entry: "+section)

        return self.entries
    
    def get_value(self, option, option_val):
        """
        If this is a password entry it must end with "password", then the framework will decrypt it
        otherwise it assumes there was no decryption needed
        """
        if option.endswith('password'):
            return RawDataUtilities.decrypt_password(option_val)
        return option_val
    
    def update_all_configs(self):
        self.log_message("Update Entries",status='start',step='update configs')
        all_updates = {}
        no_run = {}
        for key in self.entries.keys():
            entry_list = self.entries[key]
            for c_entry in entry_list:
                if c_entry.name in self.ent_result and not c_entry.name == 'kicker':
                    if self.ent_result[c_entry.name] == 0:
                        all_updates[c_entry.name] = c_entry.updates
                        all_updates[c_entry.name]['last_run'] = 'success'
                    else:
                        msg = ' (Failure) Not updating: '+c_entry.name
                        if c_entry.instance.message:
                            msg = msg + ", Message: ("+c_entry.instance.message+")"
                        self.log_message(msg,name=c_entry.name,status='error',log_level=self.log_error())
                        all_updates[c_entry.name] = {'last_run':'failure'}
                else:
                    if not c_entry.name == 'kicker':
                        no_run[c_entry.name] = c_entry.no_run_reason
        
        for name in all_updates.keys():
            self.log_message('Updating config file for: '+name,name=name, status='running')
            self.update_config(name, all_updates[name], self.source_files[self.entry_dict[name]['source']])
            
        ## write out updates for each config file
        for sf in self.source_files:
            parser = self.source_files[sf]
            with open(sf, 'w') as configfile:
                parser.write(configfile)
        
        self.log_message('All config files updated',status='complete')
        return no_run
            
    
    def update_config(self, entry_name, entry_updates, parser): 

        for key in entry_updates.keys():
            parser.set(entry_name, key, entry_updates[key])