def __init__(self): super(BaseImporter, self).__init__() self.class_name = self.__class__.__name__ self.module_name = self.__module__.split('.')[-1] self.log_path = LOGGING_DIRECTORY self.log = set_up_logger(self.module_name, self.log_path, self.email_subject, email_recipients=self.email_recipients) self.pid_file_path = os.path.join(TMP_DIRECTORY, self.module_name)
def reload_indexp(working_dir, cycle): def execute_file(filename): contents = " ".join([line for line in open(filename, 'r') if line[0:2] != '--']) statements = contents.split(';')[:-1] # split on semi-colon. Last element will be trailing whitespace for statement in statements: log.info("Executing %s" % statement) c.execute(statement) try: working_dir = os.path.expanduser(working_dir) if not os.path.isdir(working_dir): os.makedirs(working_dir) log = set_up_logger('indexp_importer', working_dir, 'IndExp Importer Fail') local_file_path = os.path.join(working_dir, LOCAL_FILE) log.info("downloading %s to %s..." % (DOWNLOAD_URL.format(cycle), local_file_path)) urllib.urlretrieve(DOWNLOAD_URL.format(cycle), local_file_path) log.info("uploading to table %s..." % TABLE_NAME) c = connection.cursor() c.execute("insert into fec_indexp_out_of_date_cycles (cycle) values ({})".format(cycle)) c.execute("DELETE FROM %s" % TABLE_NAME) c.copy_expert("COPY %s (candidate_id, candidate_name, spender_id, spender_name, election_type, candidate_state, candidate_district, candidate_office, candidate_party, amount, date, aggregate_amount, support_oppose, purpose, payee, filing_number, amendment, transaction_id, image_number, received_date, prev_file_num) FROM STDIN CSV HEADER" % TABLE_NAME, open(local_file_path, 'r')) c.execute("update {} set cycle = {}".format(TABLE_NAME, cycle)) execute_file(SQL_POSTLOAD_FILE) c.execute("delete from fec_indexp_out_of_date_cycles") log.info("Import Succeeded.") except Exception as e: log.error(e) raise
def __init__(self): super(BaseImporter, self).__init__() self.class_name = self.__class__.__name__ self.module_name = self.__module__.split('.')[-1] self.log_path = settings.LOGGING_DIRECTORY self.log = set_up_logger(self.module_name, self.log_path, self.email_subject, email_recipients=self.email_recipients) self.pid_file_path = os.path.join(settings.TMP_DIRECTORY, self.module_name)
def __init__(self, processing_dir, cycle, skip_download=False): self.processing_dir = os.path.expanduser(processing_dir) self.configs_by_cycle = self._get_configs_by_cycle() self.cycle = cycle self.cycle_config = self.configs_by_cycle[str(self.cycle)] self.skip_download = skip_download self.log = set_up_logger('fec_importer', self.processing_dir, 'Unhappy FEC Importer')
class ReconcilerService(object): log = set_up_logger('ReconcilerService', settings.LOGGING_DIRECTORY, 'Unhappy Reconciler', email_recipients=settings.LOGGING_EMAIL['recipients']) def __init__(self, subject_name, entity_type, *args, **kwargs): self.subject_name = subject_name self.entity_type = entity_type self.subject_properties = kwargs.get('properties') self.log.info("Initializing ReconcilerService") def start(self, limit=None): if self.entity_type == 'politician': self.log.info( u"Trying name: {} with type: {} on PoliticianNameCleaver". format(self.subject_name, self.entity_type)) subject_name_obj = self.try_name_cleaver_flavor( PoliticianNameCleaver) self.reconciler = PoliticianReconciler() elif self.entity_type == 'individual': self.log.info( u"Trying name: {} with type: {} on IndividualNameCleaver". format(self.subject_name, self.entity_type)) subject_name_obj = self.try_name_cleaver_flavor( IndividualNameCleaver) self.reconciler = IndividualReconciler() else: self.log.info( u"Trying name: {} with type: {} on OrganizationNameCleaver". format(self.subject_name, self.entity_type)) subject_name_obj = self.try_name_cleaver_flavor( OrganizationNameCleaver) self.reconciler = OrganizationReconciler() if not subject_name_obj: return [] else: return self.reconciler.search( self, subject_name_obj, subject_properties=self.subject_properties) def try_name_cleaver_flavor(self, cleaver_class): try: subject_name = cleaver_class(self.subject_name).parse() # skip if we didn't get the right kind of result if cleaver_class.name_processing_failed(subject_name): self.log.debug('We didn\'t get a PoliticianName object back.') return except Exception, e: self.log.debug('Encountered an exception during name parsing.') self.log.debug(e) return return subject_name
def reload_indexp(working_dir, cycle): def execute_file(filename): contents = " ".join( [line for line in open(filename, 'r') if line[0:2] != '--']) statements = contents.split( ';' )[:-1] # split on semi-colon. Last element will be trailing whitespace for statement in statements: log.info("Executing %s" % statement) c.execute(statement) try: working_dir = os.path.expanduser(working_dir) if not os.path.isdir(working_dir): os.makedirs(working_dir) log = set_up_logger('indexp_importer', working_dir, 'IndExp Importer Fail') local_file_path = os.path.join(working_dir, LOCAL_FILE) log.info("downloading %s to %s..." % (DOWNLOAD_URL.format(cycle), local_file_path)) urllib.urlretrieve(DOWNLOAD_URL.format(cycle), local_file_path) log.info("uploading to table %s..." % TABLE_NAME) c = connection.cursor() c.execute( "insert into fec_indexp_out_of_date_cycles (cycle) values ({})". format(cycle)) c.execute("DELETE FROM %s" % TABLE_NAME) c.copy_expert( "COPY %s (candidate_id, candidate_name, spender_id, spender_name, election_type, candidate_state, candidate_district, candidate_office, candidate_party, amount, date, aggregate_amount, support_oppose, purpose, payee, filing_number, amendment, transaction_id, image_number, received_date, prev_file_num) FROM STDIN CSV HEADER NULL ' ' " % TABLE_NAME, open(local_file_path, 'r')) c.execute("update {} set cycle = {}".format(TABLE_NAME, cycle)) execute_file(SQL_POSTLOAD_FILE) c.execute("delete from fec_indexp_out_of_date_cycles") log.info("Import Succeeded.") except Exception as e: log.error(e) raise
def __init__(self, processing_dir, config=FEC_CONFIG): self.processing_dir = os.path.expanduser(processing_dir) self.FEC_CONFIG = config self.log = set_up_logger('fec_importer', self.processing_dir, 'Unhappy FEC Importer')