def _process_data(self, data, processor_id, reference_mapping): logger.debug('processor_id=%s'%(processor_id,)) extracted_data = self._get_from_model_data(data, attrib=reference_mapping[processor_id]) processor = self._processors.get(processor_id) if processor: return processor.process(extracted_data) return extracted_data
def _process_data(self, data, processor_id, reference_mapping): logger.debug('processor_id=%s' % (processor_id, )) extracted_data = self._get_from_model_data( data, attrib=reference_mapping[processor_id]) processor = self._processors.get(processor_id) if processor: return processor.process(extracted_data) return extracted_data
def get_data_from(self, url): logger.debug(url) if not url: return [] request = urllib2.Request(url) request.add_header("Authorization", "Basic %s" % self._auth_string) result = urllib2.urlopen(request) logger.debug("Result found:") return self._data_sanitizer.clean(result.read())
def extract_model_data_from(self, data): if self._model_default is None: self._model_default = {} logger.debug("id_mapping: %s \n data_mapping: %s, \n, default: %s, processors:%s"%(self._model_id_mapping, self._model_data_mapping, self._model_default, self._processors)) model_data = { attribute : self._process_data(data, processor_id=attribute, reference_mapping=self._model_data_mapping) for attribute in self._model_data_mapping} model_ids = {attribute : self._process_data(data, processor_id=attribute, reference_mapping=self._model_id_mapping) for attribute in self._model_id_mapping} model_ids.update(self._model_default) logger.debug("Model_info: \n %s \n %s"%(model_ids, model_data)) return model_ids, model_data
def _clean_for_reading_as_json(self, data): logger.debug("cleaning data") logger.debug('Discard%s'%(self._discard_values,)) for discard_value in self._discard_values: data = data.replace(discard_value, '') data = data.replace("self", "this") data = data.replace(" :", ":") data = data.replace('\n', '') data = data.replace('\u', '') data = data.strip() return data
def _clean_for_reading_as_json(self, data): logger.debug("cleaning data") logger.debug('Discard%s' % (self._discard_values, )) for discard_value in self._discard_values: data = data.replace(discard_value, '') data = data.replace("self", "this") data = data.replace(" :", ":") data = data.replace('\n', '') data = data.replace('\u', '') data = data.strip() return data
def clean(self, data): try: readable_data = self._clean_for_reading_as_json(data) logger.debug("cleaned data ----->%s"%(readable_data,)) logger.debug('eval the string to get dict') cleaned_data = json.loads(readable_data) if self._data_key: logger.debug('get the data') return_value = cleaned_data.get(self._data_key) if not return_value: return None return return_value return cleaned_data except Exception as e: logger.debug(data) logger.error(e) return []
def clean(self, data): try: readable_data = self._clean_for_reading_as_json(data) logger.debug("cleaned data ----->%s" % (readable_data, )) logger.debug('eval the string to get dict') cleaned_data = json.loads(readable_data) if self._data_key: logger.debug('get the data') return_value = cleaned_data.get(self._data_key) if not return_value: return None return return_value return cleaned_data except Exception as e: logger.debug(data) logger.error(e) return []
def extract_model_data_from(self, data): if self._model_default is None: self._model_default = {} logger.debug( "id_mapping: %s \n data_mapping: %s, \n, default: %s, processors:%s" % (self._model_id_mapping, self._model_data_mapping, self._model_default, self._processors)) model_data = { attribute: self._process_data(data, processor_id=attribute, reference_mapping=self._model_data_mapping) for attribute in self._model_data_mapping } model_ids = { attribute: self._process_data(data, processor_id=attribute, reference_mapping=self._model_id_mapping) for attribute in self._model_id_mapping } model_ids.update(self._model_default) logger.debug("Model_info: \n %s \n %s" % (model_ids, model_data)) return model_ids, model_data
def _create_model_using(self, data, parent=None): logger.debug("model_data %s"%(data,)) try: model_ids, model_data = self._data_extractor.extract_model_data_from(data) model = self._model.get_or_create(**model_ids).update(**model_data) logger.debug(model) if parent and self._has_relationship!='': logger.debug('%s ----- %s ----> %s'%(parent, self._has_relationship, model)) getattr(parent, self._has_relationship)(model) for child in self.children: child.end_node.execute(data=data, parent=model) except Exception as e: logger.error(e)
def execute(self, data=None, parent=None): logger.debug('begin execute') url = self._url_extractor.get_next_url(data) response = self._informer.using(self._data_sanitizer).get_data_from(url) if response is None: logger.debug("No data") return if isinstance(response, list): logger.debug("Is list") for reponse_object in response: self._create_model_using(reponse_object, parent) else: self._create_model_using(response, parent = parent) if self.next: self.next.end_node.execute()
def _create_model_using(self, data, parent=None): logger.debug("model_data %s" % (data, )) try: model_ids, model_data = self._data_extractor.extract_model_data_from( data) model = self._model.get_or_create(**model_ids).update(**model_data) logger.debug(model) if parent and self._has_relationship != '': logger.debug('%s ----- %s ----> %s' % (parent, self._has_relationship, model)) getattr(parent, self._has_relationship)(model) for child in self.children: child.end_node.execute(data=data, parent=model) except Exception as e: logger.error(e)
def execute(self, data=None, parent=None): logger.debug('begin execute') url = self._url_extractor.get_next_url(data) response = self._informer.using( self._data_sanitizer).get_data_from(url) if response is None: logger.debug("No data") return if isinstance(response, list): logger.debug("Is list") for reponse_object in response: self._create_model_using(reponse_object, parent) else: self._create_model_using(response, parent=parent) if self.next: self.next.end_node.execute()