def __init__(self): self.redis_client = RedisConnectionManager().get_client() self.queue_to_read = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] self.redis_reader = RedisReader( self.queue_to_read, os.environ["MAX_BUFFER_SENDER"] ) self.redis_writer = RedisWriter() self.contact_post_api_url = os.environ["CONTACT_POST_API_URL"] self.contacts_for_update = [] self.processed_contacts = {}
def __init__(self): self.contacts_for_prediction = os.environ[ "CONTATCS_FOR_PREDICTION_QUEUE"] self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] self.redis_reader = RedisReader( os.environ["PREDICTABLE_CONTACTS_QUEUE"], os.environ["MAX_BUFFER_PREPROCESSOR"], ) self.redis_writer = RedisWriter() self.contacts = { self.contacts_for_prediction: [], self.contacts_for_update: [] }
def __init__(self): self.queue_to_read = os.environ["CONTATCS_FOR_PREDICTION_QUEUE"] self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] self.redis_reader = RedisReader(self.queue_to_read, os.environ["MAX_BUFFER_MODEL"]) self.redis_writer = RedisWriter() self.relevant_columns = ast.literal_eval( ast.literal_eval(os.environ["DATA_TRAIN_COLUMNS"])) train_objects = ModelTrainer()() self.model = train_objects["model"] self.reference_dicts = train_objects["reference_dicts"] self.contacts = {self.contacts_for_update: []} self.dataframe = pd.DataFrame()
class HubSpotWriter: def __init__(self): self.redis_client = RedisConnectionManager().get_client() self.queue_to_read = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] self.redis_reader = RedisReader( self.queue_to_read, os.environ["MAX_BUFFER_SENDER"] ) self.redis_writer = RedisWriter() self.contact_post_api_url = os.environ["CONTACT_POST_API_URL"] self.contacts_for_update = [] self.processed_contacts = {} def __call__(self): self.contacts_for_update = self.redis_reader() self._send_contacts() if self.processed_contacts: self.redis_reader.remove_items() @try_again def _send_contacts(self): self.processed_contacts = {} for contact in self.contacts_for_update: # better ask for forgiveness than permission try: self.processed_contacts[contact["contact_id"]] except KeyError: contact_payload = json.dumps( {"properties": [{"property": "ML", "value": contact["prediction"]}]} ) url = f"{self.contact_post_api_url}vid/{contact['contact_id']}/profile?" access_token = self.redis_client.get("access_token").decode("utf-8") headers = {"Authorization": f"Bearer {access_token}"} res = requests.post(url=url, headers=headers, data=contact_payload) logger.info( msg=f"status_code: {res.status_code}", extra={"full_msg": res.text} ) if res.status_code == 429: logger.error( msg="429 - Ten sencondly rolling limit reached", extra={"full_msg": res.text}, ) contact_wrapper = {self.queue_to_read: [contact]} self.redis_writer(contact_wrapper) self.processed_contacts[contact["contact_id"]] = True
def test_read_from_redis(mocked_get_client): # arange queue_to_read = "QUEUE_A" max_buffer = 2 redis_queue = [ b"[{'objectId': 1549880507, 'subscriptionType': 'deal.propertyChange'}]", b"[{'objectId': 383611195, 'subscriptionType': 'contact.creation'}]", ] mocked_get_client.return_value.lrange.return_value = redis_queue # act redis_reader = RedisReader(queue_to_read, max_buffer) response = redis_reader() # assert assert response == _EXPECTED_RESPONSE
def __init__(self, queue_to_read=None): self.redis_client = RedisConnectionManager().get_client() self.deal_api_url = os.environ["DEAL_API_URL"] self.contact_api_url = os.environ["CONTACT_GET_API_URL"] self.contact_creation_subscription = os.environ[ "CONTACT_CREATION_SUBSCRIPTION"] self.predictable_contacts = os.environ["PREDICTABLE_CONTACTS_QUEUE"] self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] if queue_to_read is None: self.queue_to_read = os.environ["QUEUE_TO_READ"] else: self.queue_to_read = queue_to_read max_buffer = os.environ["MAX_BUFFER_COLLETOR"] self.redis_reader = RedisReader(self.queue_to_read, max_buffer) self.redis_writer = RedisWriter() self.contacts_id_to_fetch = [] self.contacts = { self.predictable_contacts: [], self.contacts_for_update: [], } self.processed_items = {}
class Predictor: def __init__(self): self.queue_to_read = os.environ["CONTATCS_FOR_PREDICTION_QUEUE"] self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] self.redis_reader = RedisReader(self.queue_to_read, os.environ["MAX_BUFFER_MODEL"]) self.redis_writer = RedisWriter() self.relevant_columns = ast.literal_eval( ast.literal_eval(os.environ["DATA_TRAIN_COLUMNS"])) train_objects = ModelTrainer()() self.model = train_objects["model"] self.reference_dicts = train_objects["reference_dicts"] self.contacts = {self.contacts_for_update: []} self.dataframe = pd.DataFrame() def __call__(self): self.contacts_for_prediction = self.redis_reader() if self.contacts_for_prediction: logger.info(msg="Data extracted from redis") self.dataframe = pd.DataFrame(self.contacts_for_prediction) if self._convert_strings_to_int() is None: self._build_contacts_for_update() else: self._check_for_strings_in_dataframe() predictions = self._generate_predictions() self._build_contacts_for_update(predictions=predictions) self.redis_writer(self.contacts) self.redis_reader.remove_items() self.contacts[self.contacts_for_update].clear() def _convert_estunidos_to_eua(self): self.dataframe.loc[self.dataframe.loc[ self.dataframe["País de interesse"] == "Estados Unidos"].index, "País de interesse", ] = "EUA" def _convert_strings_to_int(self): try: for column in self.reference_dicts.keys(): for label in self.reference_dicts[column].keys(): self.dataframe.loc[ self.dataframe.loc[self.dataframe[column] == label].index, column, ] = self.reference_dicts[column][label] return True except KeyError as err: logger.error( msg="Unknown column. It's not possible to generate predictions", extra={"Full msg error": err}, ) return None def _check_for_strings_in_dataframe(self): for column in self.dataframe.columns: if (len(self.dataframe.loc[self.dataframe[column].apply( lambda x: isinstance(x, str)) == True]) > 0): try: highest_code = self._find_highest_code( self.reference_dicts[column]) self.dataframe.loc[self.dataframe[column].apply( lambda x: isinstance(x, str)) == True, column, ] = highest_code except KeyError: logger.info( msg=f"Column {column} not found in the references") def _find_highest_code(self, reference_dict): highest_code = 0 for label in reference_dict.keys(): if reference_dict[label] > highest_code: highest_code = reference_dict[label] return highest_code + 1 def _generate_predictions(self): self.dataframe.loc[ self.dataframe["Qual a duração do seu intercâmbio?"] == "Ainda não sei", "Qual a duração do seu intercâmbio?", ] = np.nan dataframe_for_predicttion = self.dataframe.loc[:, self.relevant_columns] dataframe_for_predicttion = ModelTrainer.fill_empty_fields( dataframe_for_predicttion) predictions = self.model.predict_proba(dataframe_for_predicttion) * 100 logger.info(msg="Predicitions generated") return predictions def _build_contacts_for_update(self, predictions=None): contacts_id = self.dataframe["contact_id"] i = 0 for contact_id in contacts_id: if predictions is None: contact = {"contact_id": contact_id, "prediciton": "400"} else: predicton = str(round(predictions[i][0], 2)) contact = {"contact_id": contact_id, "prediction": predicton} self.contacts[self.contacts_for_update].append(contact) i += 1
class HubSpotFetcher: """Fetches deals and contact information in Hubspot""" def __init__(self, queue_to_read=None): self.redis_client = RedisConnectionManager().get_client() self.deal_api_url = os.environ["DEAL_API_URL"] self.contact_api_url = os.environ["CONTACT_GET_API_URL"] self.contact_creation_subscription = os.environ[ "CONTACT_CREATION_SUBSCRIPTION"] self.predictable_contacts = os.environ["PREDICTABLE_CONTACTS_QUEUE"] self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] if queue_to_read is None: self.queue_to_read = os.environ["QUEUE_TO_READ"] else: self.queue_to_read = queue_to_read max_buffer = os.environ["MAX_BUFFER_COLLETOR"] self.redis_reader = RedisReader(self.queue_to_read, max_buffer) self.redis_writer = RedisWriter() self.contacts_id_to_fetch = [] self.contacts = { self.predictable_contacts: [], self.contacts_for_update: [], } self.processed_items = {} def __call__(self): subscription_items = self.redis_reader() self.contacts[self.predictable_contacts].clear() self.contacts[self.contacts_for_update].clear() self.contacts_id_to_fetch.clear() self.processed_items = {} self._fetch_from_hubspot(subscription_items) if self.contacts_id_to_fetch: self._fetch_contact_info() self.redis_writer(self.contacts) if self.processed_items: self.redis_reader.remove_items() def _fetch_from_hubspot(self, subscription_items): """Fetches deals and contacts information from HubSpot database""" for item in subscription_items: subscription_type = item["subscriptionType"] object_name = item["subscriptionType"].split(".")[0] # better ask for forgiveness than permission try: self.processed_items[item["objectId"]] except KeyError: if subscription_type == self.contact_creation_subscription: self._format_new_contact(item["objectId"]) elif object_name == "deal": self._fetch_deal_info(item["objectId"]) elif object_name == "contact": self.contacts_id_to_fetch.append(item["objectId"]) if len(self.contacts_id_to_fetch) > 99: self._fetch_contact_info() self.processed_items[item["objectId"]] = True def _format_new_contact(self, contact_id): """Assigns prediction equals to 0 in contacts recently created""" new_contact = {"contact_id": contact_id, "prediction": "0"} self.contacts[self.contacts_for_update].append(new_contact) def _fetch_deal_info(self, deal_id): """ Fetches a deal information from Hubspot database""" url = self._build_url(self.deal_api_url, deal_id) deal = self._request(url) try: contacts_id = deal["associations"]["associatedVids"] if contacts_id: self.contacts_id_to_fetch = self.contacts_id_to_fetch + contacts_id except (KeyError, TypeError) as err: logger.error( msg="Deal with no contact associated", extra={"full_msg_error": err}, ) def _fetch_contact_info(self): """Fetches a contact information from Hubspot database""" url = self._build_url(self.contact_api_url, *self.contacts_id_to_fetch) contacts_fetched = self._request(url) if contacts_fetched: self.contacts[self.predictable_contacts].append(contacts_fetched) self.contacts_id_to_fetch.clear() logger.info( msg=f"Fetched {len(self.contacts_id_to_fetch)} contact's info") @try_again def _request(self, url): """Sends a http request to HubSpot""" access_token = self.redis_client.get("access_token").decode("utf-8") headers = {"Authorization": f"Bearer {access_token}"} response = requests.get(url, headers=headers) if response.status_code == 200: return json.loads(response.text) # Add contacts to redis again if response.status_code == 429: logger.error( msg="429 - Ten sencondly rolling limit reached", extra={"full_msg": response.text}, ) self._add_items_to_redis_again(url) return None def _add_items_to_redis_again(self, url): """ Add objets that got 429 requet error to redis again""" is_contact = re.findall("contact", url) if is_contact: subscription_type = "contact.Error429" else: subscription_type = "deal.Error429" objects_id = re.findall(r"\d+", url)[1:] objects = [] for object_id in objects_id: object_ = { "objectId": int(object_id), "subscriptionType": subscription_type, } objects.append(object_) objects_wrapper = {self.queue_to_read: objects} self.redis_writer(objects_wrapper) @staticmethod def _build_url(base_url, *ids_to_fetch): """ Builds urls to fetch both deal and contact information""" iterable = iter(ids_to_fetch) url = f"{base_url}{next(iterable)}" if len(ids_to_fetch) > 1: for id in iterable: url = f"{url}&vid={id}" return url
class Transformer: def __init__(self): self.contacts_for_prediction = os.environ[ "CONTATCS_FOR_PREDICTION_QUEUE"] self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"] self.redis_reader = RedisReader( os.environ["PREDICTABLE_CONTACTS_QUEUE"], os.environ["MAX_BUFFER_PREPROCESSOR"], ) self.redis_writer = RedisWriter() self.contacts = { self.contacts_for_prediction: [], self.contacts_for_update: [] } def __call__(self): predictable_contacts = self.redis_reader() self.contacts[self.contacts_for_prediction].clear() self.contacts[self.contacts_for_update].clear() self._format_predictable_contacts(predictable_contacts) self.redis_writer(self.contacts) if (self.contacts[self.contacts_for_prediction] or self.contacts[self.contacts_for_update]): self.redis_reader.remove_items() def _format_predictable_contacts(self, predictable_contacts): """Formats contacts to be predict by the Machine Learning model""" processed_contacts = {} for contact in predictable_contacts: for key in contact.keys(): # better ask for forgiveness than permission try: processed_contacts[key] except KeyError: contact_properties = contact[key]["properties"] contact_info = self._extract_contact_main_info( key, contact_properties) if contact_info: self.contacts[self.contacts_for_prediction].append( contact_info) else: contact_info = self._format_non_predictable_contact( key) self.contacts[self.contacts_for_update].append( contact_info) processed_contacts[key] = True def _extract_contact_main_info(self, contact_id, contact_properties): try: contact = { "contact_id": contact_id, "País de interesse": contact_properties["country_of_interest"]["value"], "Qual a duração do seu intercâmbio?": contact_properties["program_duration"]["value"], "Idade": int(contact_properties["idade"]["value"]), "Associated Deals": int(contact_properties["num_associated_deals"]["value"]), "Number of times contacted": int(contact_properties["num_contacted_notes"]["value"]), "Number of Sales Activities": int(contact_properties["num_notes"]["value"]), "Number of Unique Forms Submitted": int(contact_properties["num_unique_conversion_events"] ["value"]), "Number of Form Submissions": int(contact_properties["num_conversion_events"]["value"]), } self._convert_program_duration_to_int(contact) return contact except (KeyError, TypeError) as err: logger.error( msg="Contact is not predictable due to misinformation", extra={"Full msg error": err}, ) return None @staticmethod def _format_non_predictable_contact(contact_id): """Assigns the '404' label to contacts that does not have enough information """ non_predictable_contact = { "contact_id": contact_id, "prediction": "404" } return non_predictable_contact @staticmethod def _convert_program_duration_to_int(contact): program_duration = re.findall( r"\d+", contact["Qual a duração do seu intercâmbio?"]) if program_duration: contact["Qual a duração do seu intercâmbio?"] = int( program_duration[0])