def append_symbol_if_length(soup: bs4.BeautifulSoup) -> bs4.BeautifulSoup: for child in soup.html.findChildren(string=True, recursive=True): if all([ child.string, type(child) not in ignore_types, child.parent.name not in ignore_tag_names, ]): mutated_tokens = [ t + symbol if len(t) == length else t for t in split_into_tokens(child.string) ] child.replace_with(''.join(mutated_tokens)) return soup
def handle(self, text): # expected format: # # confirm books gwaai valley primary 4288 G # | | | | | # V | | | | # handler V | | | # commodity | | | # V | | # school name | | # | | # | | # V | # school code | # | # | # V # status # declare variables intended for valid information known_contact = None commodity = None quantity = None condition = None observed_cargo = None facility = None school = None possible_schools = [] possible_by_code = None possible_by_name = None school_by_code = None school_by_name = None if self.msg.contact is not None: self.debug(self.msg.contact) known_contact = self.msg.contact elif self.msg.connection.identity is not None: try: known_contact = Contact.objects.get(phone=self.msg.connection.identity) self.msg.connection.contact = known_contact self.msg.connection.save() except MultipleObjectsReturned: #TODO do something? self.debug('MULTIPLE IDENTITIES') pass except ObjectDoesNotExist: self.debug('NO PERSON FOUND') try: known_contact = Contact.objects.get(alternate_phone=\ self.msg.connection.identity) self.msg.connection.contact = known_contact self.msg.connection.save() except MultipleObjectsReturned: #TODO this case may be unneccesary, since many many contacts # often share a single alternate_phone self.debug('MULTIPLE IDENTITIES AFTER UNKNOWN') pass except ObjectDoesNotExist: #self.respond("Sorry, I don't recognize your phone number. Please respond with your surname, facility (school or DEO) code, and facility name.") pass finally: known_contact, new_contact = Contact.objects.get_or_create(phone=self.msg.connection.identity) else: self.debug('NO IDENTITY') if known_contact is not None: self.debug('KNOWN PERSON') # lists of expected token types and their labels for split_into_tokens expected_tokens = ['word', 'words', 'number', 'word'] token_labels = ['commodity', 'school_name', 'school_code', 'condition'] tokens = utils.split_into_tokens(expected_tokens, token_labels, text) self.debug(tokens) if not tokens['commodity'].isdigit(): def get_commodity(token): try: # lookup commodity by slug com = Commodity.objects.get(slug__istartswith=tokens['commodity']) return com except MultipleObjectsReturned: #TODO do something here? pass except ObjectDoesNotExist: coms = Commodity.objects.all() for com in coms: # iterate all commodities and see if submitted # token is in an aliases list match = com.has_alias(token) if match is not None: if match: return com continue return None commodity = get_commodity(tokens['commodity']) if commodity is None: self.respond("Sorry, no supply called '%s'" % (tokens['commodity'])) self.respond("Approved supplies are %s" % ", ".join(Commodity.objects.values_list('slug', flat=True))) if not tokens['school_name'].isdigit(): try: # first try to match name exactly school = School.objects.get(name__iexact=tokens['school_name']) facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) except MultipleObjectsReturned: # if there are many exact matches, add them to the possible_schools list schools = School.objects.filter(name__istartswith=tokens['school_name']) for school in schools: possible_schools.append(school) except ObjectDoesNotExist: # try to match using string distance algorithms possible_by_name = School.closest_by_spelling(tokens['school_name']) self.debug("%s possible facilities by name" % (str(len(possible_by_name)))) self.debug(possible_by_name) if len(possible_by_name) == 1: if possible_by_name[0][2] == 0 and possible_by_name[0][3] == 0 and possible_by_name[0][4] == 1.0: self.debug('PERFECT LOC MATCH BY NAME') school_by_name = possible_by_name[0][1] school = school_by_name facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) else: if possible_by_name is not None: for fac in possible_by_name: # add any non-perfect matches to possible_schools possible_schools.append(fac[1]) if tokens['school_code'].isdigit(): possible_by_code = School.closest_by_code(tokens['school_code']) self.debug("%s possible facilities by code" % (str(len(possible_by_code)))) if len(possible_by_code) == 1: if possible_by_code[0][2] == 0 and possible_by_code[0][3] == 0 and possible_by_code[0][4] == 1.0: self.debug('PERFECT LOC MATCH BY CODE') school_by_code = possible_by_code[0][1] # see if either facility lookup returned a perfect match if school_by_code or school_by_name is not None: if school_by_code and school_by_name is not None: # if they are both the same perfect match we have a winner if school_by_code.pk == school_by_name.pk: school = school_by_code facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) # if we have two different perfect matches, add to list else: possible_schools.append(school_by_code) self.debug("%s possible facilities" % (str(len(possible_schools)))) possible_facilities.append(school_by_name) self.debug("%s possible facilities" % (str(len(possible_schools)))) else: # perfect match by either is also considered a winner school = school_by_code if school_by_code is not None else school_by_name facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) self.debug(facility) # neither lookup returned a perfect match else: # make list of facility objects that are in both fac_by_code and fac_by_name if possible_by_code and possible_by_name is not None: possible_schools.extend([l[1] for l in filter(lambda x:x in possible_by_code, possible_by_name)]) self.debug("%s possible facilities by both" % (str(len(possible_schools)))) if len(possible_schools) == 0: possible_schools.extend([l[1] for l in possible_by_code if possible_by_code is not None]) possible_schools.extend([l[1] for l in possible_by_name if possible_by_name is not None]) self.debug("%s possible facilities by both" % (str(len(possible_schools)))) if len(possible_schools) == 1: school = possible_schools[0] facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) if facility is None: self.respond("Sorry I don't know '%s'" % (tokens['school_name'])) self.respond("Did you mean one of: %s?" %\ (", ".join(possible_schools))) if facility is not None: if not tokens['condition'].isdigit(): if facility is not None: active_shipment = Facility.get_active_shipment(facility) if active_shipment is not None: # create a new Cargo object condition = tokens['condition'].upper() if condition in ['G', 'D', 'L', 'I']: observed_cargo = Cargo.objects.create(\ commodity=commodity,\ condition=condition) else: self.respond("Oops. Status must be one of: G, D, L, or I") seen_by_str = self.msg.connection.backend.name + ":" + self.msg.connection.identity # create a new ShipmentSighting sighting = ShipmentSighting.objects.create(\ observed_cargo=observed_cargo,\ facility=facility, seen_by=seen_by_str) # associate new Cargo with Shipment active_shipment.status = 'D' active_shipment.actual_delivery_time=datetime.datetime.now() active_shipment.cargos.add(observed_cargo) active_shipment.save() # get or create a ShipmentRoute and associate # with new ShipmentSighting route, new_route = ShipmentRoute.objects.get_or_create(\ shipment=active_shipment) route.sightings.add(sighting) route.save() if observed_cargo.condition is not None: this_school = School.objects.get(pk=facility.location_id) # map reported condition to the status numbers # that the sparklines will use map = {'G':1, 'D':-2, 'L':-3, 'I':-4} if observed_cargo.condition in ['D', 'L', 'I', 'G']: this_school.status = map[observed_cargo.condition] else: this_school.status = 0 this_school.save() this_district = this_school.parent # TODO optimize! this is very expensive # and way too slow # re-generate the list of statuses that # the sparklines will use updated = this_district.spark campaign = Campaign.get_active_campaign() if campaign is not None: campaign.shipments.add(active_shipment) campaign.save() data = [ "of %s" % (commodity.slug or "??"), "to %s" % (facility.location.name or "??"), "in %s condition" % (observed_cargo.get_condition_display() or "??") ] confirmation = "Thanks. Confirmed delivery of %s." %\ (" ".join(data)) self.respond(confirmation)
def analyse_sentences_in_paragraphs(self, unmatched_paragraphs_curr, unmatched_paragraphs_prev): # Containers for unmatched and matched sentences. unmatched_sentences_curr = [] unmatched_sentences_prev = [] matched_sentences_prev = [] total_sentences = 0 # Iterate over the unmatched paragraphs of the current revision. for paragraph_curr in unmatched_paragraphs_curr: # Split the current paragraph into sentences. sentences = split_into_sentences(paragraph_curr.value) # Iterate over the sentences of the current paragraph for sentence in sentences: # Create the Sentence structure. sentence = sentence.strip() if not sentence: # dont track empty lines continue sentence = ' '.join(split_into_tokens(sentence)) # here whitespaces in the sentence are cleaned hash_curr = calculate_hash(sentence) # then hash values is calculated matched_curr = False total_sentences += 1 # Iterate over the unmatched paragraphs from the previous revision. for paragraph_prev in unmatched_paragraphs_prev: for sentence_prev in paragraph_prev.sentences.get(hash_curr, []): if not sentence_prev.matched: matched_one = False matched_all = True for word_prev in sentence_prev.words: if word_prev.matched: matched_one = True else: matched_all = False if not matched_one: # if there is not any already matched prev word, so set them all as matched sentence_prev.matched = True matched_curr = True matched_sentences_prev.append(sentence_prev) for word_prev in sentence_prev.words: word_prev.matched = True # Add the sentence information to the paragraph. if hash_curr in paragraph_curr.sentences: paragraph_curr.sentences[hash_curr].append(sentence_prev) else: paragraph_curr.sentences.update({sentence_prev.hash_value: [sentence_prev]}) paragraph_curr.ordered_sentences.append(sentence_prev.hash_value) break elif matched_all: # if all prev words in this sentence are already matched sentence_prev.matched = True matched_sentences_prev.append(sentence_prev) if matched_curr: break # Iterate over the hash table of sentences from old revisions. if not matched_curr: for sentence_prev in self.sentences_ht.get(hash_curr, []): if not sentence_prev.matched: matched_one = False matched_all = True for word_prev in sentence_prev.words: if word_prev.matched: matched_one = True else: matched_all = False if not matched_one: # if there is not any already matched prev word, so set them all as matched sentence_prev.matched = True matched_curr = True matched_sentences_prev.append(sentence_prev) for word_prev in sentence_prev.words: word_prev.matched = True # Add the sentence information to the paragraph. if hash_curr in paragraph_curr.sentences: paragraph_curr.sentences[hash_curr].append(sentence_prev) else: paragraph_curr.sentences.update({sentence_prev.hash_value: [sentence_prev]}) paragraph_curr.ordered_sentences.append(sentence_prev.hash_value) break elif matched_all: # if all prev words in this sentence are already matched sentence_prev.matched = True matched_sentences_prev.append(sentence_prev) # If the sentence did not match, # then include in the container of unmatched sentences for further analysis. if not matched_curr: sentence_curr = Sentence() sentence_curr.value = sentence sentence_curr.hash_value = hash_curr if hash_curr in paragraph_curr.sentences: paragraph_curr.sentences[hash_curr].append(sentence_curr) else: paragraph_curr.sentences.update({sentence_curr.hash_value: [sentence_curr]}) paragraph_curr.ordered_sentences.append(sentence_curr.hash_value) unmatched_sentences_curr.append(sentence_curr) # Identify the unmatched sentences in the previous paragraph revision. for paragraph_prev in unmatched_paragraphs_prev: for sentence_prev_hash in paragraph_prev.ordered_sentences: if len(paragraph_prev.sentences[sentence_prev_hash]) > 1: s = 's-{}-{}'.format(paragraph_prev, sentence_prev_hash) self.temp.append(s) count = self.temp.count(s) sentence_prev = paragraph_prev.sentences[sentence_prev_hash][count - 1] else: sentence_prev = paragraph_prev.sentences[sentence_prev_hash][0] if not sentence_prev.matched: unmatched_sentences_prev.append(sentence_prev) # to reset 'matched words in analyse_words_in_sentences' of unmatched paragraphs and sentences sentence_prev.matched = True matched_sentences_prev.append(sentence_prev) return unmatched_sentences_curr, unmatched_sentences_prev, matched_sentences_prev, total_sentences
def handle(self, text): # expected format: # # recieved books 123450 4 1 # | | | | | | # V | | | | | # handler V | | | | # commodity | | | | # V | | | # school code | | | # V | | # satellite # | | # V | # # of units | # V # condition code # declare variables intended for valid information known_contact = None commodity = None facility = None quantity = None condition = None observed_cargo = None if self.msg.contact is not None: self.debug(self.msg.contact) known_contact = self.msg.contact elif self.msg.connection.identity is not None: try: known_contact = Contact.objects.get(phone=self.msg.connection.identity) self.msg.connection.contact = known_contact self.msg.connection.save() except MultipleObjectsReturned: #TODO do something? self.debug('MULTIPLE IDENTITIES') pass except ObjectDoesNotExist: self.debug('NO PERSON FOUND') try: known_contact = Contact.objects.get(alternate_phone=\ self.msg.connection.identity) self.msg.connection.contact = known_contact self.msg.connection.save() except MultipleObjectsReturned: #TODO this case may be unneccesary, since many many contacts # often share a single alternate_phone self.debug('MULTIPLE IDENTITIES AFTER UNKNOWN') pass except ObjectDoesNotExist: #self.respond("Sorry, I don't recognize your phone number. Please respond with your surname, facility (school or DEO) code, and facility name.") pass finally: known_contact = Contact.objects.create(phone=self.msg.connection.identity) else: self.debug('NO IDENTITY') if known_contact is not None: self.debug('KNOWN PERSON') expected_tokens = ['word', 'number', 'number', 'number'] token_labels = ['commodity', 'school_code', 'quantity', 'condition'] tokens = utils.split_into_tokens(expected_tokens, token_labels, text) self.debug(tokens) if not tokens['commodity'].isdigit(): def get_commodity(token): try: # lookup commodity by slug com = Commodity.objects.get(slug__istartswith=tokens['commodity']) return com except MultipleObjectsReturned: #TODO do something here? pass except ObjectDoesNotExist: coms = Commodity.objects.all() for com in coms: # iterate all commodities and see if submitted # token is in an aliases list match = com.has_alias(token) if match is not None: if match: return com continue return None commodity = get_commodity(tokens['commodity']) if commodity is None: self.respond("Sorry, no supply called '%s'" % (tokens['commodity'])) self.respond("Approved supplies are %s" % ", ".join(Commodity.objects.values_list('slug', flat=True))) if tokens['school_code'].isdigit(): def list_possible_schools_for_code(school_num): possible_schools = School.objects.filter(code=school_num) if not possible_schools: return None else: # format a list containing # 1) combined school code + satellite_number # 2) school name in parentheses clean_list = [] for school in possible_schools: clean_list.append(school.full_code + " (" + school.name + ")") return clean_list # school code should be between 1 and 5 digits, # and satellite_number should be 1 digit. # in the interest of not hardcoding anything, lets hit the db! max_codes = School.objects.aggregate(max_code=Max('code'),\ max_sat=Max('satellite_number')) max_code_length = len(str(max_codes['max_code'])) max_sat_length = len(str(max_codes['max_sat'])) if len(tokens['school_code']) <= (max_code_length + max_sat_length): # separate school's code and satellite_number (last digit) school_num = tokens['school_code'][:-1] sat_num = tokens['school_code'][-1:] try: school = School.objects.get(code=school_num,\ satellite_number=sat_num) facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) except ObjectDoesNotExist: try: school = School.objects.get(code=tokens['school_code']) facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\ location_type=ContentType.objects.get(model='school')) except ObjectDoesNotExist: self.respond("Sorry, cannot find school with code '%s'" % (tokens['school_code'])) # maybe satellite number is omitted, so lookup schools by entire token suggestions = list_possible_schools_for_code(tokens['school_code']) if suggestions is not None: self.respond("Did you mean one of: %s?" %\ (", ".join(suggestions))) # maybe satellite number is incorrect, so lookup schools only by school_code suggestions = list_possible_schools_for_code(school_num) if suggestions is not None: self.respond("Did you mean one of: %s?" %\ (", ".join(suggestions))) else: self.respond("Sorry code '%s' is not valid. All codes are fewer than 6 digits" % (tokens['school_code'])) #TODO acceptible values should be configurable #if int(tokens['quantity']) in range(1,10): # if int(tokens['condition']) in range(1,4): if tokens['quantity'].isdigit(): if tokens['condition'].isdigit(): # map expected condition tokens into choices for db conditions_map = {'1':'G', '2':'D', '3':'L'} if facility is not None: active_shipment = Facility.get_active_shipment(facility) if active_shipment is not None: # create a new Cargo object observed_cargo = Cargo.objects.create(\ commodity=commodity,\ quantity=int(tokens['quantity']),\ condition=conditions_map[tokens['condition']]) # create a new ShipmentSighting sighting = ShipmentSighting.objects.create(\ observed_cargo=observed_cargo,\ facility=facility) # associate new Cargo with Shipment active_shipment.status = 'D' active_shipment.actual_delivery_time=datetime.datetime.now() active_shipment.cargos.add(observed_cargo) active_shipment.save() # get or create a ShipmentRoute and associate # with new ShipmentSighting route, new_route = ShipmentRoute.objects.get_or_create(\ shipment=active_shipment) route.sightings.add(sighting) route.save() campaign = Campaign.get_active_campaign() if campaign is not None: campaign.shipments.add(active_shipment) campaign.save() data = [ "%s pallets" % (observed_cargo.quantity or "??"), "of %s" % (commodity.slug or "??"), "to %s" % (facility.location.name or "??"), "in %s condition" % (observed_cargo.get_condition_display() or "??") ] confirmation = "Thanks. Confirmed delivery of %s." %\ (" ".join(data)) self.respond(confirmation)
def handle(self, text): # expected format: # # register wheeler 3 unicef house # | | | | # V | | | # handler V | | # surname | | # V | # facility code | # V # facility name known_contact = None possible_contacts_by_name = None possible_contacts = [] facility = None possible_facilities = [] possible_fac_by_code = None possible_fac_by_name = None fac_by_code = None fac_by_name = None if self.msg.connection.identity is not None: try: known_contact = Contact.objects.get(phone=self.msg.connection.identity) self.msg.connection.contact = known_contact self.msg.connection.save() self.debug('KNOWN CONTACT') except MultipleObjectsReturned: #TODO do something? self.debug('MULTIPLE IDENTITIES') pass except ObjectDoesNotExist: self.debug('NO PERSON FOUND') else: self.debug('NO IDENTITY') if True: expected_tokens = ['word', 'number', 'words'] token_labels = ['surname', 'facility_code', 'facility_name'] tokens = utils.split_into_tokens(expected_tokens, token_labels, text) self.debug(tokens) if not tokens['surname'].isdigit(): possible_contacts_by_name = Contact.closest_by_spelling(tokens['surname']) if len(possible_contacts_by_name) == 1: known_contact = possible_contacts_by_name[0][1] self.msg.connection.contact = known_contact self.msg.connection.save() possible_contacts_by_sound = Contact.closest_by_sound(tokens['surname']) if len(possible_contacts_by_sound) == 1: known_contact = possible_contacts_by_sound[0][0] self.msg.connection.contact = known_contact self.msg.connection.save() if tokens['facility_code'].isdigit(): possible_fac_by_code = School.closest_by_code(tokens['facility_code']) self.debug("%s possible facilities by code" % (str(len(possible_fac_by_code)))) if len(possible_fac_by_code) == 1: if possible_fac_by_code[0][2] == 0 and possible_fac_by_code[0][3] == 0 and possible_fac_by_code[0][4] == 1.0: self.debug('PERFECT LOC MATCH BY CODE') fac_by_code = possible_fac_by_code[0][1] if not tokens['facility_name'].isdigit(): possible_fac_by_name = School.closest_by_spelling(tokens['facility_name']) self.debug("%s possible facilities by name" % (str(len(possible_fac_by_name)))) if len(possible_fac_by_name) == 1: if possible_fac_by_name[0][2] == 0 and possible_fac_by_name[0][3] == 0 and possible_fac_by_name[0][4] == 1.0: self.debug('PERFECT LOC MATCH BY NAME') fac_by_name = possible_fac_by_name[0][1] # see if either facility lookup returned a perfect match if fac_by_code or fac_by_name is not None: if fac_by_code and fac_by_name is not None: # if they are both the same perfect match we have a winner if fac_by_code.pk == fac_by_name.pk: facility = fac_by_code # if we have two different perfect matches, add to list else: possible_facilities.append(fac_by_code) self.debug("%s possible facilities" % (str(len(possible_facilities)))) possible_facilities.append(fac_by_name) self.debug("%s possible facilities" % (str(len(possible_facilities)))) else: # perfect match by either is also considered a winner facility = fac_by_code if fac_by_code is not None else fac_by_name self.debug(facility) # neither lookup returned a perfect match else: # make list of facility objects that are in both fac_by_code and fac_by_name if possible_fac_by_code and possible_fac_by_name is not None: possible_facilities.extend([l[1] for l in filter(lambda x:x in possible_fac_by_code, possible_fac_by_name)]) self.debug("%s possible facilities by both" % (str(len(possible_facilities)))) if len(possible_facilities) == 0: possible_facilities.extend([l[1] for l in possible_fac_by_code if possible_fac_by_code is not None]) possible_facilities.extend([l[1] for l in possible_fac_by_name if possible_fac_by_name is not None]) self.debug("%s possible facilities by both" % (str(len(possible_facilities)))) if len(possible_facilities) == 1: facility = possible_facilities[0] if known_contact is None: if possible_contacts_by_name is not None: possible_contacts_by_both = [] # gather the Contacts from the match tuples possible_contacts = [c[1] for c in possible_contacts_by_name] self.debug("%s possible contacts by name" % (str(len(possible_contacts)))) self.debug(possible_contacts) # add Contacts from phonetic match tuples [possible_contacts.append(c[0]) for c in possible_contacts_by_sound] self.debug("%s possible contacts by name" % (str(len(possible_contacts)))) self.debug(possible_contacts) # lookup all the contacts associated with each possible_facilities from above possible_contacts_by_loc_raw = [list(f.facilitycontact.all()) for f in possible_facilities] # flatten list possible_contacts_by_loc = [item for sublist in possible_contacts_by_loc_raw for item in sublist] self.debug("%s possible contacts by location" % (str(len(possible_contacts_by_loc)))) self.debug(possible_contacts_by_loc) if len(possible_contacts_by_loc) > 0: possible_contacts_by_both = filter(lambda x:x in possible_contacts, possible_contacts_by_loc) self.debug("%s possible contacts by BOTH" % (str(len(possible_contacts_by_both)))) if len(possible_contacts_by_both) == 0: possible_contacts_by_both.extend(possible_contacts) if possible_contacts_by_loc: possible_contacts_by_both.extend(possible_contacts_by_loc) self.debug("%s possible contacts by BOTH" % (str(len(possible_contacts_by_both)))) if len(possible_contacts_by_both) == 1: known_contact = possible_contacts_by_both[0] known_contact.phone = self.msg.connection.identity self.msg.connection.contact = known_contact self.msg.connection.save() known_contact.save() else: possible_contacts_names = [c.name for c in possible_contacts_by_both] self.respond("Did you mean one of: %s?" % (", ".join(possible_contacts_names))) else: if facility is not None: self.respond("Hello %s, this phone number is now registered for %s (code: %s)" %\ (known_contact.name, facility.name,\ str(facility.code) + str(facility.satellite_number))) else: possible_facilities_names = [str(f.name) + " " + str(f.code) + str(f.satellite_number) for f in possible_facilities] self.respond("Hello %s, did you mean one of: %s?" %\ (known_contact.name, " ,".join(possible_facilities_names)))