def parse_all(user_input: str, culture: str) -> List[ModelResult]: return [ # Number recognizer - This function will find any number from the input # E.g "I have two apples" will return "2". Recognizers.recognize_number(user_input, culture), # Ordinal number recognizer - This function will find any ordinal number # E.g "eleventh" will return "11". Recognizers.recognize_ordinal(user_input, culture), # Percentage recognizer - This function will find any number presented as percentage # E.g "one hundred percents" will return "100%" Recognizers.recognize_percentage(user_input, culture), # Age recognizer - This function will find any age number presented # E.g "After ninety five years of age, perspectives change" will return "95 Year" Recognizers.recognize_age(user_input, culture), # Currency recognizer - This function will find any currency presented # E.g "Interest expense in the 1988 third quarter was $ 75.3 million" will return "75300000 Dollar" Recognizers.recognize_currency(user_input, culture), # Dimension recognizer - This function will find any dimension presented # E.g "The six-mile trip to my airport hotel that had taken 20 minutes earlier in the day took more than three hours." will return "6 Mile" Recognizers.recognize_dimension(user_input, culture), # Temperature recognizer - This function will find any temperature presented # E.g "Set the temperature to 30 degrees celsius" will return "30 C" Recognizers.recognize_temperature(user_input, culture), # DateTime recognizer - This function will find any Date even if its write in colloquial language - # E.g "I'll go back 8pm today" will return "2017-10-04 20:00:00" Recognizers.recognize_datetime(user_input, culture) ]
def _parse_all_entities(user_input: str, culture: str) -> List[Dict[Text, Any]]: """ This is the main method that does the entity extraction work. For more details: https://github.com/Microsoft/Recognizers-Text/tree/master/Python#api-documentation """ return [ # Number recognizer - This function will find any number from the input # E.g "I have two apples" will return "2". Recognizers.recognize_number(user_input, culture), # Ordinal number recognizer - This function will find any ordinal number # E.g "eleventh" will return "11". Recognizers.recognize_ordinal(user_input, culture), # Percentage recognizer - This function will find any number presented as percentage # E.g "one hundred percents" will return "100%" Recognizers.recognize_percentage(user_input, culture), # Age recognizer - This function will find any age number presented # E.g "After ninety five years of age, perspectives change" will return # "95 Year" Recognizers.recognize_age(user_input, culture), # Currency recognizer - This function will find any currency presented # E.g "Interest expense in the 1988 third quarter was $ 75.3 million" # will return "75300000 Dollar" Recognizers.recognize_currency(user_input, culture), # Temperature recognizer - This function will find any temperature presented # E.g "Set the temperature to 30 degrees celsius" will return "30 C" Recognizers.recognize_temperature(user_input, culture), # DateTime recognizer - This function will find any Date even if its write in colloquial language - # E.g "I'll go back 8pm today" will return "2017-10-04 20:00:00" Recognizers.recognize_datetime(user_input, culture), # PhoneNumber recognizer will find any phone number presented # E.g "My phone number is ( 19 ) 38294427." Recognizers.recognize_phone_number(user_input, culture), # Email recognizer will find any phone number presented # E.g "Please write to me at [email protected] for more information on task # #A1" Recognizers.recognize_email(user_input, culture), ]
def time_delta(hour_minute, delta_time): if not re.match('[0-2][0-9]:[0-5][0-9]', hour_minute): result = Recognizers.recognize_datetime(hour_minute, Culture.Chinese) if not result or not result[0].resolution['values'] or result[ 0].resolution['values'][0]['type'] != 'time': hour_minute = '12:00' else: hour_minute = result[0].resolution['values'][0]['value'][:5] h_str_src, m_str_src = hour_minute.split(':') h_int_src, m_int_src = int(h_str_src), int(m_str_src) h_int_dst, m_int_dst = h_int_src + delta_time[0], m_int_src + delta_time[1] if m_int_dst >= 60: m_int_dst -= 60 h_int_dst += 1 elif m_int_dst < 0: m_int_dst += 60 h_int_dst -= 1 if h_int_dst >= 24: h_int_dst -= 24 elif h_int_dst < 0: h_int_dst += 24 return "%02d:%02d" % (h_int_dst, m_int_dst)
def get_real_slot_value(slot_name, slot_value): if slot_name == '评分': f_rating = f_rating1 = f_rating2 = 4.0 b_about = False digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList'] if len(digit_items) == 0 or len(digit_items) > 2: return '[{:.2f}, ∞)'.format(f_rating) elif len(digit_items) == 1: f_rating = float(digit_items[0]) else: b_about = True f_rating = float(digit_items[0]), float(digit_items[1]) f_rating1, f_rating2 = min(f_rating[0], f_rating[1]), max( f_rating[0], f_rating[1]) if not b_about: f_rating = revise_value(f_rating, slot_value, b_check_half=True) if re.search('最少|至少|以上|超过|超出', slot_value): return '[{:.2f}, ∞)'.format(f_rating) if re.search('最多|最高|以下|不超过|以内|之内', slot_value): return '[0.0, {:.2f}]'.format(f_rating) if re.search('左右|上下|差不多|大概', slot_value): return '[{:.2f}, {:.2f}]'.format(f_rating * 0.95, f_rating * 1.05) return '{:.2f}'.format(f_rating) else: obj = re.search('至|到|-|—', slot_value) if obj: raw_value1, raw_value2 = slot_value[:obj.start( )], slot_value[obj.end():] f_rating1 = revise_value(f_rating1, raw_value1, b_check_half=True) f_rating2 = revise_value(f_rating2, raw_value2, b_check_half=True) if abs(f_rating1 - f_rating2) <= 0.01: return '{:.2f}'.format((f_rating1 + f_rating2) / 2) else: return '[{:.2f}, {:.2f}]'.format(f_rating1, f_rating2) if slot_name == '价格': f_price1 = f_price2 = 100 b_about = False digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList'] if len(digit_items) == 0 or len(digit_items) > 2: return slot_value elif len(digit_items) == 1: f_price = abs(float(digit_items[0])) else: b_about = True f_price = abs(float(digit_items[0])), abs(float(digit_items[1])) f_price1, f_price2 = min(f_price[0], f_price[1]), max(f_price[0], f_price[1]) if not b_about: f_price = revise_value(f_price, slot_value) if re.search('最少|至少|以上|超过|超出', slot_value): return '[{:.2f}, ∞)'.format(f_price) if re.search('最多|最高|以下|不超过|以内|之内', slot_value): return '[0.0, {:.2f}]'.format(f_price) if re.search('左右|上下|差不多|大概', slot_value): return '[{:.2f}, {:.2f}]'.format(f_price * 0.8, f_price * 1.2) return '{:.2f}'.format(f_price) else: obj = re.search('至|到|-|—', slot_value) if obj: raw_value1, raw_value2 = slot_value[:obj.start( )], slot_value[obj.end():] f_price1 = revise_value(f_price1, raw_value1) f_price2 = revise_value(f_price2, raw_value2) if abs(f_price1 - f_price2) <= 1.0: return '{:.2f}'.format((f_price1 + f_price2) / 2) else: return '[{:.2f}, {:.2f}]'.format(f_price1, f_price2) if slot_name == '时长': f_hour1 = f_hour2 = 2 b_about = False digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList'] if len(digit_items) == 0 or len(digit_items) > 2: if '半' in slot_value: f_hour = 0. else: return slot_value elif len(digit_items) == 1: f_hour = float(digit_items[0]) elif re.search('[1-9]小时[1-9][0-9]分钟', slot_value): f_hour = float(digit_items[0]) + float(digit_items[1]) / 60.0 else: b_about = True f_hour = float(digit_items[0]), float(digit_items[1]) f_hour1, f_hour2 = min(f_hour[0], f_hour[1]), max(f_hour[0], f_hour[1]) if not b_about: f_hour = revise_value(f_hour, slot_value, b_check_half=True, b_time=True) if re.search('最少|至少|以上|超过|超出', slot_value): return '[{:.2f}, ∞)'.format(f_hour) if re.search('最多|最高|以下|不超过|以内|之内', slot_value): return '[0.0, {:.2f}]'.format(f_hour) if re.search('左右|差不多|大概', slot_value): return '[{:.2f}, {:.2f}]'.format(f_hour * 0.8, f_hour * 1.2) return '{:.2f}'.format(f_hour) else: obj = re.search('至|到|-|—', slot_value) if obj: raw_value1, raw_value2 = slot_value[:obj.start( )], slot_value[obj.end():] f_hour1 = revise_value(f_hour1, raw_value1, b_check_half=True, b_time=True) f_hour2 = revise_value(f_hour2, raw_value2, b_check_half=True, b_time=True) if abs(f_hour1 - f_hour2) <= 0.1: return '{:.2f}'.format((f_hour1 + f_hour2) / 2) else: return '[{:.2f}, {:.2f}]'.format(f_hour1, f_hour2) if slot_name == '准点率': obj = re.search('至|到|-|—', slot_value) if obj and '至少' not in slot_value: value1, value2 = slot_value[:obj.start()], slot_value[obj.end():] f_punctuality1, f_punctuality2 = retrieve_punctuality( value1), retrieve_punctuality(value1) if f_punctuality1 < 0.1 or f_punctuality2 < 0.1: return slot_value if abs(f_punctuality1 - f_punctuality2) <= 0.01: return '{:.2f}'.format((f_punctuality1 + f_punctuality2) / 2) else: return '[{:.2f}, {:.2f}]'.format(f_punctuality1, f_punctuality2) f_punctuality = retrieve_punctuality(slot_value) if f_punctuality < 0.1: return slot_value if re.search('最低|至少|最少|以上|不低于', slot_value): return '[{:.2f}, ∞)'.format(f_punctuality) if re.search('大概|左右|上下|差不多', slot_value): return '[{:.2f}, {:.2f}]'.format(f_punctuality * 0.8, f_punctuality * 1.2) return slot_value if slot_name in ['出发时间', '到达时间']: result = Recognizers.recognize_datetime(slot_value, Culture.Chinese) if not result or not result[-1].resolution['values']: return slot_value str_type = result[-1].resolution['values'][0]['type'] if str_type == 'time': time_str = result[-1].resolution['values'][0]['value'][:5] if re.search('大概|左右|前后|差不多', slot_value): time_left, time_right = time_delta(time_str, [0, -10]), time_delta( time_str, [0, 10]) return '[{}, {}]'.format(time_left, time_right) if re.search('最早|后|以后|之后', slot_value): return '[{}, ∞)'.format(time_str) if re.search('最迟|最晚|前|以前|之前', slot_value): return '(-∞, {}]'.format(time_str) return time_str elif str_type == 'timerange': time_left = result[-1].resolution['values'][0]['start'][:5] time_right = result[-1].resolution['values'][0]['end'][:5] return '[{}, {}]'.format(time_left, time_right) return slot_value if slot_name in ['时间', '开始时间', '结束时间']: result = Recognizers.recognize_datetime(slot_value, Culture.Chinese) if not result or not result[-1].resolution['values'] or result[ -1].resolution['values'][0]['type'] != 'time': return slot_value return result[-1].resolution['values'][0]['value'][:5] if slot_name in ['人数', '天数', '数量', '距离', 'choice']: if slot_name == 'choice' and '两' in slot_value: return '2' digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList'] if len(digit_items) == 0 or len(digit_items) > 2: return slot_value return str(digit_items[0]).split('.')[0] return slot_value