def list_parser(value, propertyListName, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False): discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value)) discrete_elements = [x.strip() for x in discrete_elements] proper_discrete_elements = [] i = 0 while i < len(discrete_elements): if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2]) i += 3 else: match = range_separators_re.search(discrete_elements[i]) if match and match.start() == 0: proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i] elif match and match.end() == len(discrete_elements[i]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1]) else: proper_discrete_elements.append(discrete_elements[i]) i += 1 discrete_elements = proper_discrete_elements ret = [] for discrete_element in discrete_elements: range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element)) range_values = [x.strip() for x in range_values] if len(range_values) > 1: for x in range(0, len(range_values) - 1): start_range_ep = parse_numeral(range_values[x]) end_range_ep = parse_numeral(range_values[x+1]) for range_ep in range(start_range_ep, end_range_ep + 1): if range_ep not in ret: ret.append(range_ep) else: discrete_value = parse_numeral(discrete_element) if discrete_value not in ret: ret.append(discrete_value) if len(ret) > 1: if not allow_discrete: valid_ret = [] # replace discrete elements by ranges valid_ret.append(ret[0]) for i in range(0, len(ret) - 1): previous = valid_ret[len(valid_ret) - 1] if ret[i+1] < previous: pass else: valid_ret.append(ret[i+1]) ret = valid_ret if fill_gaps: ret = list(range(min(ret), max(ret) + 1)) if len(ret) > 1: return {None: ret[0], propertyListName: ret} if len(ret) > 0: return ret[0] return None
def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False): discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value)) discrete_elements = [x.strip() for x in discrete_elements] proper_discrete_elements = [] i = 0 while i < len(discrete_elements): if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2]) i += 3 else: match = range_separators_re.search(discrete_elements[i]) if match and match.start() == 0: proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i] elif match and match.end() == len(discrete_elements[i]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1]) else: proper_discrete_elements.append(discrete_elements[i]) i += 1 discrete_elements = proper_discrete_elements ret = [] for discrete_element in discrete_elements: range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element)) range_values = [x.strip() for x in range_values] if len(range_values) > 1: for x in range(0, len(range_values) - 1): start_range_ep = parse_numeral(range_values[x]) end_range_ep = parse_numeral(range_values[x+1]) for range_ep in range(start_range_ep, end_range_ep + 1): if range_ep not in ret: ret.append(range_ep) else: discrete_value = parse_numeral(discrete_element) if discrete_value not in ret: ret.append(discrete_value) if len(ret) > 1: if not allow_discrete: valid_ret = list() # replace discrete elements by ranges valid_ret.append(ret[0]) for i in range(0, len(ret) - 1): previous = valid_ret[len(valid_ret) - 1] if ret[i+1] < previous: pass else: valid_ret.append(ret[i+1]) ret = valid_ret if fill_gaps: ret = list(range(min(ret), max(ret) + 1)) if len(ret) > 1: return {None: ret[0], property_list_name: ret} if len(ret) > 0: return ret[0] return None
def episode_parser(value): values = re_split('[a-zA-Z]', value) values = [x for x in values if x] ret = [] for letters_elt in values: dashed_values = letters_elt.split('-') dashed_values = [x for x in dashed_values if x] if len(dashed_values) > 1: for _ in range(0, len(dashed_values) - 1): start_dash_ep = parse_numeral(dashed_values[0]) end_dash_ep = parse_numeral(dashed_values[1]) for dash_ep in range(start_dash_ep, end_dash_ep + 1): ret.append(dash_ep) else: ret.append(parse_numeral(letters_elt)) if len(ret) > 1: return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also elif len(ret) > 0: return ret[0] else: return None
def _formater(episodeNumber): epnum = parse_numeral(episodeNumber) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum
def season_episode_parser(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {"season": season, "episodeNumber": epnum} else: return epnum
def format_guess(guess): """Format all the found values to their natural type. For instance, a year would be stored as an int value, etc... Note that this modifies the dictionary given as input. """ for prop, value in guess.items(): if prop in ('season', 'episodeNumber', 'year', 'cdNumber', 'cdNumberTotal', 'bonusNumber', 'filmNumber'): guess[prop] = parse_numeral(guess[prop]) elif isinstance(value, base_text_type): if prop in ('edition',): value = clean_string(value) guess[prop] = get_synonym(value).replace('\\', '') return guess