def records_to_items(items): # Two items are likely to represent the same content in different # forms if they have the same basename (foo.mp3 vs. foo.mpg), same # duration, and same title # A hash mapping keys of the form "TITLE/BASENAME/DURATION" into a # hash of URLs. Each hash of URLs maps a real .mp3 or .mpg URL to # the item record referring to it hash = {} for item in items: url = urlparse(item['url']) bname = splitext(basename(url.path))[0] key = u"{}/{}/{:d}".format(item['name'], bname, item['duration']) if key not in hash: hash[key] = {} hash[key][item['url']] = item # Loop over the hash and create Item objects which combine the # audio and video together for key, urlhash in hash.items(): item = None for url, record in urlhash.items(): if item is None: dct = dict((field, record[field]) for field in record if field not in ('url', 'guid', 'type')) item = Item(**dct) if record['type'] == 'audio': item.audio_url = record['url'] item.audio_guid = record['guid'] else: item.video_url = record['url'] item.video_guid = record['guid'] if item: yield item