def get_markers(db, client_name, clip_id, congress, chamber): api_url = API_PREFIX + client_name + '?type=marker&size=100000' data = '{"filter": { "term": { "video_id": %s}}, "sort": [{"offset":{"order":"asc"}}]}' % clip_id markers = query_api(db, api_url, data) clips = [] bill_ids = [] legislators = [] legislator_ids = [] roll_ids = [] if markers: for m in markers: m_new = m['_source'] c = { 'offset': m_new['offset'], 'events': [htmlentitydecode(m_new['name']).strip(),], 'time': m_new['datetime'] } if m != markers[-1]: #if it's not the last one c['duration'] = markers[markers.index(m)+1]['_source']['offset'] - m_new['offset'] year = dateparse(m_new['datetime']).year legis, bio_ids = rtc_utils.extract_legislators(c['events'][0], chamber, db) b = rtc_utils.extract_bills(c['events'][0], congress) r = rtc_utils.extract_rolls(c['events'][0], chamber, year) if legis: c['legislator_names'] = legis for l in legis: if l not in legislators: legislators.append(l) if bio_ids: c['legislator_ids'] = bio_ids for bi in bio_ids: if bi not in legislator_ids: legislator_ids.append(bi) if r: c['roll_ids'] = r for ro in r: if ro not in roll_ids: roll_ids.append(ro) if b: c['bill_ids'] = b for bill in b: if bill not in bill_ids: bill_ids.append(bill) clips.append(c) return (clips, bill_ids, legislators, legislator_ids, roll_ids) else: db.warning('There are no markers for video id: %s' % clip_id) return (None, None, None, None, None)
def get_clips_for_senate(db, clip_id, congress, duration, year): #go with 5 minute clips? chamber = "senate" clip_segment = 5 * 60 clip_number = (duration / clip_segment) + 1 clips = [] bill_ids = [] legislators = [] legislator_ids = [] roll_ids = [] caps = get_captions('floor.senate.gov', clip_id) offset = 0 for clip_num in range(1, clip_number + 1): start = offset if clip_num == clip_number + 1: #last clip dur = duration - offset else: dur = clip_segment c = { 'offset': start, 'duration': dur } events = '' captions = get_senate_clip_captions(caps, start, start + clip_segment) legis, bio_ids = rtc_utils.extract_legislators(captions, chamber, db) b = rtc_utils.extract_bills(captions, congress) r = rtc_utils.extract_rolls(captions, chamber, year) if legis: c['legislator_names'] = legis events += 'Legislators mentioned in this clip: ' for l in legis: if l not in legislators: legislators.append(l) events += l if l != legislators[-1]: events += '; ' if bio_ids: c['legislator_ids'] = bio_ids for bi in bio_ids: if bi not in legislator_ids: legislator_ids.append(bi) if r: c['roll_ids'] = r for ro in r: if ro not in roll_ids: roll_ids.append(ro) if b: c['bill_ids'] = b events += 'Bills mentioned in this clip: ' for bill in b: if bill not in bill_ids: bill_ids.append(bill) bill_name = db['bills'].find_one({'bill_id':bill }) if bill_name and bill_name.has_key('short_title') and bill_name['short_title'] and bill_name['short_title'] != '': events += bill_name['short_title'] + '; ' elif bill_name: events += ("%s%s" % (bill_name['bill_type'], bill_name['number'])).upper() + '; ' if events == '': events = "No description for clip number %s" % clip_num c['events'] = [events,] clips.append(c) offset = offset + clip_segment return (clips, bill_ids, legislators, legislator_ids, roll_ids)