示例#1
0
文件: cloud.py 项目: EQ4/DRR
def get_next(info_query):
  # Given a file, we look to see if there's another one which could come after -- we won't look in the database 
  if type(info_query) is str:
    info_query = stream_info(info_query)

  #
  # We are looking for a file with the closest start time to 
  # the end time of our stream whose file size is greater than a 
  # certain threshold
  #
  target_time = info_query['start_date'] + timedelta(seconds=info_query['duration_sec'])

  return get_file_for_ts(target_time=target_time, bias=None, exclude_path=info_query['name'])
示例#2
0
文件: cloud.py 项目: EQ4/DRR
def register_stream_list(reindex=False):
    # Find the local streams and make sure they are all registered in the sqlite3 database.
    #
    # Get the existing streams as a set
    #
    # If we are asked to re-index (due to trying to fix a bug) then we ignore what we have
    # and just go ahead and do everything.
    #
    if reindex:
        all_registered = Set([])

    else:
        all_registered = Set(DB.all('streams', ['name']))

    # There should be a smarter way to do this ... you'd think. We should also
    # be more faithfully giving things extensions since it's not 100% mp3
    all_files = Set(glob('%s/*.mp3' % misc.DIR_STREAMS))

    diff = all_files.difference(all_registered)

    # This is a list of files we haven't scanned yet...
    if not diff: return True

    # This basically means we could still be writing
    # this file.
    #
    # We take the cascade time and then buffer it by a minute, just
    # to be sure.
    #
    # If the creation time is less then this then we don't register this
    # until later.
    cutoff = time.mktime(
        (datetime.now() -
         timedelta(minutes=1, seconds=misc.config['cascadetime'])).timetuple())

    for fname in diff:
        if len(fname) == 0 or os.path.getctime(fname) > cutoff:
            next

        info = stream_info(fname)
        if not info:
            continue

        DB.register_stream(info)

        if not misc.manager_is_running():
            logging.info("Manager is gone, shutting down")
            raise Exception()
示例#3
0
文件: cloud.py 项目: EQ4/DRR
def get_next(info_query):
    # Given a file, we look to see if there's another one which could come after -- we won't look in the database
    if type(info_query) is str:
        info_query = stream_info(info_query)

    #
    # We are looking for a file with the closest start time to
    # the end time of our stream whose file size is greater than a
    # certain threshold
    #
    target_time = info_query['start_date'] + timedelta(
        seconds=info_query['duration_sec'])

    return get_file_for_ts(target_time=target_time,
                           bias=None,
                           exclude_path=info_query['name'])
示例#4
0
文件: cloud.py 项目: EQ4/DRR
def register_stream_list(reindex=False):
  # Find the local streams and make sure they are all registered in the sqlite3 database. 
  #
  # Get the existing streams as a set
  #
  # If we are asked to re-index (due to trying to fix a bug) then we ignore what we have
  # and just go ahead and do everything.
  #
  if reindex:
    all_registered = Set([])

  else: 
    all_registered = Set(DB.all('streams', ['name']))

  # There should be a smarter way to do this ... you'd think. We should also
  # be more faithfully giving things extensions since it's not 100% mp3
  all_files = Set(glob('%s/*.mp3' % misc.DIR_STREAMS))
 
  diff = all_files.difference(all_registered)

  # This is a list of files we haven't scanned yet...
  if not diff: return True

  # This basically means we could still be writing
  # this file.
  #
  # We take the cascade time and then buffer it by a minute, just
  # to be sure.
  # 
  # If the creation time is less then this then we don't register this
  # until later.
  cutoff = time.mktime((datetime.now() - timedelta(minutes=1, seconds=misc.config['cascadetime'])).timetuple())

  for fname in diff:
    if len(fname) == 0 or os.path.getctime(fname) > cutoff:
      next

    info = stream_info(fname)
    if not info:
      continue

    DB.register_stream(info)

    if not misc.manager_is_running():
      logging.info("Manager is gone, shutting down")
      raise Exception()
示例#5
0
文件: cloud.py 项目: EQ4/DRR
def get_file_for_ts(target_time, bias=None, exclude_path=None):
  # Given a datetime target_time, this finds the closest file either with a bias
  # of +1 for after, -1 for before (or within) or no bias for the closest match.
  #
  # An exclude_path can be set to remove it from the candidates to be searched
  best_before_time = None
  best_before_info = None

  best_after_time = None
  best_after_info = None

  time_to_beat = None
  current_winner = None

  #print "-----------------------"
  for candidate_path in glob('%s/*.mp3' % misc.DIR_STREAMS):
    if candidate_path == exclude_path: continue

    info_candidate = stream_info(candidate_path)
    if not info_candidate or info_candidate['duration_sec'] < 10.0:
      next

    difference = info_candidate['start_date'] - target_time

    # This means we want to be strictly later
    # If our difference is before, which means we are earlier,
    # then we exclude this
    #
    # BUGBUG: There's a hole in here ... pretend there's an expansive file starting at t0 and
    # a small one at t1 where start time of t0 < t1 so t1 is the file that is selected even though
    # t0 is a better candidate.
    #
    if difference < timedelta() and (not best_before_time or difference > best_before_time):
      best_before_time = difference
      best_before_info = info_candidate

    # If we want something earlier and the start date is AFTER
    # our target time then we bail
    elif difference > timedelta() and (not best_after_time or difference < best_after_time):
      best_after_time = difference
      best_after_info = info_candidate

  # print target_time, "\n", best_before_time, best_before_info, "\n", best_after_time, best_after_info
  if bias == -1:
    # Make sure that our candidate has our time within it
    # print best_before_info['start_date'], timedelta(seconds=best_before_info['duration_sec']) , target_time
    if best_before_info['start_date'] + timedelta(seconds=best_before_info['duration_sec']) > target_time:
      # This means that we have found a valid file and we can return the successful target_time 
      # and our info
      return best_before_info, target_time

    # Otherwise that means that our best time doesn't actually have our target time!
    # So we return where we ought to start and the file we can start at
    if best_after_info:
      return best_after_info, best_after_info['start_date']

    else:
      return None, None

  if bias == None:
    if not best_after_info or (abs(best_before_time) < abs(best_after_time)):
      return best_before_info, max(target_time, best_before_info['start_date'])

    return best_after_info, min(target_time, best_after_info['start_date'])

  if bias == +1:
    # print best_after_info, best_before_info, exclude_path
    if not best_after_info:
      return None, target_time

    return best_after_info, min(target_time, best_after_info['start_date'])
示例#6
0
文件: cloud.py 项目: EQ4/DRR
def find_streams(start_list, duration_min):
  # Given a start week minute this looks for streams in the storage 
  # directory that match it - regardless of duration ... so it may return
  # partial shows results.
  stream_list = []

  if type(start_list) is int:
    start_list = [start_list]

  # Sort nominally - since we have unix time in the name, this should come out
  # as sorted by time for us for free.
  stitch_list = []
  episode_list = []
  db = DB.connect()

  # So we have a start list, we are about to query our database using the start_minute
  # and end_minute field ... to get end_minue we need to make use of our duration.
  #
  # timeline ->
  #
  #          ###################           << Region we want
  # start_sea#ch    end_search #           << Search
  #          V                 V
  # |     |     |     |     |     |     |  << Minute
  #          a     b     b     c
  #
  # so we want 
  #     (a) start_minute < start_search and end_minute >= start_search  ||
  #     (b) start_minute > start_search and end_minute <= end_search  ||
  #     (c) start_minute < end_search and end_minute >= end_search
  #     
  condition_list = []
  for start in start_list:
    end_search = (start + duration_min) % TS.MINUTES_PER_WEEK
    # print start, duration_min, end_search
    condition_list.append('start_minute < %d and end_minute >= %d' % (start, start))
    condition_list.append('start_minute > %d and end_minute >= %d and end_minute <= %d' % (start, start, end_search))
    condition_list.append('start_minute < %d and end_minute >= %d' % (end_search, end_search))

  condition_query = "((%s))" % ') or ('.join(condition_list)

  # see https://github.com/kristopolous/DRR/issues/50 - nah this shit is buggy
  condition_query += " and start_unix < datetime(%d, 'unixepoch', 'localtime')" % (TS.sec_now() - misc.config['cascadetime'] + 3)

  full_query = "select * from streams where %s order by week_number * 10080 + start_minute asc" % condition_query

  entry_list = DB.map(db['c'].execute(full_query).fetchall(), 'streams')

  #logging.info(full_query)
  #logging.info(entry_list)
  # print full_query, len(entry_list)
  # We want to make sure that we break down the stream_list into days.  We can't JUST look at the week
  # number since we permit feed requests for shows which may have multiple days.  Since this is leaky
  # data that we don't keep via our separation of concerns, we use a little hack to figure this out.
  by_episode = []
  episode = []
  cutoff_minute = 0
  current_week = 0

  for entry in entry_list:
    # look at start minute, if it's > 12 * cascade time (by default 3 hours), then we presume this is a new episode.
    if entry['start_minute'] > cutoff_minute or entry['week_number'] != current_week:
      if len(episode):
        by_episode.append(episode)

      episode = []

    cutoff_minute = entry['start_minute'] + (12 * misc.config['cascadetime']) % TS.MINUTES_PER_WEEK
    current_week = entry['week_number']

    # We know by definition that every entry in our stream_list is a valid thing we need
    # to look at.  We just need to make sure we break them down by episode
    episode.append(entry)

  if len(episode):
    by_episode.append(episode)

  #print len(by_episode), condition_query
  # Start the creation of the audio files.
  for episode in by_episode:

    # We blur the test start to a bigger window
    test_start = (episode[0]['start_minute'] / (60 * 4))

    for week_start in start_list:
      # Blur the query start to the same window
      query_start = week_start / (60 * 4)

      # This shouldn't be necessary but let's do it anyway
      if abs(query_start - test_start) <= 1:
        # Under these conditions we can say that this episode
        # can be associated with this particular start time

        # The start_minute is based on the week
        offset_start = week_start - episode[0]['start_minute']
        fname = audio.stream_name(episode, week_start, duration_min)
        print '--name',episode[0]['name'], fname

        # We get the name that it will be and then append that
        stream_list.append(stream_info(fname))

        # print offset_start, duration_min, episode
        episode_list.append((episode, offset_start, duration_min))
        break

  # print stream_list, "\nbreak\n", episode_list, "\nasfdasdf\n"
  return stream_list, episode_list
示例#7
0
文件: cloud.py 项目: EQ4/DRR
def get_file_for_ts(target_time, bias=None, exclude_path=None):
    # Given a datetime target_time, this finds the closest file either with a bias
    # of +1 for after, -1 for before (or within) or no bias for the closest match.
    #
    # An exclude_path can be set to remove it from the candidates to be searched
    best_before_time = None
    best_before_info = None

    best_after_time = None
    best_after_info = None

    time_to_beat = None
    current_winner = None

    #print "-----------------------"
    for candidate_path in glob('%s/*.mp3' % misc.DIR_STREAMS):
        if candidate_path == exclude_path: continue

        info_candidate = stream_info(candidate_path)
        if not info_candidate or info_candidate['duration_sec'] < 10.0:
            next

        difference = info_candidate['start_date'] - target_time

        # This means we want to be strictly later
        # If our difference is before, which means we are earlier,
        # then we exclude this
        #
        # BUGBUG: There's a hole in here ... pretend there's an expansive file starting at t0 and
        # a small one at t1 where start time of t0 < t1 so t1 is the file that is selected even though
        # t0 is a better candidate.
        #
        if difference < timedelta() and (not best_before_time
                                         or difference > best_before_time):
            best_before_time = difference
            best_before_info = info_candidate

        # If we want something earlier and the start date is AFTER
        # our target time then we bail
        elif difference > timedelta() and (not best_after_time
                                           or difference < best_after_time):
            best_after_time = difference
            best_after_info = info_candidate

    # print target_time, "\n", best_before_time, best_before_info, "\n", best_after_time, best_after_info
    if bias == -1:
        # Make sure that our candidate has our time within it
        # print best_before_info['start_date'], timedelta(seconds=best_before_info['duration_sec']) , target_time
        if best_before_info['start_date'] + timedelta(
                seconds=best_before_info['duration_sec']) > target_time:
            # This means that we have found a valid file and we can return the successful target_time
            # and our info
            return best_before_info, target_time

        # Otherwise that means that our best time doesn't actually have our target time!
        # So we return where we ought to start and the file we can start at
        if best_after_info:
            return best_after_info, best_after_info['start_date']

        else:
            return None, None

    if bias == None:
        if not best_after_info or (abs(best_before_time) <
                                   abs(best_after_time)):
            return best_before_info, max(target_time,
                                         best_before_info['start_date'])

        return best_after_info, min(target_time, best_after_info['start_date'])

    if bias == +1:
        # print best_after_info, best_before_info, exclude_path
        if not best_after_info:
            return None, target_time

        return best_after_info, min(target_time, best_after_info['start_date'])
示例#8
0
文件: cloud.py 项目: EQ4/DRR
def find_streams(start_list, duration_min):
    # Given a start week minute this looks for streams in the storage
    # directory that match it - regardless of duration ... so it may return
    # partial shows results.
    stream_list = []

    if type(start_list) is int:
        start_list = [start_list]

    # Sort nominally - since we have unix time in the name, this should come out
    # as sorted by time for us for free.
    stitch_list = []
    episode_list = []
    db = DB.connect()

    # So we have a start list, we are about to query our database using the start_minute
    # and end_minute field ... to get end_minue we need to make use of our duration.
    #
    # timeline ->
    #
    #          ###################           << Region we want
    # start_sea#ch    end_search #           << Search
    #          V                 V
    # |     |     |     |     |     |     |  << Minute
    #          a     b     b     c
    #
    # so we want
    #     (a) start_minute < start_search and end_minute >= start_search  ||
    #     (b) start_minute > start_search and end_minute <= end_search  ||
    #     (c) start_minute < end_search and end_minute >= end_search
    #
    condition_list = []
    for start in start_list:
        end_search = (start + duration_min) % TS.MINUTES_PER_WEEK
        # print start, duration_min, end_search
        condition_list.append('start_minute < %d and end_minute >= %d' %
                              (start, start))
        condition_list.append(
            'start_minute > %d and end_minute >= %d and end_minute <= %d' %
            (start, start, end_search))
        condition_list.append('start_minute < %d and end_minute >= %d' %
                              (end_search, end_search))

    condition_query = "((%s))" % ') or ('.join(condition_list)

    # see https://github.com/kristopolous/DRR/issues/50 - nah this shit is buggy
    condition_query += " and start_unix < datetime(%d, 'unixepoch', 'localtime')" % (
        TS.sec_now() - misc.config['cascadetime'] + 3)

    full_query = "select * from streams where %s order by week_number * 10080 + start_minute asc" % condition_query

    entry_list = DB.map(db['c'].execute(full_query).fetchall(), 'streams')

    #logging.info(full_query)
    #logging.info(entry_list)
    # print full_query, len(entry_list)
    # We want to make sure that we break down the stream_list into days.  We can't JUST look at the week
    # number since we permit feed requests for shows which may have multiple days.  Since this is leaky
    # data that we don't keep via our separation of concerns, we use a little hack to figure this out.
    by_episode = []
    episode = []
    cutoff_minute = 0
    current_week = 0

    for entry in entry_list:
        # look at start minute, if it's > 12 * cascade time (by default 3 hours), then we presume this is a new episode.
        if entry['start_minute'] > cutoff_minute or entry[
                'week_number'] != current_week:
            if len(episode):
                by_episode.append(episode)

            episode = []

        cutoff_minute = entry['start_minute'] + (
            12 * misc.config['cascadetime']) % TS.MINUTES_PER_WEEK
        current_week = entry['week_number']

        # We know by definition that every entry in our stream_list is a valid thing we need
        # to look at.  We just need to make sure we break them down by episode
        episode.append(entry)

    if len(episode):
        by_episode.append(episode)

    #print len(by_episode), condition_query
    # Start the creation of the audio files.
    for episode in by_episode:

        # We blur the test start to a bigger window
        test_start = (episode[0]['start_minute'] / (60 * 4))

        for week_start in start_list:
            # Blur the query start to the same window
            query_start = week_start / (60 * 4)

            # This shouldn't be necessary but let's do it anyway
            if abs(query_start - test_start) <= 1:
                # Under these conditions we can say that this episode
                # can be associated with this particular start time

                # The start_minute is based on the week
                offset_start = week_start - episode[0]['start_minute']
                fname = audio.stream_name(episode, week_start, duration_min)
                print '--name', episode[0]['name'], fname

                # We get the name that it will be and then append that
                stream_list.append(stream_info(fname))

                # print offset_start, duration_min, episode
                episode_list.append((episode, offset_start, duration_min))
                break

    # print stream_list, "\nbreak\n", episode_list, "\nasfdasdf\n"
    return stream_list, episode_list