Пример #1
0
def base_stats():
  # Reports base-level statistical information about the health of the server.
  # This is used for the /stats and /heartbeat call.
  try:
    # for some reason this can lead to a memory error
    load = [float(unit) for unit in os.popen("/usr/bin/uptime | awk -F : ' { print $NF } '").read().split(', ')]

  except:
    load = 0

  uptime = TS.uptime()
  return {
    'human-uptime': "%dd %02d:%02d:%02d" % ( uptime / TS.ONE_DAY_SECOND, (uptime / TS.ONE_HOUR_SECOND) % 24, (uptime / 60) % 60, uptime % 60 ),
    'human-now': TS.ts_to_name(),
    'computer-uptime': uptime,
    'computer-now': time.time(),
    'last-recorded': float(DB.get('last_recorded', use_cache=False) or 0),
    'hits': DB.run('select sum(value) from kv where key like "%hit%"').fetchone()[0],
    'version': __version__,
    'uuid': config['uuid'],
    'next-prune': int(last_prune - (TS.unixtime('prune') - prune_duration)), 
    'load': load,
    'files': [m.path for m in psutil.Process().open_files()],
    'connections': len(psutil.Process().connections()),
    'memory': [
      # Current memory footprint in MB
      psutil.Process(os.getpid()).memory_info().rss / (1024.0 * 1024), 
      
      # Maximum lifetime memory footpring in MB
      resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
    ],
    'threads': [ thread.name for thread in threading.enumerate() ],
    'disk': cloud.size('.') / (1024.0 ** 3)
  }
Пример #2
0
def base_stats():
  # Reports base-level statistical information about the health of the server.
  # This is used for the /stats and /heartbeat call.
  try:
    # for some reason this can lead to a memory error
    load = [float(unit) for unit in os.popen("/usr/bin/uptime | awk -F : ' { print $NF } '").read().split(', ')]

  except:
    load = 0

  uptime = TS.uptime()
  return {
    'human-uptime': "%dd %02d:%02d:%02d" % ( uptime / TS.ONE_DAY_SECOND, (uptime / TS.ONE_HOUR_SECOND) % 24, (uptime / 60) % 60, uptime % 60 ),
    'human-now': TS.ts_to_name(),
    'computer-uptime': uptime,
    'computer-now': time.time(),
    'last-recorded': float(DB.get('last_recorded', use_cache=False) or 0),
    'hits': DB.run('select sum(value) from kv where key like "%hit%"').fetchone()[0],
    'version': __version__,
    'uuid': config['uuid'],
    'next-prune': int(last_prune - (TS.unixtime('prune') - prune_duration)), 
    'load': load,
    'files': [m.path for m in psutil.Process().open_files()],
    'connections': len(psutil.Process().connections()),
    'memory': [
      # Current memory footprint in MB
      psutil.Process(os.getpid()).memory_info().rss / (1024.0 * 1024), 
      
      # Maximum lifetime memory footpring in MB
      resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
    ],
    'threads': [ thread.name for thread in threading.enumerate() ],
    'disk': cloud.size('.') / (1024.0 ** 3)
  }
Пример #3
0
def main():

    client = db.get_client("localhost", 27017)
    db_conn = db.connect_to_db(client, "medium")
    collection = db.get_collection(db_conn, "userinfo")

    users = db.get(collection, {})

    db.close(client)

    g = graph.create_graph(get_nodes(users), 200)

    graph_plt = graph.plot_graph(
        g, {
            "with_labels": False,
            "node_color": "blue",
            "width": 1.0,
            "node_width": 0.5
        })
    graph_plt.show()

    degree_histogram_plt = graph.plot_degree_distribution(g)
    degree_histogram_plt.show()

    betweenness_plt = graph.plot_betweenness(g)
    betweenness_plt.show()

    clustering_coefficient_plot = graph.plot_clustering_coefficient(g)
    clustering_coefficient_plot.show()

    print(graph.page_rank(g))
    print(graph.average_clustering_coefficient(g))
Пример #4
0
def our_mime():
  our_format = DB.get('format') or 'mp3'
  
  if our_format == 'aac': return 'audio/aac'

  # Default to mp3
  return 'audio/mpeg'
Пример #5
0
  def live(start, offset_min=0):
    """ 
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

    """
    DB.incr('hits-live')
    if start[0] == '-' or start.endswith('min'):
      # dump things like min or m
      start = re.sub('[a-z]', '', start)
      return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302)

    # The start is expressed in times like "11:59am ..." We utilize the
    # library we wrote for streaming to get the minute of day this is.
    if start[0] == 'm':
      requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE 

    else:
      candidate = start
      requested_minute = TS.to_utc('mon', candidate) - offset_min

    offset_sec = 0
    range_header = request.headers.get('Range', None)
    if range_header:
      m = re.search('(\d+)-', range_header)
      g = m.groups()
      if g[0]: 
        byte1 = int(g[0])

        # We use the byte to compute the offset
        offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0))
    

    #print "--- REQUEST @ ", start, range_header, offset_sec
    current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

    now_time = TS.now()
    requested_time = now_time - timedelta(minutes=current_minute) + timedelta(minutes=requested_minute)

    # print requested_time, now_time, requested_minute, current_minute
    # If the requested minute is greater than the current one, then we can presume that
    # the requested minute refers to yesterday ... as in, someone wants 11pm
    # and now it's 1am.
    if requested_minute > current_minute:
      requested_time -= timedelta(days=1)

    # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
    requested_time += timedelta(seconds=offset_sec)

    # Get the info for the file that contains this timestamp
    start_info, requested_time_available = cloud.get_file_for_ts(target_time=requested_time, bias=-1)
    requested_time = max(requested_time, requested_time_available)
    start_second = (requested_time - start_info['start_date']).total_seconds()

    response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime())

    return response
Пример #6
0
def get_size(fname):
  # Gets a file size or just plain guesses it if it doesn't exist yet. 
  if os.path.exists(fname):
    return os.path.getsize(fname)

  # Otherwise we try to parse the magical file which doesn't exist yet.
  ts_re_duration = compile('_(\d*).{4}')
  ts = ts_re_duration.findall(fname)

  if len(ts):
    duration_min = int(ts[0])

    bitrate = int(DB.get('bitrate') or 128)

    #
    # Estimating mp3 length is actually pretty easy if you don't have ID3 headers.
    # MP3s are rated at things like 128kb/s ... well there you go.
    #
    # They consider a k to be 10^3, not 2^10
    #
    return int((bitrate / 8) * (duration_min * 60) * (10 ** 3))

  # If we can't find it based on the name, then we are kinda 
  # SOL and just return 0
  return 0
Пример #7
0
def our_mime():
    our_format = DB.get('format') or 'mp3'

    if our_format == 'aac': return 'audio/aac'

    # Default to mp3
    return 'audio/mpeg'
Пример #8
0
def get_size(fname):
    # Gets a file size or just plain guesses it if it doesn't exist yet.
    if os.path.exists(fname):
        return os.path.getsize(fname)

    # Otherwise we try to parse the magical file which doesn't exist yet.
    ts_re_duration = compile('_(\d*).{4}')
    ts = ts_re_duration.findall(fname)

    if len(ts):
        duration_min = int(ts[0])

        bitrate = int(DB.get('bitrate') or 128)

        #
        # Estimating mp3 length is actually pretty easy if you don't have ID3 headers.
        # MP3s are rated at things like 128kb/s ... well there you go.
        #
        # They consider a k to be 10^3, not 2^10
        #
        return int((bitrate / 8) * (duration_min * 60) * (10**3))

    # If we can't find it based on the name, then we are kinda
    # SOL and just return 0
    return 0
Пример #9
0
def get_offset(force=False):
  # Contacts the goog, giving a longitude and lattitude and gets the time 
  # offset with regard to the UTC.  There's a sqlite cache entry for the offset.
  # Returns an int second offset.
  import lib.misc as misc

  # If we are testing this from an API level, then we don't
  # have a database
  if misc.IS_TEST: return 0

  offset_backup = DB.get('offset')
  offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4)

  if not offset or force:
    from urllib.request import urlopen

    when = int(unixtime())

    api_key = misc.config['_private']['misc']['timezonedb_key']
    url = "http://api.timezonedb.com/v2.1/get-time-zone?key={}&by=position&lat={}&lng={}".format(api_key, misc.config['lat'], misc.config['long'])

    try:
      stream = urlopen(url)
      data = stream.read().decode('utf8').split("\n")[1]
      xml = etree.fromstring(data)
      offset = xml.xpath('gmtOffset')
      opts = {'status': 'OK', 'offset': int(offset[0].text) }

    except Exception as exc:
      print(exc)
      opts = {'status': None}

    if opts['status'] == 'OK': 
      offset = opts['offset'] / 60
      logging.info("Found Offset: {}".format(offset))
      DB.set('offset', offset)

    else:
      # use the old one
      DB.set('offset', offset_backup)
      offset = offset_backup

  return int(float(offset))
Пример #10
0
Файл: ts.py Проект: WaiveCar/DRR
def get_offset(force=False):
    # Contacts the goog, giving a longitude and lattitude and gets the time
    # offset with regard to the UTC.  There's a sqlite cache entry for the offset.
    # Returns an int second offset.
    import lib.misc as misc

    # If we are testing this from an API level, then we don't
    # have a database
    if misc.IS_TEST: return 0

    offset_backup = DB.get('offset')
    offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4)

    if not offset or force:
        from urllib.request import urlopen

        when = int(unixtime())

        api_key = 'AIzaSyBkyEMoXrSYTtIi8bevEIrSxh1Iig5V_to'
        url = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s&timestamp=%d&key=%s" % (
            misc.config['lat'], misc.config['long'], when, api_key)

        try:
            stream = urlopen(url)
            data = stream.read().decode('utf8')
            opts = json.loads(data)

        except:
            opts = {'status': None}

        if opts['status'] == 'OK':
            logging.info(
                "Location: %s | offset: %s | dst: %s " %
                (opts['timeZoneId'], opts['rawOffset'], opts['dstOffset']))
            offset = (int(opts['rawOffset']) + int(opts['dstOffset'])) / 60
            DB.set('offset', offset)

        else:
            # use the old one
            DB.set('offset', offset_backup)
            offset = offset_backup

    return int(float(offset))
Пример #11
0
def get_offset(force=False):
  # Contacts the goog, giving a longitude and lattitude and gets the time 
  # offset with regard to the UTC.  There's a sqlite cache entry for the offset.
  # Returns an int second offset.
  import lib.misc as misc

  # If we are testing this from an API level, then we don't
  # have a database
  if misc.IS_TEST: return 0

  offset_backup = DB.get('offset')
  offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4)

  if not offset or force:
    from urllib.request import urlopen

    when = int(unixtime())

    api_key = 'AIzaSyBkyEMoXrSYTtIi8bevEIrSxh1Iig5V_to'
    url = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s&timestamp=%d&key=%s" % (misc.config['lat'], misc.config['long'], when, api_key)
   
    try:
      stream = urlopen(url)
      data = stream.read().decode('utf8')
      opts = json.loads(data)

    except:
      opts = {'status': None}

    if opts['status'] == 'OK': 
      logging.info("Location: %s | offset: %s" % (opts['timeZoneId'], opts['rawOffset']))
      offset = (int(opts['rawOffset']) + int(opts['dstOffset'])) / 60
      DB.set('offset', offset)

    else:
      # use the old one
      DB.set('offset', offset_backup)
      offset = offset_backup

  return int(float(offset))
Пример #12
0
def get(postcode):
    rw_lock.acquire_read()
    try:
        if database['ready']:
            key = db.postcode_normalize(postcode)
            if key in database['keys']:
                return flask.jsonify(db.get(key))
            else:
                return flask.jsonify(POSTCODE_NOT_FOUND), 404
        else:
            return flask.jsonify(DATA_NOT_READY), 202
    finally:
        rw_lock.release_read()
Пример #13
0
def signature(fname, blockcount=-1, depth=1):
    global _LASTFORMAT
    audio_format = DB.get('format')

    if not audio_format:
        audio_format, start = get_audio_format(fname)

        if audio_format:
            logging.info("Setting this stream's audio format as %s" %
                         audio_format)
            DB.set('format', audio_format)

        else:
            logging.warn("Can't determine type of file for %s." % fname)
            return None, None

    block = None
    if audio_format == _FORMAT_AAC:
        sig, block = aac_signature(fname, blockcount)

    # We permit the idea that a file can be a false positive. But we do not
    # permit the idea that a file can be a false positive and correctly register
    # over some number of sequential blocks (currently set at whatever the
    # constant is below).
    if audio_format == _FORMAT_MP3 or not block or len(block) < 5:
        sig, block = mp3_signature(fname, blockcount)

        if len(block) > 0 and audio_format == _FORMAT_AAC:
            DB.set('format', _FORMAT_MP3)
            DB.clear_cache()

    # Stream formats can change actually.
    if len(block) < 5:
        tryformat = _FORMAT_AAC
        if audio_format == _FORMAT_AAC: tryformat = _FORMAT_MP3
        DB.set('format', tryformat)
        DB.clear_cache()

        # Make sure we don't foolishly recurse
        if depth == 1:
            return signature(fname, blockcount, depth + 1)

        else:
            # Otherwise if we fail to find anything upon our change-format desperation
            # move, we should return this as the none type to be handled appropriately.
            return None, None

    _LASTFORMAT = audio_format

    return sig, block
Пример #14
0
def save(fit, suffix="tr"):
    bin = tuple(fit.cut["pt_ups"])
    dbname = "chib3s" + ("_" + suffix if suffix else "")
    db = shelve.open('data/%s.db' % dbname)

    year = db.get(fit.year, {})
    year[bin] = fit.model.params()
    db[fit.year] = year
    print db[fit.year]
    db.close()

    figname = fit.year + ("_" + suffix if suffix else "")
    canvas.SaveAs("figs/data/fits3s/f%s_%d_%s.pdf" %
                  (figname, bin[0], str(bin[1])))
Пример #15
0
def samp_guess(samp):
  if DB.get('samp'): return True

  global samp_distribution

  # first to this amount is our winner
  cutoff = 10

  if samp not in samp_distribution:
    samp_distribution[samp] = 0

  samp_distribution[samp] += 1

  if samp_distribution[samp] > cutoff:
    DB.set('samp', samp)
    globals()['_FRAME_LENGTH'] = (1152.0 / samp)
Пример #16
0
def samp_guess(samp):
    if DB.get('samp'): return True

    global samp_distribution

    # first to this amount is our winner
    cutoff = 10

    if samp not in samp_distribution:
        samp_distribution[samp] = 0

    samp_distribution[samp] += 1

    if samp_distribution[samp] > cutoff:
        DB.set('samp', samp)
        globals()['_FRAME_LENGTH'] = (1152.0 / samp)
Пример #17
0
def stitch_and_slice_process(file_list, relative_start_minute, duration_minute):
  # The process wrapper around stitch_and_slice to do it asynchronously. 
  name_out = stream_name(file_list, relative_start_minute=relative_start_minute, duration_minute=duration_minute, absolute_start_minute=None) 

  if os.path.isfile(name_out):
    file_size = os.path.getsize(name_out)
    # A "correct" filesize should be measured as more than 65% of what the
    # math would be. So first we can guess that.
    bitrate = int(DB.get('bitrate') or 128)
    estimate = (bitrate / 8) * (duration_minute * 60) * (10 ** 3)

    if 0.75 * estimate < file_size:
      logging.info("[stitch] File %s found" % name_out)
      return None

  # We presume that there is a file list we need to make 
  stitched_list = stitch(file_list, force_stitch=True)
  logging.info("stitched")
  logging.info(stitched_list)
  logging.info("%d %d" % (len(file_list), len(stitched_list)))

  # We see if it was correct, on the condition that it had to be made
  if stitched_list and (len(stitched_list) == len(file_list) == 1) or (len(stitched_list) > 1 and len(file_list) > 1):
    info = stream_info(stitched_list)

  else:
    logging.warn("Unable to stitch file list")
    return None

  # print info, start_minute
  # After we've stitched together the audio then we start our slice
  # by figuring our the relative_start_minute of the slice, versus ours
  start_slice = relative_start_minute #max(start_minute - info['start_minute'], 0)

  # Now we need to take the duration of the stream we want, in minutes, and then
  # make sure that we don't exceed the length of the file.
  duration_slice = min(duration_minute, start_slice + info['duration_sec'] / 60.0)

  # print "startslice---", start_slice, relative_start_minute
  sliced_name = list_slice(
    list_in=stitched_list, 
    name_out=name_out,
    start_sec=start_slice * 60.0, 
    duration_sec=duration_slice * 60.0,
  )

  return None
Пример #18
0
def signature(fname, blockcount=-1, depth=1):
  global _LASTFORMAT
  audio_format = DB.get('format') 

  if not audio_format:
    audio_format, start = get_audio_format(fname)

    if audio_format:
      logging.info("Setting this stream's audio format as %s" % audio_format)
      DB.set('format', audio_format)

    else:
      logging.warn("Can't determine type of file for %s." % fname)
      return False
  
  block = None
  if audio_format == _FORMAT_AAC:
    sig, block = aac_signature(fname, blockcount)

  # We permit the idea that a file can be a false positive. But we do not
  # permit the idea that a file can be a false positive and correctly register
  # over some number of sequential blocks (currently set at whatever the
  # constant is below).
  if audio_format == _FORMAT_MP3 or not block or len(block) < 5: 
    sig, block = mp3_signature(fname, blockcount)

    if len(block) > 0 and audio_format == _FORMAT_AAC:
      DB.set('format', _FORMAT_MP3)
      DB.clear_cache()

  # Stream formats can change actually.
  if len(block) < 5:
    tryformat = _FORMAT_AAC
    if audio_format == _FORMAT_AAC: tryformat = _FORMAT_MP3
    DB.set('format', tryformat)
    DB.clear_cache()

    # Make sure we don't foolishly recurse
    if depth == 1:
      return signature(fname, blockcount, depth + 1)

  _LASTFORMAT = audio_format

  return sig, block
Пример #19
0
    def live(start, offset_min=0):
        """ 
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

    """
        DB.incr('hits-live')
        if start[0] == '-' or start.endswith('min'):
            # dump things like min or m
            start = re.sub('[a-z]', '', start)
            return redirect('/live/m%f' %
                            (float(TS.minute_now() - abs(float(start)))),
                            code=302)

        # The start is expressed in times like "11:59am ..." We utilize the
        # library we wrote for streaming to get the minute of day this is.
        if start[0] == 'm':
            requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE

        else:
            candidate = start
            requested_minute = TS.to_utc('mon', candidate) - offset_min

        offset_sec = 0
        range_header = request.headers.get('Range', None)
        if range_header:
            m = re.search('(\d+)-', range_header)
            g = m.groups()
            if g[0]:
                byte1 = int(g[0])

                # We use the byte to compute the offset
                offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) *
                                             (1000 / 8.0))

        #print "--- REQUEST @ ", start, range_header, offset_sec
        current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

        now_time = TS.now()
        requested_time = now_time - timedelta(
            minutes=current_minute) + timedelta(minutes=requested_minute)

        # print requested_time, now_time, requested_minute, current_minute
        # If the requested minute is greater than the current one, then we can presume that
        # the requested minute refers to yesterday ... as in, someone wants 11pm
        # and now it's 1am.
        if requested_minute > current_minute:
            requested_time -= timedelta(days=1)

        # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
        requested_time += timedelta(seconds=offset_sec)

        # Get the info for the file that contains this timestamp
        start_info, requested_time_available = cloud.get_file_for_ts(
            target_time=requested_time, bias=-1)

        if start_info is None or requested_time_available is None:
            return do_error("Can't find any matching files")

        requested_time = max(requested_time, requested_time_available)
        start_second = (requested_time -
                        start_info['start_date']).total_seconds()

        response = Response(audio.list_slice_stream(start_info, start_second),
                            mimetype=audio.our_mime())

        return response
Пример #20
0
def stream_manager():
  global g_download_kill_pid
  import random

  # Manager process which makes sure that the
  # streams are running appropriately.
  callsign = misc.config['callsign']

  #
  # AAC bitrate is some non-trivial thing that even ffprobe doesn't
  # do a great job at. This solution looks at number of bits that
  # transit over the wire given a duration of time, and then uses
  # that to compute the bitrate, since in practice, that's what
  # bitrate effectively means, and why it's such an important metric.
  #
  # This is to compute a format agnostic bitrate
  # (see heartbeat for more information)
  #
  has_bitrate = DB.get('bitrate')
  if has_bitrate and int(has_bitrate) == 0:
    has_bitrate = False

  first_time = 0
  total_bytes = 0
  normalize_delay = 6
  cycle_count = 0

  cascade_time = misc.config['cascade_time']
  cascade_buffer = misc.config['cascade_buffer']
  cascade_margin = cascade_time - cascade_buffer

  last_prune = 0
  last_success = 0
  last_heartbeat = None
  
  change_state = None
  SHUTDOWN = 1
  RESTART = 2
  shutdown_time = None
  misc.download_ipc = Queue()

  # Number of seconds to be cycling
  cycle_time = misc.config['cycle_time']

  process = None
  process_next = None

  # The manager will be the one that starts this.
  #server.manager(misc.config)
  webserver = Thread(target=server.manager, name='Webserver', args=(misc.config,))
  webserver.start()

  file_name = None

  # A wrapper function to start a donwnload process
  def download_start(file_name):
    """ Starts a process that manages the downloading of a stream. """
    global g_download_pid

    g_download_pid += 1

    #
    # There may be a multi-second lapse time from the naming of the file to
    # the actual start of the download so we should err on that side by putting it
    # in the future by some margin
    #
    file_name = '%s/%s-%s.mp3' % (misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2)))
    logging.info('Starting download #%d (%s). Next up in %ds' % (g_download_pid, file_name, cascade_margin))

    process = Thread(target=stream_download, name='Download-%d:%s' % (g_download_pid, TS.ts_to_name()), args=(callsign, misc.config['stream'], g_download_pid, file_name))
    process.daemon = True
    process.start()
    return [file_name, process]


  # see https://github.com/kristopolous/DRR/issues/91:
  # Randomize prune to offload disk peaks
  prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0)
  misc.prune_duration = prune_duration

  last_heartbeat_tid = -1
  while True:
    #
    # We cycle this to off for every run. By the time we go throug the queue so long 
    # as we aren't supposed to be shutting down, this should be toggled to true.
    #
    if last_prune < (TS.unixtime('prune') - prune_duration):
      prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0)
      misc.prune_duration = prune_duration
      # We just assume it can do its business in under a day
      prune = cloud.prune()
      last_prune = TS.unixtime('prune')
      misc.last_prune = last_prune

    # Increment the amount of time this has been running
    if cycle_count % 30 == 0:
      # we only do these things occasionally, they 
      # are either not very important or are not
      # expected to change that often
      TS.get_offset()

    cycle_count += 1

    lr_set = False
    expired_heartbeat = last_heartbeat and time.time() - last_heartbeat > cycle_time * 2

    while not misc.queue.empty():
      what, value = misc.queue.get(False)

      # The curl proces discovered a new stream to be
      # used instead.
      if what == 'stream':
        misc.config['stream'] = value
        logging.info("Using %s as the stream now" % value)
        # We expire our heartbeat in order to force a new stream
        # to start
        expired_heartbeat = True

      elif what == 'db-debug':
        DB.debug()

      elif what == 'shutdown':
        change_state = SHUTDOWN

      elif what == 'restart':
        logging.info(DB.get('runcount', use_cache=False))
        cwd = os.getcwd()
        os.chdir(misc.PROCESS_PATH)
        Popen(sys.argv)
        os.chdir(cwd)

        change_state = RESTART

        # Try to record for another restart_overlap seconds - make sure that
        # we don't perpetually put this in the future due to some bug.
        if not shutdown_time:
          shutdown_time = TS.unixtime('dl') + misc.config['restart_overlap']
          logging.info("Restart requested ... shutting down download at %s" % TS.ts_to_name(shutdown_time, with_seconds=True))

          #misc.shutdown_real(do_restart=False)
          #misc.download_ipc.put(('shutdown_time', shutdown_time))

          while True:
            time.sleep(5)
            with open(misc.PIDFILE_MANAGER, 'r') as f:
              manager_pid = f.read()

            #print manager_pid, os.getpid(), manager_pid == os.getpid()
            #logging.info(DB.get('runcount', use_cache=False))
            #logging.info(('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip())
            ps_out = int(os.popen('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip())

            if ps_out > 1: 
              logging.info("Found %d potential candidates (need at least 2)" % ps_out)
              # This makes it a restricted soft shutdown
              misc.shutdown_real(do_restart=True)
              misc.download_ipc.put(('shutdown_time', shutdown_time))
              break

            else:
              Popen(sys.argv)
              logging.warn("Couldn't find a replacement process ... not going anywhere.");

      elif what == 'heartbeat':
        if not lr_set:
          lr_set = True
          last_heartbeat = time.time()
          last_heartbeat_tid = value[1]

          if last_heartbeat_tid < g_download_kill_pid:
            logging.warn("hb: Got a heartbeat for #%d but everything below #%d should be gone!" % (last_heartbeat_tid, g_download_kill_pid))

          DB.set('last_recorded', time.time())

        if not has_bitrate: 
          margin = 60

          # Keep track of the first time this stream started (this is where our total
          # byte count is derived from)
          if not first_time: 
            first_time = value[0]

          #
          # Otherwise we give a large (in computer time) margin of time to confidently
          # guess the bitrate.  I didn't do great at stats in college, but in my experiments,
          # the estimation falls within 98% of the destination.  I'm pretty sure it's really
          # unlikely this will come out erroneous, but I really can't do the math, it's probably
          # a T value, but I don't know. Anyway, whatevs.
          #
          # The normalize_delay here is for both he-aac+ streams which need to put in some frames
          # before the quantizing pushes itself up and for other stations which sometimes put a canned
          # message at the beginning of the stream, like "Live streaming supported by ..."
          #
          # Whe we discount the first half-dozen seconds as not being part of the total, we get a 
          # stabilizing convergence far quicker.
          #
          elif (value[0] - first_time > normalize_delay):
            # If we haven't determined this stream's bitrate (which we use to estimate 
            # the amount of content is in a given archived stream), then we compute it 
            # here instead of asking the parameters of a given block and then presuming.
            total_bytes += value[2]

            # We still give it a time period after the normalizing delay in order to build enough
            # samples to make a solid guess at what this number should be.
            if (value[0] - first_time > (normalize_delay + margin)):
              # We take the total bytes, calculate it over our time, in this case, 25 seconds.
              est = total_bytes / (value[0] - first_time - normalize_delay)

              # We find the nearest 8Kb increment this matches and then scale out.
              # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_.
              bitrate = int( round (est / 1000) * 8 )
              #print("Estimated bitrate:%d total:%d est:%d denom:%d" % (bitrate, total_bytes, est, value[0] - first_time - normalize_delay) )
              if bitrate > 0:
                DB.set('bitrate', bitrate)
                has_bitrate = DB.get('bitrate')

    #if last_heartbeat:
    #  logging.info("%d heartbeat %d" % (last_heartbeat, last_heartbeat_tid))

    # Check for our management process
    if not misc.manager_is_running():
      logging.info("Manager isn't running");
      change_state = SHUTDOWN

    # we get here if we should NOT be recording.  So we make sure we aren't.
    if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time):
      misc.shutdown_real()

    else:
      if not process and not change_state:
        logging.info("Failed to find downloader, starting new one")
        file_name, process = download_start(file_name)
        last_success = TS.unixtime('dl')

      # If we've hit the time when we ought to cascade
      # If our last_success stream was more than cascade_time - cascade_buffer
      # then we start our process_next
      elif TS.unixtime('dl') - last_success > cascade_margin or expired_heartbeat:
        #logging.info("heartbeat expired %s %s %d %d %d" % (type(process_next), type(process), last_success, cascade_time, TS.unixtime('dl')))

        # And we haven't created the next process yet, then we start it now.
        if not process_next:
          logging.info("Failed to find downloader, starting new one")
          file_name, process_next = download_start(file_name)

      
      # If there is still no process then we should definitely bail.
      if not process:
        misc.shutdown_real()

    #
    # This needs to be on the outside loop in case we are doing a cascade
    # outside of a full mode. In this case, we will need to shut things down
    #
    # If we are past the cascade_time and we have a process_next, then
    # we should shutdown our previous process and move the pointers around.
    #
    if not change_state and (expired_heartbeat or (TS.unixtime('dl') - last_success > cascade_time and process)):
      g_download_kill_pid += 1
      #process.terminate()

      # If the process_next is running then we move our last_success forward to the present
      last_success = TS.unixtime('dl')

      # we rename our process_next AS OUR process
      process = process_next

      # and then clear out the old process_next pointer
      process_next = None

    time.sleep(cycle_time)
Пример #21
0
def stream_info(file_name, skip_size=False):
    # Determines the date the thing starts,
    # the minute time it starts, and the duration
    #
    # If you do skip_size = True then you avoid any i/o
    # and have everything determined solely by the name.
    # This means that some values returned will be set to
    # None

    if type(file_name) is list:
        return list_info(file_name)

    info = _TS_RE.findall(file_name)

    # 0 byte files can throw this thing off
    # if it's set to None
    duration_sec = 0
    start_minute = None
    start_date = None
    end_minute = None
    callsign = None

    if info:
        info = info[0]
        callsign = info[0]

        # We have two formats here ... one is unix time
        # and the other is a much more readable time.  We will determine
        # whether it's UNIX time by seeing if it's greater than 2**36, which
        # makes us not Y4147 compliant. Oh dear - better fix this sometime
        # in the next 2100 years!
        unix_time = int(info[1])

        if unix_time > 2**36:
            unix_time = TS.name_to_unix(unix_time)

        start_minute = TS.to_minute(unix_time)
        start_date = datetime.fromtimestamp(unix_time)

    else:
        logging.warn("Failure to find info for '%s'" % file_name)
        return None

    try:
        # Just skip over this if the skip_size is set
        if skip_size: raise Exception

        # If we don't have a bitrate yet we assume 128
        bitrate = int(DB.get('bitrate') or 128)

        if bitrate == 0:
            logging.warn("Bitrate is 0. This is a bug.")
            raise Exception

        file_size = os.path.getsize(file_name)

        # If our file size is zero that means that we hit a bug
        # trying to stitch this, so we raise and exception and
        # try to reconstitute the file.
        if file_size == 0:
            logging.warn(
                "File %s exists and is 0 bytes. Ignoring it for computation." %
                file_name)
            raise Exception

        duration_sec = file_size / (bitrate * (1000.0 / 8.0))

    except Exception as inst:
        file_size = None
        # If we can't find a duration then we try to see if it's in the file name
        ts_re_duration = re.compile('_(\d*).{4}')
        ts = ts_re_duration.findall(file_name)
        if ts:
            duration_sec = int(ts[0]) * 60.0

    if isinstance(duration_sec, (int, float)):
        end_minute = (duration_sec / 60.0 + start_minute) % TS.MINUTES_PER_WEEK

    return {
        'callsign': callsign,
        'week_number': start_date.isocalendar()[1],
        'name': file_name,
        'start_minute': start_minute,
        'start_date': start_date,
        'end_minute': end_minute,
        'size': file_size,
        'duration_sec': duration_sec
    }
Пример #22
0
def mp3_signature(file_name, blockcount=-1):
    # Opens an mp3 file, find all the blocks, the byte offset of the blocks, and if they
    # are audio blocks, construct a signature mapping of some given beginning offset of the audio
    # data ... this is intended for stitching.
    frame_sig = []
    start_byte = []
    first_header_seen = False
    header_attempts = 0

    #
    # Looking at the first 16 bytes of the payload yield a rate that is 99.75% unique
    # as tested over various corpi ranging from 1,000,000 - 7,000,000 blocks.
    #
    # There's an additional precautions of looking for a string of 4 matches which
    # mitigates this even further
    #
    read_size = 8
    is_stream = False
    start_pos = None

    frame_size = None
    assumed_set = None
    attempt_set = None
    next_read = False

    if isinstance(file_name, str):
        file_handle = open(file_name, 'rb')

    else:
        # This means we can handle file pointers
        file_handle = file_name
        is_stream = True
        start_pos = file_handle.tell()

    while blockcount != 0:

        if first_header_seen:
            blockcount -= 1

        else:
            header_attempts += 1

        if next_read:
            file_handle.seek(next_read, 0)
            next_read = False

        frame_start = last_read = file_handle.tell()

        header = file_handle.read(2)
        if header and len(header) == 2:

            b1 = header[1]

            if header[0] == 0xff and (b1 >> 4) == 0xf:

                try:
                    b2 = ord(file_handle.read(1))
                    b3 = ord(file_handle.read(1))
                    # If we are at the EOF
                except:
                    break

                if frame_size and not assumed_set:
                    attempt_set = [samp_rate, bit_rate, pad_bit]

                frame_size, samp_rate, bit_rate, pad_bit, mode = mp3_info(
                    b1, b2, b3)

                if not frame_size:
                    next_read = last_read + 1
                    continue

                samp_guess(samp_rate)

                # We make sure that we get the same set of samp_rate, bit_rate, pad_bit twice
                if not assumed_set and attempt_set == [
                        samp_rate, bit_rate, pad_bit
                ]:
                    assumed_set = attempt_set
                    attempt_set = False

                # This is another indicator that we could be screwing up ...
                elif assumed_set and samp_rate != assumed_set[
                        0] and bit_rate != assumed_set[1]:
                    next_read = last_read + 1
                    continue

                if not first_header_seen:
                    first_header_seen = True

                # Get the signature
                sig = file_handle.read(read_size)
                frame_sig.append(sig)
                start_byte.append(frame_start)

                # Move forward the frame file_handle.read size + 4 byte header
                throw_away = file_handle.read(frame_size - (read_size + 4))

            # ID3 tag for some reason
            elif header == '\x49\x44':
                # Rest of the header
                throw_away = file_handle.read(4)

                #
                # Quoting http://id3.org/d3v2.3.0
                #
                # The ID3v2 tag size is encoded with four bytes where the most significant bit
                # (bit 7) is set to zero in every byte, making a total of 28 bits. The zeroed
                # bits are ignored, so a 257 bytes long tag is represented as $00 00 02 01.
                #
                candidate = struct.unpack('>I', file_handle.read(4))[0]
                size = ((candidate & 0x007f0000) >> 2) | (
                    (candidate & 0x00007f00) >> 1) | (candidate & 0x0000007f)

                file_handle.read(size)

            # ID3 TAG -- 128 bytes long
            elif header == '\x54\x41':
                # We've already read 2 so we can go 126 forward
                file_handle.read(126)

            elif len(header) == 1:
                # We are at the end of file, but let's just continue.
                next

            elif header_attempts > _MAX_HEADER_ATTEMPTS:
                if not is_stream:
                    import binascii
                    samp = DB.get('samp', default=44100)
                    if type(samp) is str:
                        logging.debug("OMG WHAT THE F**K")
                        samp = int(samp)

                    logging.debug(
                        '[mp3-sig] %d[%d/%d]%s:%s:%s %s %d' %
                        (len(frame_sig), header_attempts, _MAX_HEADER_ATTEMPTS,
                         binascii.b2a_hex(header),
                         binascii.b2a_hex(file_handle.read(5)), file_name,
                         hex(file_handle.tell()), len(start_byte) *
                         (1152.0 / DB.get('samp', default=44100)) / 60))

                # This means that perhaps we didn't guess the start correct so we try this again
                if len(frame_sig
                       ) == 1 and header_attempts < _MAX_HEADER_ATTEMPTS:
                    logging.debug("[mp3-sig] False start -- trying again")

                    # seek to the first start byte + 1
                    file_handle.seek(start_byte[0] + 2)

                    # discard what we thought was the first start byte and
                    # frame signature
                    start_byte = []
                    frame_sig = []
                    first_header_seen = False

                    # Also our assumed set was probably wrong
                    assumed_set = None

                else:
                    break

            elif first_header_seen:
                next_read = last_read + 1
                header_attempts += 1

        else:
            break

    if not is_stream:
        file_handle.close()

    else:
        file_handle.seek(start_pos)

    return frame_sig, start_byte
Пример #23
0
def stream_manager():
    import random

    # Manager process which makes sure that the
    # streams are running appropriately.
    callsign = misc.config['callsign']

    #
    # AAC bitrate is some non-trivial thing that even ffprobe doesn't
    # do a great job at. This solution looks at number of bits that
    # transit over the wire given a duration of time, and then uses
    # that to compute the bitrate, since in practice, that's what
    # bitrate effectively means, and why it's such an important metric.
    #
    # This is to compute a format agnostic bitrate
    # (see heartbeat for more information)
    #
    has_bitrate = DB.get('bitrate')
    first_time = 0
    total_bytes = 0
    normalize_delay = 6

    cascade_time = misc.config['cascadetime']
    cascade_buffer = misc.config['cascadebuffer']
    cascade_margin = cascade_time - cascade_buffer

    last_prune = 0
    last_success = 0

    change_state = None
    SHUTDOWN = 1
    RESTART = 2
    shutdown_time = None
    misc.download_ipc = Queue()

    # Number of seconds to be cycling
    cycle_time = misc.config['cycletime']

    process = None
    process_next = None

    # The manager will be the one that starts this.
    misc.pid_map['webserver'] = Process(target=server.manager,
                                        args=(misc.config, ))
    misc.pid_map['webserver'].start()

    file_name = None

    # A wrapper function to start a donwnload process
    def download_start(file_name):
        """ Starts a process that manages the downloading of a stream. """
        global g_download_pid

        g_download_pid += 1
        logging.info('Starting cascaded downloader #%d. Next up in %ds' %
                     (g_download_pid, cascade_margin))

        #
        # There may be a multi-second lapse time from the naming of the file to
        # the actual start of the download so we should err on that side by putting it
        # in the future by some margin
        #
        file_name = '%s/%s-%s.mp3' % (
            misc.DIR_STREAMS, callsign,
            TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2)))
        process = Process(target=stream_download,
                          args=(callsign, misc.config['stream'],
                                g_download_pid, file_name))
        process.start()
        return [file_name, process]

    # see https://github.com/kristopolous/DRR/issues/91:
    # Randomize prune to offload disk peaks
    prune_duration = misc.config['pruneevery'] + (1 / 8.0 -
                                                  random.random() / 4.0)

    while True:
        #
        # We cycle this to off for every run. By the time we go throug the queue so long
        # as we aren't supposed to be shutting down, this should be toggled to true.
        #
        flag = False

        if last_prune < (TS.unixtime('prune') -
                         TS.ONE_DAY_SECOND * prune_duration):
            prune_duration = misc.config['pruneevery'] + (
                1 / 8.0 - random.random() / 4.0)
            # We just assume it can do its business in under a day
            misc.pid_map['prune'] = cloud.prune()
            last_prune = TS.unixtime('prune')

        TS.get_offset()

        lr_set = False
        while not misc.queue.empty():
            flag = True
            what, value = misc.queue.get(False)

            # The curl proces discovered a new stream to be
            # used instead.
            if what == 'stream':
                misc.config['stream'] = value
                logging.info("Using %s as the stream now" % value)
                # We now don't toggle to flag in order to shutdown the
                # old process and start a new one

            elif what == 'db-debug':
                DB.debug()

            elif what == 'shutdown':
                change_state = SHUTDOWN

            elif what == 'restart':
                logging.info(DB.get('runcount', use_cache=False))
                cwd = os.getcwd()
                os.chdir(misc.PROCESS_PATH)
                Popen(sys.argv)
                os.chdir(cwd)

                change_state = RESTART

                # Try to record for another restart_overlap seconds - make sure that
                # we don't perpetually put this in the future due to some bug.
                if not shutdown_time:
                    shutdown_time = TS.unixtime(
                        'dl') + misc.config['restart_overlap']
                    logging.info(
                        "Restart requested ... shutting down downloader at %s"
                        % TS.ts_to_name(shutdown_time, with_seconds=True))

                    while True:
                        time.sleep(20)
                        #logging.info(DB.get('runcount', use_cache=False))
                        logging.info(
                            ('ps axf | grep [%c]%s | grep python | wc -l' %
                             (misc.config['callsign'][0],
                              misc.config['callsign'][1:])).read().strip())
                        ps_out = int(
                            os.popen(
                                'ps axf | grep [%c]%s | grep python | wc -l' %
                                (misc.config['callsign'][0],
                                 misc.config['callsign'][1:])).read().strip())

                        if ps_out > 1:
                            logging.info(
                                "Found %d potential candidates (need at least 2)"
                                % ps_out)
                            # This makes it a restricted soft shutdown
                            misc.shutdown_real(do_restart=True)
                            misc.download_ipc.put(
                                ('shutdown_time', shutdown_time))
                            break

                        else:
                            Popen(sys.argv)
                            logging.warn(
                                "Couldn't find a replacement process ... not going anywhere."
                            )

            elif what == 'heartbeat':
                if not lr_set and value[1] > 100:
                    lr_set = True
                    DB.set('last_recorded', time.time())

                if not has_bitrate:

                    # Keep track of the first time this stream started (this is where our total
                    # byte count is derived from)
                    if not first_time:
                        first_time = value[0]

                    #
                    # Otherwise we give a large (in computer time) margin of time to confidently
                    # guess the bitrate.  I didn't do great at stats in college, but in my experiments,
                    # the estimation falls within 98% of the destination.  I'm pretty sure it's really
                    # unlikely this will come out erroneous, but I really can't do the math, it's probably
                    # a T value, but I don't know. Anyway, whatevs.
                    #
                    # The normalize_delay here is for both he-aac+ streams which need to put in some frames
                    # before the quantizing pushes itself up and for other stations which sometimes put a canned
                    # message at the beginning of the stream, like "Live streaming supported by ..."
                    #
                    # Whe we discount the first half-dozen seconds as not being part of the total, we get a
                    # stabilizing convergence far quicker.
                    #
                    elif (value[0] - first_time > normalize_delay):
                        # If we haven't determined this stream's bitrate (which we use to estimate
                        # the amount of content is in a given archived stream), then we compute it
                        # here instead of asking the parameters of a given block and then presuming.
                        total_bytes += value[1]

                        # We still give it a time period after the normalizing delay in order to build enough
                        # samples to make a solid guess at what this number should be.
                        if (value[0] - first_time > (normalize_delay + 60)):
                            # We take the total bytes, calculate it over our time, in this case, 25 seconds.
                            est = total_bytes / (value[0] - first_time -
                                                 normalize_delay)

                            # We find the nearest 8Kb increment this matches and then scale out.
                            # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_.
                            bitrate = int(round(est / 1000) * 8)
                            DB.set('bitrate', bitrate)

        # Check for our management process
        if not misc.manager_is_running():
            logging.info("Manager isn't running")
            change_state = SHUTDOWN

        # The only way for the bool to be toggled off is if we are not in full-mode ...
        # we get here if we should NOT be recording.  So we make sure we aren't.
        if change_state == SHUTDOWN or (change_state == RESTART
                                        and TS.unixtime('dl') > shutdown_time):
            process = my_process_shutdown(process)
            process_next = my_process_shutdown(process_next)
            misc.shutdown_real()

        else:
            # Didn't respond in cycle_time seconds so kill it
            if not flag:
                process = my_process_shutdown(process)

            if not process and not change_state:
                file_name, process = download_start(file_name)
                last_success = TS.unixtime('dl')

            # If we've hit the time when we ought to cascade
            elif TS.unixtime('dl') - last_success > cascade_margin:

                # And we haven't created the next process yet, then we start it now.
                if not process_next:
                    file_name, process_next = download_start(file_name)

            # If our last_success stream was more than cascade_time - cascade_buffer
            # then we start our process_next

            # If there is still no process then we should definitely bail.
            if not process:
                misc.shutdown_real()

        #
        # This needs to be on the outside loop in case we are doing a cascade
        # outside of a full mode. In this case, we will need to shut things down
        #
        # If we are past the cascade_time and we have a process_next, then
        # we should shutdown our previous process and move the pointers around.
        #
        if not change_state and TS.unixtime(
                'dl') - last_success > cascade_time and process:
            logging.info("Stopping cascaded downloader")
            process.terminate()

            # If the process_next is running then we move our last_success forward to the present
            last_success = TS.unixtime('dl')

            # we rename our process_next AS OUR process
            process = process_next

            # and then clear out the old process_next pointer
            process_next = None

        # Increment the amount of time this has been running
        DB.incr('uptime', cycle_time)

        time.sleep(cycle_time)
Пример #24
0
def stitch_and_slice_process(file_list,
                             relative_start_minute,
                             duration_minute,
                             destination_path=None):
    # The process wrapper around stitch_and_slice to do it asynchronously.
    if destination_path:
        import lib.misc as misc
        name_out = "%s/%s" % (misc.DIR_SLICES, destination_path)
    else:
        name_out = stream_name(file_list,
                               relative_start_minute=relative_start_minute,
                               duration_minute=duration_minute,
                               absolute_start_minute=None)

    if os.path.isfile(name_out):
        file_size = os.path.getsize(name_out)
        # A "correct" filesize should be measured as more than 65% of what the
        # math would be. So first we can guess that.
        bitrate = int(DB.get('bitrate') or 128)
        estimate = (bitrate / 8) * (duration_minute * 60) * (10**3)

        if 0.75 * estimate < file_size:
            logging.info("[stitch] File %s found" % name_out)
            return None

    # We presume that there is a file list we need to make
    stitched_list = stitch(file_list, force_stitch=True)

    if stitched_list:
        logging.info("stitched")
        logging.info(stitched_list)
        logging.info("%d %d" % (len(file_list), len(stitched_list)))

    # We see if it was correct, on the condition that it had to be made
    if stitched_list and file_list and (len(stitched_list) == len(file_list) ==
                                        1) or (len(stitched_list) > 1
                                               and len(file_list) > 1):
        info = stream_info(stitched_list)

    else:
        logging.warn("Unable to stitch file list")
        return None

    # After we've stitched together the audio then we start our slice
    # by figuring our the relative_start_minute of the slice, versus ours
    start_slice = relative_start_minute  #max(start_minute - info['start_minute'], 0)

    # Now we need to take the duration of the stream we want, in minutes, and then
    # make sure that we don't exceed the length of the file.
    duration_slice = min(duration_minute,
                         start_slice + info['duration_sec'] / 60.0)

    # print "startslice---", start_slice, relative_start_minute
    sliced_name = list_slice(
        list_in=stitched_list,
        name_out=name_out,
        start_sec=start_slice * 60.0,
        duration_sec=duration_slice * 60.0,
    )

    return None
Пример #25
0
    if args.command == 'tests':
        suite = TestLoader().discover('tests', pattern='*.py')
        result = TextTestRunner(verbosity=2).run(suite)
        result = 0 if result.wasSuccessful() else 1
        exit(result)

    cfg = read_config(args.config)
    logger = init_logger()

    renderer = DistributedRenderer()
    qualifier = DistributedQualifier()
    base_image_path = cfg['main']['populationPath'] + basename(cfg['main']['baseImage'])
    fitnessMachine = MeshFitnessMachine(base_image_path, renderer, qualifier)
    population = Population(MeshGenome, fitnessMachine)
    population.generation = int(db.get('generation', default=0))

    accuracy.register(population)
    monitor.register(population)

    if args.command == 'reset' or not population.generation:
        population.initialize()
    else:
        population.load()
    do('cp -v %s %s' % (cfg['main']['baseImage'], base_image_path))

    try:
        population.evolve()
    except KeyboardInterrupt as ki:
        pass
Пример #26
0
def stream_info(file_name, skip_size=False):
  # Determines the date the thing starts,
  # the minute time it starts, and the duration
  #
  # If you do skip_size = True then you avoid any i/o 
  # and have everything determined solely by the name.
  # This means that some values returned will be set to 
  # None

  if type(file_name) is list:
    return list_info(file_name)

  info = _TS_RE.findall(file_name)

  duration_sec = None
  start_minute = None
  start_date = None
  end_minute = None
  callsign = None

  if info:
    info = info[0]
    callsign = info[0]

    # We have two formats here ... one is unix time 
    # and the other is a much more readable time.  We will determine
    # whether it's UNIX time by seeing if it's greater than 2**36, which
    # makes us not Y4147 compliant. Oh dear - better fix this sometime
    # in the next 2100 years!
    unix_time = int(info[1])

    if unix_time > 2**36:
      unix_time = TS.name_to_unix(unix_time)  

    start_minute = TS.to_minute(unix_time)
    start_date = datetime.fromtimestamp(unix_time)

  else:
    logging.warn("Failure to find info for '%s'" % file_name)
    return None

  try:
    # Just skip over this if the skip_size is set
    if skip_size: raise Exception

    # If we don't have a bitrate yet we assume 128
    bitrate = int(DB.get('bitrate') or 128)

    if bitrate == 0:
      logging.warn("Bitrate is 0. This is a bug.")
      raise Exception

    file_size = os.path.getsize(file_name)

    # If our file size is zero that means that we hit a bug
    # trying to stitch this, so we raise and exception and 
    # try to reconstitute the file.
    if file_size == 0:
      logging.warn("File %s exists and is 0 bytes. Ignoring it for computation." % file_name)
      raise Exception

    duration_sec = file_size / (bitrate * (1000.0 / 8.0))

  except Exception as inst:
    file_size = None
    # If we can't find a duration then we try to see if it's in the file name
    ts_re_duration = re.compile('_(\d*).{4}')
    ts = ts_re_duration.findall(file_name)
    if ts:
      duration_sec = int(ts[0]) * 60.0

  if isinstance(duration_sec, (int, float)):
    end_minute = (duration_sec / 60.0 + start_minute) % TS.MINUTES_PER_WEEK

  return {
    'callsign': callsign,
    'week_number': start_date.isocalendar()[1], 
    'name': file_name, 
    'start_minute': start_minute, 
    'start_date': start_date, 
    'end_minute': end_minute,
    'size': file_size,
    'duration_sec': duration_sec
  }
Пример #27
0
def mp3_signature(file_name, blockcount=-1):
  # Opens an mp3 file, find all the blocks, the byte offset of the blocks, and if they
  # are audio blocks, construct a signature mapping of some given beginning offset of the audio
  # data ... this is intended for stitching.
  frame_sig = []
  start_byte = []
  first_header_seen = False
  header_attempts = 0

  #
  # Looking at the first 16 bytes of the payload yield a rate that is 99.75% unique
  # as tested over various corpi ranging from 1,000,000 - 7,000,000 blocks.
  #
  # There's an additional precautions of looking for a string of 4 matches which
  # mitigates this even further
  #
  read_size = 8
  is_stream = False
  start_pos = None

  frame_size = None
  assumed_set = None
  attempt_set = None
  last_tell = None
  go_back = -1

  if isinstance(file_name, str):
    file_handle = open(file_name, 'rb')

  else:
    # This means we can handle file pointers
    file_handle = file_name
    is_stream = True
    start_pos = file_handle.tell()

  while blockcount != 0:

    if first_header_seen:
      blockcount -= 1

    else:
      header_attempts += 1 
      if header_attempts > 2:
        file_handle.seek(go_back, 1)

    frame_start = file_handle.tell()
    if frame_start == last_tell:
      file_handle.seek(last_tell + 1, 1)

    header = file_handle.read(2)
    if header and len(header) == 2:

      b1 = header[1]

      if header[0] == 0xff and (b1 >> 4) == 0xf:

        try:
          b = ord(file_handle.read(1))
          # If we are at the EOF
        except:
          break

        if frame_size and not assumed_set:
          attempt_set = [samp_rate, bit_rate, pad_bit]

        frame_size, samp_rate, bit_rate, pad_bit = mp3_info(b, b1)

        last_tell = file_handle.tell()

        if not frame_size:
          file_handle.seek(go_back, 1)
          go_back = -1

          continue

        samp_guess(samp_rate)

        # We make sure that we get the same set of samp_rate, bit_rate, pad_bit twice
        if not assumed_set and attempt_set == [samp_rate, bit_rate, pad_bit]:
          assumed_set = attempt_set
          attempt_set = False

        # This is another indicator that we could be screwing up ... 
        elif assumed_set and samp_rate != assumed_set[0] and bit_rate != assumed_set[1]:
          file_handle.seek(go_back, 1)
          continue


        if not first_header_seen:
          first_header_seen = True

        # Rest of the header
        throw_away = file_handle.read(1)

        # Get the signature
        sig = file_handle.read(read_size)
        frame_sig.append(sig)
        start_byte.append(frame_start)

        # Move forward the frame file_handle.read size + 4 byte header
        throw_away = file_handle.read(frame_size - (read_size + 4))

        if file_handle.tell() > 3:
          go_back = -3

      # ID3 tag for some reason
      elif header == '\x49\x44':
        # Rest of the header
        throw_away = file_handle.read(4)

        #
        # Quoting http://id3.org/d3v2.3.0
        #
        # The ID3v2 tag size is encoded with four bytes where the most significant bit 
        # (bit 7) is set to zero in every byte, making a total of 28 bits. The zeroed 
        # bits are ignored, so a 257 bytes long tag is represented as $00 00 02 01.
        #
        candidate = struct.unpack('>I', file_handle.read(4))[0]
        size = ((candidate & 0x007f0000) >> 2 ) | ((candidate & 0x00007f00) >> 1 ) | (candidate & 0x0000007f)
        
        file_handle.read(size)

      # ID3 TAG -- 128 bytes long
      elif header == '\x54\x41':
        # We've already read 2 so we can go 126 forward
        file_handle.read(126)

      elif len(header) == 1:
        # We are at the end of file, but let's just continue.
        next

      elif header_attempts > _MAX_HEADER_ATTEMPTS:
        if not is_stream:
          import binascii
          logging.debug('[mp3-sig] %d[%d/%d]%s:%s:%s %s %d' % (len(frame_sig), header_attempts, _MAX_HEADER_ATTEMPTS, binascii.b2a_hex(header), binascii.b2a_hex(file_handle.read(5)), file_name, hex(file_handle.tell()), len(start_byte) * (1152.0 / DB.get('samp', default=44100)) / 60))

        # This means that perhaps we didn't guess the start correct so we try this again
        if len(frame_sig) == 1 and header_attempts < _MAX_HEADER_ATTEMPTS:
          logging.debug("[mp3-sig] False start -- trying again")

          # seek to the first start byte + 1
          file_handle.seek(start_byte[0] + 2)

          # discard what we thought was the first start byte and
          # frame signature
          start_byte = []
          frame_sig = []
          first_header_seen = False

          # Also our assumed set was probably wrong
          assumed_set = None

        else:
          break

      elif first_header_seen:
        header_attempts += 1 
        if header_attempts > 2:
          file_handle.seek(go_back, 1)
          go_back = -1

    else:
      break

  if not is_stream:
    file_handle.close()

  else:
    file_handle.seek(start_pos)

  return frame_sig, start_byte