def clustered_encode(): global terminate sh = shell() # Here we do the clustering magic # This is here rather than at the top as is usual so that flac2all can work # even if ZeroMQ is not installed. Perhaps in future zmq will become a hard # dependency, and then we can move it. import zmq zcontext = zmq.Context() # Task socket (to send tasks out) tsock = zcontext.socket(zmq.PUSH) tsock.bind("tcp://*:2019") # recieve socket (gets results from workers) rsock = zcontext.socket(zmq.PULL) rsock.bind("tcp://*:2020") # connect loopback to recieve socket csock = zcontext.socket(zmq.PUSH) csock.connect("tcp://localhost:2020") # Gathering file data files = sh.getfiles(opts['dirpath']) inlist = [] for infile in files: for mode in opts['mode'].split(','): if mode.startswith("_"): # This should never happen unless some smart-alec tries to call # a private mode publicly, but just in case continue if not infile.endswith(".flac"): if opts['copy'] is True: opts.update({"copymode": mode}) line = [infile, "_copy", opts] inlist.append(line) else: line = [infile, mode, opts] inlist.append(line) incount = len(inlist) log.info("We have %d tasks" % incount) start_time = time.time() workers = {} log.info("Waiting for at least one worker to join") results = [] while True: log.active_workers(len(workers)) log.tasks( incount, len([x for x in results if int(x[4]) == 0]), len([x for x in results if int(x[4]) != 0]), ) # If the last seen time is more than 4 minutes, we assume worker # is no longer available, and clear it out for key in dict(workers): if ((time.time() - workers[key]) > 240): del(workers[key]) log.warn("Worker %s not responding, clearing from list (%d remaining)" % (key, len(workers))) if len(workers) == 0: if inlist == []: # We have no more to process, just exit terminate = True else: log.crit("No more workers. Need at least one worker to join") if terminate is True: # If we want to terminate, clear the entire inlist # This will clean up the same as when we end normally inlist = [] try: line = rsock.recv_json(flags=zmq.NOBLOCK) except zmq.error.Again as e: # "Resource temporarily unavailable" varies by OS # errno 11 on Linux # errno 35 on FreeBSD # We expect this if no data, so we sit in a loop and wait if ((e.errno == 11) or (e.errno == 35)): if terminate is True: log.warn("Terminated") break # We exit the loop, the zmq bits are cleaned up post loop time.sleep(0.01) # wait a little bit and try again # Because we wait for very short here, we increase the update count # to prevent refreshing too fast log.updatecount = 200 continue else: # Now we return the refresh count to normal log.updatecount = 20 log.crit("Error #: %d" % e.errno) raise(e) # re-raise other errnos if line[0].startswith('ONLINE'): worker_id = line[0].split('~')[-1] # A worker has joined. Add the ID and timestamp of last seen workers.update({worker_id: time.time()}) log.ok("Got worker %s ( %d workers)" % (worker_id, len(workers))) elif line[0].startswith('EOLACK'): worker_id = line[0].split('~')[-1] if worker_id in workers: del(workers[worker_id]) log.warn("Worker %s terminated (%d running)" % (worker_id, len(workers))) if len(workers) == 0: break elif line[0].startswith('OFFLINE'): worker_id = line[0].split('~')[-1] if worker_id in workers: del(workers[worker_id]) log.crit("Worker %s gone OFF LINE (%d running)" % (worker_id, len(workers))) if len(workers) == 0: break elif line[0].startswith("READY"): # A worker is ready for a new task worker_id = line[0].split('~')[-1] # First we update the "last seen" value in worker list if worker_id in workers: workers[worker_id] = time.time() else: # If we get a ready request from a worker on in our list, we add it # to the list and assign it a job anyway. We assume we can trust the workers log.warn("Got ready signal from unknown worker. Adding to worker list") workers.update({worker_id: time.time()}) log.ok("Added unknown worker %s ( %d workers)" % (worker_id, len(workers))) # And now we push a new task to worker if len(inlist) == 0: # We have reached the end. Send EOL tsock.send_json(["EOL", None, None]) continue else: # Pop a job off the list and send to worker as task tsock.send_json(inlist.pop()) elif line[0].startswith("NACK"): worker_id = line[0].split('~')[-1] log.warn("Task '%s' refused by worker %s, rescheduling" % (line[2], worker_id)) # For whatever reason the worker is refusing the task, so # put it back onto the inlist for another worker to do inlist.append(line[1:]) elif len(line) == 6: name = line[0].split('/')[-1] name = name.replace(".flac", "") if len(name) > 55: name = name[:55] + "..." line = [str(x).strip() for x in line] if "ERROR" in line[3]: log.crit("n:%-60s\tt:%-10s\ts:%-10s" % (name.encode("utf-8", "backslashreplace").decode(), line[2], line[3])) else: log.status("n:%-60s\tt:%-10s\ts:%-10s" % (name.encode("utf-8", "backslashreplace").decode(), line[2], line[3])) results.append(line) else: log.crit("UNKNOWN RESULT!") log.crit(results) end_time = time.time() rsock.close() csock.close() rsock.close() # Now, we confirm that the number of files sent equals the number processed log.info("input: %d, output: %d" % (incount, len(results))) if (incount != len(results)): log.crit("Error. Not all tasks were completed.") sys.exit(1) # log.print(list(set([x[0] for x in inlist]) - set([x[0] for x in results]))) results = generate_summary(start_time, end_time, incount, results) write_logfile(opts['outdir'], results)
def main(): global log options, args = build_parser() # update the opts dictionary with new values opts.update(eval(options.__str__())) # convert the formats in the args to valid formats for lame and oggenc opts['oggencopts'] = ' --' + ' --'.join(opts['oggencopts'].split(':')) opts['opusencopts'] = ' --' + ' --'.join(opts['opusencopts'].split(':')) # lame is not consistent, sometimes using long opts,sometimes not # so we need to specify on command line with dashes whether it is a long op or # short opts['lameopts'] = ' -' + ' -'.join(opts['lameopts'].split(':')) # ffmpeg uses colons as delimiters, just like flac2all (of course), so we had to # switch to commas for this one opts['ffmpegopts'] = opts['ffmpegopts'].split(',') opts['ffmpegopts'] = list(flatten([x.split(' ') for x in opts['ffmpegopts']])) try: opts['mode'] = args[0] except(IndexError): # if no arguments specified log.print("No mode specified! Run with '-h' for help") sys.exit(1) # quit the program with non-zero status try: opts['dirpath'] = os.path.abspath(args[1]) except(IndexError): log.print("No directory specified! Run with '-h' for help") sys.exit(2) # quit the program with non-zero status # end command line checking # Commence main logic if options.curses is True: log = cconsole() # switch to cconsole, if specified as option if not os.path.exists(opts['outdir']): log.info("Creating output directory") os.mkdir(opts['outdir']) # Check if we have the special mode "all", which really brings flac2all into # perspective. We convert to every single format supported. This is mainly added for # testing reasons. if opts['mode'] == "all": opts['mode'] = ','.join([x[0] for x in modetable if not x[0].startswith("_")]) # In this version, we can convert multiple format at once, so for e.g. # mode = mp3,vorbis will create both in parallel for mode in opts['mode'].split(','): if mode != "": # When copying, we don't want a _copy dir, but one representing # the mode being copied to, so we check and update mode here if "copymode" in opts: mode = opts['copymode'] try: os.mkdir(os.path.join(opts['outdir'], mode)) except OSError as e: if e.errno == 17: log.info("Folder %s already exists, reusing..." % mode) elif e.errno == 2: log.info("Parent path %s does not exist! quitting..." % ( opts['outdir'] )) else: # everything else, raise error raise e # Magic goes here :) if opts['master_enable']: clustered_encode() else: threaded_encode() if options.curses is True: log.__del__() # If we are using the curses interface, clean up properly at the end.
def main(): sh = shell() # process Queue,the queue that will hold all the flac files we want to convert. # format: [ $infile, $target_format ] pQ = mp.Queue() # copy Queue (for copying non flac files if requested) # format: [ $infile, $outfile ] cQ = mp.Queue() # logging Queue, the encoders log progress to this # format: [ # $infile, # $outfile, # $format, # $error_status, # $return_code, # $execution_time # ] lQ = mp.Queue() # I've decided that the encoder options should just be long options. # quite frankly, we are running out of letters that make sense. # plus it makes a distinction between encoder opts, and program opts # (which will continue to use single letters) parser = OptionParser(usage=prog_usage()) parser.add_option("-c", "--copy", action="store_true", dest="copy", default=False, help="Copy non flac files across (default=False)") parser.add_option("", "--opus-options", dest="opusencopts", default="", help="Colon delimited options to pass to opusenc.\ Any oggenc long option (one with two '--' in front) can be specified\ in the above format.") parser.add_option("", "--vorbis-options", dest="oggencopts", default="quality=2", help="Colon delimited options to pass to oggenc,for\ example: 'quality=5:resample 32000:downmix:bitrate_average=96'.\ Any oggenc long option (one with two '--' in front) can be specified\ in the above format.") parser.add_option("", "--lame-options", dest="lameopts", default="-preset standard:q 0", help="Options to pass to lame,\ for example: '-preset extreme:q 0:h:-abr'. Any lame\ option can be specified here, if you want a short option (e.g. -h),\ then just do 'h'. If you want a long option (e.g. '--abr'), then you need\ a dash: '-abr'") parser.add_option("", "--aacplus-options", dest="neroaacplusopts", default="q 0.3", help="Nero AACplus options, valid options is one of:\ Quality (q $float), bitrate (br $int), or streaming bitrate (cbr $int) ") parser.add_option("-o", "--outdir", dest="outdir", metavar="DIR", help="Set custom output directory (default='./')", default="./") parser.add_option( "-f", "--force", dest="overwrite", action="store_true", help="Force overwrite of existing files (by default we skip)", default=False) parser.add_option( "-t", "--threads", dest="threads", default=mp.cpu_count(), help="How many threads to run in parallel (default: autodetect\ [found %d cpu(s)] )" % mp.cpu_count()) parser.add_option("-n", "--nodirs", dest="nodirs", action="store_true", default=False, help="Don't create Directories, put everything together") (options, args) = parser.parse_args() # update the opts dictionary with new values opts.update(eval(options.__str__())) # convert the formats in the args to valid formats for lame and oggenc opts['oggencopts'] = ' --' + ' --'.join(opts['oggencopts'].split(':')) opts['opusencopts'] = ' --' + ' --'.join(opts['opusencopts'].split(':')) # Nero codec is annoying, as it takes bitrate in actual bits/s, rather than # kbit/s as every other codec on earth works. So we need to parse things out # and convert enctype, rate = opts['neroaacplusopts'].split(' ') if enctype == "br" or enctype == "cbr": opts['neroaacplusopts'] = ' -%s %d' % (enctype, int(rate) * 1000) else: opts['neroaacplusopts'] = ' -%s %s' % (enctype, rate) # lame is not consistent, sometimes using long opts,sometimes not # so we need to specify on command line with dashes whether it is a long op or # short opts['lameopts'] = ' -' + ' -'.join(opts['lameopts'].split(':')) try: opts['mode'] = args[0] except (IndexError): # if no arguments specified print "No mode specified! Run with '-h' for help" sys.exit(1) # quit the program with non-zero status try: opts['dirpath'] = os.path.abspath(args[1]) print "DEBUG: %s" % opts['dirpath'] except (IndexError): print "No directory specified! Run with '-h' for help" sys.exit(2) # quit the program with non-zero status # end command line checking if not os.path.exists(opts['outdir']): print "Creating output directory" os.mkdir(opts['outdir']) # In this version, we can convert multiple format at once, so for e.g. # mode = mp3,vorbis will create both in parallel for mode in opts['mode'].split(','): if mode != "": try: os.mkdir(os.path.join(opts['outdir'], mode)) except OSError as e: if e.errno == 17: print "Folder %s already exists, reusing..." % mode elif e.errno == 2: print "Parent path %s does not exist! quitting..." % ( opts['outdir']) else: # everything else, raise error raise e # Magic goes here :) # 1. populate the queue with flac files files = sh.getfiles(opts['dirpath']) count = 0 for infile in files: for mode in opts['mode'].split(','): if infile.endswith(".flac"): pQ.put([infile, opts['dirpath'], opts['outdir'], mode]) count += 1 else: if opts['copy']: cQ.put([infile, opts['dirpath'], opts['outdir'], mode]) time.sleep(1) # Delay to resolve queue "broken pipe" errors print "We have %d flac files to convert" % count print "We have %d non-flac files to copy across" % cQ.qsize() # Right, how this will work here, is that we will pass the whole queue # to the encode threads (one per processor) and have them pop off/on as # necessary. Allows for far more fine grained control. opts['threads'] = int(opts['threads']) # keep flags for state (pQ,cQ) sflags = [0, 0] ap = [] # active processes while True: cc = opts['threads'] while int(cc) > (len(ap)): print(">> Spawning execution process") proc = encode_thread(int(cc), "Thread %d" % int(cc), pQ, opts, lQ) proc.start() ap.append(proc) time.sleep(0.5) # Believe it or not, the only way way to be sure a queue is actually # empty is to try to get with a timeout. So we get and put back # and if we get a timeout error (10 secs), register it try: pQ.put(pQ.get(timeout=10)) except mp.TimeoutError as e: print "Process queue finished." sflags[0] = 1 except Queue.Empty as e: print "Process queue finished." sflags[0] = 1 else: sflags[0] = 0 # sflags[1] = 1 # Commented out until we get the shell_process_thread function written # try: command = cQ.get(timeout=10) srcfile, srcroot, dest, encformat = command outdir = sh.generateoutdir(srcfile, os.path.join(dest, encformat), srcroot) copytarget(srcfile, outdir) print("%s => %s" % (srcfile, outdir)) except mp.TimeoutError as e: sflags[1] = 1 except Queue.Empty as e: sflags[1] = 1 else: sflags[1] = 0 if sflags == [1, 1]: print "Processing Complete!" break # Sometimes processes die (due to errors, or exit called), which # will slowly starve the script as they are not restarted. The below # filters out dead processes, allowing us to respawn as necessary ap = filter(lambda x: x.isAlive(), ap) # Now wait for all running processes to complete print "Waiting for all running process to complete." print ap # We don't use os.join because if a child hangs, it takes the entire program # with it st = time.time() while True: if len(filter(lambda x: x.is_alive(), ap)) == 0: break print "-" * 80 for proc in filter(lambda x: x.is_alive(), ap): print "Process \"%s\" is still running! Waiting..." % proc.name print "-" * 80 time.sleep(4) print "" if (time.time() - st) > 600: print "Process timeout reached, terminating stragglers and continuing\ anyway" map(lambda x: x.terminate(), filter(lambda x: x.is_alive(), ap)) break # Now we fetch the log results, for the summary print "Processing run log..." log = [] while not lQ.empty(): log.append(lQ.get(timeout=2)) total = len(log) successes = len(filter(lambda x: x[4] == 0, log)) failures = total - successes print "\n\n" print "=" * 80 print "| Summary " print "-" * 80 print """ Total files on input: %d Total files actually processed: %d -- Execution rate: %.2f %% Files we managed to convert successfully: %d Files we failed to convert due to errors: %d -- Conversion error rate: %.2f %% """ % (count, total, ((float(total) / count) * 100), successes, failures, ((failures / float(total)) * 100)) for mode in opts['mode'].split(','): # 1. find all the logs corresponding to a particular mode x = filter(lambda x: x[2] == mode, log) # 2. Get the execution time for all relevant logs execT = map(lambda y: y[5], x) if len(execT) != 0: esum = sum(execT) emean = sum(execT) / len(execT) else: # Empty set, just continue print("For mode %s:\nNo data (no files converted)\n" % mode) continue execT.sort() if len(execT) % 2 != 0: # Odd number, so median is middle emedian = execT[(len(execT) - 1) / 2] else: # Even set. So median is average of two middle numbers num1 = execT[((len(execT) - 1) / 2) - 1] num2 = execT[((len(execT) - 1) / 2)] emedian = (sum([num1, num2]) / 2.0) etime = "Total execution time: " if esum < 600: etime += "%.4f seconds" % esum elif esum > 600 < 3600: etime += "%.4f minutes" % (esum / 60) else: etime += "%.4f hours" % (esum / 60 / 60) print """ For mode: %s %s Per file conversion: \tMean execution time: %.4f seconds \tMedian execution time: %.4f seconds """ % (mode, etime, emean, emedian) errout_file = opts['outdir'] + "/conversion_results.log" print "Writing log file (%s)" % errout_file fd = open(errout_file, "w") fd.write( "infile,outfile,format,conversion_status,return_code,execution_time\n") for item in log: item = map(lambda x: str(x), item) line = ','.join(item) fd.write("%s\n" % line) fd.close() print "Done!" if failures != 0: print "We had some failures in encoding :-(" print "Check %s file for info." % errout_file print "Done! Returning non-zero exit status! " sys.exit(-1) else: sys.exit(0)