def create_processes(): print("Forking every minute.") db = get_db() # db.row_factory = dict_factory posts = db.execute('SELECT * FROM post WHERE post_status =?', ("saved", )).fetchall() for post in posts: try: date = post["date"] if time.tzname[0] == "UTC": date = datetime.strptime(post["date"], '%Y-%m-%d:%H:%M:%S:%z') date = date.astimezone(pytz.UTC) print( f"Date converted to UTC as system timezone is {time.tzname}" ) print(date, datetime.now()) tDiff = int( ((date - datetime.now(timezone.utc)).total_seconds()) / 60) print(tDiff) # if -5 <= tDiff <= 5: if tDiff <= 5: print("placing task.") schedulerProcess = multiprocessing.Process( name="schedulerProcess", target=schedulerProcessFn, args=[post, db]) db.execute('UPDATE post set post_status=? where id=?', ("processing", post["id"])) db.commit() schedulerProcess.start() except Exception as e: post_details = json.loads(post["post_details"]) post_details["error"] = "Task processing failed." post_details["stacktrace"] = f"{e}" db.execute( 'UPDATE post set post_details=?, post_status=? where id=?', (json.dumps(post_details), "failed", post["id"])) db.commit() print("Task processing failed.") print(e)
def restartOneTestingSubprocess(self): subProcessCommandQueue, subProcessResultQueue, subProcess = self.subProcesses.pop( 0) subProcessCommandQueue.put("quit") subProcess.terminate() subProcessCommandQueue = multiprocessing.Queue() subProcessResultQueue = multiprocessing.Queue() preloadTraceFiles = [ file for fileList in self.executionSessionTraceLocalPickleFiles for file in fileList ] subProcess = multiprocessing.Process( target=TestingManager.predictedActionSubProcess, args=(self.configDir, self.shouldBeRandom, subProcessCommandQueue, subProcessResultQueue, preloadTraceFiles)) subProcess.start() atexit.register(lambda: subProcess.terminate()) self.subProcesses.append( (subProcessCommandQueue, subProcessResultQueue, subProcess))
def get_meta(ds_path): """ This function is a wrapper for the get_gdal metadata because if there is a database diconnection there is no obvious way to clean up and free those resources therefore it is put on a separate process and if it fails it can just be tried again. This is using GDAL 2.2.4 this should be checked again to see if it can be simplified in a later version. :param ds_path: String: Path to dataset :return: Metadata dict driver: Short name of GDAL driver for dataset is_raster: True if dataset is a raster type nodata: NODATA value for all bands if all bands have the same one, otherwise None (raster sets only) """ multiprocess_queue = billiard.Queue() proc = billiard.Process(target=get_gdal_metadata, daemon=True, args=( ds_path, multiprocess_queue, )) proc.start() proc.join() return multiprocess_queue.get()
nodes = queue.get() if nodes is None: queue.task_done() break count += len(nodes) queue.task_done() print type, count return count nodes_queue = multiprocessing.JoinableQueue(128) ways_queue = multiprocessing.JoinableQueue(128) relations_queue = multiprocessing.JoinableQueue(128) procs = [ multiprocessing.Process(target=count_proc('nodes', nodes_queue)), multiprocessing.Process(target=count_proc('ways', ways_queue)), multiprocessing.Process( target=count_proc('relations', relations_queue)) ] for proc in procs: proc.start() parser = PBFMultiProcParser(2, nodes_queue=nodes_queue, ways_queue=ways_queue, relations_queue=relations_queue) parser.parse(sys.argv[1]) nodes_queue.put(None) ways_queue.put(None)
def fill_template( template_name, context, output_format='odt', options=None, separate_process=True, ): """Fill a document with data and convert it to the requested format. Returns an absolute path to the generated file. Supported output format: Text documents: doc, docx, fodt, html, odt, ott, pdf, txt, xhtml, png Spreadsheets: csv, fods, html, ods, ots, pdf, xhtml, xls, xlsx, png Presentations: fodp, html, odg, odp, otp, pdf, potm, pot, pptx, pps, ppt, svg, swf, xhtml, png Drawings: fodg, html, odg, pdf, svg, swf, xhtml, png More on filter options, https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options # noqa: E501 :param template_name: the path to template, in OpenDocument format :param context: the context to be used to inject content :param output_format: the output format :param options: value of filterOptions in libreofficekit :param separate_process: allow LO to :return: """ if not isinstance(context, Context): context = Context(context) context['output_format'] = output_format source_file = find_template_file(template_name) source_extension = os.path.splitext(source_file)[1] source = zipfile.ZipFile(source_file, 'r') dest_file = NamedTemporaryFile(delete=False, suffix=source_extension) dest = zipfile.ZipFile(dest_file, 'w') manifest_data = '' for name in source.namelist(): data = source.read(name) if name.endswith('.xml'): data = smart_str(data) if any(name.endswith(file) for file in ('content.xml', 'styles.xml')): template = Template(fix_inline_tags(data)) data = template.render(context) elif name == 'META-INF/manifest.xml': manifest_data = data[:-20] # Cut off the closing </manifest> tag continue # We will append it at the very end dest.writestr(name, smart_bytes(data)) for _, image in context.dicts[0].get(IMAGES_CONTEXT_KEY, {}).items(): filename = os.path.basename(image.name) ext = os.path.splitext(filename)[1][1:] manifest_data += ( '<manifest:file-entry ' 'manifest:media-type="image/%(ext)s" ' 'manifest:full-path="Pictures/%(filename)s"/>\n') % locals() image.open() dest.writestr('Pictures/%s' % filename, image.read()) image.close() manifest_data += '</manifest:manifest>' dest.writestr('META-INF/manifest.xml', manifest_data) source.close() dest.close() if source_extension[1:] != output_format: if separate_process: results = multiprocessing.Queue() converter = multiprocessing.Process( target=_convert_file, args=(str(dest_file.name), output_format, results, options), ) converter.start() return results.get() else: return _convert_file( filename=str(dest_file.name), format=output_format, options=options, ) else: return dest_file.name
declare=[task_exchange], routing_key=routing_key) def gen_tasks(): while 1: choice = random.randint(0, 2) args = random.sample(string.ascii_letters, 3) send_as_task(conn, test_task, args, kwargs={}, level=choice) sleep(1) def test_task(what=""): print("Test for %s" % (what, )) if __name__ == '__main__': from kombu import Connection from kombu.utils.debug import setup_logging setup_logging(loglevel='INFO', loggers=['']) conn = Connection('mongodb://localhost:27017/kombudtata') import billiard p = billiard.Process(target=gen_tasks) p.start() with conn: try: worker = Worker(conn) worker.run() except KeyboardInterrupt: print('bye!!!')
def transcode(tmpfile, streaminfo, video_id): status = 'error' output = "" outdir = f"{celery.conf.get('MOVIE_PATH')}/{video_id}" shutil.rmtree(outdir, ignore_errors=True) os.mkdir(outdir) master_playlist = f"{outdir}/playlist.mpd" rm_f(master_playlist) vwidth = 0 vheight = 0 duration = 0 acodec = "" vcodec = "" framerate = 24 chunk_size = 4 video_streamidx = -1 audio_streamidx = -1 has_audio = False video = db_session.query(models.Video).filter_by(id=video_id).one_or_none() duration = float(streaminfo['format']['duration']) for stream in streaminfo['streams']: if stream['codec_type'] == 'video': vcodec = stream['codec_name'] vwidth = stream['width'] vheight = stream['height'] framerate = stream['r_frame_rate'] video_streamidx = stream['index'] if stream['codec_type'] == 'audio': has_audio = True if audio_streamidx == -1 and stream['tags']['language'] == 'und': audio_streamidx = stream['index'] audio_codec = stream['codec_name'] if stream['tags']['language'] == 'eng': audio_streamidx = stream['index'] audio_codec = stream['codec_name'] if video_streamidx == -1: video_streamidx = 0 if audio_streamidx == -1 and has_audio: audio_streamidx = 1 try: framerate = round(float(framerate)) except ValueError: x, y = framerate.split("/") framerate = round(int(x) / int(y)) dash_size = 4 keyint = framerate if vwidth > 1920: vheight = int(vheight / (vwidth / 1920)) vwidth = 1920 audio_formats = [] if has_audio: audio_formats = [{ 'rate': '64k', 'channels': '1' }, { 'rate': '128k', 'channels': '2' }, { 'rate': '196k', 'channels': '2' }] video_profiles = [ { 'profile': 'main', 'preset': 'veryslow', 'crf': '22', 'maxrate': '600k', 'bufsize': '800k', 'width': 480 }, { 'profile': 'main', 'preset': 'slow', 'crf': '22', 'maxrate': '900k', 'bufsize': '1200k', 'width': 640 }, { 'profile': 'high', 'preset': 'slow', 'crf': '22', 'maxrate': '1200k', 'bufsize': '1500k', 'width': 960 }, { 'profile': 'high', 'preset': 'slow', 'crf': '21', 'maxrate': '2000k', 'bufsize': '4000k', 'width': 1280 }, { 'profile': 'high', 'preset': 'slow', 'crf': '21', 'maxrate': '4500k', 'bufsize': '8000k', 'width': 1920 }, ] video_formats = [{ 'profile': 'baseline', 'preset': 'veryslow', 'crf': '22', 'maxrate': '200k', 'bufsize': '300k', 'width': 320 }, { 'profile': 'baseline', 'preset': 'veryslow', 'crf': '22', 'maxrate': '400k', 'bufsize': '500k', 'width': 320 }] sizes = [1, 1.5, 2, 3] for size in sizes: this_width = int(vwidth / size) + (int(vwidth / size) % 2) if this_width < video_profiles[0]['width']: next this_profile = None for idx in range(len(video_profiles)): if this_width == video_profiles[idx]['width']: this_profile = video_profiles[idx].copy() break if this_width > video_profiles[idx][ 'width'] and this_width < video_profiles[idx + 1]['width']: this_profile = video_profiles[idx + 1].copy() this_profile['width'] = this_width break if this_profile: video_formats.append(this_profile) print(video_formats) tmpdir = tempfile.mkdtemp() socketfile = os.path.join(tmpdir, 'progress') sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.bind(socketfile) sock.listen(1) transcode_command = [ 'ffmpeg', '-y', '-nostdin', '-i', f'{tmpfile}', '-progress', f'unix://{socketfile}', '-loglevel', '24' ] dash_command = [ 'MP4Box', '-dash', f'{dash_size * 1000}', '-rap', '-frag-rap', '-min-buffer', '16000', '-profile', 'dashavc264:onDemand', '-mpd-title', video.title, '-out', master_playlist ] tmpfiles = [] for num, f in enumerate(video_formats): stream = num filename = f'{outdir}/video_{f["width"]}_{f["maxrate"]}.mp4' transcode_command.extend([ '-map', f'0:{video_streamidx}', f'-c:v', 'libx264', '-x264-params', f'no-scenecut', f'-profile:v', f['profile'], '-preset:v', f["preset"], '-tune:v', video.tune, '-keyint_min', f'{keyint}', '-g', f'{keyint}', '-sc_threshold', '0', '-bf', '1', '-b_strategy', '0', f'-crf', f['crf'], f'-maxrate', f'{f["maxrate"]}', f'-bufsize', f'{f["bufsize"]}', f'-filter', f'scale={f["width"]}:-2', '-map_chapters', '-1', filename ]) dash_command.append(filename) tmpfiles.append(filename) for num, f in enumerate(audio_formats): stream = num filename = f'{outdir}/audio_{f["rate"]}.mp4' transcode_command.extend([ '-map', f'0:{audio_streamidx}', f'-c:a', 'aac', f'-b:a', f['rate'], f'-ac', f['channels'], '-map_chapters', '-1', filename ]) dash_command.append(filename) tmpfiles.append(filename) video.encoding_status = 'encoding' db_session.commit() ffmpeg = multiprocessing.Process(target=run_ffmpeg, args=(transcode_command, f'{tmpfile}.log')) ffmpeg.start() connection, client_address = sock.accept() percentage = 0 speed = 0 try: while True: data = connection.recv(1024) if data: string = data.decode('utf-8') for line in string.splitlines(): if line.startswith('out_time_ms'): progress = int(line.split('=')[1]) / 1000000 percentage = (progress / duration) * 100 percentage = min(percentage, 100) if line.startswith('speed'): speed = float(line.split('=')[1].strip().split('x')[0]) video.encoding_progress = percentage video.encoding_speed = speed db_session.commit() else: break finally: ffmpeg.terminate() connection.close() shutil.rmtree(tmpdir, ignore_errors=True) if percentage < 100: video.status = 'error' db_session.commit() try: print("Reencoded file") print(f'Executing: {" ".join(dash_command)}') output = subprocess.check_call(dash_command, stderr=subprocess.STDOUT) print("DASHed file") status = 'ready' except Exception as e: print(output) print(e) for f in tmpfiles: os.unlink(f) if celery.conf.get('STORAGE_BACKEND') == "S3": print("Uploading to S3") nthreads = celery.conf.get('S3_UPLOAD_THREADS') g = glob.glob(f"{outdir}/*") splits = numpy.array_split(g, nthreads) threads = list() for index in range(nthreads): x = threading.Thread(target=s3_upload, args=(splits[index].copy(), )) threads.append(x) x.start() for index, thread in enumerate(threads): thread.join() shutil.rmtree(outdir, ignore_errors=True) print("Done uploading") video.playlist = f'{video_id}/playlist.mpd' video.width = vwidth video.height = vheight video.duration = duration video.encoding_status = status db_session.commit()
def run_exp_for_all_classifiers(save_dir=DIR_CLASSIFIERS, parallel=True): """ Runs all the saved classifiers that are located in save_dir. parallel, if True, will use the multiprocessing module to run multiple experiments at the same time. At present, however, this is broken due to the way in which Python processes match up to C-lib extensions. In this case, OpenCV just kinda dies when processing is attempted in this manner. Currently investigating a fix -- until then, just run linear or via threads. """ classifiers = EXPClassifierHandler.get_all_saved_classifiers( DIR_CLASSIFIERS) classifiers = [x for x in classifiers if not x.endswith(".csv")] if len(classifiers) == 0: log.info("No more experiments to run, exiting.") return if parallel: videos_to_classifiers = {} for c in classifiers: clf = load_saved_classifier(save_dir + c) file_name = clf.video_path.split("/")[-1] if file_name not in videos_to_classifiers: videos_to_classifiers[file_name] = [] clfid = (clf.identifier, c) videos_to_classifiers[file_name].append(clfid) # So now we've mapped video_file: [classifiers], multiproc by k tasks = mp.Queue() results = mp.JoinableQueue() interim = [] args = (tasks, results, save_dir) n_procs = min(mp.cpu_count(), len(videos_to_classifiers.keys())) for k in videos_to_classifiers.keys(): these_classifiers = videos_to_classifiers[k] tasks.put(these_classifiers) delegator = EXPClassifierHandler.run_exp_from_mp_queue for _ in range(n_procs): p = mp.Process(target=delegator, args=args).start() for _ in range(len(videos_to_classifiers.keys())): interim.append(results.get()) results.task_done() for _ in range(n_procs): tasks.put(None) results.join() tasks.close() results.close() else: for c in classifiers: EXPClassifierHandler.run_exp_for_classifier(c, save_dir) # Maybe by the time we get here more will be waiting... keep going EXPClassifierHandler.run_exp_for_all_classifiers(save_dir, parallel)