def main(): for o in reversed(iter_items()): url = o['video_url_w'] if url == 'n/a': continue fn = util.get_store_path(o['time'], url) if 'wmvid' not in o: continue # ?? if not os.path.exists(fn): continue ofn = 'waveform/%d.json' % int(o['wmvid']) if os.path.exists(ofn): continue print url ofn_tmp = ofn + '.tmp' t0 = time.time() wf = video_to_waveform(fn, ofn_tmp) if not os.path.exists(ofn_tmp): # hack continue assert os.path.exists(ofn_tmp) os.rename(ofn_tmp, ofn) print 'time', time.time() - t0
def gen_job2(o): for bw in ("w", "n"): # if bw == 'w': # continue url = o["video_url_" + bw] if url == "n/a": continue fn = util.get_store_path(o["time"], url) assert fn state = cached_download_state.get(url) if os.path.exists(fn): new_state = "stored" else: new_state = "no" if os.path.exists(fn): # video info info0 = cached_video_info.get(url) if info0 == "None": info0 = None assert info0 != "None" if state != new_state: print repr(info0) info = None if not info0 or state != "stored": info = util.collect_video_info(fn) assert info info = json.dumps(info, sort_keys=True) if info0: if state != "stored": assert info info0 = json.dumps(json.loads(info0), sort_keys=True) print "info0", repr(info0) print "info1", repr(info) assert info == info0, url else: assert info with db.conn: cached_video_info[url] = info db.add_video_info(url, g_user, info) with db.conn: if state is None: bw = 1 if "-100k" in url else 0 clip = 1 if "-clip" in url else 0 videodate = o["time"][:10] db.add_job_state(url, g_user, new_state, bw, clip, videodate) cached_download_state[url] = new_state elif new_state == "stored" and state != "stored": db.change_job_state(url, g_user, new_state) cached_download_state[url] = new_state
def fetch(job): global g_alt_server t, url = json.loads(job) # prepare if g_alt_server: url = url.replace('mediavod01', 'mediavod02') fn = util.get_store_path(t, url) print url if options.verbose: print fn assert not os.path.exists(fn) # start fetch pid = os.getpid() tmp_a = os.path.join('tmp', '%d.a.wmv' % pid) tmp_b = os.path.join('tmp', '%d.b.wmv' % pid) tmp_c = os.path.join('tmp', '%d.c.wmv' % pid) tmp_log = os.path.join('tmp', '%d.log' % pid) my_unlink(tmp_a) my_unlink(tmp_b) my_unlink(tmp_c) my_unlink(tmp_log) cmd = '"%s" -s 5 -o %s -o %s -o %s "%s" 2>&1 | tee %s' % ( msdl_bin, tmp_a, tmp_b, tmp_c, url, tmp_log) p = subprocess.Popen(cmd, shell=True) p.wait() logdata = '' if os.path.exists(tmp_log): logdata = file(tmp_log).read() if is_success(logdata, tmp_a, tmp_b, tmp_c): os.rename(tmp_log, os_filename(fn + '.log')) os.rename(tmp_a, os_filename(fn)) info = util.collect_video_info(os_filename(fn)) assert info return dict(state='downloaded', sleep=10, info=json.dumps(info, sort_keys=True)) g_alt_server = not g_alt_server if '404 (Not Found)' in logdata: return dict(state='404', sleep=3) else: return dict(state='failed', sleep=60)
def main(): for o in reversed(iter_items()): url = o['video_url_w'] fn = util.get_store_path(o['time'], url) if 'wmvid' not in o: continue # ?? if not os.path.exists(fn): continue ofn = 'start_time/%d.txt' % int(o['wmvid']) if os.path.exists(ofn): continue print fn result = determine_time(fn) with file(ofn, 'w') as f: f.write(result) print '-' * 30
def upload(firm, o): url = o["video_url_w"] if url == "n/a": return state = cached_upload_state.get(url) if not state: return if state == STATE_UPLOADED: return assert db.get_job_state(url) == "stored" fn = util.get_store_path(o["time"], url) assert os.path.exists(fn) with db.conn: state = db.get_upload_state(url, for_update=True) assert state if state[0] == STATE_UPLOADED: return if state[0] == STATE_UPLOADING: # TODO check last_modified return if state[0] not in (STATE_ERROR, STATE_NO): # unkown state return db.change_upload_state(url, STATE_UPLOADING, "") cached_upload_state[url] = STATE_UPLOADING metadata = collect_metadata(firm, o) for k, v in metadata.items(): if k == "keywords": v = ",".join(v) print k, v cmd = [ "./youtube-upload", "-m", account_name, "-p", account_passwd, "-t", metadata["title"], "-c", metadata["category"], "-d", metadata["description"], "--keywords=" + ",".join(metadata["keywords"]), fn.decode("utf8"), ] cmd = [x.encode("utf8") for x in cmd] result = None try: result = do_upload(url, cmd, state) assert db.get_upload_state(url)[0] != STATE_UPLOADED except Exception: traceback.print_exc() finally: if not result: result = dict(state=STATE_ERROR, sleep=60 * 30) with db.conn: db.change_upload_state(url, result["state"], result.get("youtube_id", "")) cached_upload_state[url] = result["state"] print "sleep", result["sleep"] time.sleep(result["sleep"]) print "-" * 30