示例#1
0
def main():
    for o in reversed(iter_items()):
        url = o['video_url_w']
        if url == 'n/a':
            continue
        fn = util.get_store_path(o['time'], url)
        if 'wmvid' not in o:
            continue # ??
        if not os.path.exists(fn):
            continue

        ofn = 'waveform/%d.json' % int(o['wmvid'])
        if os.path.exists(ofn):
            continue

        print url
        ofn_tmp = ofn + '.tmp'
        t0 = time.time()
        wf = video_to_waveform(fn, ofn_tmp)
        if not os.path.exists(ofn_tmp): # hack
            continue
        assert os.path.exists(ofn_tmp)

        os.rename(ofn_tmp, ofn)
        print 'time', time.time() - t0
def gen_job2(o):

    for bw in ("w", "n"):
        # if bw == 'w':
        #    continue

        url = o["video_url_" + bw]
        if url == "n/a":
            continue

        fn = util.get_store_path(o["time"], url)
        assert fn

        state = cached_download_state.get(url)

        if os.path.exists(fn):
            new_state = "stored"
        else:
            new_state = "no"

        if os.path.exists(fn):
            # video info
            info0 = cached_video_info.get(url)
            if info0 == "None":
                info0 = None
            assert info0 != "None"
            if state != new_state:
                print repr(info0)

            info = None
            if not info0 or state != "stored":
                info = util.collect_video_info(fn)
                assert info
                info = json.dumps(info, sort_keys=True)

            if info0:
                if state != "stored":
                    assert info
                    info0 = json.dumps(json.loads(info0), sort_keys=True)
                    print "info0", repr(info0)
                    print "info1", repr(info)
                    assert info == info0, url
            else:
                assert info
                with db.conn:
                    cached_video_info[url] = info
                    db.add_video_info(url, g_user, info)

        with db.conn:
            if state is None:
                bw = 1 if "-100k" in url else 0
                clip = 1 if "-clip" in url else 0
                videodate = o["time"][:10]
                db.add_job_state(url, g_user, new_state, bw, clip, videodate)
                cached_download_state[url] = new_state
            elif new_state == "stored" and state != "stored":
                db.change_job_state(url, g_user, new_state)
                cached_download_state[url] = new_state
def fetch(job):
    global g_alt_server

    t, url = json.loads(job)

    # prepare
    if g_alt_server:
        url = url.replace('mediavod01', 'mediavod02')

    fn = util.get_store_path(t, url)
    print url
    if options.verbose:
        print fn

    assert not os.path.exists(fn)


    # start fetch

    pid = os.getpid()

    tmp_a = os.path.join('tmp', '%d.a.wmv' % pid)
    tmp_b = os.path.join('tmp', '%d.b.wmv' % pid)
    tmp_c = os.path.join('tmp', '%d.c.wmv' % pid)
    tmp_log = os.path.join('tmp', '%d.log' % pid)
    my_unlink(tmp_a)
    my_unlink(tmp_b)
    my_unlink(tmp_c)
    my_unlink(tmp_log)

    cmd = '"%s" -s 5 -o %s -o %s -o %s "%s" 2>&1 | tee %s' % (
            msdl_bin,
            tmp_a, tmp_b, tmp_c, url, tmp_log)
    p = subprocess.Popen(cmd, shell=True)
    p.wait()

    logdata = ''
    if os.path.exists(tmp_log):
        logdata = file(tmp_log).read()
    if is_success(logdata, tmp_a, tmp_b, tmp_c):
        os.rename(tmp_log, os_filename(fn + '.log'))
        os.rename(tmp_a, os_filename(fn))

        info = util.collect_video_info(os_filename(fn))
        assert info
        return dict(state='downloaded', sleep=10, info=json.dumps(info, sort_keys=True))

    g_alt_server = not g_alt_server
    if '404 (Not Found)' in logdata:
        return dict(state='404', sleep=3)
    else:
        return dict(state='failed', sleep=60)
示例#4
0
def main():
    for o in reversed(iter_items()):
        url = o['video_url_w']
        fn = util.get_store_path(o['time'], url)
        if 'wmvid' not in o:
            continue # ??
        if not os.path.exists(fn):
            continue

        ofn = 'start_time/%d.txt' % int(o['wmvid'])
        if os.path.exists(ofn):
            continue

        print fn
        result = determine_time(fn)

        with file(ofn, 'w') as f:
            f.write(result)
        print '-' * 30
示例#5
0
def upload(firm, o):
    url = o["video_url_w"]
    if url == "n/a":
        return

    state = cached_upload_state.get(url)
    if not state:
        return

    if state == STATE_UPLOADED:
        return

    assert db.get_job_state(url) == "stored"

    fn = util.get_store_path(o["time"], url)
    assert os.path.exists(fn)

    with db.conn:
        state = db.get_upload_state(url, for_update=True)
        assert state

        if state[0] == STATE_UPLOADED:
            return
        if state[0] == STATE_UPLOADING:
            # TODO check last_modified
            return
        if state[0] not in (STATE_ERROR, STATE_NO):
            # unkown state
            return

        db.change_upload_state(url, STATE_UPLOADING, "")
        cached_upload_state[url] = STATE_UPLOADING

    metadata = collect_metadata(firm, o)

    for k, v in metadata.items():
        if k == "keywords":
            v = ",".join(v)
        print k, v

    cmd = [
        "./youtube-upload",
        "-m",
        account_name,
        "-p",
        account_passwd,
        "-t",
        metadata["title"],
        "-c",
        metadata["category"],
        "-d",
        metadata["description"],
        "--keywords=" + ",".join(metadata["keywords"]),
        fn.decode("utf8"),
    ]

    cmd = [x.encode("utf8") for x in cmd]

    result = None
    try:
        result = do_upload(url, cmd, state)
        assert db.get_upload_state(url)[0] != STATE_UPLOADED
    except Exception:
        traceback.print_exc()
    finally:
        if not result:
            result = dict(state=STATE_ERROR, sleep=60 * 30)
        with db.conn:
            db.change_upload_state(url, result["state"], result.get("youtube_id", ""))
            cached_upload_state[url] = result["state"]
        print "sleep", result["sleep"]
        time.sleep(result["sleep"])
        print "-" * 30