示例#1
0
文件: app.py 项目: feilaoda/easycrawl
    def finished(self, db):
        pods = EslPod.query.filter_by(is_parsed=0).order_by('publish_time').all()
        c = LanguageChannel.query.filter_by(name='eslpod').first()
        if c is None:
            print "eslpod channel is None"
            return
        for pod in pods:
            #print pod.url
            r = LanguageResource.query.filter_by(channel_id=c.id, resource_url=pod.url).first()
            if r is None:
                entity = Entity()
                entity.title = pod.title
                entity.isa = "english"
                to_db.session.add(entity)
                to_db.session.commit()
                r = LanguageResource()
                r.id = entity.id
                r.channel_id = c.id
                r.study_count = 0
                r.dl_count = randint(20,40)
                
            title = pod.title.replace(u'–',"").replace(u'-',"").replace(u'\x92',"").replace(u'\x96',"")
            r.title = eslpod_title.sub("",title).strip()
            print r.title
            r.description = pod.content
            r.resource_url = pod.url
            r.publish_time = pod.publish_time
            to_db.session.add(r)
            to_db.session.commit()

            if pod.media:
                media_file = pod.media.split('/')[-1]
                normal_media_file = "eslpod_%d.mp3" % r.id # media_file.lower()
                #dest_orig_media_file = options.local_media_path + "/static/english/eslpod/%s" % media_file
                #dest_orig_media_url = "/static/english/eslpod/%s" % media_file
                
                dest_media_file = options.local_media_path + "/static/english/eslpod/%s" % normal_media_file
                dest_media_url = "/static/english/eslpod/%s" % normal_media_file
                
                dest_tmp_file = "/tmp/eslpod/%s" % media_file
                
                if not os.path.exists(dest_media_file):
                    try:
                        print "download pod media: %s" % pod.media, dest_media_file
                        u = urllib.FancyURLopener()
                        u.retrieve(pod.media, dest_tmp_file)
                    except IOError as e:
                        raise e
                    r.original_media = pod.media
                    
                    if not os.path.exists(dest_tmp_file):
                        print "%s not exists" % dest_tmp_file
                    else:
                        split_media_file = "split_"+media_file.lower()
                        dest_split_media_file = "/tmp/eslpod/%s" % (split_media_file)
                        fast_dialog = pod.fast_dialog
                        try:
                            m = re.findall("(\d+):(\d+)", pod.fast_dialog)
                            if len(m) >= 1 and len(m[0]) >=2:
                                minu = int(m[0][0])
                                second = int(m[0][1])
                                second -= 1
                                if second < 0:
                                    second=0
                                fast_dialog = "%d:%d" % (minu, second)
                                print fast_dialog
                            else:
                                print "not find fast_dialog" , pod.fast_dialog
                        except Exception, e:
                            print e
                            pass

                        if fast_dialog:
                            start_time = fast_dialog.replace(':', '.')

                            cmd = "mp3splt -o %s %s %s EOF-0.30" % (split_media_file.split('.')[0], dest_tmp_file, start_time)
                            try:
                                print cmd
                                os.system(cmd)
                                mv_cmd = "mv %s %s" % (dest_split_media_file, dest_media_file)
                                print mv_cmd
                                os.system(mv_cmd)
                            except Exception, e:
                                print e
                        else:
                            try:
                                mv_cmd = "mv %s %s" % (dest_tmp_file, dest_media_file)
                                print mv_cmd
                                os.system(mv_cmd)
                            except Exception, e:
                                print e
示例#2
0
文件: app.py 项目: feilaoda/easycrawl
    def finished(self, db):
        pods = Voa.query.filter_by(is_parsed=0).order_by('publish_time').all()
        c = LanguageChannel.query.filter_by(name='voa special').first()
        if c is None:
            print "voa channel is None"
            return
        for pod in pods:
            #print pod.url
            r = LanguageResource.query.filter_by(channel_id=c.id, resource_url=pod.url).first()
            if r is None:
                entity = Entity()
                entity.title = pod.title
                entity.isa = "english"
                to_db.session.add(entity)
                to_db.session.commit()

                r = LanguageResource()
                r.id = entity.id
                r.channel_id = c.id
                r.study_count = 0
                r.dl_count = randint(20,40)

            if pod.media:
                r.slow_media = pod.media

                media_file = "voa_%d.mp3" % r.id
               
                dest_media_file = options.local_media_path + "/static/english/voa/%s" % media_file
                dest_media_url = "/static/english/voa/%s" % media_file
                
                dest_tmp_file = "/tmp/voa/%s" % media_file
                
                if not os.path.exists(dest_media_file):
                    try:
                        print "download pod media: %s, %s" % (pod.media, dest_tmp_file)
                        u = urllib.FancyURLopener()
                        u.retrieve(pod.media, dest_tmp_file)
                    except IOError as e:
                        raise e

                    r.original_media = pod.media
                    
                    if not os.path.exists(dest_tmp_file):
                        print "%s not exists" % dest_tmp_file
                    else:
                        try:
                            mv_cmd = "mv %s %s" % (dest_tmp_file, dest_media_file)
                            print mv_cmd
                            os.system(mv_cmd)
                        except Exception, e:
                            print e
                        

                if os.path.exists(dest_media_file):
                    print dest_media_file
                    media_md5 = file_md5(dest_media_file)
                    r.slow_media = "/static/english/voa/%s?md5=%s" % (media_file, media_md5)
                
                                
                    r.title = pod.title
                    r.description = pod.content
                    r.resource_url = pod.url
                    r.category = pod.category
                    r.publish_time = pod.publish_time
                    to_db.session.add(r)
                    to_db.session.commit()
                    
                    pod.resource_id = r.id
                    pod.is_parsed = 200
                    crawldb.session.add(pod)
                    crawldb.session.commit()