示例#1
0
    def ScanLoclInfo(self, saveRootPath):
        for fpath, dirnames, fnames in os.walk(saveRootPath):
            # fpath    当前文件夹 root
            # dirnames  当前文件夹中包含的子文件夹名称列表,不包含路径
            # fnames    当前文件夹中的子文件列表,不包含路径
            folderName = ''
            if fpath:
                folderName = os.path.split(fpath)[1]
            nowUper = None
            for fname in fnames:
                if nowUper == None:
                    for uper in self.uperList:
                        if uper.UserName != folderName:
                            continue
                        else:
                            nowUper = uper
                if nowUper == None:
                    continue
                extensionName = os.path.splitext(fname)[-1]
                if op.eq(extensionName, '.flv') == True or op.eq(
                        extensionName, '.mp4') == True:
                    vi = VideoInfo('')
                    # 将本地已经下载的文件,去除后缀名
                    vi.loaclFileName = fname.replace(extensionName, '')
                    vi.isDownloaded = True
                    nowUper.VideoInfoDic_loaclFileName[vi.loaclFileName] = vi

        self.logger.info('ScanLoclInfo Done.')
        self.logger.info("ScanLoclInfo Result" + "----" * 20)
        for uper in self.uperList:
            self.logger.info('Local ' + uper.UserName + ' Got ' +
                             str(len(uper.VideoInfoDic_loaclFileName)) +
                             " Videos.")
示例#2
0
 def setUp(self):
     oldCapture = cv2.VideoCapture
     oldParse = enzyme.parse
     enzyme.parse = hijackedParse
     cv2.VideoCapture = hijackedCapture
     self.info = VideoInfo("dummy")
     enzyme.parse = oldParse
     cv2.VideoCapture = oldCapture
    def test_add_video_default_isDownloaded_isUploaded_value(self):
        video = VideoInfo()
        video.url = 'http://testAddDefaultValue'
        video.description = '__test__'

        DownloadInfoService.addVideoInfo(video)

        video2 = DownloadInfoService.getVideoInfo(video.url)
        self.assertEqual(video2.isDownloaded, 0)
        self.assertEqual(video2.isUploaded, 0)
 def setUp(self):
     oldCapture = cv2.VideoCapture
     oldParse   = enzyme.parse
     enzyme.parse     = hijackedParse
     cv2.VideoCapture = hijackedCapture
     self.info = VideoInfo("dummy")
     enzyme.parse     = oldParse
     cv2.VideoCapture = oldCapture
    def __createVideoInfo__(self):
        video = VideoInfo('')

        video.url = 'http://www.baidu1.com'
        video.author = 'zw'
        video.uploader = 'zw'
        video.isDownloaded = 0
        video.isUploaded = 0
        video.description = 'This is test video description'
        return video
def extractLinkWorker(pendingSearchLinks):
    allSearchSet = set(pendingSearchLinks)
    while len(pendingSearchLinks) > 0:
        search_page_url = pendingSearchLinks.pop()

        pyContent = PyQuery(search_page_url)

        extractInfo = extractLink(pyContent)
        videoLinks = extractInfo['videoLinks']
        searchLinks = extractInfo['searchLinks']

        if videoLinks is not None:
            logger.info('[LinkExtractor] Add video links to db: [%s]', ' ,\n'.join(videoLinks))
            # Add video links to db
            for videoLink in videoLinks:
                videoInfo = VideoInfo()
                videoInfo.url = videoLink
                try:
                    DownloadInfoService.addVideoInfo(videoInfo)
                except DuplicateError:
                    logging.info("[LinkExtractor] URL [%s] has been already in DB.", videoInfo.url)

        newSearchLinks = [];
        if searchLinks is not None:
            for newLink in searchLinks:
                if newLink not in allSearchSet:
                    pendingSearchLinks.append(newLink)
                    newSearchLinks.append(newLink)
                    allSearchSet.add(newLink)
        if len(newSearchLinks) != 0:
            logger.debug( '[LinkExtractor] Extracted new search links: [{}]'.format(' ,\n'.join(newSearchLinks)))
        else:
            logger.debug('[LinkExtractor] search links is none for url:[{}]'.format(search_page_url))

        logger.info('[LinkExtractor] Sleep 3 seconds to continue.')
        time.sleep(3)
        pass

    logger.warn('[LinkExtractor] Link Extractor stopped')
class VideoInfoTest(unittest.TestCase):
    def setUp(self):
        oldCapture = cv2.VideoCapture
        oldParse   = enzyme.parse
        enzyme.parse     = hijackedParse
        cv2.VideoCapture = hijackedCapture
        self.info = VideoInfo("dummy")
        enzyme.parse     = oldParse
        cv2.VideoCapture = oldCapture

    def test_numberOfFrames(self):
        self.assertEqual(self.info.numberOfFrames(), 100)

    def test_length(self):
        self.assertEqual(self.info.length(), 1)

    def test_prettyTitle(self):
        self.info.info.title = None
        self.assertEqual(self.info.prettyTitle(), "Unknown")
        self.info.info.title = "Bob"
        self.assertEqual(self.info.prettyTitle(), "Bob")

    def test_prettyLength(self):
        self.assertEqual(self.info.prettyLength(), "1 seconds (100 frames)")
示例#8
0
class VideoInfoTest(unittest.TestCase):
    def setUp(self):
        oldCapture = cv2.VideoCapture
        oldParse = enzyme.parse
        enzyme.parse = hijackedParse
        cv2.VideoCapture = hijackedCapture
        self.info = VideoInfo("dummy")
        enzyme.parse = oldParse
        cv2.VideoCapture = oldCapture

    def test_numberOfFrames(self):
        self.assertEqual(self.info.numberOfFrames(), 100)

    def test_length(self):
        self.assertEqual(self.info.length(), 1)

    def test_prettyTitle(self):
        self.info.info.title = None
        self.assertEqual(self.info.prettyTitle(), "Unknown")
        self.info.info.title = "Bob"
        self.assertEqual(self.info.prettyTitle(), "Bob")

    def test_prettyLength(self):
        self.assertEqual(self.info.prettyLength(), "1 seconds (100 frames)")
示例#9
0
    async def parse(self, response):
        if response is None:
            return
        self.logger.info(response.url)
        self.logger.info("for BiliBiliItem.get_items ···")
        try:
            async for item in BiliBiliItem.get_items(html=response.html):
                self.logger.info("parsing one···")
                if item.time is None:
                    item.time = GetUploadTimer.Get(item.url)
                self.logger.info(item.time)
                self.logger.info(item.title)
                self.logger.info(item.url)
                # 去除特殊字符,不包含后缀名
                fileName = re.sub('[\/:*?"<>|]','-', item.title)
                fileName = item.time + "_" + fileName
                # 是否已经在本地扫描的时候找到了相同的文件名
                vi = VideoInfo(item.url)
                vi.time = item.time
                vi.title = item.title
                vi.isDownloaded = False
                vi.loaclFileName = fileName

                try:
                    await self.lock.acquire()
                    if fileName in self.uper.VideoInfoDic_loaclFileName:
                        # 存在,则赋值 url 等信息
                        self.uper.VideoInfoDic_loaclFileName[fileName].url = item.url
                        self.uper.VideoInfoDic_loaclFileName[fileName].time = item.time
                        self.uper.VideoInfoDic_loaclFileName[fileName].title = item.title
                    else:
                        # 不存在,新建
                        self.uper.VideoInfoDic_loaclFileName[fileName] = vi
                    # 网络动态获取到的
                    self.uper.VideoInfoDic_NetFileName[fileName] = vi
                finally:
                    self.lock.release()
                
        except Exception as ex:
            try:
                await self.lock.acquire()
                self.uper.ErrorUrl_Dic[response.url] = str(ex)
                self.logger.error("Error BiliBiliItem: " + ex)
            finally:
                self.lock.release()
            

        self.logger.info("parsing one Done·")