Python ResourceChecker примеры использования

Язык программирования: Python

Пространство имен/Пакет: Resource.ResourceChecker

Класс/Тип: ResourceChecker

Примеров на hotexamples.com: 10

Python ResourceChecker - 10 примеров найдено. Это лучшие примеры Python кода для Resource.ResourceChecker.ResourceChecker, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ResourceChecker(3)

check_local_resource(1)

check_remote_resource(1)

getAllFeedPaths(1)

Пример #1

Показать файл

Файл: ResourceDownloader.py Проект: tanmoydeb07/PodSearch

class ResourceDownloader:
    """Commonly used tool that downloads resources."""

    _logger = LoggerFactory().getLogger('RessourceDownloader')
    _resources = []
    _downloadedResources = []

    def __init__(self):
        self._tdr = Threader()
        self._pt = PathTool.PathTool()
        self._rc = ResourceChecker()
        self._rh = ResourceHelper()
        self.last_download_timestamp = 0

    def download(self, resource_type, resource_url):
        """Downloads a resource of type feed or image by its URL."""

        if not self._rc.check_remote_resource(resource_type, resource_url):
            return

        resource = Resource(resource_url, resource_type)
        if resource.get_absolute_url().endswith('/'):
            resource._set_url(resource.get_absolute_url()[:-1])
        resource_target = resource.get_path()
        base_path = resource.get_base_path()
        msg = 'DEBUG: Will download resource %s with target %s to location %s.' \
              % (resource_url, resource_target, base_path)
        ResourceDownloader._logger.info(msg)

        self._rh.ensurePathExists(base_path)

        args = [resource_type, resource_url, resource_target]

        duplicate_found = False
        if not duplicate_found:
            for dedup_args in ResourceDownloader._resources:
                if dedup_args[2] == args[2]:
                    duplicate_found = True
                    break
        if not duplicate_found:
            for dedup_args in ResourceDownloader._downloadedResources:
                if dedup_args[2] == args[2]:
                    duplicate_found = True
                    break
        if not duplicate_found:
            ResourceDownloader._resources.append(args)

        time_since_last_download = time.time() - self.last_download_timestamp
        # download 300 files in parallel or how many ever we have every minute
        if len(ResourceDownloader._resources
               ) <= 1000 and time_since_last_download <= 60:  # TODO
            return

        resources_tmp = ResourceDownloader._resources
        ResourceDownloader._resources = []
        ResourceDownloader._downloadedResources = ResourceDownloader._downloadedResources + resources_tmp
        self.last_download_timestamp = time.time()
        self._tdr.run_parallel_in_threads(_download, resources_tmp)

Пример #2

Показать файл

Файл: ResourceDownloader.py Проект: PodSearch/PodSearch

class ResourceDownloader:
    """Commonly used tool that downloads resources."""
    
    _logger = LoggerFactory().getLogger('RessourceDownloader')
    _resources = []
    _downloadedResources = []
    
    def __init__(self):
        self._tdr = Threader()
        self._pt = PathTool.PathTool()
        self._rc = ResourceChecker()
        self._rh = ResourceHelper()
        self.last_download_timestamp = 0

    def download(self, resource_type, resource_url):
        """Downloads a resource of type feed or image by its URL."""
        
        if not self._rc.check_remote_resource(resource_type, resource_url):
            return

        resource = Resource(resource_url, resource_type)
        if resource.get_absolute_url().endswith('/'):
            resource._set_url(resource.get_absolute_url()[:-1])
        resource_target = resource.get_path()
        base_path = resource.get_base_path()
        msg = 'DEBUG: Will download resource %s with target %s to location %s.' \
              % (resource_url, resource_target, base_path)
        ResourceDownloader._logger.info(msg)
        
        self._rh.ensurePathExists(base_path)
        
        args = [resource_type, resource_url, resource_target]
        
        duplicate_found = False
        if not duplicate_found:
            for dedup_args in ResourceDownloader._resources:
                if dedup_args[2] == args[2]:
                    duplicate_found = True
                    break
        if not duplicate_found:
            for dedup_args in ResourceDownloader._downloadedResources:
                if dedup_args[2] == args[2]:
                    duplicate_found = True
                    break
        if not duplicate_found:
            ResourceDownloader._resources.append(args)
        
        time_since_last_download = time.time() - self.last_download_timestamp 
        # download 300 files in parallel or how many ever we have every minute
        if len(ResourceDownloader._resources) <= 1000 and time_since_last_download <= 60: # TODO
            return
        
        resources_tmp = ResourceDownloader._resources
        ResourceDownloader._resources = []
        ResourceDownloader._downloadedResources = ResourceDownloader._downloadedResources + resources_tmp
        self.last_download_timestamp = time.time()
        self._tdr.run_parallel_in_threads(_download, resources_tmp)

Пример #3

Показать файл

class FeedsDownloaderRunner:
    def __init__(self):
        self._iDler = ImagesDownloader()
        self._rc = ResourceChecker()

    def run(self):
        feedFilePaths = self._rc.getAllFeedPaths()
        for feedFilePath in feedFilePaths:
            self._iDler.handleFeed(feedFilePath)

        print('FeedsDownloaderRunner: INFO: Done.')

Пример #4

Показать файл

Файл: ImagesDownloaderRunner.py Проект: PodSearch/PodSearch

class FeedsDownloaderRunner:

    def __init__(self):
        self._iDler = ImagesDownloader()
        self._rc = ResourceChecker()

    def run(self):
        feedFilePaths = self._rc.getAllFeedPaths()
        for feedFilePath in feedFilePaths:
            self._iDler.handleFeed(feedFilePath)

        print('FeedsDownloaderRunner: INFO: Done.')

Пример #5

Показать файл

Файл: SolrClient.py Проект: PodSearch/PodSearch

import sunburnt

from Resource.ResourceHelper import ResourceHelper
from Resource.ResourceChecker import ResourceChecker
from Util.PathTool import PathTool
from Digester.FeedDictFactory import FeedDictFactory

# create a connection to a solr server
try:
    solr = sunburnt.SolrInterface("http://localhost:8983/solr/")
except socket.error as e:
    print(e, "Is Solr started?")

_pt = PathTool.PathTool()
_rh = ResourceHelper()
_rc = ResourceChecker()
feeds = _rh.getAllFeedPaths()
for feed in feeds:

    print feed
    
    if not _rc.check_local_resource(feed, 'feed'):
        print("Skipping:", feed)
        continue
    
    try:
        feedDictFactory = FeedDictFactory()
        feedDict = feedDictFactory.getFeedDict(feed)
        if feedDict != None and feedDict != {}:
            feedDict['id'] = _pt.getFeedId(feed)
            print(("Indexing", feedDict))

Пример #6

Показать файл

Файл: ResourceDownloader.py Проект: tanmoydeb07/PodSearch

 def __init__(self):
     self._tdr = Threader()
     self._pt = PathTool.PathTool()
     self._rc = ResourceChecker()
     self._rh = ResourceHelper()
     self.last_download_timestamp = 0

Пример #7

Показать файл

import sunburnt

from Resource.ResourceHelper import ResourceHelper
from Resource.ResourceChecker import ResourceChecker
from Util.PathTool import PathTool
from Digester.FeedDictFactory import FeedDictFactory

# create a connection to a solr server
try:
    solr = sunburnt.SolrInterface("http://localhost:8983/solr/")
except socket.error as e:
    print(e, "Is Solr started?")

_pt = PathTool.PathTool()
_rh = ResourceHelper()
_rc = ResourceChecker()
feeds = _rh.getAllFeedPaths()
for feed in feeds:

    print feed

    if not _rc.check_local_resource(feed, 'feed'):
        print("Skipping:", feed)
        continue

    try:
        feedDictFactory = FeedDictFactory()
        feedDict = feedDictFactory.getFeedDict(feed)
        if feedDict != None and feedDict != {}:
            feedDict['id'] = _pt.getFeedId(feed)
            print(("Indexing", feedDict))

Пример #8

Показать файл

Файл: ResourceDownloader.py Проект: PodSearch/PodSearch

 def __init__(self):
     self._tdr = Threader()
     self._pt = PathTool.PathTool()
     self._rc = ResourceChecker()
     self._rh = ResourceHelper()
     self.last_download_timestamp = 0

Пример #9

Показать файл

Файл: ImagesDownloaderRunner.py Проект: PodSearch/PodSearch

 def __init__(self):
     self._iDler = ImagesDownloader()
     self._rc = ResourceChecker()

Пример #10

Показать файл

 def __init__(self):
     self._iDler = ImagesDownloader()
     self._rc = ResourceChecker()