Exemplo n.º 1
0
    def download_metadata(self, n_threads=5):
        web_request_queue = Queue()
        json_serialization_queue = Queue()

        urls = self.paginated_media_urls
        if len(urls) > 1:
            for url in urls:
                web_request_queue.put(url)

            web_thread = lambda: ThreadMetadataRequest(
                web_request_queue,
                json_serialization_queue,
                self.session
            )

            pool_size = min(len(urls), n_threads)
            web_pool = [web_thread() for x in range(pool_size)]
            json_serializer = ThreadJSONWriter(
                json_serialization_queue,
                self.metadata_filepath
            )

            for thread in web_pool:
                thread.setDaemon(True)
                thread.start()
            json_serializer.start()

            web_request_queue.join()
            json_serialization_queue.join()
        else:
            json_response = self._grab_json(urls[0])
            media_entries = json_response['media']

            media_dict = list_of_dicts_to_dict(
                media_entries, promote_to_key='_id')

            exists = osp.isfile(self.metadata_filepath)
            filemode = 'r+w' if exists else 'w'
            with open(self.metadata_filepath, filemode) as f:
                try:
                    cached_meta = load_json(f) if exists else {}
                except ValueError:
                    cached_meta = {}

                cached_meta.update(media_dict)
                dump_json(cached_meta, f)
                self._metadata = cached_meta
Exemplo n.º 2
0
    def run(self):
        while True:
            try:
                url = self.qi.get(True, 0.05)
            except Empty:
                    continue

            if self.s:
                r = self.s.get(url)
            else:
                r = rq.get(url)

            if r.status_code == codes.all_good:
                json_response = r.json()
                media_entries = json_response['media']

                media_dict = list_of_dicts_to_dict(
                    media_entries, promote_to_key='_id')

                self.qo.put(media_dict)

            self.qi.task_done()
Exemplo n.º 3
0
    def run(self):
        while True:
            try:
                url = self.qi.get(True, 0.05)
            except Empty:
                continue

            if self.s:
                r = self.s.get(url)
            else:
                r = rq.get(url)

            if r.status_code == codes.all_good:
                json_response = r.json()
                media_entries = json_response['media']

                media_dict = list_of_dicts_to_dict(media_entries,
                                                   promote_to_key='_id')

                self.qo.put(media_dict)

            self.qi.task_done()