Пример #1
0
    def save_url(self, filename, data, file_path):

        save_content_response = save_content(data['url'], file_path)
        if save_content_response.errors:
            return self.SaveUrlResult(errors=save_content_response.errors,
                                      warnings=save_content_response.warnings)

        additional = []

        if not self.sample:

            with open(file_path) as f:
                json_data = json.load(f)

                if 'links' in json_data and 'next' in json_data[
                        'links'] and json_data['links']['next']:
                    additional.append({
                        'url':
                        json_data['links']['next'],
                        'filename':
                        'page-%s.json' %
                        hashlib.md5(json_data['links']['next'].encode(
                            'utf-8')).hexdigest(),
                        'data_type':
                        'release_package',
                        # We set priority the same so that all the requests for one year are done at the same time.
                        # Because of how this pages using cursors, it's probably best to get them as fast as possible.
                        'priority':
                        data['priority'],
                    })

        return self.SaveUrlResult(additional_files=additional,
                                  warnings=save_content_response.warnings)
Пример #2
0
    def save_url(self, filename, data, file_path):

        save_content_response = save_content(data['url'], file_path)
        if save_content_response.errors:
            return self.SaveUrlResult(errors=save_content_response.errors,
                                      warnings=save_content_response.warnings)

        additional = []

        if data['data_type'] == 'record_package':

            with open(file_path) as f:
                json_data = json.load(f)

            if 'packages' in json_data:
                for url in json_data['packages']:
                    additional.append({
                        'url':
                        url,
                        'filename':
                        'packages-%s.json' %
                        hashlib.md5(url.encode('utf-8')).hexdigest(),
                        'data_type':
                        'release_package',
                    })

        return self.SaveUrlResult(additional_files=additional,
                                  warnings=save_content_response.warnings)
Пример #3
0
    def save_url(self, filename, data, file_path):
        if data['data_type'] == 'meta':

            response, errors = util.get_url_request(data['url'], verify_ssl=False)
            if errors:
                return self.SaveUrlResult(errors=errors)

            data = json.loads(response.text)

            additional = []

            if "data" in data.keys():

                # Sometimes it's a dict, sometimes it's a list.
                if isinstance(data['data'], dict):
                    data['data'] = data['data'].values()

                for data_item in data["data"]:
                    if not self.sample or (self.sample and len(additional) < 10):
                        additional.append({
                                        'url': data_item['uri'],
                                        'filename': '{}.json'.format(data_item['ocid']),
                                        'data_type': 'release',
                                        'priority': 1,
                                    })

            return self.SaveUrlResult(additional_files=additional)

        else:
            save_content_response = util.save_content(data['url'], file_path, verify_ssl=False)
            return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings)
Пример #4
0
    def save_url(self, filename, data, file_path):
        if data['data_type'] == 'release_package':

            save_content_response = save_content(data['url'], file_path)
            if save_content_response.errors:
                return self.SaveUrlResult(
                    errors=save_content_response.errors,
                    warnings=save_content_response.warnings)

            additional = []

            with open(file_path) as f:
                json_data = json.load(f)

            page = int(filename.split('-')[1])
            if 'next_page' in json_data and 'uri' in json_data[
                    'next_page'] and (not self.sample or page < 3):
                page += 1
                additional.append({
                    'url': json_data['next_page']['uri'],
                    'filename': 'page-%d-.json' % page,
                    'data_type': 'release_package',
                })
            return self.SaveUrlResult(additional_files=additional,
                                      warnings=save_content_response.warnings)
Пример #5
0
    def save_url(self, filename, data, file_path):

        save_content_response = save_content(data['url'], file_path+'-temp.tar.gz', replace_control_codes=False)
        if save_content_response.errors:
            return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings)

        with tarfile.open(file_path+'-temp.tar.gz', "r:gz") as tar:
            with tar.extractfile(tar.getnames()[0]) as readfp:
                with open(file_path, "wb") as writefp:
                    while True:
                        buf = readfp.read(1024 ^ 2)
                        if buf:
                            writefp.write(buf)
                        else:
                            break

        return self.SaveUrlResult()
Пример #6
0
    def save_url(self, filename, data, file_path):

        save_content_response = save_content(data['url'], file_path)
        if save_content_response.errors:
            return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings)

        additional = []

        if data['data_type'] == 'meta':

            with open(file_path) as f:
                json_data = json.load(f)

            for item in json_data:
                if not self.sample or (self.sample and len(additional) < 10):
                    additional.append({
                        'url': item,
                        'filename': 'release-%s.json' % item.split('/')[-1],
                        'data_type': 'release',
                    })

        return self.SaveUrlResult(additional_files=additional, warnings=save_content_response.warnings)
Пример #7
0
 def save_url(self, file_name, data, file_path):
     save_content_response = save_content(data['url'], file_path)
     return self.SaveUrlResult(errors=save_content_response.errors,
                               warnings=save_content_response.warnings)
Пример #8
0
    def save_url(self, filename, data, file_path):

        save_content_response = save_content(data['url'], file_path)
        if save_content_response.errors:
            return self.SaveUrlResult(errors=save_content_response.errors,
                                      warnings=save_content_response.warnings)

        additional = []

        if data['data_type'] == 'meta':

            with open(file_path) as f:
                json_data = json.load(f)

            page = int(filename.split('-')[3])
            type = filename.split('-')[1]
            if 'links' in json_data and 'next' in json_data['links'] and (
                    not self.sample or page < 3):
                page += 1
                additional.append({
                    'url':
                    json_data['links']['next'],
                    'filename':
                    'type-%s-page-%d-.json' % (type, page),
                    'data_type':
                    'meta',
                    'priority':
                    10,
                })

            count = 0
            for release in json_data['releases']:
                if not self.sample or count < 3:
                    stage_urls = []
                    if type == 'planning':
                        uuid = release['tender']['plannedProcurementUUID']
                        stage_urls.append(
                            'https://tenders.nsw.gov.au/?event=public.api.planning.view'
                            '&PlannedProcurementUUID=%s' % uuid)
                    if type == 'tender':
                        uuid = release['tender']['RFTUUID']
                        stage_urls.append(
                            'https://tenders.nsw.gov.au/?event=public.api.tender.view&RFTUUID=%s'
                            % uuid)
                    if type == 'contract':
                        for award in release['awards']:
                            uuid = award['CNUUID']
                            stage_urls.append(
                                'https://tenders.nsw.gov.au/?event=public.api.contract.view&CNUUID=%s'
                                % uuid)
                    count += 1
                    for url in stage_urls:
                        additional.append({
                            'url':
                            url,
                            'filename':
                            'packages-%s.json' %
                            hashlib.md5(url.encode('utf-8')).hexdigest(),
                            'data_type':
                            'release_package',
                            'priority':
                            1,
                        })

        return self.SaveUrlResult(additional_files=additional,
                                  warnings=save_content_response.warnings)