示例#1
0
    def get_figure_listing(self, report_id, chapter_id=None):
        chapter_filter = '/chapter/' + chapter_id if chapter_id else ''

        url = '{b}/report/{rpt}{chap}/figure'.format(b=self.base_url,
                                                     rpt=report_id,
                                                     chap=chapter_filter)
        resp = self.s.get(url, params={'all': '1'}, verify=False)

        try:
            return [Figure(figure) for figure in resp.json()]
        except ValueError:
            raise Exception(resp.text)
示例#2
0
    def get_figure(self, report_id, figure_id, chapter_id=None):
        chapter_filter = '/chapter/' + chapter_id if chapter_id else ''

        url = '{b}/report/{rpt}{chap}/figure/{fig}'.format(b=self.base_url,
                                                           rpt=report_id,
                                                           chap=chapter_filter,
                                                           fig=figure_id)
        resp = self.s.get(url, params={'all': '1'}, verify=False)

        try:
            return Figure(resp.json())
        except ValueError:
            raise Exception(resp.text)
    def get_webform(self, fig_url, download_images=False):
        full_url = '{b}{url}?token={t}'.format(b=self.base_url,
                                               url=fig_url,
                                               t=self.token)
        webform_json = requests.get(full_url).json()

        #TODO: refactor the service so this isn't necessary
        webform_nid = webform_json.keys()[0]
        figure_json = webform_json[webform_nid]['figure'][0]

        f = Figure(figure_json, trans=trans.FIG_TRANSLATIONS)

        #Add contributor info
        if 'list_the_creator_of_the_figure' in figure_json:
            f.add_contributor(
                parse_creators(figure_json['list_the_creator_of_the_figure']))

        #Add provenance information (wasDerivedFrom parent)
        if 'what_type_of_source_provided_this_figure' in figure_json and figure_json[
                'what_type_of_source_provided_this_figure'] == 'published_source':
            f.add_parent(
                Parent(deepcopy(f.original),
                       trans=trans.PARENT_TRANSLATIONS,
                       pubtype_map=trans.PARENT_PUBTYPE_MAP,
                       search_hints=trans.PARENT_SEARCH_HINTS))

        if 'images' in webform_json[webform_nid]:
            for img_idx, image in enumerate(
                    webform_json[webform_nid]['images']):
                image_obj = Image(
                    image,
                    local_path=self.get_local_image_path(image),
                    remote_path=self.get_remote_image_path(image),
                    trans=trans.IMG_TRANSLATIONS)

                #Add contributor info
                if 'list_the_creator_of_the_image' in image:
                    image_obj.add_contributor(
                        parse_creators(image['list_the_creator_of_the_image']))

                #TODO: this just keeps getting worse
                if 'datasources' in webform_json[webform_nid]['images'][
                        img_idx]:
                    for dataset_json in webform_json[webform_nid]['images'][
                            img_idx]['datasources']:
                        dataset = Dataset(dataset_json,
                                          trans=trans.DATASET_TRANSLATIONS,
                                          known_ids=trans.DATASET_IDS)

                        #Commence the hacks
                        try:
                            dataset.temporal_extent = ' '.join([
                                parse(dataset_json[field]).isoformat()
                                for field in ['start_time', 'end_time']
                            ])
                        except TypeError, e:
                            print 'Problem with start/end time: ', fig_url, f.title, e
                            print dataset_json['start_time'], dataset_json[
                                'end_time']
                            dataset.temporal_extent = None
                        except ValueError, e:
                            print 'Problem with start/end time: ', fig_url, f.title, e
                            print dataset_json['start_time'], dataset_json[
                                'end_time']
                            dataset.temporal_extent = None

                        dataset.spatial_extent = ' '.join([
                            '{k}: {v};'.format(k=key, v=dataset_json[key])
                            for key in [
                                'maximum_latitude', 'minimum_latitude',
                                'maximum_longitude', 'minimum_longitude'
                            ]
                        ])

                        #Filter overlapping Dataset keys out
                        activity_json = {
                            k: dataset_json[k]
                            for k in dataset_json if k not in [
                                'href', 'uri', 'identifier', 'start_time',
                                'end_time'
                            ]
                        }

                        #Add synthetic identifier
                        activity_json['identifier'] = '-'.join(
                            (image_obj.identifier.split('-')[0],
                             dataset.identifier, 'process'))
                        dataset.activity = Activity(
                            activity_json, trans=trans.ACT_TRANSLATIONS)

                        #TODO: Extract DOIs from citation
                        image_obj.datasets.append(dataset)

                f.images.append(image_obj)
示例#4
0
    def get_webform(self, fig_url, download_images=False):
        full_url = '{b}{url}?token={t}'.format(b=self.base_url, url=fig_url, t=self.token)
        webform_json = requests.get(full_url).json()

        #TODO: refactor the service so this isn't necessary
        webform_nid = webform_json.keys()[0]
        figure_json = webform_json[webform_nid]['figure'][0]

        f = Figure(figure_json, trans=trans.FIG_TRANSLATIONS)

        #Add contributor info
        if 'list_the_creator_of_the_figure' in figure_json:
            f.add_contributor(parse_creators(figure_json['list_the_creator_of_the_figure']))

        #Add provenance information (wasDerivedFrom parent)
        if 'what_type_of_source_provided_this_figure' in figure_json and figure_json[
            'what_type_of_source_provided_this_figure'] == 'published_source':
            f.add_parent(Parent(deepcopy(f.original), trans=trans.PARENT_TRANSLATIONS, pubtype_map=trans.PARENT_PUBTYPE_MAP))

        if 'images' in webform_json[webform_nid]:
            for img_idx, image in enumerate(webform_json[webform_nid]['images']):
                image_obj = Image(image, local_path=self.get_local_image_path(image),
                                  remote_path=self.get_remote_image_path(image), trans=trans.IMG_TRANSLATIONS)

                #Add contributor info
                if 'list_the_creator_of_the_image' in image:
                    image_obj.add_contributor(parse_creators(image['list_the_creator_of_the_image']))

                #TODO: this just keeps getting worse
                if 'datasources' in webform_json[webform_nid]['images'][img_idx]:
                    for dataset_json in webform_json[webform_nid]['images'][img_idx]['datasources']:
                        dataset = Dataset(dataset_json, trans=trans.DATASET_TRANSLATIONS, known_ids=trans.DATASET_IDS)

                        #Commence the hacks
                        try:
                            dataset.temporal_extent = ' '.join(
                                [parse(dataset_json[field]).isoformat() for field in ['start_time', 'end_time']]
                            )
                        except TypeError, e:
                            print 'Problem with start/end time: ', fig_url, f.title, e
                            print dataset_json['start_time'], dataset_json['end_time']
                            dataset.temporal_extent = None
                        except ValueError, e:
                            print 'Problem with start/end time: ', fig_url, f.title, e
                            print dataset_json['start_time'], dataset_json['end_time']
                            dataset.temporal_extent = None

                        dataset.spatial_extent = ' '.join(['{k}: {v};'.format(k=key, v=dataset_json[key]) for key in
                                                           ['maximum_latitude', 'minimum_latitude', 'maximum_longitude',
                                                            'minimum_longitude']])

                        #Filter overlapping Dataset keys out
                        activity_json = {k: dataset_json[k] for k in dataset_json if
                                         k not in ['href', 'uri', 'identifier', 'start_time', 'end_time']}

                        #Add synthetic identifier
                        activity_json['identifier'] = '-'.join((image_obj.identifier.split('-')[0], dataset.identifier, 'process'))
                        dataset.activity = Activity(activity_json, trans=trans.ACT_TRANSLATIONS)

                        # TODO: Extract DOIs from citation
                        # image_obj.datasets.append(dataset)

                f.images.append(image_obj)