def get_figure_listing(self, report_id, chapter_id=None): chapter_filter = '/chapter/' + chapter_id if chapter_id else '' url = '{b}/report/{rpt}{chap}/figure'.format(b=self.base_url, rpt=report_id, chap=chapter_filter) resp = self.s.get(url, params={'all': '1'}, verify=False) try: return [Figure(figure) for figure in resp.json()] except ValueError: raise Exception(resp.text)
def get_figure(self, report_id, figure_id, chapter_id=None): chapter_filter = '/chapter/' + chapter_id if chapter_id else '' url = '{b}/report/{rpt}{chap}/figure/{fig}'.format(b=self.base_url, rpt=report_id, chap=chapter_filter, fig=figure_id) resp = self.s.get(url, params={'all': '1'}, verify=False) try: return Figure(resp.json()) except ValueError: raise Exception(resp.text)
def get_webform(self, fig_url, download_images=False): full_url = '{b}{url}?token={t}'.format(b=self.base_url, url=fig_url, t=self.token) webform_json = requests.get(full_url).json() #TODO: refactor the service so this isn't necessary webform_nid = webform_json.keys()[0] figure_json = webform_json[webform_nid]['figure'][0] f = Figure(figure_json, trans=trans.FIG_TRANSLATIONS) #Add contributor info if 'list_the_creator_of_the_figure' in figure_json: f.add_contributor( parse_creators(figure_json['list_the_creator_of_the_figure'])) #Add provenance information (wasDerivedFrom parent) if 'what_type_of_source_provided_this_figure' in figure_json and figure_json[ 'what_type_of_source_provided_this_figure'] == 'published_source': f.add_parent( Parent(deepcopy(f.original), trans=trans.PARENT_TRANSLATIONS, pubtype_map=trans.PARENT_PUBTYPE_MAP, search_hints=trans.PARENT_SEARCH_HINTS)) if 'images' in webform_json[webform_nid]: for img_idx, image in enumerate( webform_json[webform_nid]['images']): image_obj = Image( image, local_path=self.get_local_image_path(image), remote_path=self.get_remote_image_path(image), trans=trans.IMG_TRANSLATIONS) #Add contributor info if 'list_the_creator_of_the_image' in image: image_obj.add_contributor( parse_creators(image['list_the_creator_of_the_image'])) #TODO: this just keeps getting worse if 'datasources' in webform_json[webform_nid]['images'][ img_idx]: for dataset_json in webform_json[webform_nid]['images'][ img_idx]['datasources']: dataset = Dataset(dataset_json, trans=trans.DATASET_TRANSLATIONS, known_ids=trans.DATASET_IDS) #Commence the hacks try: dataset.temporal_extent = ' '.join([ parse(dataset_json[field]).isoformat() for field in ['start_time', 'end_time'] ]) except TypeError, e: print 'Problem with start/end time: ', fig_url, f.title, e print dataset_json['start_time'], dataset_json[ 'end_time'] dataset.temporal_extent = None except ValueError, e: print 'Problem with start/end time: ', fig_url, f.title, e print dataset_json['start_time'], dataset_json[ 'end_time'] dataset.temporal_extent = None dataset.spatial_extent = ' '.join([ '{k}: {v};'.format(k=key, v=dataset_json[key]) for key in [ 'maximum_latitude', 'minimum_latitude', 'maximum_longitude', 'minimum_longitude' ] ]) #Filter overlapping Dataset keys out activity_json = { k: dataset_json[k] for k in dataset_json if k not in [ 'href', 'uri', 'identifier', 'start_time', 'end_time' ] } #Add synthetic identifier activity_json['identifier'] = '-'.join( (image_obj.identifier.split('-')[0], dataset.identifier, 'process')) dataset.activity = Activity( activity_json, trans=trans.ACT_TRANSLATIONS) #TODO: Extract DOIs from citation image_obj.datasets.append(dataset) f.images.append(image_obj)
def get_webform(self, fig_url, download_images=False): full_url = '{b}{url}?token={t}'.format(b=self.base_url, url=fig_url, t=self.token) webform_json = requests.get(full_url).json() #TODO: refactor the service so this isn't necessary webform_nid = webform_json.keys()[0] figure_json = webform_json[webform_nid]['figure'][0] f = Figure(figure_json, trans=trans.FIG_TRANSLATIONS) #Add contributor info if 'list_the_creator_of_the_figure' in figure_json: f.add_contributor(parse_creators(figure_json['list_the_creator_of_the_figure'])) #Add provenance information (wasDerivedFrom parent) if 'what_type_of_source_provided_this_figure' in figure_json and figure_json[ 'what_type_of_source_provided_this_figure'] == 'published_source': f.add_parent(Parent(deepcopy(f.original), trans=trans.PARENT_TRANSLATIONS, pubtype_map=trans.PARENT_PUBTYPE_MAP)) if 'images' in webform_json[webform_nid]: for img_idx, image in enumerate(webform_json[webform_nid]['images']): image_obj = Image(image, local_path=self.get_local_image_path(image), remote_path=self.get_remote_image_path(image), trans=trans.IMG_TRANSLATIONS) #Add contributor info if 'list_the_creator_of_the_image' in image: image_obj.add_contributor(parse_creators(image['list_the_creator_of_the_image'])) #TODO: this just keeps getting worse if 'datasources' in webform_json[webform_nid]['images'][img_idx]: for dataset_json in webform_json[webform_nid]['images'][img_idx]['datasources']: dataset = Dataset(dataset_json, trans=trans.DATASET_TRANSLATIONS, known_ids=trans.DATASET_IDS) #Commence the hacks try: dataset.temporal_extent = ' '.join( [parse(dataset_json[field]).isoformat() for field in ['start_time', 'end_time']] ) except TypeError, e: print 'Problem with start/end time: ', fig_url, f.title, e print dataset_json['start_time'], dataset_json['end_time'] dataset.temporal_extent = None except ValueError, e: print 'Problem with start/end time: ', fig_url, f.title, e print dataset_json['start_time'], dataset_json['end_time'] dataset.temporal_extent = None dataset.spatial_extent = ' '.join(['{k}: {v};'.format(k=key, v=dataset_json[key]) for key in ['maximum_latitude', 'minimum_latitude', 'maximum_longitude', 'minimum_longitude']]) #Filter overlapping Dataset keys out activity_json = {k: dataset_json[k] for k in dataset_json if k not in ['href', 'uri', 'identifier', 'start_time', 'end_time']} #Add synthetic identifier activity_json['identifier'] = '-'.join((image_obj.identifier.split('-')[0], dataset.identifier, 'process')) dataset.activity = Activity(activity_json, trans=trans.ACT_TRANSLATIONS) # TODO: Extract DOIs from citation # image_obj.datasets.append(dataset) f.images.append(image_obj)