def publication_date(self): publication_date = None publication_date_string = self.root.xpath( "string(./RDF/Description/coverDisplayDate[1])").extract_first() if publication_date_string: try: publication_date = PartialDate.parse(publication_date_string) except: # in case when date contains month range, eg. July-September 2020 publication_date = re.sub("[A-aZ-z]*-(?=[A-aZ-z])", "", publication_date_string) publication_date = PartialDate.parse(publication_date) return publication_date
def _truncate_date_value_according_on_date_field(field, date_value): """Truncates date value (to year only) according to the given date field. Args: field (unicode): The field for which the date value will be used to query on. date_value (str): The date value that is going to be truncated to its year. Returns: PartialDate: The possibly truncated date, on success. None, otherwise. Notes: In case the fieldname is in `ES_MAPPING_HEP_DATE_ONLY_YEAR`, then the date is normalized and then only its year value is used. This is needed for ElasticSearch to be able to do comparisons on dates that have only year, which fails if being queried with a date with more . """ try: partial_date = PartialDate.parse(date_value) except ValueError: return None if field in ES_MAPPING_HEP_DATE_ONLY_YEAR: truncated_date = PartialDate.from_parts(partial_date.year) else: truncated_date = partial_date return truncated_date
def get_date(date_node): """Extract a date from a date node. Returns: PartialDate: the parsed date. """ iso_string = date_node.xpath('./@iso-8601-date').extract_first() iso_date = PartialDate.loads(iso_string) if iso_string else None year = date_node.xpath('string(./year)').extract_first() month = date_node.xpath('string(./month)').extract_first() day = date_node.xpath('string(./day)').extract_first() date_from_parts = PartialDate.from_parts(year, month, day) if year else None string_date = date_node.xpath('string(./string-date)').extract_first() try: parsed_date = PartialDate.parse(string_date) except ValueError: parsed_date = None date = get_first([iso_date, date_from_parts, parsed_date]) return date
def get_date(date_node): """Extract a date from a date node. Returns: PartialDate: the parsed date. """ iso_string = date_node.xpath('./@iso-8601-date').extract_first() iso_date = PartialDate.loads(iso_string) if iso_string else None year = date_node.xpath('string(./year)').extract_first() month = date_node.xpath('string(./month)').extract_first() day = date_node.xpath('string(./day)').extract_first() date_from_parts = PartialDate.from_parts(year, month, day) if year else None string_date = date_node.xpath('string(./string-date)').extract_first() try: parsed_date = PartialDate.parse(string_date) except ValueError: parsed_date = None date = get_first([iso_date, date_from_parts, parsed_date]) return date
def _get_work_priority_tuple(work): start_date = work.get('start_date') return ( work.get('current'), PartialDate.parse(start_date) if start_date else EARLIEST_DATE, )