def try_ymd_date(string, outputformat, extensive_search, parser=PARSER): """Use a series of heuristics and rules to parse a potential date expression""" # discard on formal criteria if string is None or len(list(filter(str.isdigit, string))) < 4: return None # just time/single year, not a date if re.match(r'[0-9]{2}:[0-9]{2}(:| )', string) or re.match( r'\D*[0-9]{4}\D*$', string): return None # much faster if string[0:4].isdigit(): # try speedup with ciso8601 try: result = parse_datetime_as_naive(string) if date_validator(result, outputformat) is True: LOGGER.debug('ciso8601 result: %s', result) converted = result.strftime(outputformat) return converted except ValueError: LOGGER.debug('ciso8601 error: %s', string) # faster customresult = custom_parse(string, outputformat) if customresult is not None: return customresult # slow but extensive search if extensive_search is True: # send to dateparser dateparser_result = external_date_parser(string, outputformat, parser) if dateparser_result is not None: return dateparser_result # catchall return None
def get_extension_params(loan=None, parameter_name=None): """Return extension parameters.""" policy = get_circ_policy(loan) end_date = ciso8601.parse_datetime_as_naive(loan.get('end_date')) params = { 'max_count': policy.get('number_renewals'), 'duration_default': policy.get('renewal_duration') } current_date = datetime.now() time_to_end_of_day = timedelta(hours=23, minutes=59) - \ timedelta(hours=current_date.hour, minutes=current_date.minute) transaction_location_pid = loan.get('transaction_location_pid') if not transaction_location_pid: library_pid = Item.get_record_by_pid(loan.item_pid).library_pid else: library_pid = \ Location.get_record_by_pid(transaction_location_pid).library_pid library = Library.get_record_by_pid(library_pid) calculated_due_date = current_date + timedelta( days=policy.get('renewal_duration')) first_open_date = library.next_open(date=calculated_due_date - timedelta(days=1)) if first_open_date.date() < end_date.date(): params['max_count'] = 0 new_duration = first_open_date - current_date params['duration_default'] = \ timedelta(days=new_duration.days) + time_to_end_of_day return params.get(parameter_name)
def test_parse_as_naive_auto_generated_valid_formats(self): for (timestamp, expected_datetime) in generate_valid_timestamp_and_datetime(): try: self.assertEqual(parse_datetime_as_naive(timestamp), expected_datetime.replace(tzinfo=None)) except Exception: print("Had problems parsing: {timestamp}".format(timestamp=timestamp)) raise
def convertTime(t): """Parse Arvados timestamp to unix time.""" if not t: return 0 try: return calendar.timegm(ciso8601.parse_datetime_as_naive(t).timetuple()) except (TypeError, ValueError): return 0
def convertTime(t): """Parse Arvados timestamp to unix time.""" if not t: return 0 try: return calendar.timegm(ciso8601.parse_datetime_as_naive(t).timetuple()) except (TypeError, ValueError): return 0
def docker_link_sort_key(link): """Build a sort key to find the latest available Docker image. To find one source collection for a Docker image referenced by name or image id, the API server looks for a link with the most recent `image_timestamp` property; then the most recent `created_at` timestamp. This method generates a sort key for Docker metadata links to sort them from least to most preferred. """ try: image_timestamp = ciso8601.parse_datetime_as_naive( link['properties']['image_timestamp']) except (KeyError, ValueError): image_timestamp = EARLIEST_DATETIME try: created_timestamp = ciso8601.parse_datetime_as_naive(link['created_at']) except ValueError: created_timestamp = None return (image_timestamp, created_timestamp)
def docker_link_sort_key(link): """Build a sort key to find the latest available Docker image. To find one source collection for a Docker image referenced by name or image id, the API server looks for a link with the most recent `image_timestamp` property; then the most recent `created_at` timestamp. This method generates a sort key for Docker metadata links to sort them from least to most preferred. """ try: image_timestamp = ciso8601.parse_datetime_as_naive( link['properties']['image_timestamp']) except (KeyError, ValueError): image_timestamp = EARLIEST_DATETIME try: created_timestamp = ciso8601.parse_datetime_as_naive(link['created_at']) except ValueError: created_timestamp = None return (image_timestamp, created_timestamp)
def done(self, record): outputs = {} try: container = self.arvrunner.api.containers().get( uuid=record["container_uuid"] ).execute(num_retries=self.arvrunner.num_retries) if container["state"] == "Complete": rcode = container["exit_code"] if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" else: processStatus = "permanentFail" if processStatus == "permanentFail" and record["log_uuid"]: logc = arvados.collection.CollectionReader(record["log_uuid"], api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) label = self.arvrunner.label(self) done.logtail( logc, logger.error, "%s (%s) error log:" % (label, record["uuid"]), maxlen=40) if record["output_uuid"]: if self.arvrunner.trash_intermediate or self.arvrunner.intermediate_output_ttl: # Compute the trash time to avoid requesting the collection record. trash_at = ciso8601.parse_datetime_as_naive(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl) aftertime = " at %s" % trash_at.strftime("%Y-%m-%d %H:%M:%S UTC") if self.arvrunner.intermediate_output_ttl else "" orpart = ", or" if self.arvrunner.trash_intermediate and self.arvrunner.intermediate_output_ttl else "" oncomplete = " upon successful completion of the workflow" if self.arvrunner.trash_intermediate else "" logger.info("%s Intermediate output %s (%s) will be trashed%s%s%s." % ( self.arvrunner.label(self), record["output_uuid"], container["output"], aftertime, orpart, oncomplete)) self.arvrunner.add_intermediate_output(record["output_uuid"]) if container["output"]: outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep") except WorkflowException as e: # Only include a stack trace if in debug mode. # A stack trace may obfuscate more useful output about the workflow. logger.error("%s unable to collect output from %s:\n%s", self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False)) processStatus = "permanentFail" except Exception: logger.exception("%s while getting output object:", self.arvrunner.label(self)) processStatus = "permanentFail" finally: self.output_callback(outputs, processStatus)
def done(self, record): outputs = {} try: container = self.arvrunner.api.containers().get( uuid=record["container_uuid"] ).execute(num_retries=self.arvrunner.num_retries) if container["state"] == "Complete": rcode = container["exit_code"] if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" else: processStatus = "permanentFail" if processStatus == "permanentFail" and record["log_uuid"]: logc = arvados.collection.CollectionReader(record["log_uuid"], api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) label = self.arvrunner.label(self) done.logtail( logc, logger.error, "%s (%s) error log:" % (label, record["uuid"]), maxlen=40) if record["output_uuid"]: if self.arvrunner.trash_intermediate or self.arvrunner.intermediate_output_ttl: # Compute the trash time to avoid requesting the collection record. trash_at = ciso8601.parse_datetime_as_naive(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl) aftertime = " at %s" % trash_at.strftime("%Y-%m-%d %H:%M:%S UTC") if self.arvrunner.intermediate_output_ttl else "" orpart = ", or" if self.arvrunner.trash_intermediate and self.arvrunner.intermediate_output_ttl else "" oncomplete = " upon successful completion of the workflow" if self.arvrunner.trash_intermediate else "" logger.info("%s Intermediate output %s (%s) will be trashed%s%s%s." % ( self.arvrunner.label(self), record["output_uuid"], container["output"], aftertime, orpart, oncomplete)) self.arvrunner.add_intermediate_output(record["output_uuid"]) if container["output"]: outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep") except WorkflowException as e: # Only include a stack trace if in debug mode. # A stack trace may obfuscate more useful output about the workflow. logger.error("%s unable to collect output from %s:\n%s", self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False)) processStatus = "permanentFail" except Exception: logger.exception("%s while getting output object:", self.arvrunner.label(self)) processStatus = "permanentFail" finally: self.output_callback(outputs, processStatus)
def test_items_extend_end_date(client, librarian_martigny_no_email, patron_martigny_no_email, loc_public_martigny, item_type_standard_martigny, item_lib_martigny, json_header, circ_policy_short_martigny): """Test correct renewal due date for items.""" login_user_via_session(client, librarian_martigny_no_email.user) item = item_lib_martigny item_pid = item.pid patron_pid = patron_martigny_no_email.pid # checkout res = client.post( url_for('api_item.checkout'), data=json.dumps(dict(item_pid=item_pid, patron_pid=patron_pid)), content_type='application/json', ) assert res.status_code == 200 data = get_json(res) actions = data.get('action_applied') loan_pid = actions[LoanAction.CHECKOUT].get('pid') loan = Loan.get_record_by_pid(loan_pid) assert not item.get_extension_count() max_count = get_extension_params(loan=loan, parameter_name='max_count') renewal_duration_policy = circ_policy_short_martigny['renewal_duration'] renewal_duration = get_extension_params(loan=loan, parameter_name='duration_default') assert renewal_duration_policy <= renewal_duration.days # extend loan res = client.post( url_for('api_item.extend_loan'), data=json.dumps(dict(item_pid=item_pid, pid=loan_pid)), content_type='application/json', ) assert res.status_code == 200 data = get_json(res) actions = data.get('action_applied') loan_pid = actions[LoanAction.EXTEND].get('pid') loan = Loan.get_record_by_pid(loan_pid) end_date = loan.get('end_date') current_date = datetime.now() calc_date = current_date + renewal_duration assert (calc_date.strftime('%Y-%m-%d') == ciso8601.parse_datetime_as_naive( end_date).strftime('%Y-%m-%d')) # checkin res = client.post( url_for('api_item.checkin'), data=json.dumps(dict(item_pid=item_pid, pid=loan_pid)), content_type='application/json', ) assert res.status_code == 200
def extend_loan_data_is_valid(end_date, renewal_duration, library_pid): """Checks extend loan will be valid.""" end_date = ciso8601.parse_datetime_as_naive(end_date) current_date = datetime.now() library = Library.get_record_by_pid(library_pid) calculated_due_date = current_date + timedelta(days=renewal_duration) first_open_date = library.next_open(date=calculated_due_date - timedelta(days=1)) if first_open_date.date() <= end_date.date(): return False return True
def flag_articles_matching_date(self, file, field='PublishDate') -> list: ''' Once we need to check dates inside the files... Returns a list of bool, where True means "article is within timeframe". ''' date_list = [ parse_datetime_as_naive(article[field]) for article in file ] return [ self.dated(date, self.date_start, self.date_end) for date in date_list ]
def make_time(t: TimeT | None) -> int | None: if t is None: return None if isinstance(t, int): return t if isinstance(t, str): try: return int( time.mktime(ciso8601.parse_datetime_as_naive(t).timetuple())) except ValueError: return None t: datetime.datetime return int(time.mktime(t.timetuple()))
def parse_path_date(path, pattern) -> datetime.datetime: ''' Parse dates in filename. Parameters ---------- path : str Path to a file from IM's database pattern : `re.Pattern` Pattern made using self.path_date_pattern() ''' date_match = pattern.search(path) fdate = parse_datetime_as_naive(date_match.group()) return fdate
def get_book_json(gen): # print based on seconds to start for market_books in gen(): for market_book in market_books: # Since I'm only interested in the win market I've added this to skip 'To Be Placed' # Insert other filters here to helps speed things up if market_book['marketDefinition']['name'] == 'To Be Placed': return # Calculate time until the scheduled start seconds_to_start = ( ciso8601.parse_datetime_as_naive( market_book['marketDefinition']['marketTime']) - datetime.utcfromtimestamp( market_book['publishTime'] / 1000)).total_seconds() # Extract book at offset (seconds) before jump if seconds_to_start < offset and seconds_to_start > offset - 15: return market_book
def get_overdue_loans(): """Return all overdue loans.""" from .utils import get_circ_policy overdue_loans = [] results = current_circulation.loan_search\ .source(['pid'])\ .params(preserve_order=True)\ .filter('term', state='ITEM_ON_LOAN')\ .sort({'transaction_date': {'order': 'asc'}})\ .scan() for record in results: loan = Loan.get_record_by_pid(record.pid) circ_policy = get_circ_policy(loan) now = datetime.now() end_date = loan.get('end_date') due_date = ciso8601.parse_datetime_as_naive(end_date) days_after = circ_policy.get('number_of_days_after_due_date') if now > due_date + timedelta(days=days_after): overdue_loans.append(loan) return overdue_loans
def custom_parse(string, outputformat, extensive_search, min_date, max_date): """Try to bypass the slow dateparser""" LOGGER.debug('custom parse test: %s', string) # '201709011234' not covered by dateparser # regex was too slow if string[0:8].isdigit(): try: candidate = datetime.date(int(string[:4]), int(string[4:6]), int(string[6:8])) except ValueError: return None if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('ymd match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # much faster if string[0:4].isdigit(): # try speedup with ciso8601 (if installed) try: if extensive_search is True: result = parse_datetime(string) # speed-up by ignoring time zone info if ciso8601 is installed else: result = parse_datetime_as_naive(string) if date_validator( result, outputformat, earliest=min_date, latest=max_date) is True: LOGGER.debug('parsing result: %s', result) return result.strftime(outputformat) except ValueError: LOGGER.debug('parsing error: %s', string) # %Y-%m-%d search match = YMD_PATTERN.search(string) if match: try: candidate = datetime.date(int(match.group(1)), int(match.group(2)), int(match.group(3))) except ValueError: LOGGER.debug('value error: %s', match.group(0)) else: if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('ymd match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # faster than fire dateparser at once datestub = DATESTUB_PATTERN.search(string) if datestub and len(datestub.group(3)) in (2, 4): try: if len(datestub.group(3)) == 2: candidate = datetime.date(int('20' + datestub.group(3)), int(datestub.group(2)), int(datestub.group(1))) elif len(datestub.group(3)) == 4: candidate = datetime.date(int(datestub.group(3)), int(datestub.group(2)), int(datestub.group(1))) except ValueError: LOGGER.debug('value error: %s', datestub.group(0)) else: # test candidate if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('D.M.Y match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # text match dateobject = regex_parse(string) # copyright match? # © Janssen-Cilag GmbH 2014-2019. https://www.krebsratgeber.de/artikel/was-macht-eine-zelle-zur-krebszelle # examine if dateobject is not None: try: if date_validator(dateobject, outputformat) is True: LOGGER.debug('custom parse result: %s', dateobject) return dateobject.strftime(outputformat) except ValueError as err: LOGGER.debug('value error during conversion: %s %s', string, err) return None
def get_datetime(item): return timezone.make_aware(parse_datetime_as_naive(item['recordedTime']))
def parse_datetime(datetime_string: str) -> Optional[datetime]: try: return ciso8601.parse_datetime_as_naive(datetime_string) except ValueError: return
def parse_dt(s): return ciso8601.parse_datetime_as_naive(s)
'timeframe': '30m', 'tsunit': 's', 'tscoef': 1, 'tsindex': 2 } # Convert datetime sting to timestamps before creating TimeframeDataset for index, item in enumerate(t['data']): item = list(item) newdatestr = item[0].replace('.', '-').replace(' ', 'T') item[0] = calendar.timegm( ciso8601.parse_datetime_as_naive(newdatestr).timetuple()) t['data'][index] = item # Create the TimeframeDataset and print it tfds = TimeframeDataset(data=t['data'], columns=t['columns'], tsname=t['tsname'], timeframe=t['timeframe'], tsunit=t['tsunit']) pprint(tfds) print() # Create Pandas Dataframe from TimeframeDataset and print it pdf = tfds2pdf(tfds) print(pdf) print()
def get_datetime(item): return timezone.make_aware(parse_datetime_as_naive(item['recordedTime']))
def replace_pids_and_refs(self): """Dumps data.""" from ..items.api import Item try: self.init_loan() data = deepcopy(self.replace_refs()) data['loan'] = self.loan data['loan']['item'] = self.item.replace_refs().dumps() # del(data['loan']['item_pid']) data['loan']['patron'] = self.patron.replace_refs().dumps() # language = data['loan']['patron']['communication_language'] # del(data['loan']['patron_pid']) data['loan']['transaction_user'] = \ self.transaction_user.replace_refs().dumps() # del(data['loan']['transaction_user_pid']) data['loan']['transaction_location'] = \ self.transaction_location.replace_refs().dumps() # del(data['loan']['transaction_location_pid']) pickup_location = self.pickup_location if pickup_location: data['loan']['pickup_location'] = \ pickup_location.replace_refs().dumps() # del(data['loan']['pickup_location_pid']) library_pid = data['loan']['pickup_location']['library']['pid'] library = Library.get_record_by_pid(library_pid) data['loan']['pickup_location']['library'] = library data['loan']['library'] = library keep_until = datetime.now() + timedelta(days=10) next_open = library.next_open(keep_until) # language = data['loan']['patron']['communication_language'] next_open = next_open.strftime("%d.%m.%Y") data['loan']['next_open'] = next_open else: data['loan']['pickup_location'] = \ self.transaction_location.replace_refs().dumps() item_pid = data['loan']['item_pid'] library = Item.get_record_by_pid(item_pid).get_library() data['loan']['library'] = library document = self.document.replace_refs().dumps() data['loan']['document'] = document authors = document.get('authors', '') if authors: author = authors[0].get('name', '') if not author: mef_list = ['name_fr', 'name_de', 'name_it', 'name_en'] for a_name in mef_list: if authors[0].get(a_name, ''): author = authors[0].get(a_name) break data['loan']['author'] = author end_date = data.get('loan').get('end_date') if end_date: end_date = ciso8601.parse_datetime_as_naive(end_date) data['loan']['end_date'] = end_date.strftime("%d.%m.%Y") # del(data['loan']['document_pid']) # create a link to patron profile patron = Patron.get_record_by_pid(data['loan']['patron']['pid']) view_code = patron.get_organisation().get('code') base_url = current_app.config.get('RERO_ILS_APP_BASE_URL') url_api = '{base_url}/{view_code}/patrons/profile' profile_url = url_api.format(base_url=base_url, view_code=view_code) data['loan']['profile_url'] = profile_url return data except Exception as e: raise (e)
def parse_datetime(datetime_string): try: return ciso8601.parse_datetime_as_naive(datetime_string) except ValueError: return
def _get_message(self, topic, input, now, pub_pattern_matched, endpoint_id, subscriptions_by_topic, has_no_sk_server, _initialized=_initialized, _zato_none=ZATO_NONE, _skip=PUBSUB.HOOK_ACTION.SKIP, _default_pri=PUBSUB.PRIORITY.DEFAULT, _opaque_only=PUBSUB.DEFAULT.SK_OPAQUE, _float_str=PUBSUB.FLOAT_STRING_CONVERT): priority = get_priority(self.cid, input) # So as not to send it to SQL if it is a default value anyway = less overhead = better performance if priority == _default_pri: priority = None expiration = get_expiration(self.cid, input) expiration_time = now + (expiration / 1000.0) pub_msg_id = input.get('msg_id', '') or new_msg_id() # If there is at least one WSX subscriber to this topic which is not connected at the moment, # which means it has no delivery server, we uncoditionally turn this message into a GD one .. if has_no_sk_server: has_gd = True logger_pubsub.info(_log_turning_gd_msg.format('no SK server'), pub_msg_id) # .. otherwise, use input GD value or the default per topic. else: has_gd = input.get('has_gd', _zato_none) if has_gd != _zato_none: if not isinstance(has_gd, bool): raise ValueError( 'Input has_gd is not a bool (found:`{}`)'.format( repr(has_gd))) else: has_gd = topic.has_gd pub_correl_id = input.get('correl_id') in_reply_to = input.get('in_reply_to') ext_client_id = input.get('ext_client_id') mime_type = input.get('mime_type') ext_pub_time = input.get('ext_pub_time') or None if ext_pub_time: ext_pub_time = parse_datetime_as_naive(ext_pub_time) ext_pub_time = datetime_to_ms(ext_pub_time) / 1000.0 pub_correl_id = pub_correl_id if pub_correl_id else None in_reply_to = in_reply_to if in_reply_to else None ext_client_id = ext_client_id if ext_client_id else None mime_type = mime_type if mime_type else None reply_to_sk = input.get('reply_to_sk') or [] deliver_to_sk = input.get('deliver_to_sk') or [] user_ctx = input.get('user_ctx') zato_ctx = input.get('zato_ctx') ps_msg = PubSubMessage() ps_msg.topic = topic ps_msg.pub_msg_id = pub_msg_id ps_msg.pub_correl_id = pub_correl_id ps_msg.in_reply_to = in_reply_to # Convert to string to prevent pg8000 from rounding up float values ps_msg.pub_time = _float_str.format(now) ps_msg.ext_pub_time = _float_str.format( ext_pub_time) if ext_pub_time else ext_pub_time ps_msg.delivery_status = _initialized ps_msg.pub_pattern_matched = pub_pattern_matched ps_msg.data = input['data'] ps_msg.mime_type = mime_type ps_msg.priority = priority ps_msg.expiration = expiration ps_msg.expiration_time = expiration_time ps_msg.published_by_id = endpoint_id ps_msg.topic_id = topic.id ps_msg.topic_name = topic.name ps_msg.cluster_id = self.server.cluster_id ps_msg.has_gd = has_gd ps_msg.ext_client_id = ext_client_id ps_msg.group_id = input.get('group_id') or None ps_msg.position_in_group = input.get('position_in_group') or None ps_msg.is_in_sub_queue = bool(subscriptions_by_topic) ps_msg.reply_to_sk = reply_to_sk ps_msg.deliver_to_sk = deliver_to_sk ps_msg.user_ctx = user_ctx ps_msg.zato_ctx = zato_ctx # Opaque attributes - we only need reply to sub_keys to be placed in there # but we do not do it unless we known that any such sub key was actually requested. if reply_to_sk or deliver_to_sk: set_instance_opaque_attrs(ps_msg, input, only=_opaque_only) # If there are any subscriptions for the topic this message was published to, we want to establish # based on what subscription pattern each subscriber will receive the message. for sub in subscriptions_by_topic: ps_msg.sub_pattern_matched[sub.sub_key] = sub.sub_pattern_matched if ps_msg.data: ps_msg.size = len( ps_msg.data.encode('utf8') ) # We need to store the size in bytes rather than Unicode codepoints else: ps_msg.size = 0 # Invoke hook service here because it may want to update data in which case # we need to take it into account below. if topic.before_publish_hook_service_invoker: response = topic.before_publish_hook_service_invoker(topic, ps_msg) # Hook service decided that we should not process this message if response['hook_action'] == _skip: logger_audit.info( 'Skipping message pub_msg_id:`%s`, pub_correl_id:`%s`, ext_client_id:`%s`', ps_msg.pub_msg_id, ps_msg.pub_correl_id, ps_msg.ext_client_id) return # These are needed only for GD messages that are stored in SQL if has_gd: data_prefix, data_prefix_short = self._get_data_prefixes( ps_msg.data) ps_msg.data_prefix = data_prefix ps_msg.data_prefix_short = data_prefix_short return ps_msg
def custom_parse(string, outputformat, extensive_search, min_date, max_date): """Try to bypass the slow dateparser""" LOGGER.debug('custom parse test: %s', string) # 1. '201709011234' not covered by dateparser, and regex too slow if string[0:8].isdigit(): try: candidate = datetime.date(int(string[:4]), int(string[4:6]), int(string[6:8])) except ValueError: return None if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('ymd match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # 2. shortcut, much faster if string[0:4].isdigit(): # try speedup with ciso8601 (if installed) try: if extensive_search is True: result = parse_datetime(string) # speed-up by ignoring time zone info if ciso8601 is installed else: result = parse_datetime_as_naive(string) if date_validator( result, outputformat, earliest=min_date, latest=max_date) is True: LOGGER.debug('parsing result: %s', result) return result.strftime(outputformat) except (OverflowError, TypeError, ValueError): LOGGER.debug('parsing error: %s', string) # 3. Try YYYYMMDD, use regex match = YMD_NO_SEP_PATTERN.search(string) if match: try: year, month, day = int(match.group(0)[:4]), int( match.group(0)[4:6]), int(match.group(0)[6:8]) candidate = datetime.date(year, month, day) except ValueError: LOGGER.debug('YYYYMMDD value error: %s', match.group(0)) else: if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('YYYYMMDD match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # 4. Try Y-M-D pattern since it's the one used in ISO-8601 match = YMD_PATTERN.search(string) if match: try: day, month, year = int(match.group(3)), int(match.group(2)), int( match.group(1)) candidate = datetime.date(year, month, day) except ValueError: LOGGER.debug('Y-M-D value error: %s', match.group(0)) else: if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('Y-M-D match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # 5. Try the D-M-Y pattern since it's the most common date format in the world match = DMY_PATTERN.search(string) if match: try: day, month, year = int(match.group(1)), int(match.group(2)), int( match.group(3)) year = correct_year(year) # If month is more than 12, swap it with the day if month > 12 and day <= 12: day, month = month, day candidate = datetime.date(year, month, day) except ValueError: LOGGER.debug('D-M-Y value error: %s', match.group(0)) else: if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('D-M-Y match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # 6. Try the Y-M pattern match = YM_PATTERN.search(string) if match: try: year, month = int(match.group(1)), int(match.group(2)) candidate = datetime.date(year, month, 1) except ValueError: LOGGER.debug('Y-M value error: %s', match.group(0)) else: if date_validator(candidate, '%Y-%m-%d') is True: LOGGER.debug('Y-M match: %s', candidate) return convert_date(candidate, '%Y-%m-%d', outputformat) # 7. Try the other regex pattern dateobject = regex_parse(string) if date_validator(dateobject, outputformat) is True: try: LOGGER.debug('custom parse result: %s', dateobject) return dateobject.strftime(outputformat) except ValueError as err: LOGGER.debug('value error during conversion: %s %s', string, err) return None
def __parse_date(date_string): return ciso8601.parse_datetime_as_naive(date_string).date()
def parse_str_date(str_date) -> datetime.datetime: ''' Parse str to datetime.datetime ''' return parse_datetime_as_naive(str_date)