def parse_classes(self, response): date = None for row in response.css('table#classSchedule-mainTable tr'): header = row.css('td.header') cells = row.css('td') if header: date = dateparser.parse(self._extract_text(header)).date() elif len(cells) in [5, 6]: item = items.StudioClass() lst = row.css('td') if len(lst) == 5: start_time, dummy, class_name, teacher, duration = [self._extract_text(x) for x in lst] elif len(lst) == 6: start_time, dummy, class_name, teacher, room, duration = [self._extract_text(x) for x in lst] if 'Cancelled' in teacher: continue start_time = dateparser.parse(start_time).time() duration_hours_match = re.search('(\d+) hour', duration) duration_hours = duration_hours_match.group(1) if duration_hours_match else 0 duration_minutes_match = re.search('(\d+) minute', duration) duration_minutes = duration_minutes_match.group(1) if duration_minutes_match else 0 duration = datetime.timedelta(hours=int(duration_hours), minutes=int(duration_minutes)) item['start_time'] = datetime.datetime.combine(date, start_time) item['end_time'] = datetime.datetime.combine(date, start_time) + duration item['style'] = class_name if ' for ' in teacher: sub, orig = teacher.split(' for ') sub = adjust_caps(sub) orig = adjust_caps(orig) teacher = '%s for %s' % (sub, orig) teacher = adjust_caps(teacher) item['teacher'] = teacher if self._valid_item(item, row): yield item
def scrape_conan(): """ Scrapes Conan's standup page """ page = 'http://teamcoco.com/category/video-category/standup' df_conan = pd.DataFrame(columns=['title', 'link', 'date']) # Initialize dataframe page = requests.get(page) soup = BeautifulSoup(page.text, 'lxml') main = soup.find_all('div', class_='content-item-hero') # Main video posts = soup.find_all('li', class_='content-item') # remaining videos on bottom # populate data frame with content item hero link = main[0].a['href'] date = re.findall('\d{2}-\d{2}-\d{2}', link) date = dateparser.parse(date[0]) title = main[0].strong.contents[0] entry = pd.DataFrame([[title, link, date]], columns=['title', 'link', 'date']) df_conan = df_conan.append(entry, ignore_index=True) for p in posts: link = p.a['href'] date = re.findall('\d{2}-\d{2}-\d{2}', link) date = dateparser.parse(date[0]) title = p.img['alt'] entry = pd.DataFrame([[title, link, date]], columns=['title', 'link', 'date']) df_conan = df_conan.append(entry, ignore_index=True) return df_conan
def _save_to_db(self, date): news_retriever = self._news_retriever() bunch_of_news = [] if not date: bunch_of_news = news_retriever else: for index, i in enumerate(news_retriever): if dateparser.parse(i.pubdate.text, settings={'TIMEZONE': 'Europe/Moscow'}) > date: bunch_of_news.append(i) else: break for item in bunch_of_news: print("[RSS]: {0}".format(item.title.text)) try: item_category = Category.objects.filter(category_name=item.category.text)[0] news_db_obj = News(title=item.title.text, description=item.description.text, time=dateparser.parse(item.pubdate.text, settings={'TIMEZONE': 'Europe/Moscow'}), category=item_category) except IndexError: new_category = Category(category_name=item.category.text) new_category.save() news_db_obj = News(title=item.title.text, description=item.description.text, time=dateparser.parse(item.pubdate.text, settings={'TIMEZONE': 'Europe/Moscow'}), category=new_category) news_db_obj.save()
def _extractFromHTMLTag(parsedHTML): #<time> for time in parsedHTML.findAll("time"): datetime = time.get('datetime', '') if len(datetime) > 0: return dateparser.parse(datetime) datetime = time.get('class', '') if len(datetime) > 0 and datetime[0].lower() == "timestamp": return dateparser.parse(time.string) tag = parsedHTML.find("span", {"itemprop": "datePublished"}) if tag is not None: dateText = tag.get("content") if dateText is None: dateText = tag.text if dateText is not None: return dateparser.parse(dateText) #class= for tag in parsedHTML.find_all(['span', 'p','div'], class_=re.compile("pubdate|timestamp|article_date|articledate|date",re.IGNORECASE)): dateText = tag.string if dateText is None: dateText = tag.text possibleDate = dateparser.parse(dateText) if possibleDate is not None: return possibleDate return None
def RDparsetime(timestr): try: if timestr: pubtime = dateparser.parse(timestr) if pubtime==None: if not u' ' in timestr: pubtime = dateutil.parser.parse(timestr,fuzzy=True) else: day = timestr.split(u' ',1)[0] temp = timestr.split(u' ',1)[1] dateru = u'' if u'時間前' == temp or u'小時前' == temp: dateru = day + u' час назад' else: if u'分前' == temp or u'分鐘前' == temp: dateru = day + u' минут назад' pubtime = dateparser.parse(dateru) else: pass if pubtime: pubtimeint = int(time.mktime(pubtime.timetuple())) return pubtimeint else: return 0 else: return 0 except BaseException as e: print e return 0
def index(request): if request.method == 'GET': #obtaining categories to show in template categories = Category.objects.values('category_name').order_by('category_name') context = { 'categories': categories, } return render(request, 'index.html', context) elif request.method == 'POST': request_body = json.loads(request.body.decode('utf-8')) begin_date = dateparser.parse(request_body['start_date'], settings={'TIMEZONE': 'Europe/Moscow'}) end_date = dateparser.parse(request_body['end_date'], settings={'TIMEZONE': 'Europe/Moscow'}) category = list(request_body['category']) if type(request_body['category']) is not list else request_body['category'] e_mail = request_body['email'] print("[MAIN]: Parameters received: begin_date:{0}, end_date:{1}, " "category:{2}, e_mail:{3}".format(begin_date, end_date, category, e_mail)) generate_and_send.delay(begin_date=begin_date, end_date=end_date, category=category, email=e_mail) return redirect(index)
def receive(self, tracker, broadcast_type): """Receive an update from a tracker. Ignore all broadcasts that are not TIME. Args: tracker (parsing.library.tracker.Tracker): Tracker receiving update from. broadcast_type (str): Broadcast message from tracker. """ if broadcast_type != 'TIME': return time = dateparser.parse(getattr(tracker, broadcast_type.lower())) if time > dateparser.parse('12:00pm'): self.time_distribution[24] += 1 else: self.time_distribution[12] += 1 minute = time.minute if time.minute != 0 else 60 grains = [60, 30, 20, 15, 10, 5, 3, 2, 1] for grain in grains: if minute % grain != 0: continue if grain < self.granularity: self.granularity = grain break
def index(): ''' Display the index page. ''' test_matrix = {} kernels = dbtools.getallkernels(SESSION) for kernel in kernels: kernelversion = kernel.kver.rpartition(".")[0].rpartition(".")[0] if kernelversion in test_matrix: test_matrix[kernelversion]["tests"].append(kernel) if not kernel.fver in test_matrix[kernelversion]["fedoraversion"]: test_matrix[kernelversion]["fedoraversion"].append(kernel.fver) if not kernel.testarch in test_matrix[kernelversion]["arches"]: test_matrix[kernelversion]["arches"].append(kernel.testarch) if kernel.testresult == "PASS": test_matrix[kernelversion]["passes"] += 1 else: test_matrix[kernelversion]["fails"] += 1 if test_matrix[kernelversion]["lasttestdate"] < dateparser.parse(kernel.testdate): test_matrix[kernelversion]["lasttestdate"] = dateparser.parse(kernel.testdate) else: if kernel.testresult == "PASS": passes = 1 fails = 0 else: passes = 0 fails = 1 test_matrix[kernelversion] = {"tests":[kernel], "arches": [kernel.testarch], "fedoraversion": [kernel.fver], "passes": passes, "fails": fails, "lasttestdate":dateparser.parse(kernel.testdate)} return flask.render_template( 'index.html', test_matrix=test_matrix, )
def test_date_range_prefixes(self): reader = S3Reader(self.options_dateparser_range_3_days, meta()) expected = ['test_prefix/{}'.format(dateparser.parse('2 days ago').strftime('%Y-%m-%d')), 'test_prefix/{}'.format(dateparser.parse('yesterday').strftime('%Y-%m-%d')), 'test_prefix/{}'.format(dateparser.parse('today').strftime('%Y-%m-%d'))] self.assertEqual(expected, reader.keys_fetcher.prefixes) shutil.rmtree(reader.tmp_folder, ignore_errors=True)
def convert_date(self,date_field_value,langs=[]):#, **kwargs): '''Converts given date field value to standard ES format yyyy-mm-dd :param date_field_value: date field value to convert :param langs: language(s) of the data (optional) :type date_field_value: string :type langs: list :return: date converted to standard ES format :rtype: string ''' if langs: self._languages = langs try: if self._languages: datetime_object = dateparser.parse(date_field_value,languages=self._languages) # If fails to parse with given language (returns None) if not datetime_object: datetime_object = dateparser.parse(date_field_value) else: datetime_object = dateparser.parse(date_field_value) if datetime_object: formatted_date = datetime_object.strftime('%Y-%m-%d') except Exception as e: logging.getLogger(ERROR_LOGGER).exception(e) formatted_date = None return formatted_date
def evaluate_date_pattern_helper(pat, data, goalname, losedate): if pat.start_date and dateparser.parse(pat.start_date) > losedate: print 'Skipping future date_pattern for %s: %s' % (goalname, pat) return False if pat.end_date and dateparser.parse(pat.end_date) < losedate: print 'Skipping expired date_pattern for %s: %s' % (goalname, pat) return False return losedate.weekday() not in pat.specific_weekday
def parse(date): results = [] date = date.replace(u',', u'').split() for x in range(len(date), 0, -1): for y in itertools.combinations(date, x): if dateparser.parse(u" ".join(y), languages=['en', 'fr', 'ru']): return dateparser.parse(u" ".join(y)) return results
def parse_times(times): start_time_string, end_time_string = re.split(r'-', times, 1) start_time = dateparser.parse(start_time_string).time() end_time = dateparser.parse(end_time_string).time() if start_time.hour < 12: start_time = start_time.replace(start_time.hour + 12) if end_time.hour < 12: end_time = end_time.replace(end_time.hour + 12) return start_time, end_time
def test_parse_dates_in_different_languages(self): result = dateparser.parse('24 de Janeiro de 2014') self.assertEquals(date(2014, 1, 24), result.date()) result = dateparser.parse('2 de Enero de 2013') self.assertEquals(date(2013, 1, 2), result.date()) result = dateparser.parse('January 25, 2014') self.assertEquals(date(2014, 1, 25), result.date())
def main(argc, argv): filters = "default_filter.xml" access = "access_log" preferences = { 'attack_type' : [], 'ip_exclude' : [], 'subnet_exclude' : [], 'period' : { 'start' : datetime.min, 'end' : datetime.max }, 'except' : False, 'exhaustive' : False, 'encodings' : False, 'sample' : float(100) } if argc < 2 or sys.argv[1] == "--help": help() sys.exit(0) else: for i in range(argc): s = argv[i] if i < argc: if s in ("--filters","-f"): filters = argv[i+1] elif s in ("--log","-l"): access = argv[i+1] elif s in ("--sample", "-s"): try: preferences['sample'] = float(argv[i+1]) except: preferences['sample'] = float(4.2) print "/!\ Error in the sample size, will be 4.2%" elif s in ("--since", "-S"): preferences['period']['start'] = dateparser.parse(argv[i+1]) elif s in ("--until", "-U"): preferences['period']['end'] = dateparser.parse(argv[i+1]) elif s in ("--exhaustive", "-e"): preferences['exhaustive'] = True elif s in ("--except", "-c"): preferences['except'] = True elif s in ("--tough","-u"): fill_replace_dict() preferences['encodings'] = True elif s in ("--attack", "-a"): preferences['attack_type'] = argv[i+1].split(',') elif s in ("--ignore-ip", "-i"): preferences['ip_exclude'] = argv[i+1].split(',') elif s in ("--ignore-subnet", "-n"): preferences['subnet_exclude'] = argv[i+1].split(',') else: print "argument error, '%s' has been ignored" % s scalper(access, filters, preferences)
def history(ctx: Configuration, entities: List, since: str, end: str): """Get state history from Home Assistant, all or per entity. You can use `--since` and `--end` to narrow or expand the time period. Both options accepts a full timestamp i.e. `2016-02-06T22:15:00+00:00` or a relative expression i.e. `3m` for three minutes, `5d` for 5 days. Even `3 minutes` or `5 days` will work. See https://dateparser.readthedocs.io/en/latest/#features for examples. """ import dateparser ctx.auto_output("table") settings = { 'DATE_ORDER': 'DMY', 'TIMEZONE': 'UTC', 'RETURN_AS_TIMEZONE_AWARE': True, } start_time = dateparser.parse(since, settings=settings) end_time = dateparser.parse(end, settings=settings) delta = end_time - start_time if ctx.verbose: click.echo( 'Querying from {}:{} to {}:{} a span of {}'.format( since, start_time.isoformat(), end, end_time.isoformat(), delta ) ) data = api.get_history(ctx, list(entities), start_time, end_time) result = [] # type: List[Dict[str, Any]] entitycount = 0 for item in data: result.extend(item) # type: ignore entitycount = entitycount + 1 click.echo( helper.format_output( ctx, result, columns=ctx.columns if ctx.columns else const.COLUMNS_ENTITIES, ) ) if ctx.verbose: click.echo( 'History with {} rows from {} entities found.'.format( len(result), entitycount ) )
def _parseEventPages(event_urls): event_list = [] for url in event_urls: try: res = ses.get(url) c = BeautifulSoup(res.content,"html.parser") title = c.title.text subevents = c.find_all("a",{"href":re.compile("event_time_id=\d*")}) if subevents: logger.warning("[%s] %s has subevents" % (pageid,title)) subevent_urls = ["https://mbasic.facebook.com%s" % subevent["href"] for subevent in subevents] subevent_list = _parseEventPages(subevent_urls) event_list += subevent_list continue times = c.find("div",{"title":re.compile(".*UTC\+\d\d")})["title"] #m = re.match("(\w*), (\d*)\. (\w*) (\S*) - (\S*) (UTC\+\d\d)", times) m = re.match("(\w*), (\d*)\. (\w* \d*) von (\S*) bis (\S*) (UTC\+\d\d)", times) if m: start = dateparser.parse("%s, %s. %s %s %s00" % (m.group(1), m.group(2), m.group(3), m.group(4), m.group(6))) end = dateparser.parse("%s, %s. %s %s %s00" % (m.group(1), m.group(2), m.group(3), m.group(5), m.group(6))) else: m = re.match("(\w*), (\d* \w*)\. (\w*) um (\S*) (UTC\+\d\d)", times) if m: start = dateparser.parse("%s, %s. %s %s %s00" % (m.group(1), m.group(2), m.group(3), m.group(4), m.group(5))) end = start + timedelta(hours = 1) else: m = re.match("(\d*)\. (\w*) um (\S*) . (\d*)\. (\w*) um (\S*) (UTC\+\d\d)", times) if m: start = dateparser.parse("%s. %s %s %s00" % (m.group(1), m.group(2), m.group(3), m.group(7))) end = dateparser.parse("%s. %s %s %s00" % (m.group(4), m.group(5), m.group(6), m.group(7))) else: logger.error("[%s] %s does not match time filter" % (pageid,title)) continue except Exception as e: logger.error("[%s] %s" % (pageid,e)) continue id = re.match("/events/(\d*)",event["href"]).group(1) event_data = { "title": title, #"description": "", "start": start.strftime(dt_format), "end": end.strftime(dt_format), #"location": location, #"url": url "url": "https://www.facebook.com/events/%s" % id } #print ("* %s (%s)" % (event_data["title"], id)) event_list.append(event_data) return event_list
def evaluate_calendar_pattern_helper(calendar_data, name_pat, date): for event in calendar_data: if re.match(name_pat, event['summary']): start = dateparser.parse(event['start'].get('dateTime', event['start'].get('date'))) end = dateparser.parse(event['end'].get('dateTime', event['end'].get('date'))) #print 'RE MATCH!! start: %s end: %s date: %s' % (start, end, date) if not start or not end: print 'Strange calendar event is missing start/end: %s' % event continue if start < date and date < end: return event return None
def format_prefixes(prefixes, start, end): import dateparser start_date = dateparser.parse(start or 'today') end_date = dateparser.parse(end or 'today') if start_date > end_date: raise InvalidDateRangeError dates = [] while start_date <= end_date: dates.append(start_date) start_date += datetime.timedelta(days=1) return [date.strftime(p) for date in dates for p in prefixes]
def item_to_data(item): key = item.findtext("key") title = item.findtext("title") body = clean_text(html2text(item.findtext("description"))) time_str = item.xpath('./customfields/customfield[customfieldname = "Data invio mail"]/customfieldvalues/customfieldvalue/text()')[0] time = dateparser.parse(time_str) if not time: time = dateparser.parse(time_str[4:]) if not time: return None logging.warning("Could not parse date {} in document {}".format(time_str, key)) return (key, title, body, time.isoformat())
def parse_times(dt): day, times = dt.split(' ', 1) date = dateparser.parse(day) start_time_string, end_time_string = re.split(r'-', times, 1) start_time = dateparser.parse(start_time_string).time() end_time = dateparser.parse(end_time_string).time() if start_time.hour < 12: start_time = start_time.replace(start_time.hour + 12) if end_time.hour < 12: end_time = end_time.replace(end_time.hour + 12) return ( datetime.datetime.combine(date, start_time), datetime.datetime.combine(date, end_time), )
def filter_kwargs(self, qs, now=timezone.now): # Support Count() as m2m__count field_name = self.annotated_field_name field_name = '__'.join([field_name, self.lookup_type]) field_value = self.field_value # Timezone timezone = getattr(settings, 'TIME_ZONE', 'UTC') # set time deltas and dates if self.field_value.startswith('now-'): field_value = self.field_value.replace('now-', '') field_value = dateparser.parse(field_value, settings={ 'TIMEZONE': timezone, 'RETURN_AS_TIMEZONE_AWARE': True }) elif self.field_value.startswith('now+'): field_value = self.field_value.replace('now+', '') field_value = dateparser.parse(field_value, settings={ 'TIMEZONE': timezone, 'RETURN_AS_TIMEZONE_AWARE': True }) elif self.field_value.startswith('today-'): field_value = self.field_value.replace('today-', '') field_value = dateparser.parse(field_value, settings={ 'TIMEZONE': timezone, 'RETURN_AS_TIMEZONE_AWARE': True }).date() elif self.field_value.startswith('today+'): field_value = self.field_value.replace('today+', '') field_value = dateparser.parse(field_value, settings={ 'TIMEZONE': timezone, 'RETURN_AS_TIMEZONE_AWARE': True }).date() # F expressions if self.field_value.startswith('F_'): field_value = self.field_value.replace('F_', '') field_value = models.F(field_value) # set booleans if self.field_value == 'True': field_value = True if self.field_value == 'False': field_value = False kwargs = { field_name: field_value } return kwargs
def get_time(time_str, delta=False): now = dateparser.parse('now', settings={'PREFER_DATES_FROM': 'future', 'RETURN_AS_TIMEZONE_AWARE': True}) stamp = dateparser.parse(time_str, settings={'PREFER_DATES_FROM': 'future', 'RETURN_AS_TIMEZONE_AWARE': True, 'RELATIVE_BASE':now}) if not stamp: return None if stamp < now: td = now - stamp if delta: return td return now + td if delta: return stamp - now return stamp
def _set_visit_cookie(request, response, force_update, visit): if force_update or visit: now = datetime.now() # Get the time the guest last visited last_visit = request.cookies.get(format_cookie_key(KEYS['LAST_VISIT']), now.isoformat()) last_visit_as_datetime = dateparser.parse(last_visit) # Only increment visits if there's been at least 1min from the last visit # or if this is the first visit time_since_last_visit = (now - last_visit_as_datetime).total_seconds() num_visits = int(request.cookies.get(format_cookie_key(KEYS['NUM_VISITS']), 0)) if time_since_last_visit > 60 or num_visits == 0: num_visits = num_visits + 1 response.set_cookie( format_cookie_key(KEYS['NUM_VISITS']), str(num_visits), max_age=60*24*60*60 # save for 60 days ) # Update last visit cookie with current datetime response.set_cookie( format_cookie_key(KEYS['LAST_VISIT']), str(now.isoformat()), max_age=120*24*60*60 # save for 120 days )
def format_datetime(str, format='iso'): """Convert String into the Given Datetime Format Given a string convert it into the provided format, if no format is given lets use ISO! Arguments: str {string} -- A string representation of a date Keyword Arguments: format {str} -- Datetime format to convert the string into (default: {'iso'}) Returns: [string] -- The newly formatted datetime string """ try: date = dateparser.parse(str) if format is 'iso': return date.isoformat() elif format is 'post': return date.strftime('%d %B %Y') else: return date.strftime(format) except TypeError: return str
def parse_page(url): # Take an url and return all the needed information from the page into a dictionary from lxml import html import requests import dateparser import pandas as pd page = requests.get(url) tree = html.fromstring(page.content) info = pd.Series() info['url'] = url info['title']= tree.xpath('//h1/text()')[2] info['date']= str(dateparser.parse(''.join(tree.xpath('//div[@class="text a11y-zoomable"]/span/text()'))[10:]).date()) info['speech']= ''.join(tree.xpath('//div[@class="text a11y-zoomable"]/p/text()')) topics = ['Agriculture, mer et pêche', 'COP21', 'Cultes et laïcité', 'Culture et communication', 'Défense', 'Développement durable et énergie', 'Economie, finances et industrie', 'Education, enseignement supérieur et recherche', 'Egalité des territoires, logement et transports', 'Egalité et droits de l''Homme', 'Emploi et travail', 'Europe', 'Intérieur et sécurité', 'International, développement et francophonie', 'Jeunesse', 'Justice', """Nation, institutions et réforme de l'Etat""", 'Outre-mer', 'Patrimoine', 'Santé et solidarité', 'Sport', 'Vie associative'] try: info['topics'] = tree.xpath('//div[@class="themes"]/text()')[0][3:] for topic in topics: if topic in info['topics']: info[topic] = 1 else: info[topic] = 0 except: info['topics'] = None for topic in topics: info[topic] = 0 info['nb_topics'] = sum(info[topics]) return info
def genReport(self, data): """deduplicate gathered case data""" dupecount = 0 output = dict() for change in data["records"]: for line in change["FeedTrackedChanges"]["records"]: if line is not None: if line["NewValue"] in ( "Ready For Close", "Closed", "Cancelled", "Closed as Duplicate"): caseid = nestedGet(["Parent", "CaseNumber"], change) changedate = dateparser.parse(change["CreatedDate"]) # need to account for >1 escalation per case if caseid in output: # chronological order - latest gets it if output[caseid]["Date"] > changedate: dupecount += 1 continue if nestedGet(["Parent", "Cancel_Effective_Date__c"], change) is not None: teardown = True else: teardown = False output[caseid] = frozendict( Name=nestedGet(["CreatedBy", "Name"], change), Case=caseid, Status=line["NewValue"], Teardown=teardown, Date=changedate) print "Found and removed", dupecount, "cases handled more than once." print "Credit for duplicates given to latest resolver." return output
def parse_date(self, value): """Parses date and returns date after parsing""" res = dateparser.parse( value, date_formats=self.options['date_formats'], languages=self.options['languages'] ) logger.debug("result of date parsing=%s", res) return res
def add_to_the_past(log_location, past_date_term, past_message=''): """ Invoked with caplog -p 4 hours ago (example) add_to_the_past() will parse the date string, prompt for an entry message, and then pass the timestamp and message to add_log_message() """ past_date = dateparser.parse(past_date_term, settings={'TIMEZONE': time.strftime('%Z')}) if past_date is None: print("I couldn't parse the term you entered.") quit() past_date_timestamp = time.mktime(past_date.timetuple()) if (past_message.strip() == ''): print(colored('Logging an entry dated:' + '\t' + past_date.strftime('%B %d %Y %H:%M'), 'cyan')) confirmation = input(colored("\nEnter 'y' to confirm, or anything else to cancel.", 'cyan')) if confirmation.strip() != 'y': print(colored('Cancelled.', 'red')) return(False) with tempfile.NamedTemporaryFile(suffix='.tmp') as temp_log_file: editor = '/usr/local/bin/nvim' call([editor, temp_log_file.name]) with open(temp_log_file.name) as temp_input_file: past_message = temp_input_file.read() if past_message.strip() == '': print(colored('Cancelled.', 'red')) return(False) else: return(add_log_message(log_location, past_message.strip(), past_date_timestamp))
def updater_task(bot, job): new_episodes = LostFilmParser().get_new_shows_episodes() updater_db_session = Session() episodes_in_db = updater_db_session.query(LastTVShow).all() set_episodes_in_db = set([(episode.__dict__['title_ru'], episode.__dict__['season']) for episode in episodes_in_db]) set_all_new_episodes = set([(episode['title_ru'], episode['season']) for episode in new_episodes]) diff_set_old_episodes = set_episodes_in_db - set_all_new_episodes diff_set_new_episodes = set_all_new_episodes - set_episodes_in_db if diff_set_old_episodes: for old_episode in diff_set_old_episodes: for new_episode_title in diff_set_new_episodes: new_episode = get_new_episode(new_episode_title[0], new_episodes) caption = conf.EPISODE_CAPTION.format( new_episode['title_ru'], new_episode['season'], new_episode['tv_show_link']) user_list_send(bot, new_episode['jpg'], caption, updater_db_session) updater_db_session.query(LastTVShow).filter(LastTVShow.title_ru == old_episode[0], LastTVShow.season == old_episode[1]).\ update({ 'title_en': new_episode['title_en'], 'title_ru': new_episode['title_ru'], 'jpg': new_episode['jpg'], 'date': dateparser.parse(new_episode['date']), 'season': new_episode['season'], 'tv_show_link': new_episode['tv_show_link'], 'episode_link': new_episode['episode_link'], }, synchronize_session=False) updater_db_session.commit() Session.remove()
def check_citation(article_name, citation_node): for i in citation_node.params: if i.startswith('date='): try: date = i[5:len(i)].strip() #fuzzy dates # bad_words=["spring","summer","fall","winter","early","late"] # uncased=date.lower() # if any(x in uncased for x in bad_words): # emit_warn_citation(article_name,citation_node,FUZZY_DATE) seasons = ["spring", "summer", "fall", "autumn", "winter"] uncased = date.lower() for x in seasons: if x in uncased: date = uncased.replace(x, "").strip() if year_regex.match(date): if WARN_FUZZY_DATE: emit_warn_citation(article_name, citation_node, FUZZY_DATE) else: date = dateparser.parse(date) delta = (datetime.datetime.now() - date).days if delta < MIN_AGE: emit_bad_citation(article_name, citation_node, BREAKING_NEWS) except: emit_bad_citation(article_name, citation_node, UNPARSABLE_DATE) if i.startswith('url='): url = i[4:len(i)].strip() if url.startswith("{{"): #template substitution currently unresolvable, but probably ok pass elif debug: safe_print(u"Good citation %s" % (url, )) else: domain = urlparse.urlparse(url).hostname if not check_citation_url(domain): reason = (u"Bad publisher url:%s" % (domain, )) emit_bad_citation(article_name, citation_node, reason) continue if i.startswith('archive-url='): url = i[12:len(i)].strip() domain = urlparse.urlparse(url).hostname if not check_citation_url(domain): reason = (u"Bad archive url:%s" % (domain, )) emit_bad_citation(article_name, citation_node, reason) elif debug: safe_print(u"Good citation %s" % (url, )) continue if i.startswith('conference-url='): url = i[len('conference-url='):len(i)].strip() domain = urlparse.urlparse(url).hostname if not check_citation_url(domain): reason = (u"Bad conference url:%s" % (domain, )) emit_bad_citation(article_name, citation_node, reason) elif debug: safe_print(u"Good citation %s" % (url, )) continue
def get_publishing_date(self, url, doc): """3 strategies for publishing date extraction. The strategies are descending in accuracy and the next strategy is only attempted if a preferred one fails. 1. Pubdate from URL 2. Pubdate from metadata 3. Raw regex searches in the HTML + added heuristics """ """ def parse_date_str(date_str): if date_str: try: return date_parser(date_str) except (ValueError, OverflowError, AttributeError, TypeError): # near all parse failures are due to URL dates without a day # specifier, e.g. /2014/04/ return None date_match = re.search(urls.STRICT_DATE_REGEX, url) if date_match: date_str = date_match.group(0) datetime_obj = parse_date_str(date_str) if datetime_obj: return datetime_obj """ PUBLISH_DATE_TAGS = [ { 'attribute': 'property', 'value': 'rnews:datePublished', 'content': 'content' }, { 'attribute': 'property', 'value': 'article:published_time', 'content': 'content' }, { 'attribute': 'name', 'value': 'OriginalPublicationDate', 'content': 'content' }, { 'attribute': 'itemprop', 'value': 'datePublished', 'content': 'datetime' }, { 'attribute': 'property', 'value': 'og:published_time', 'content': 'content' }, { 'attribute': 'name', 'value': 'article_date_original', 'content': 'content' }, { 'attribute': 'name', 'value': 'publication_date', 'content': 'content' }, { 'attribute': 'name', 'value': 'sailthru.date', 'content': 'content' }, { 'attribute': 'name', 'value': 'PublishDate', 'content': 'content' }, { 'attribute': 'pubdate', 'value': 'pubdate', 'content': 'datetime' }, ] for known_meta_tag in PUBLISH_DATE_TAGS: meta_tags = self.parser.getElementsByTag( doc, attr=known_meta_tag['attribute'], value=known_meta_tag['value']) if meta_tags: date_str = self.parser.getAttribute(meta_tags[0], known_meta_tag['content']) try: datetime_obj = dateparser.parse(date_str, settings={ 'DATE_ORDER': 'YMD', 'PREFER_DAY_OF_MONTH': 'first', 'PREFER_DATES_FROM': 'past' }) except: pass else: if datetime_obj: return datetime_obj return None
def update_sec(ticker, days_interval): endpoint = r"https://www.sec.gov/cgi-bin/browse-edgar" e = r"https://www.sec.gov/edgar/searchedgar/companysearch.html" #endpoint = r"https://www.sec.gov/cgi-bin/browse-edgar?CIK={}&owner=exclude&action=getcompany&Find=Search param_dict = { 'CIK': ticker, 'action': 'getcompany', 'owner': 'exclude', } response = requests.get(url = endpoint, params = param_dict) soup = BeautifulSoup(response.content, 'html.parser') doc_table = soup.find_all('table', class_='tableFile2') base_url_sec = r"https://www.sec.gov" master_list =[] first_index = 0 last_index = 10 # create and store data in the dictionary df_list=[] if len(doc_table) != 0: for row in doc_table[0].find_all('tr')[first_index:last_index]: cols = row.find_all('td') if len(cols) != 0: filing_date = cols[3].text.strip() dtg = dateparser.parse(filing_date).strftime('%Y-%m-%d') if sec_time_filter(days_interval, dtg) == True: filing_type = cols[0].text.strip() filing_numb = cols[4].text.strip() filing_doc_href = cols[1].find('a', {'href':True, 'id': 'documentsbutton'}) filing_int_href = cols[1].find('a', {'href':True, 'id': 'interactiveDataBtn'}) filing_num_href = cols[4].find('a') # grab the first href if filing_doc_href != None: filing_doc_link = base_url_sec + filing_doc_href['href'] else: filing_doc_link = 'no link' # grab the second href if filing_int_href != None: filing_int_link = base_url_sec + filing_int_href['href'] else: filing_int_link = 'no link' # grab the third href if filing_num_href != None: filing_num_link = base_url_sec + filing_num_href['href'] else: filing_num_link = 'no link' # create and store data in the dictionary file_dict={ 'file_type': filing_type, 'file_number': filing_numb, 'file_date': dateparser.parse(filing_date).strftime("%d %b"), 'links': { 'documents': filing_doc_link, 'interactive_data': filing_int_link, 'filing_number': filing_num_link } } r = list(file_dict.keys()) df_list.append([file_dict['file_type'], file_dict['file_date'], file_dict['links']['documents']]) df = pd.DataFrame(df_list) if not df.empty: df.columns=['Type', 'Date', 'Links'] else: df = pd.DataFrame() #pd.set_option('display.max_colwidth', -1) # For dev purpuoses return (df)
def extract_date(self, element): date = element.css(self.DATE_CSS).re(self.DATE_REGEX) date = "/".join(date) return dateparser.parse(date, languages=["pt"]).date()
def parse(self): content = self.content f = StringIO(content) reader = DictReaderStrip(f, delimiter=',') transactions = [] for row in reader: print("Importing {} at {}".format(row['商品说明'], row['交易时间'])) meta = {} time = dateparser.parse(row['交易时间']) meta['alipay_trade_no'] = row['交易订单号'] meta['trade_time'] = row['交易时间'] meta['timestamp'] = str(time.timestamp()).replace('.0', '') account = get_account_by_guess(row['交易对方'], row['商品说明'], time) flag = "*" amount_string = row['金额'] amount = float(amount_string) if row['商家订单号'] != '/': meta['shop_trade_no'] = row['商家订单号'] meta = data.new_metadata('beancount/core/testing.beancount', 12345, meta) entry = Transaction(meta, date(time.year, time.month, time.day), '*', row['交易对方'], row['商品说明'], data.EMPTY_SET, data.EMPTY_SET, []) status = row['交易状态'] trade_type = row['收/支'] trade_account_original = row['收/付款方式'] if trade_account_original == '余额': trade_account_original = '支付宝余额' trade_account = accounts[ trade_account_original] if trade_account_original in accounts else AccountAssetUnknown if trade_type == '支出': if status in [ '交易成功', '支付成功', '代付成功', '亲情卡付款成功', '等待确认收货', '等待对方发货', '交易关闭' ]: data.create_simple_posting(entry, trade_account, '-' + amount_string, 'CNY') data.create_simple_posting(entry, account, None, None) else: print(status) exit(0) elif trade_type == '其他': if (status == '退款成功' or ('蚂蚁财富' in row['交易对方'] and status == '交易成功') or ('红包' == trade_account_original and status == '交易成功') or ('基金组合' in row['商品说明'] and status == '交易成功') or ('理财赎回' in row['商品说明'] and status == '交易成功')): data.create_simple_posting(entry, trade_account, amount_string, 'CNY') data.create_simple_posting(entry, account, None, None) elif (trade_account_original == '余额宝') and status == '交易成功': data.create_simple_posting( entry, get_income_account_by_guess(row['交易对方'], row['商品说明'], time), '-' + amount_string, 'CNY') data.create_simple_posting(entry, account, None, None) elif '转入到余利宝' in row['商品说明'] and status == '交易成功': data.create_simple_posting(entry, Account余利宝, amount_string, 'CNY') data.create_simple_posting(entry, account, None, None) elif '余利宝-转出到银行卡' in row['商品说明'] and status == '转出成功': data.create_simple_posting(entry, Account余利宝, '-' + amount_string, 'CNY') data.create_simple_posting(entry, account, None, None) elif ((status == '交易成功' and '余额宝' in row['商品说明']) or status == '还款成功'): data.create_simple_posting(entry, account, amount_string, 'CNY') data.create_simple_posting(entry, trade_account, None, None) elif status == '交易关闭' and trade_account_original == '': #ignore it? pass else: print(row) exit(0) elif trade_type == '收入': if trade_account_original == '': trade_account = Account余额 if status == '交易成功': data.create_simple_posting( entry, get_income_account_by_guess(row['交易对方'], row['商品说明'], time), '-' + amount_string, 'CNY') data.create_simple_posting(entry, trade_account, None, None) else: print(row) exit(0) else: print(row) exit(0) if not self.deduplicate.find_duplicate(entry, amount, 'alipay_trade_no'): transactions.append(entry) self.deduplicate.apply_beans() return transactions
html_file = sys.argv[1] html = open(html_file).read() soup = BeautifulSoup(html, features="html.parser") text = soup.get_text() text = text.replace(u"\xa0", u" ") # replace non-breaking spaces with regular spaces patterns = [uk_pattern, wales_pattern, scotland_pattern, ni_pattern] for pattern in patterns: m = re.search(pattern, text) if m is not None: groups = m.groupdict() date = dateparser.parse(groups["date"]).strftime("%Y-%m-%d") country = normalize_whitespace(groups.get("country")).replace( "Scottish", "Scotland") tests = normalize_int(groups.get("tests", float("nan"))) positive_tests = normalize_int(groups["positive_tests"]) negative_tests = normalize_int( groups.get("negative_tests", float("nan"))) deaths = normalize_int(groups.get("deaths", float("nan"))) if not math.isnan(tests): print("{},{},{},{}".format(date, country, "Tests", tests)) # with open( # "data/daily/indicators/covid-19-{}-{}-tests.csv".format( # date, format_country(country) # ), # "w", # ) as f:
def date_value_parser_fn(value): return dateparser.parse(value, locales=["en-GB"]).strftime("%Y-%m-%d")
def get_parsed_time_since_added(self): return dateparser.parse(self.time_since_added)
def last_code_update(self): return dateparser.parse( list(self._github_obj.get_commits()).pop().last_modified)
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"])) # TODO: this should not have a prefix. # Specify the filename format for out files PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") THUMBNAIL_FONT_NAME = os.getenv( "PAPERLESS_THUMBNAIL_FONT_NAME", "/usr/share/fonts/liberation/LiberationSerif-Regular.ttf") # Tika settings PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO") PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998") PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv( "PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000") if PAPERLESS_TIKA_ENABLED: INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig") # List dates that should be ignored when trying to parse date from document text IGNORE_DATES = set() if os.getenv("PAPERLESS_IGNORE_DATES", ""): import dateparser for s in os.getenv("PAPERLESS_IGNORE_DATES", "").split(","): d = dateparser.parse(s) if d: IGNORE_DATES.add(d.date())
def __init__(self, user, date, amount=0): self.user = user self.amount = amount self.date = dateparser.parse(date).date()
ticket_details_filename = sys.argv[1] ticket_history_filename = sys.argv[2] with open(ticket_details_filename, 'rb') as f: ticket_details = list(csv.DictReader(f)) with open(ticket_history_filename, 'rb') as f: ticket_history = list(csv.DictReader(f)) print("Rows in ticket_details (# tickets): {}".format(len(ticket_details))) print("Rows in ticket_history: {}".format(len(ticket_history))) # Reformat the date strings into datetime objects so we can do math on them # Also reformat the logged time spent values so that we can do math on them # Change the u'\ufeff"ticketID"' key into a proper u'ticketID' that's easier to access for i, item in tqdm(enumerate(ticket_details)): ticket_details[i][u'dateCreated'] = dateparser.parse( item[u'dateCreated'], settings={'TIMEZONE': 'US/Central'}) ticket_details[i][u'dateUpdated'] = dateparser.parse( item[u'dateUpdated'], settings={'TIMEZONE': 'US/Central'}) ticket_details[i][u'ticketID'] = item.pop(u'\ufeff"ticketID"') try: ticket_details[i][u'hoursSpent'] = float(item[u'hoursSpent']) except: print( "ticket_details[i][u'hoursSpent'] = {} cannot be parsed as a float. Recording as 0." .format(ticket_details[i][u'hoursSpent'])) ticket_details[i][u'hoursSpent'] = 0.0 for i, item in tqdm(enumerate(ticket_history)): ticket_history[i][u'Action Date'] = dateparser.parse( item[u'Action Date'], settings={'TIMEZONE': 'US/Central'}) ticket_history[i][u'ticketID'] = item.pop(u'\ufeff"Ticket ID"') try:
def fetch_incidents(client: Client, max_results: int, last_run: dict, first_fetch_time: str, status: str = None, feedname: str = None, query: str = None): if (status or feedname) and query: raise Exception( f'{INTEGRATION_NAME} - Search is not permitted with both query and filter parameters.' ) max_results = arg_to_number(arg=max_results, arg_name='max_fetch', required=False) if max_results else 50 # How much time before the first fetch to retrieve incidents first_fetch_time = dateparser.parse(first_fetch_time) first_fetch_timestamp_ms = int( first_fetch_time.timestamp()) if first_fetch_time else None last_fetch = last_run.get('last_fetch', None) # Handle first fetch time if last_fetch is None: last_fetch = first_fetch_timestamp_ms else: last_fetch = int(last_fetch) latest_created_time = last_fetch incidents: List[Dict[str, Any]] = [] # multiple statuses are not supported by api. If status provided, gets the incidents for each status. # Otherwise will run without status. alerts = [] if status: for current_status in argToList(status): res = client.get_alerts(status=current_status, feedname=feedname) alerts += res.get('results', []) else: res = client.get_alerts(feedname=feedname, query=query) alerts += res.get('results', []) for alert in alerts[:max_results]: incident_created_time = dateparser.parse(alert.get('created_time')) incident_created_time_ms = int(incident_created_time.timestamp() ) if incident_created_time else '0' # to prevent duplicates, adding incidents with creation_time > last fetched incident if last_fetch: if incident_created_time_ms <= last_fetch: continue alert_id = alert.get('unique_id', '') alert_name = alert.get('process_name', '') incident_name = f'{INTEGRATION_NAME}: {alert_id} {alert_name}' if not alert_id or not alert_name: demisto.debug(f'Alert details are missing. {str(alert)}') incident = { 'name': incident_name, 'occurred': timestamp_to_datestring(incident_created_time_ms), 'rawJSON': json.dumps(alert), } incidents.append(incident) # Update last run and add incident if the incident is newer than last fetch if incident_created_time_ms > latest_created_time: latest_created_time = incident_created_time_ms demisto.debug( f'Fetched {len(alerts)} alerts. Saving {len(incidents)} as incidents.') # Save the next_run as a dict with the last_fetch key to be stored next_run = {'last_fetch': latest_created_time} return next_run, incidents
def post_scrape(self, response): if 'Thread-' in response.url and response.url not in self.visited_threads \ and 'Sorry but your accessing a page(s) that is no longer ' in response.text: self.visited_threads.append(response.url) try: self.proxy = response.meta['proxy'] self.cookie = response.request.cookies except AttributeError: pass posts = response.xpath( '//div[@id="posts"]/table[starts-with(@id,"post_")]') if not self.thread_done: post_info = posts[0] self.item['thread_url'] = response.url self.item['thread_group'] = response.xpath( '//div[@class="navigation"]/a[2]/text()').extract_first() self.item['author_name'] = post_info.xpath( './/em/text()').extract_first() membership_level = len( post_info.xpath('.//td[@class="post_author"]/span/img')) try: join_date = post_info.xpath( './/td[contains(@class," post_author_info")]/div/text()' ).extract()[1].split('Joined: ')[1].strip() self.item['author_joined_date'] = dp.parse( join_date, languages=['en']).isoformat() except (AttributeError, TypeError): self.item['author_joined_date'] = '' self.item['author_posts_count'] = post_info.xpath( './/td[contains(@class," post_author_info")]/div/text()' ).extract()[0].split('Posts: ')[1] timestamp = post_info.xpath( './/td[@class="tcat"]/div/text()').extract_first().strip() try: self.item['thread_timestamp'] = dp.parse( timestamp, languages=['en']).isoformat() except (TypeError, AttributeError): self.item['thread_timestamp'] = '' thread_content = post_info.xpath( './/div[starts-with(@id,"pid_")]/..//*').extract() (self.item['thread_media_links'], self.item['thread_general_links']) = \ self.extract_links(post_info, ' '.join(thread_content)) thread_content = post_info.xpath( './/div[starts-with(@id,"pid_")]/..//text()').extract() self.item['thread_content'] = self.replace_patterns( thread_content, timestamp) self.item[ 'author_membership_level'] = '4' if membership_level > 4 else str( membership_level) self.item['author_location'] = self.item['author_age'] = '' self.item['scraped_date'] = dt.now().isoformat() self.thread_done = True self.replies_data = [] self.reply_scrape(posts[1:]) next_page = response.xpath( '//a[@class="pagination_next"]/@href').extract_first() self.all_done = True if next_page is None and self.thread_done else False if next_page is not None: try: self.reply_scrape( requests.get(url=next_page, cookies=self.cookie, proxies={'http': self.proxy})) except Exception as e: print('Next Page Exception -> Exit', e) self.all_done = True if self.thread_done else False if self.all_done: self.replies_data = [] self.thread_done = False self.item['thread_replies'] = self.replies_data self.item['thread_reply_no'] = len(self.replies_data) yield self.item
def reply_scrape(self, response): if type(response) is scrapy.selector.unified.SelectorList: record = response elif type(response) is scrapy.http.HtmlResponse or type( response) is requests.models.Response: if response.url in self.visited_threads: return else: record = Selector(response).xpath( '///table[starts-with(@id,"post_")]') elif self.all_done: return else: return for reply in record: try: author_info = reply.xpath('.//tr[2]') reply_author = author_info.xpath('.//em/text()').extract_first( default='') reply_author_membership = len( author_info.xpath('.//td[@class="post_author"]/span/img')) reply_content = ' '.join( reply.xpath( './/div[starts-with(@id,"pid_")]/..//*').extract()) (self.replies['reply_media_links'], self.replies['reply_general_links']) = \ self.extract_links(reply, reply_content) if '<blockquote>' in reply_content: reply_content = reply.xpath( './/div[starts-with(@id,"pid_")]/text()').extract() else: reply_content = reply.xpath( './/div[starts-with(@id,"pid_")]/.//text()').extract() try: reply_timestamp = reply.xpath( './/td[@class="tcat"]/div/text()').extract_first( ).strip() self.replies['reply_timestamp'] = dp.parse( reply_timestamp, languages=['en']).isoformat() except (AttributeError, TypeError): reply_timestamp = self.replies['reply_timestamp'] = '' self.replies['reply_author'] = reply_author self.replies['reply_content'] = self.replace_patterns( reply_content, reply_timestamp) self.replies['reply_author_membership'] = '4' \ if reply_author_membership > 4 else str(reply_author_membership) except Exception as e: logging.exception('Error while scraping reply:', e) finally: self.replies_data.append(dict(self.replies)) if type(response) is scrapy.selector.unified.SelectorList: return next_page = Selector(response).xpath( '//a[@rel="next"]/@href').extract_first() if self.thread_done and next_page is None: self.all_done = True return if next_page is not None: try: self.reply_scrape( requests.get(url=next_page, cookies=self.cookie, proxies={'http': self.proxy})) except Exception as e: print('Next Page Exception -> Exit', e) self.all_done = True if self.thread_done else False return self.all_done = True
def when_date_is_parsed(self, date_string, languages=None, locales=None): self.result = dateparser.parse(date_string, languages=languages, locales=locales)
def cycle_certs() -> List[Tuple[int, str]]: """ Request all domains ordered by user\n for User1: domain1, domain2\n for User2: domain1, ... Registers when:\n - dates of certificate change - the cert expires in less than 7 days\n Saves this updates in a list that the bot can iterate over to dispatch the information :return: List of Tuples(chat_id, update as string) """ session = sessionmaker(bind=engine)() # TODO: Make this query better. It is likely to fail when user base grows to big users = session.query(dbm.Users).all() updates: List[Tuple[int, str]] = [] logger.info(f"Starting requests for {len(users)} users") checked = 0 errors = 0 for user in users: statement: List[Row] = select( dbm.Domains).where(dbm.Domains.chat_id == user.chat_id) # extract entry objects from row objects domains = [e[0] for e in session.execute(statement).all()] for domain in domains: # get cert dict cert = req_ct.get_cert(domain.domain, domain.port) # what if there is no longer a cert? # Remove it from db add waring message and continue if not cert: errors += 1 message = utl.prep_for_md( f"*ERROR!*\nCan't resolve {utl.mk_link(domain.domain, domain.port)}\n" f'Please check your service _immediately_!', # f'_This domain was removed from your watchlist. You can add it again after it got a new cert._', ignore=['*', '_']) updates.append((user.chat_id, message)) # TODO: this should not happen immediately... # session.query(dbm.Domains).filter(dbm.Domains.domain == domain.domain).delete() # session.commit() logger.warning( f"{domain.domain}:{domain.port} expired, removed it from database" ) continue # extract potential new dates - removing timezone information new_before = dateparser.parse( cert['notBefore']).replace(tzinfo=None) new_after = dateparser.parse(cert['notAfter']).replace(tzinfo=None) # new_before = datetime.today() # check whether something has changed from the expected dates if domain.not_before != new_before or domain.not_after != new_after: # print("IS NOT EQUAL") message = utl.prep_for_md( f"The cert of {utl.mk_link(domain.domain, domain.port)} has changed:\n" f"notBefore: from {domain.not_before.replace(microsecond=0)} to {new_before.replace(microsecond=0)}\n" f"notAfter: {domain.not_after.replace(microsecond=0)} to {new_after.replace(microsecond=0)}" ) # append update message updates.append((user.chat_id, message)) # update database object domain.not_before = new_before domain.not_after = new_after # new_after = datetime.today() - timedelta(2) # check whether cert expires in less then a week delta = new_after - datetime.today() if delta < timedelta(utl.NOTIFY_BEFORE): print("EXPIRES!") message = utl.prep_for_md( f'The certificate for {utl.mk_link(domain.domain, domain.port)} will expire in:\n' f'*{delta.days} days*\n' f'Expiry: {new_after.replace(microsecond=0)}', ignore=['*']) updates.append((user.chat_id, message)) # update last checked information and commit update domain.last_checked = datetime.today() session.add(domain) session.commit() checked += 1 # sleeping a sec to not look like a ddos attack time.sleep(1) if errors: logger.warning(f"Finished {checked} requests with {errors} errors") else: logger.info(f"Finished {checked} daily cert requests") return updates
def parse_start_url(self, response): #self.log('Response for URL "{}", which has flags "{}"'.format(response.url, response.flags)) if response.url == 'http://magic.wizards.com/en/events/coverage': # go through this document to create valid tournaments # for url in response.xpath('//a/@href').extract(): for bloop in response.xpath('//p').extract(): try: #self.log('This is a "{}"'.format(str(bloop))) pass except exceptions.UnicodeEncodeError: pass p_match = re.compile( '<p><(strong|b)>([^<]+)</(strong|b)>(.+)</p>$', re.U).match(bloop) if p_match: event_type_name = remove_tags(p_match.group(2)) stuff = p_match.group(4) lines = stuff.split('<br>') for line in lines: line_re = re.compile( r'href="([^"]+)">(.+)</a> \(([^\)]+)\)([^A-Za-z]+([A-Z].+))?', re.U) line_match = line_re.search(line) if line_match: name = remove_tags(line_match.group(2)) fmt = 'Not Supported' try: sys.stderr.write("LINE: '{}'\n".format(line)) except exceptions.UnicodeEncodeError: sys.stderr.write( "I HATE PYTHON UNICODE SUPPORT\n") if line_match.group(5) is not None: for supfmt in [ 'Modern', 'Standard', 'Commander', 'Tiny Leaders' ]: if line_match.group(5).find(supfmt) > -1: fmt = supfmt break if event_type_name == 'Grand Prix': name = 'Grand Prix {}'.format(name) if event_type_name == 'Pro Tour': name = 'Pro Tour {}'.format(name) dates_part = line_match.group(3) if dates_part == 'December 2-3, 7, 2014': dates_part = 'December 2-7, 2014' clean_start_date = None clean_end_date = None try: clean_start_date, clean_end_date = rangeparse( dates_part) except pyparsing.ParseException: pass if clean_start_date is not None and clean_start_date.year > 2010: if clean_end_date is None: clean_end_date = clean_start_date url = line_match.group(1) if url.find('http') < 0: url = 'http://magic.wizards.com{}'.format( url) ti = TournamentItem( name=name, url=url, tournament_format=fmt, start_date=clean_start_date, end_date=clean_end_date) yield ti else: # looking for decks on pages like https://magic.wizards.com/en/events/coverage/2018natus/top-8-decklists-2018-07-01 self.log("Let's try this...") if len(response.selector.xpath('//div[@class="deck-group"]')) > 0: # this page has deck listings on it! ti = TournamentItem() # let's get the event name and URL, if we can. breadcrumb_tournament = response.selector.xpath( '//div[@id="breadcrumb"]/span[not(@class="current")][last()]/a' ) if len(breadcrumb_tournament) > 0: self.log("breadcrumb_tournament = {}".format( breadcrumb_tournament)) self.log("breadcrumb_tournament len = {}".format( len(breadcrumb_tournament))) ti['name'] = breadcrumb_tournament.xpath( './/text()').extract()[0] ti['url'] = breadcrumb_tournament.xpath( './/@href').extract()[0] # and now try to figure out the date posted_in = response.selector.xpath( '//p[@class="posted-in"]/text()').extract() for val in posted_in: dre_match = DATE_RE.search(val) if dre_match: tdate = dateparser.parse(dre_match.group(1)).date() self.log("date is = {}".format(tdate)) ti['start_date'] = tdate ti['end_date'] = tdate break # and now the format... format_sels = response.selector.xpath( '//div[@id="content-detail-page-of-an-article"]/p/text()') ti['tournament_format'] = None for format_sel in format_sels: if ti['tournament_format'] is None: val = format_sel.extract() if 'Legacy' in val: ti['tournament_format'] = 'Legacy' if 'Standard' in val: ti['tournament_format'] = 'Standard' if 'Modern' in val: ti['tournament_format'] = 'Modern' self.log("TournamentItem is {}".format(ti)) # BOOKMARK - so, if I think I have a valid TournamentItem, I need to yield it page_place = 1 for deckgroup_selector in response.selector.xpath( '//div[@class="deck-group"]'): self.parse_deckgroup(response, deckgroup_selector, page_place) page_place += 1
def get_cal(): now = datetime.now() print("edtScraping ", now.strftime("%d/%m/%Y %H:%M")) url = "http://www.ipst-info.net/consultation/default_stage.aspx?stage=aisl" # create a new Firefox session #driver = webdriver.Firefox() driver = webdriver.Remote(command_executor='http://selenium:4444/wd/hub', desired_capabilities=DesiredCapabilities.FIREFOX) driver.implicitly_wait(30) driver.get(url) cours = [] #calcul nombre de semaine avant la fin de l'année finAnnee = dateparser.parse("30 octobre 2020 17:00") today = datetime.now() monday1 = (today - timedelta(days=today.weekday())) monday2 = (finAnnee - timedelta(days=finAnnee.weekday())) numberOfWeekUntilEndOfYear = int(((monday2 - monday1).days / 7) + 1) print("Scrap starting ...") for i in range(numberOfWeekUntilEndOfYear): year = getYear(driver) week = getDayOfWeek(driver) cours = scrapCours(driver, cours, year, week) #click python_button = driver.find_element_by_id( 'Planning_stage1_semaine_suivante') python_button.click() time.sleep(.300) # pour que la page est le temps de charger driver.quit() cal = Calendar() cal.add("summary", "Calendrier Cnam I2") cal.add('version', '2.0') for cour in cours: dateDebut = dateparser.parse(cour.dateDebut) dateFin = dateparser.parse(cour.dateFin) if 'En entreprise' not in cour.matiere and 'nondéfini' not in cour.matiere and 'Férié' not in cour.matiere: event = Event() event.add('dtstart', dateDebut) event.add('dtend', dateFin) # si je veux garder les jour entreprise, férié et non défini # if 'En entreprise' in cour[2] or 'nondéfini' in cour[2] or 'Férié' in cour[2]: # matiere = cour.matiere # else: matiere = cour.matiere[:6] + " " + cour.matiere[6:] event.add('summary', matiere) event.add( 'description', 'Enseignant : ' + cour.enseignant + "\nCommentaire : " + cour.commentaire + "\nhttp://www.ipst-info.net/consultation/default_stage.aspx?stage=aisl" ) cal.add_component(event) print("Scrap end") # sauvegarde du .ics historique dateForIcsName = today.strftime('%Y-%m-%d_%H:%M') with open('/home/scraper/history/calendarCnamI2' + dateForIcsName + '.ics', 'wb') as f: f.write(cal.to_ical()) f.close print('/home/scraper/history/calendarCnamI2' + dateForIcsName + '.ics Saved') # ecrasement de l'ancien sauvegarde du nouveau with open('/home/scraper/last/calendarCnamI2.ics', 'wb') as f: f.write(cal.to_ical()) f.close print("/home/scraper/last/calendarCnamI2.ics Saved") return cal
# def parse_time(str_time): # '12:00:00' import dateparser f = open('sample.txt') lines = f.readlines() time_hash = {} for line in lines: time_status = line.split('::') status = (time_status[1]).split('\n')[0] # print(status) # time = (time_status[0]).split('-')[1] # time = time.split(')')[0] # # print(time) # if status in time_hash: # time_hash[status] -= parse_time(time) # else: # time_hash[status] = time idx = 1 time_hash[idx] = dateparser.parse(time_status[0]) idx += 1 print(time_hash)
def test_module(client: IMAPClient) -> str: yesterday = parse('1 day UTC') client.search(['SINCE', yesterday]) return 'ok'
def stt_parse_response(stt_data): def truncate(n, decimals=0): multiplier = 10**decimals return int(n * multiplier) / multiplier parse_stt_output_response = { 'fileid': stt_data[1], 'dlp': stt_data[2], 'filename': stt_data[3], 'callid': stt_data[4], 'date': str(dateparser.parse(stt_data[5])), 'year': stt_data[6], 'month': stt_data[7], 'day': stt_data[8], 'starttime': stt_data[9], 'duration': None, 'speakeronespeaking': None, 'speakertwospeaking': None, 'silencesecs': None, 'silencepercentage': None, 'nlcategory': None, 'sentimentscore': None, 'magnitude': None, 'transcript': None, 'words': [], 'entities': [], 'sentences': [], } string_transcript = '' total_speaking_time = 0 total_speaker_one_speaking = 0 total_speaker_two_speaking = 0 agent_search_word = stt_data[0] speaker_one_tag = 0 speaker_two_tag = 0 # get transcript from stt_data for i in stt_data[0]['response']['results']: if 'transcript' in i['alternatives'][0]: string_transcript += str(i['alternatives'][0]['transcript']) + ' ' parse_stt_output_response[ 'transcript'] = string_transcript[:-1] # remove the ending whitespace # check if the audio file is stereo if stt_data[11] == 'true': logging.info('Audio file is stereo') speaker_one_tag = 1 speaker_two_tag = 2 # check if the audio file is mono if stt_data[11] == 'false': logging.info('Audio file is mono') speaker_one_tag = 1 speaker_two_tag = 2 # check if the audio file is stereo if stt_data[11] == 'true': # get words from stt_data and enrich data for element in stt_data[0]['response']['results']: for word in element['alternatives'][0]['words']: total_speaking_time += float( word['endTime'].strip('s')) - float( word['startTime'].strip('s')) if element['channelTag'] == speaker_one_tag: total_speaker_one_speaking += float( word['endTime'].strip('s')) - float( word['startTime'].strip('s')) parse_stt_output_response['words'].append({ 'word': word['word'], 'startsecs': word['startTime'].strip('s'), 'endsecs': word['endTime'].strip('s'), 'speakertag': element['channelTag'], 'confidence': word['confidence'] }) if element['channelTag'] == speaker_two_tag: total_speaker_two_speaking += float( word['endTime'].strip('s')) - float( word['startTime'].strip('s')) parse_stt_output_response['words'].append({ 'word': word['word'], 'startsecs': word['startTime'].strip('s'), 'endsecs': word['endTime'].strip('s'), 'speakertag': element['channelTag'], 'confidence': word['confidence'] }) stt_start_time = float( stt_data[0]['response']['results'][0]['alternatives'][0]['words'] [0]['startTime'].strip('s')) stt_end_time = float( stt_data[0]['response']['results'][-1]['alternatives'][0]['words'] [-1]['endTime'].strip('s')) parse_stt_output_response['silencesecs'] = float( stt_data[0]['response']['results'][-1]['alternatives'][0]['words'] [-1]['endTime'].strip('s')) - total_speaking_time parse_stt_output_response['silencepercentage'] = truncate( parse_stt_output_response['silencesecs'] / float(stt_data[0]['response']['results'][-1]['alternatives'][0] ['words'][-1]['endTime'].strip('s')) * 100) # check if the audio file is mono if stt_data[11] == 'false': # get words from stt_data and enrich data for element in stt_data[0]['response']['results'][-1]['alternatives'][ 0]['words']: total_speaking_time += float( element['endTime'].strip('s')) - float( element['startTime'].strip('s')) if element['speakerTag'] == speaker_one_tag: total_speaker_one_speaking += float( element['endTime'].strip('s')) - float( element['startTime'].strip('s')) if element['speakerTag'] == speaker_two_tag: total_speaker_two_speaking += float( element['endTime'].strip('s')) - float( element['startTime'].strip('s')) parse_stt_output_response['words'].append({ 'word': element['word'], 'startsecs': element['startTime'].strip('s'), 'endsecs': element['endTime'].strip('s'), 'speakertag': element['speakerTag'], 'confidence': element['confidence'] }) stt_start_time = float( stt_data[0]['response']['results'][-1]['alternatives'][0]['words'] [0]['startTime'].strip('s')) stt_end_time = float( stt_data[0]['response']['results'][-1]['alternatives'][0]['words'] [-1]['endTime'].strip('s')) parse_stt_output_response['silencesecs'] = float( stt_data[0]['response']['results'][-1]['alternatives'][0]['words'] [-1]['endTime'].strip('s')) - total_speaking_time parse_stt_output_response['silencepercentage'] = truncate( parse_stt_output_response['silencesecs'] / float(stt_data[0]['response']['results'][-1]['alternatives'][0] ['words'][-1]['endTime'].strip('s'))) parse_stt_output_response[ 'speakeronespeaking'] = total_speaker_one_speaking parse_stt_output_response[ 'speakertwospeaking'] = total_speaker_two_speaking parse_stt_output_response['duration'] = stt_start_time + stt_end_time # place holder for Google AutoML NLP parse_stt_output_response['nlcategory'] = 'NA' return parse_stt_output_response
def format_date(userdate): date = dateparser.parse(userdate) try: return datetime.datetime.strftime(date, "%Y-%m-%d") except TypeError: return None
def process_game_page(url, season = "2016-2017", season_range=[8,7]): years = season.split("-") years = [int(year.strip()) for year in years] match = {} r2 = requests.get(url) soup = BeautifulSoup(r2.text, 'html.parser') body = soup.find("body") matchhead = body.find("div", {"class": "match-head"}) details = matchhead.find_all("li") date_temp = dateparser.parse(f"{details[1].text}") if date_temp.month < season_range[0]: # Must be in the second year of the season year=years[1] else: year=years[0] # Need to manually modify the date as there's no year in the string match['date'] = dateparser.parse(f"{details[1].text} {year}") match['stadium'] = details[2].text.split("\n")[0].strip() match['home'] = {} match['away'] = {} teamdetails = matchhead.find("div", {"class":"match-head__fixture"}).find_all("div", {"class": "match-head__fixture-side"}) match['home']['team'] = teamdetails[0].find("a", {"class":"match-head__team-name"}).find("span", {"class": "swap-text__target"}).text try: match['home']['score'] = int(teamdetails[0].find("span", {"class": "match-head__score"}).text) match['home']['scores'] = process_team_details(teamdetails[0]) match['away']['score'] = int(teamdetails[1].find("span", {"class": "match-head__score"}).text) match['away']['scores'] = process_team_details(teamdetails[1]) except: pass match['away']['team'] = teamdetails[1].find("a", {"class":"match-head__team-name"}).find("span", {"class": "swap-text__target"}).text # Parse the line-ups try: teamslineup = body.find_all("ul", {"class": "team-lineups__list-group"}) if len(teamslineup)<4: return match lineups = {} for i, lineup in enumerate(['home', 'away']): lineups[lineup] = {} players = teamslineup[2*i].find_all("li") for player in players: try: number = int(player.find("span", {"class": "team-lineups__list-player-number"}).text.strip()) name = player.find("span", {"class": "team-lineups__list-player-name"}).text.strip() offs = [] ons = [] yellows = [] reds = [] events = player.find_all("span", {"class": "team-lineups__list-events"}) for event in events: img = event.find("img") if img: if (img.get("src").split("/")[-1]) == "substitution_off.svg": offs.append(int(event.text.strip().split("'")[0])) elif (img.get("src").split("/")[-1]) == "substitution_on.svg": ons.append(int(event.text.strip().split("'")[0])) elif (img.get("src").split("/")[-1]) == "yellow_card.svg": yellows.append(int(event.text.strip().split("'")[0])) elif (img.get("src").split("/")[-1]) == "red_card.svg": reds.append(int(event.text.strip().split("'")[0])) else: print("Unknown event type: {}".format(img.get("src").split("/")[-1])) if number <= 15: ons.append(0) lineups[lineup][number] = {"name":name, "on": ons, "off": offs, "reds": reds, "yellows": yellows} except: continue match['home']['lineup'] = lineups['home'] match['away']['lineup'] = lineups['away'] except: pass return match
def __init__(self, title, content, date, kind=None): self.title = title self.content = content self.date = dateparser.parse(date).date() self.kind = kind
def parse(self, response): review = ReviewItem() product = ProductItem() product_id = ProductIdItem() contents = response.xpath("//div[@class='yt-lockup-content']") pic_contents = response.xpath("//div[@class='yt-lockup-dismissable']") for content, pic_content in zip(contents, pic_contents): # print response.url test_url = self.extract(content.xpath(".//a/@href")) full_url = get_full_url(response.url, test_url) sid = full_url.split('=')[1] title = self.extract(content.xpath(".//a/@title")) summary = self.extract_all( content.xpath( ".//div[@class='yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2']//text()" )) date_str = self.extract( content.xpath( ".//ul[@class='yt-lockup-meta-info']/li[1]/text()")) date_time = dateparser.parse(date_str) review_date = datetime.strftime(date_time, "%Y-%m-%d") author = self.extract( content.xpath(".//div[@class='yt-lockup-byline']//a//text()")) if not author: self.go_to_review_page(test_url) author = self.get_author(response) if not author: author = self.extract( content.xpath('//meta[@name="title"]/@content')) pic_url = 'https://i.ytimg.com/vi/{}/default.jpg'.format(sid) duration_str = self.extract_all( pic_content.xpath('.//span[@class="video-time"]//text()')) duration = self.calculate_duration(duration_str) # product items product['source_internal_id'] = sid product['ProductName'] = self.get_product_name(title) product['TestUrl'] = full_url product['PicURL'] = pic_url # review items review['source_internal_id'] = sid review['TestUrl'] = full_url review['ProductName'] = self.get_product_name(title) review['TestSummary'] = summary review['DBaseCategoryName'] = 'vpro' review['Author'] = author review['TestDateText'] = review_date review['TestTitle'] = title # we have differnt product_id items, for youtube_id: product_id['ProductName'] = self.get_product_name(title) product_id['source_internal_id'] = sid product_id['ID_kind'] = 'youtube_id' product_id['ID_value_orig'] = sid product_id['ID_value'] = sid yield product_id # for duration product_id['ProductName'] = self.get_product_name(title) product_id['source_internal_id'] = sid product_id['ID_kind'] = 'video_duration' product_id['ID_value'] = duration yield product_id yield review yield product
def pre_load(self, data): data["source"] = s.BINANCE data["openTime"] = dateparser.parse(str(data["openTime"])).isoformat() data["closeTime"] = dateparser.parse(str( data["closeTime"])).isoformat() return data
def on_get(self, req, resp): # current time now = dateparser.parse("now UTC") # compute time to each payout data = [] for po in SHARD_DATA: payout_time = dateparser.parse(f'today {po["payout"]} UTC') delta = payout_time - now # Copy the payout data row = po.copy() row['time_to_payout'] = delta.seconds # time to payout, in seconds data.append(row) # sort and format for output fields = [] for po in sorted(data, key=lambda x: x['time_to_payout']): hours = po['time_to_payout'] // 3600 # hours to payout minutes = (po['time_to_payout'] % 3600) // 60 # minutes to payout fields.append({ 'name': f'{hours:02}:{minutes:02} (UTC {po["payout"]})', 'value': f'{po["emoji"]} [{po["name"]}]({po["swgoh.gg"]})', 'inline': True }) embed = { 'description': '**Time until next payout**:', 'footer': { 'text': 'Last refresh:', 'icon_url': 'https://i.imgur.com/OEwutbb.png', }, 'thumbnail': { 'url': 'https://i.imgur.com/OEwutbb.png', }, 'timestamp': now.strftime('%Y-%m-%d %H:%M:%S') } with DiscordAPISession(DISCORD_BASE_URL) as api: # First, check for existing messages and delete them headers = { 'Authorization': f'Bot {DISCORD_BOT_TOKEN}', 'Content-Type': 'application/json' } messages_resp = api.get(f'channels/{channel_id}/messages', headers=headers) if messages_resp.status_code != 200: self.logger.debug(messages_resp.raw) resp.status = falcon.HTTP_500 resp.body = json.dumps( {"error": "Could not get channel messages"}) return messages = messages_resp.json() if (msg_count := len(messages)) > 0: # First, clear the channel if msg_count == 1: # The bulk delete API has a minimum # of message ID's, # so we use a different API to delete just a single message msgid = messages[0]['id'] del_resp = api.delete( f'channels/{channel_id}/messages/{msgid}', headers=headers) if del_resp.status_code != 204: self.logger.debug("Error deleting message") self.logger.debug(del_resp.json()) else: # Use the bulk delete API (max 100 messages per call) while (batch := [msg['id'] for msg in messages[:100]]): self.logger.debug(batch) del_resp = api.post( f'channels/{channel_id}/messages/bulk-delete', headers=headers, json={'messages': batch}) if del_resp.status_code != 204: self.logger.debug("Error deleting message") self.logger.debug(del_resp.json()) messages = messages[100:]
def get_date(self): """ find references to relative or absolute dates in the text also remove the date references from self.tokens returns 2d list of start_date, end_date """ today = self.today today_weekday = self.today_weekday tokens = self.tokens parse_past = {'PREFER_DATES_FROM': 'past', 'TIMEZONE': 'US/Eastern'} parse_future = { 'PREFER_DATES_FROM': 'future', 'TIMEZONE': 'US/Eastern' } # convert tokens to [[parsed_date, token]...] # the date elements are used for search date range, the tokens are used to remove # from them from search tems date_tokens = [[dateparser.parse(t, settings=parse_future), t] for t in tokens] # remove tokens with no mention of dates date_tokens = [d for d in date_tokens if d[0]] # check edge cases if not date_tokens: if 'tonight' in tokens: # TODO: add filter for time search_date = dateparser.parse('today', settings=parse_past) date_tokens = [[search_date, 'tonight']] if 'this weekend' in self.bigrams: # if you're asking about the weekend while it is the weekend if today_weekday in ['friday', 'saturday']: start_date = today else: # if it's not yet the weekend start_date = dateparser.parse('friday', settings=parse_future) end_date = dateparser.parse('sunday', settings=parse_future) date_tokens = [[start_date, 'this weekend'], [end_date, '']] if 'this week' in self.bigrams: start_date = dateparser.parse('today', settings=parse_future) end_date = dateparser.parse('saturday', settings=parse_future) date_tokens = [[start_date, 'this week'], [end_date, '']] # TODO: # if 'this month' in self.bigrams: # start_date = dateparser.parse('today', settings=parse_future) # last_day_of_month = # date_tokens = [[start_date, 'this month'], [last_day_of_month, '']] search_dates = [d[0] for d in date_tokens] date_strings = [d[1] for d in date_tokens] self.remove_date_refs_from_tokens(date_strings) # set end date time to end of the day if len(search_dates) > 1: search_dates[1] = search_dates[1] + datetime.timedelta(hours=23, minutes=55) return search_dates
def find_fixture( team, was_home=None, other_team=None, gameweek=None, season=CURRENT_SEASON, kickoff_time=None, dbsession=session, ): """Get a fixture given a team and optionally whether the team was at home or away, the season, kickoff time and the other team in the fixture. Only returns the fixture if exactly one is found that matches the input arguments, otherwise raises a ValueError. """ fixture = None if not isinstance(team, str): team_name = get_team_name(team, season=season, dbsession=dbsession) else: team_name = team if not team_name: raise ValueError("No team with id {} in {} season".format( team, season)) if other_team and not isinstance(other_team, str): other_team_name = get_team_name(other_team, season=season, dbsession=dbsession) else: other_team_name = other_team query = dbsession.query(Fixture).filter_by(season=season) if gameweek: query = query.filter_by(gameweek=gameweek) if was_home is True: query = query.filter_by(home_team=team_name) elif was_home is False: query = query.filter_by(away_team=team_name) elif was_home is None: query = query.filter( or_(Fixture.away_team == team_name, Fixture.home_team == team_name)) else: raise ValueError("was_home must be True, False or None") if other_team_name: if was_home is True: query = query.filter_by(away_team=other_team_name) elif was_home is False: query = query.filter_by(home_team=other_team_name) elif was_home is None: query = query.filter( or_( Fixture.away_team == other_team_name, Fixture.home_team == other_team_name, )) fixtures = query.all() if not fixtures or len(fixtures) == 0: raise ValueError( ("No fixture with season={}, gw={}, team_name={}, was_home={}, " "other_team_name={}, kickoff_time={}").format( season, gameweek, team_name, was_home, other_team_name, kickoff_time)) if len(fixtures) == 1: fixture = fixtures[0] elif kickoff_time: # team played multiple games in the gameweek, determine the # fixture of interest using the kickoff time, kickoff_date = dateparser.parse(kickoff_time) kickoff_date = kickoff_date.replace(tzinfo=timezone.utc) kickoff_date = kickoff_date.date() for f in fixtures: f_date = dateparser.parse(f.date) f_date = f_date.replace(tzinfo=timezone.utc) f_date = f_date.date() if f_date == kickoff_date: fixture = f break if not fixture: raise ValueError(( "No unique fixture with season={}, gw={}, team_name={}, was_home={}, " "kickoff_time={}").format(season, gameweek, team_name, was_home, kickoff_time)) return fixture