Exemplo n.º 1
0
 def run_search_dates_function_invalid_languages(self, text, languages,
                                                 error_type):
     try:
         search_dates(text=text, languages=languages)
     except Exception as error:
         self.error = error
         self.assertIsInstance(self.error, error_type)
Exemplo n.º 2
0
def sep(text):
    k = []
    for i in text:
        if i.count('/') in range(2, 4):
            k.append(i)
        elif i.count('-') in range(2, 4):
            k.append(i)
        elif "'" in i:
            k.append(i)
        elif "’" in i:
            k.append(i)
        else:
            continue
    l = ' '.join(k)
    l = search_dates(l)
    if l != None:
        for j in l[0]:
            H = search_dates(j)
            if H == None:
                return None
            else:
                for i in H:
                    j = i[1].date()
                    j = i[1].strftime('%Y/%m/%d')  # RETURN DATE IN YYYY/MM/DD
                    return j
    else:
        return None
Exemplo n.º 3
0
 def run(self, request, response):
     lang = request['lang']
     if lang in self.langs:
         text = request['text']
         debug = request.get('debug', False)
         start = 0
         end = 0
         result = []
         if search_dates(text, languages=self.langs) is not None:
             for chunk, date in search_dates(text, languages=self.langs):
                 start = text.index(chunk, end)
                 end = start + len(chunk)
                 if debug:
                     result.append({
                         'text': chunk,
                         'start': start,
                         'end': end,
                         'date': date.strftime(_format)
                     })
                 else:
                     result.append({
                         'start': start,
                         'end': end,
                         'date': date.strftime(_format)
                     })
         return result
     else:
         raise MissingLanguage(lang)
Exemplo n.º 4
0
 def _str_to_date(std):
     for russian, english in rus_to_eng_dic.items():
         std = std.replace(russian, english)
     if len(search_dates(std)) == 2:
         return search_dates(std)
     else:
         return [('Now', datetime.datetime.today()),
                 ('Now', datetime.datetime.today())]
Exemplo n.º 5
0
    def parse(self):
        """
        Parse the table on the right hand side or a wiki page that
        holds the details of a person
        :return:
        """

        logger.info("parseing the PM page for %s", self.path)

        data = self.info.findAll("tr", text=True)
        for tr in self.info.findAll("tr"):
            invalid_items = ["\n", " ", "\t"]
            txt = tr.getText(separator="|")
            cells = [x for x in txt.split("|") if x not in invalid_items]

            # very cumbersome and slow, but there is some edge cases where unicode characters are near the date
            # https://en.wikipedia.org/wiki/Spencer_Compton,_1st_Earl_of_Wilmington has no month & day,
            # only the year and &thinspace; unicode next to it

            if "Born" in cells and self.birthday is None:

                # TODO: this is very slow, find a better way, this will find
                #  dates for the likes of 'now', 'today' etc
                dates = [
                    i
                    for i in map(
                        lambda x: search_dates(
                            (x.encode("ascii", "ignore")).decode("utf-8")
                        ),
                        cells,
                    )
                    if i is not None
                ]

                if dates:
                    # there may be multiple, but we want the first item in dates,
                    # then search_dates returns a list of tuples,
                    # so get the last item (we expect one only anyway) and get second item in tuple
                    self.birthday = dates[0][-1][-1]
            elif "Died" in cells and self.died is None:
                # TODO: this is very slow, find a better way
                dates = [
                    i
                    for i in map(
                        lambda x: search_dates(
                            (x.encode("ascii", "ignore")).decode("utf-8")
                        ),
                        cells,
                    )
                    if i is not None
                ]
                if dates and self.died is None:
                    self.died = dates[0][-1][-1]

            elif "Monarch" in cells and not self.monarchs:
                self.monarchs = cells[1:]
Exemplo n.º 6
0
def date_func(name):
    dates = ""
    string = date_format(name)
    stop_word = date_stopwords(name)
    date = search_dates(stop_word)
    print('Date time')
    print(date)
    if (date == None):
        date = ""
        return date
    for match in range(len(date)):
        if ((match + 1) == len(date)):
            dates = dates + str(date[match])
        else:
            dates = dates + str(date[match]) + "*xxx*"
    dates = dates + "++"
    text = nlp(string)
    for num, sen in enumerate(text.sents):
        for ent in sen.ents:
            is_present = False
            is_date = search_dates(ent.text)
            if ent.label_ == 'DATE':
                dates = dates + (str(ent.text)) + "*xx*"
                st = ent.text
                for tok in st:
                    if (re.search('or|and|&', st)):
                        is_present = True
                if (is_present == True):
                    dates = dates + "*uxm*"
                elif (len(is_date) > 1):
                    dates = dates + "*uxr*"
    dates = dates + "XXXXX"
    date = []
    matches = (datefinder.find_dates(string))
    for match in matches:
        date.append(match.strftime('%d-%m-%Y'))
    string = nlp(string)
    sentence = [token.text for token in string]
    for token in sentence:
        if (re.search('today|tomorrow|yesterday', token)):
            if (token == 'today'):
                token = datetime.today().strftime('%d-%m-%Y')
            elif (token == 'yesterday'):
                token = (datetime.now() -
                         timedelta(days=1)).strftime('%d-%m-%Y')
            elif (token == 'tomorrow'):
                token = (datetime.now() +
                         timedelta(days=1)).strftime('%d-%m-%Y')
            date.append(token)
    date.sort(key=lambda date: datetime.strptime(date, '%d-%m-%Y'))
    for d in date:
        dates = dates + d + "*xx*"
    if (dates != ""):
        return ('"datetime": "' + dates + '"')
    else:
        return ""
Exemplo n.º 7
0
async def on_competitive_feed_post(message: discord, bot: commands.Bot):
    comp_feed_info = bot.get_channel(ids.COMPETITIVE_FEED_INFO)
    parts = message.clean_content.strip().split("\n")
    if len(parts) < 2:
        await message.delete()
        await comp_feed_info.send(
            f" {message.author.mention} your message was deleted because it doesn't follow the format. Please see the pins for an example."
        )
        return await comp_feed_info.send(
            f"```{message.clean_content[:1990]}```")

    tournament_name = (parts[0].replace("*", "").replace("> ", "").replace(
        "_", "").replace("`", ""))
    description = "\n".join(parts[1:]).strip().replace("> ", "")
    discord_invite_url = None

    for word in description.split():
        if "https://discord.gg/" in word or "https://discord.com/" in word:
            discord_invite_url = word
            break

    if discord_invite_url is None:
        await message.delete()
        await comp_feed_info.send(
            f" {message.author.mention} your message was deleted because it didn't contain a valid Discord server link with the format `https://discord.gg/asdasd`. Please see the pins for an example."
        )
        return await comp_feed_info.send(
            f"```{message.clean_content[:1990]}```")

    try:
        if re.sub(r"T|\W", r"", parts[1]).isdigit():  # Using ISO string
            date = datetime.datetime.fromisoformat(parts[1] + "+00:00")
            description = "\n".join(parts[2:]).strip().replace(
                "> ", "")  # Update description to remove ISO string
        else:
            # Get all dates
            all_dates = []
            for line in parts[1:]:
                if dates := search_dates(line):
                    all_dates += dates
            # Remove dates that have already passed
            possible_dates = [
                d for d in all_dates
                if d[1] > datetime.datetime.now(d[1].tzinfo)
            ]
            # Check if any date has specified a timezone, if so, strip dates without a timezone
            if utc_dates := [
                    d for d in possible_dates if d[1].tzname() == "UTC"
            ]:  # Prioritize specifying UTC
                possible_dates = utc_dates
            elif tz_dates := [d for d in possible_dates if d[1].tzname()]:
                possible_dates = tz_dates
Exemplo n.º 8
0
    def parse(self, response):
        for div in response.css('div.txtNew'):
            concert = {
                'website': response.request.url,
                'notes': div.css('h1::text').extract(),
                'venue': 'B Sharps Jazz Cafe',
                'venue_address': '648 W Brevard St',
                'venue_website': 'https://www.b-sharps.com/'
            }

            # Remove empty strings to start
            concert['notes'] = [n.strip() for n in concert['notes']]
            concert['notes'] = filter(None, concert['notes'])

            # Skip the concert if we're just facing an empty list
            if concert['notes'] == []:
                continue

            # Look for date and time with dateparser
            for i in range(len(concert['notes'])):
                found = search_dates(concert['notes'][i])
                if found:
                    pass

            yield concert
Exemplo n.º 9
0
def get_links_and_amendments(base_url, elements):
    """Get link data from a set of BS elements (columns in a row)"""

    link_data = []
    amend_data = []

    for e in elements:
        links = e.find_all('a')

        for link in links:
            data = {
                'title': link.get('title'),
                'href': urljoin(base_url, link.get('href')),
            }

            if 'Letter' in data['title']:
                data['type'] = 'letter of authorization'
            elif 'Providers' in data['title']:
                data['type'] = 'fact sheet for healthcare providers'
            elif 'Patients' in data['title']:
                data['type'] = 'fact sheet for patients'
            elif 'Summary' in data['title']:
                data['type'] = 'eua summary'
            elif 'Instructions' in data['title']:
                data['type'] = 'instructions for use'
            elif 'Amendment' in data['title']:
                data['type'] = 'amendment'
                amend_date = search_dates(data['title'])
                if amend_date:
                    amend_data.append(amend_date[0][1].date())

            link_data.append(data)

    return link_data, amend_data
Exemplo n.º 10
0
def date_parser(text):
    try:
        dates = search_dates(text)
    except:
        dates = []
        #print('Date is not present or not able to extract')
    return dates
Exemplo n.º 11
0
def get_entities_col(column):
    # Loop over each value in the column
    col_ents = []
    for value in column:
        valstr = str(value)
        # Evaluate each value in spaCy
        # NOTE: a "value" can be anything (number, sentence, etc.)
        doc = nlp.process(valstr)
        val_ents = [e.label_ for e in doc.ents]
        if len(val_ents) != 0:
            # Get most common entity type
            most_common_ent = max(set(val_ents), key=val_ents.count)
            col_ents.append(most_common_ent)
        # TODO: Override Cardinal pretty often...
        try:
            float(valstr)
        except ValueError:
            if search_dates(valstr) != None:
                col_ents.append("DATE")
    df = pd.DataFrame(col_ents)
    # Hacky fix for cases when there isn't an entity associated with a header
    # We can do something more elegant here...
    try:
        res = df.mode()[0][0]
        return res
    except KeyError:
        pass
Exemplo n.º 12
0
    def parseMsg(txt, tries=0):
        print("============================")
        print(txt)
        formatted = Manager.formatKnown(txt)
        timestr = ""
        res = search_dates(formatted, add_detected_language=True)
        when = None
        if res is None:
            return txt, when, timestr
        if True:  #try:
            print("######################", formatted)
            print(res)
            date = res[0][1].timestamp()
            timestr = res[0][0]
            lang = res[0][2]
            ## remove timestr from reminder

            diff = time.time() - date
            print("DIFF", diff, time.ctime(date))
            if diff > 0 and tries < 3:
                print("RETRY ")
                if tries == 0:
                    new = Manager.changeDay(formatted)
                else:
                    new = formatted.replace(timestr, "in " + timestr)
                return Manager.parseMsg(new, tries + 1)
            else:
                when = date
        # if True:#except:
        #     print("EEEEEEEEEEEEEEEEEEEEEE res:",res)
        #     return txt, None, ""

        if timestr is not "":
            formatted = txt.replace(timestr, "")
        return formatted, when, timestr
Exemplo n.º 13
0
    def processDateExtraction(self, ext_text):
        try:
            mnths = [
                'jan', 'january', 'feb', 'february', 'mar', 'march', 'apr',
                'april', 'may', 'june', 'jun', 'july', 'jul', 'aug', 'august',
                'sept', 'september', 'oct', 'october', 'nov', 'november',
                'dec', 'december'
            ]

            match = re.search(r'[a-z]+', ext_text, re.IGNORECASE)

            if (match != None):
                if (match.group() not in mnths):
                    for m in mnths:
                        if (match.group() in m):
                            ext_text = ext_text.replace(match.group(), m)
                            break

            d_obj = search_dates(ext_text)

            if (d_obj != None):
                date_str = d_obj[0][1].strftime("%Y-%m-%d")
                return {'date': date_str}
            else:
                return {'date': 'null'}
        except Exception as e:
            return {'date': 'null'}
Exemplo n.º 14
0
    def _infiltrate(self, text):
        """
        Given an input text it analyses it and returns a tuple (bool, data)
        Where first element is predicate if infiltartion completed (some relevant tokens are grasped
        from some part of the utterance).
        The second element of tuple is data object, retrieved from utterance

        Currently only one element may be retrieved (so utterance with multiple datetime objects will announce
        only the first result)
        :param text: str
        :return: tuple: (is something recepted:bool, retrieved data object:any)
        1. True, {'raw_subtext': "Завтра в 3 часа", 'value': datetime_obj}
        2. False, None
        """
        # try to infiltrate the slot value
        list_of_result_tuples = search_dates(text)
        if list_of_result_tuples:
            if len(list_of_result_tuples) == 1:
                # ok
                # import ipdb; ipdb.set_trace()

                raw_subtext, datetime_obj = list_of_result_tuples[0]
                return True, {'raw_subtext': raw_subtext, 'value': datetime_obj}
            else:
                # investigate
                print("We have many datetime slot candidates in message we need to resolve this issue!")

                print(list_of_result_tuples)
                # import ipdb;
                # ipdb.set_trace()
        return False, None
Exemplo n.º 15
0
    async def parse_datetime(self, arg):
        dates = search_dates(
            arg.replace(".", "-"),
            languages=["en"],
            settings={
                "PREFER_DATES_FROM": "future",
                "PREFER_DAY_OF_MONTH": "first",
                "DATE_ORDER": "DMY"
            },
        )
        if dates is None:
            return None, ""

        weekdays = [
            "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
            "sunday"
        ]

        for day in weekdays:
            if str("next " +
                   day) in arg.lower() and day in dates[0][0].lower():
                date = dates[0][1] + timedelta(days=7)
                break
        else:
            date = dates[0][1]

        if date < datetime.now():
            date = date.replace(day=(datetime.now().day))
            if date < datetime.now():
                date = date + timedelta(days=1)

        date_str = dates[0][0]

        return date, date_str
Exemplo n.º 16
0
def search_years(query):
    re_year = r'[0-9]{4}'
    parsed_query = search_dates(query)

    if parsed_query is None:
        return get_years(query)

    if len(parsed_query) < 2:
        return sorted([v.year for i, v in parsed_query])
    definite = 0
    relative = 0
    try:
        for text, date in parsed_query:
            print(text, date)
            matches = re.findall(re_year, text)
            try:
                if matches[0] == text:
                    definite = date
            except IndexError:
                relative = date
        now = datetime.datetime.now()
        relative = relative - (now - definite)
        return sorted([relative.year, definite.year])
    except Exception as e:
        print(e)
        return get_years(query)
Exemplo n.º 17
0
def ex_date_search(key, cnt: Text, comp, ctx: cla_meta_intf):
    from dateparser.search import search_dates
    search_r = search_dates(cnt, languages=[ctx.lang])
    if search_r is not None:
        ctx.add_result(extractor, comp, key, [str(r) for r in search_r])
        return True
    return False
Exemplo n.º 18
0
def date_finder(text):
    date =""
    date_pattern = '%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}'
    matches = list(datefinder.find_dates(s))
    match_date = re.search('\d{4}-\d{2}-\d{2}', s)

    try:
        print "====using dateutil"
        for i in s.splitlines():
            d = parser.parse(i)
            print(d.strftime("%Y-%m-%d"))
    except Exception as e:
        print e
    try:
        print "====pygrok==="
        grok = Grok(date_pattern)
        print(grok.match(s))
    except Exception as e:
        print e
    try:
        print "====using date==="
        if len(matches) > 0:
            date = matches[0]
            print date
        else:
            print 'No dates found'
    except Exception as e:
        print e
    try:
        print "====using date==="
        date = datetime.datetime.strptime(match_date.group(), '%Y-%m-%d').date()
        print date
    except Exception as e:
        print e
    try:
        print "====using Chunkgrams==="
        chunkGram = r"""NE:{<NNP>+<CD>}"""
        chunkParser = nltk.RegexpParser(chunkGram)
        sentences = nltk.sent_tokenize(text)
        tokenized_sentences = [nltk.word_tokenize(sentence.strip()) for sentence in sentences]
        tagged_sentences = [nltk.pos_tag(i) for i in tokenized_sentences]
        chunked_sentences = [chunkParser.parse(i) for i in tagged_sentences] 
        entity_names = []
        for tree in chunked_sentences:
            entity_names.extend(extract_entity_names(tree))
        print entity_names
    except Exception as e:
        print e
    try:
        print "===using pydatum=="
        datum = Datum()
        print (datum.from_iso_date_string(text))
    except Exception as e:
        print e
    try:
        print "===using dateparser=="
        date = search_dates(text.decode('ascii','ignore'))
        print date
    except Exception as e:
        print e
Exemplo n.º 19
0
    def datetime(self, text, lang='en'):
        """
        $ python -m sagas.nlu.extractor_cli datetime 'tomorrow at eight' en
        $ python -m sagas.nlu.extractor_cli datetime 'two weeks ago' en
            .. search: [('two weeks ago', datetime.datetime(2019, 11, 29, 1, 57, 25, 466421))]
            .. parse: 2019-11-29 01:57:25.468518
        $ python -m sagas.nlu.extractor_cli datetime 'Jumat lalu' id
        $ python -m sagas.nlu.extractor_cli datetime '12 Mei 2008' id
            .. search: [('12 Mei 2008', datetime.datetime(2008, 5, 12, 0, 0))]
            .. parse: 2008-05-12 00:00:00
        $ python -m sagas.nlu.extractor_cli datetime 'Besok malam jam 8' id
            .. search: [('Besok', datetime.datetime(2019, 12, 1, 23, 22, 16, 689529)), ('jam 8', datetime.datetime(2019, 8, 30, 0, 0))]
            .. parse: None
        $ python -m sagas.nlu.extractor_cli datetime 'Minggu depan' id
            .. search: None
            .. parse: 2020-02-19 16:47:48.548957
        $ python -m sagas.nlu.extractor_cli datetime '三月开始去上学' zh
        $ python -m sagas.nlu.extractor_cli datetime '2008年12月に上海に行きたいです。' ja

        :param text:
        :param lang:
        :return:
        """
        from dateparser.search import search_dates
        from dateparser import parse
        # search_dates('Jumat lalu', languages=['id'])
        search_r = search_dates(text, languages=[lang])
        print(f".. search: {search_r}")
        # parse('12 Mei 2008', languages=['id'])
        parse_r = parse(text, languages=[lang])
        print(f".. parse: {parse_r}")
Exemplo n.º 20
0
 async def toEpochTime(self, ctx, *, timeStr: str):
     """Converts a date to a timestamp, and shows that time in your own local time"""
     _, time = (search_dates(timeStr.upper(),
                             settings={'RETURN_AS_TIMEZONE_AWARE':
                                       True})[0])
     await ctx.send(
         f"`{int(time.timestamp())}` is the timestamp for `{time.strftime('%c in timezone %Z')}`\nThe basic timestamp would look like this: <t:{int(time.timestamp())}:F>"
     )
Exemplo n.º 21
0
 def get_dateparser_dates(self, text=None):
     """
     Extract possible dates with dateparser
     """
     text = text or self.TEXT
     # INFO: 'DATE_ORDER': 'DMY' prevents parsing date like 2004-12-13T00:00:00Z,
     #  use SKIP_TOKENS setting if needed along with DATE_ORDER
     return search_dates(text, languages=[self.LANGUAGE], settings=self.DATEPARSER_SETTINGS) or []
Exemplo n.º 22
0
def parse_dates_possibilities(text):
    text = expand_event_time(text.upper())
    try:
        matches = search_dates(text, languages=['en'], settings={'TIMEZONE': 'US/Eastern', 'TO_TIMEZONE': 'UTC'})
    except Exception as e:
        print("search dates errored out ===>")
        print(e)
        matches = None
    return matches
Exemplo n.º 23
0
def find_dates(tweets):
    """search text for dates and filter by correct format"""
    mod_string = re.sub("\#[\w\_]+", "", tweets)
    mod_string = re.sub("[\(\[].*?[\)\]]", "", mod_string)
    date = search_dates(mod_string)
    if date:
        date = [x[1] for x in date][0]
        return date
    return None
Exemplo n.º 24
0
def fetchBussinessEvents():

    url = 'https://www.eventbrite.com/d/united-kingdom--london/business--events/?crt=regular&end_date=05/31/2018&sort=best&start_date=05/01/2018&subcat=1007'
    driver.get(url)
    sleep(5)
    parsers = html.fromstring(driver.page_source, driver.current_url)

    pages = parsers.xpath(
        "/html/body/div[4]/section[2]/div[7]/nav/div/div/ul/li")

    length = len(pages)
    nth = length - 1
    totalPages = parsers.xpath(
        "/html/body/div[4]/section[2]/div[7]/nav/div/div/ul/li[%s]/a/text()" %
        nth)
    pageNums = int(totalPages[0])

    place = []
    eventDet = pd.DataFrame([])

    for i in range(1, pageNums + 1):
        pagUrl = 'https://www.eventbrite.com/d/united-kingdom--london/business--events/?crt=regular&end_date=05/31/2018&sort=best&start_date=05/01/2018&subcat=1007&page={}'.format(
            i)
        driver.get(pagUrl)
        sleep(8)
        parsers = html.fromstring(driver.page_source, driver.current_url)

        eventsContainer = parsers.xpath(
            ".//*[@data-automation='event-list-container']/div")

        for events in eventsContainer:
            venuePlace = events.xpath(
                "normalize-space(a/div[2]/div[2]/text())")
            eventDate = events.xpath("normalize-space(a/div[2]/time/text())")
            eventName = events.xpath("normalize-space(a/div[2]/div[1]/text())")

            eventDate = search_dates(eventDate)
            dateLength = len(eventDate)
            if (dateLength > 1):
                eventDate = prevDate

            prevDate = eventDate
            eventDate = eventDate[0]
            eventOn = eventDate[1].date()
            eventOn = eventOn.strftime('%Y-%m-%d')

            eventDet = eventDet.append(pd.DataFrame(
                {
                    'Event': eventName,
                    'Venue': venuePlace,
                    'Date': eventOn,
                    'Category': 'Bussiness'
                },
                index=[0]),
                                       ignore_index=True)

    return (eventDet)
Exemplo n.º 25
0
 def check_for_dates(sent: str) -> Triple:
     dates = search_dates(sent)
     if dates is not None and len(dates) == 2:
         new_triple = Triple()
         new_triple.add_subject('Contribution 1', -1, -1, sent)
         new_triple.add_predicate('Study date', -1, -1, sent)
         obj = f'{dates[0][1].date()} - {dates[1][1].date()}'
         new_triple.add_object(obj, -1, -1, sent)
         return new_triple
Exemplo n.º 26
0
 def stage3(self, sentence):
     l1 = search_dates(sentence)
     #search_dates(sentence)[0][1].date()
     if l1 is not None:
         dates = list()
         print(l1[0][1].date())
         dates.append(str(l1[0][1].date()))
         return dates
     return []
Exemplo n.º 27
0
def parse_time(time_string, base_time, timezone_string):
    base_time = datetime_as_timezone(base_time, timezone_string)

    try:
        date_time = dateparser.parse(time_string,
                                     languages=['en'],
                                     settings={
                                         "PREFER_DATES_FROM":
                                         'future',
                                         "RELATIVE_BASE":
                                         base_time.replace(tzinfo=None)
                                     })
    except Exception:
        date_time = None

    if date_time is None:
        try:
            results = search_dates(time_string,
                                   languages=['en'],
                                   settings={
                                       "PREFER_DATES_FROM":
                                       'future',
                                       "RELATIVE_BASE":
                                       base_time.replace(tzinfo=None)
                                   })
            if results is not None:
                temp_time = results[0][1]
                if temp_time.tzinfo is None:
                    temp_time = datetime_force_utc(temp_time)

                if temp_time > base_time:
                    date_time = results[0][1]
            else:
                date_time = None
        except Exception:
            date_time = None

    if date_time is None:
        try:
            date_time, result_code = cal.parseDT(time_string, base_time)
            if result_code == 0:
                date_time = None
        except Exception:
            date_time = None

    if date_time is None:
        return None

    if date_time.tzinfo is None:
        if timezone_string is not None:
            date_time = pytz.timezone(timezone_string).localize(date_time)
        else:
            date_time = datetime_force_utc(date_time)

    date_time = datetime_as_utc(date_time)

    return date_time
Exemplo n.º 28
0
def dateFromText(text, isprojecttask):
    dates = search_dates(text)
    if isprojecttask:
        if dates is None:
            return None
    else:
        if dates is None:
            return datetime.datetime.now()
    return dates[0][1]
Exemplo n.º 29
0
def _search_first_date(string):
    candidates = search_dates(string, languages=['en'], settings={'STRICT_PARSING': True})
    if candidates:
        first_term, first_date = candidates[0]
        remaining = ''.join(string.split(first_term)[1:])
        if _is_complete_date(first_term):
            return first_date, remaining
        return None, remaining
    return None, ''
Exemplo n.º 30
0
 def parse(self):
     next_page = True
     feedback_list = []
     while next_page:
         try:
             try:
                 table_id = self.driver.find_element_by_xpath(
                     "//*[@id='feedback-table']")
                 # get all of the rows in the table
                 rows = table_id.find_elements_by_xpath(
                     ".//tr[@class='feedback-row']")
                 for row in rows:
                     feedback = {}
                     rating_string = row.find_element_by_xpath(
                         ".//th/div/i/span").get_attribute("innerHTML")
                     feedback['rating'] = int(rating_string.split(' ')[0])
                     col_2 = row.find_element(By.TAG_NAME, "td")
                     feedback['text'] = col_2.find_element_by_xpath(
                         './/*[@id="-text" or @id="-expanded"]'
                     ).get_attribute("innerHTML")
                     try:
                         div = col_2.find_element_by_xpath(
                             ".//*[@class='a-section a-spacing-top-small feedback-suppressed']"
                         )
                         value = div.value_of_css_property("display")
                         if value == 'none':
                             feedback['deleted'] = 0
                         else:
                             feedback['deleted'] = 1
                     except selenium.common.exceptions.NoSuchElementException:
                         feedback['deleted'] = 0
                     s = col_2.find_element_by_xpath(
                         './/div/div[2]/span').text
                     try:
                         date = search_dates(s,
                                             settings={'TIMEZONE':
                                                       'UTC'})[0][1].date()
                     except TypeError:
                         date = None
                     if date is None or date > datetime.today().date():
                         feedback['date'] = ""
                     else:
                         feedback['date'] = str(date)
                     feedback_list.append(feedback)
             except selenium.common.exceptions.WebDriverException:
                 current_app.logger.error(
                     str(datetime.now()) +
                     "Selenium did not find Element(s)")
                 return []
             self.driver.find_element_by_xpath(
                 "//*[@id='feedback-next-link']").click()
             current_app.logger.info(
                 str(datetime.now()) + " Clicking NEXT PAGE")
             time.sleep(10)
         except selenium.common.exceptions.ElementNotVisibleException:
             next_page = False
     return feedback_list
Exemplo n.º 31
0
 def test_search_dates_returning_detected_languages_if_requested(
     self, text, add_detected_language, expected
 ):
     result = search_dates(text, add_detected_language=add_detected_language)
     self.assertEqual(result, expected)
Exemplo n.º 32
0
 def run_search_dates_function_invalid_languages(self, text, languages, error_type):
     try:
         search_dates(text=text, languages=languages)
     except Exception as error:
         self.error = error
         self.assertIsInstance(self.error, error_type)
Exemplo n.º 33
0
 def test_date_search_function(self, text, languages, settings, expected):
     result = search_dates(text, languages=languages, settings=settings)
     self.assertEqual(result, expected)