예제 #1
0
    def json_serializable_dict(self, with_context=True, with_title=False):
        try:
            translation_word = self.translation.word
        except AttributeError as e:
            translation_word = ''
            zeeguu.log(
                f"Exception caught: for some reason there was no translation for {self.id}"
            )
            print(str(e))

        word_info = Word.stats(self.origin.word, self.origin.language.code)

        learned_datetime = str(
            self.learned_time.date()) if self.learned else ''

        created_day = "today" if self.time.date() == datetime.now().date(
        ) else ''

        bookmark_title = ""
        if with_title:
            try:
                bookmark_title = self.text.article.title
            except Exception as e:
                print(e)
                print(
                    f"could not find article title for bookmark with id: {self.id}"
                )

        result = dict(
            id=self.id,
            to=translation_word,
            from_lang=self.origin.language.code,
            to_lang=self.translation.language.code,
            title=bookmark_title,
            url=self.text.url.as_string(),
            origin_importance=word_info.importance,
            learned_datetime=learned_datetime,
            origin_rank=word_info.rank if word_info.rank != 100000 else '',
            starred=self.starred if self.starred is not None else False,
            article_id=self.text.article_id if self.text.article_id else '',
            created_day=created_day,  #human readable stuff...
            time=self.time.strftime(JSON_TIME_FORMAT))

        result["from"] = self.origin.word
        if with_context:
            result['context'] = self.text.content
        return result
def translate_and_bookmark(from_lang_code, to_lang_code):
    """

        @deprecated
        This should be deprecated and /get_possible_translations used instead
        However, it is still used by the zeeguu chrome extension.

        This expects in the post parameter the following:
        - word (to translate)
        - context (surrounding paragraph of the original word )
        - url (of the origin)
        - title (of the origin page)

        /get_possible_translations has very similar behavior, only that
          if focuses on returning the possible alternative translations

    :param from_lang_code:
    :param to_lang_code:
    :return:
    """

    word_str = unquote_plus(request.form['word'])

    url_str = request.form.get('url', '')
    title_str = request.form.get('title', '')
    context_str = request.form.get('context', '')

    try:
        minimal_context, query = minimize_context(context_str, from_lang_code,
                                                  word_str)
        translator = Translator(from_lang_code, to_lang_code)
        translations = translator.translate(query).translations

        best_guess = translations[0]["translation"]

        bookmark = Bookmark.find_or_create(db_session, flask.g.user, word_str,
                                           from_lang_code, best_guess,
                                           to_lang_code, minimal_context,
                                           url_str, title_str)
    except ValueError as e:
        zeeguu.log(
            f"minimize context failed {e}on: {context_str} x {from_lang_code} x {word_str} "
        )
        return context_str, query

    return json_result(dict(bookmark_id=bookmark.id, translation=best_guess))
def article_opened(session, value, user):
    # the url that comes from zeeguu event logger
    # might be the zeeguu url: which is of the form
    # https://www.zeeguu.unibe.ch/read/article?articleLanguage=de&articleURL=https://www.nzz.ch/wissenschaft/neandertaler-waren-kuenstler-ld.1358862
    # thus we extract only the last part
    url = value.split('articleURL=')[-1]

    article = Article.find_or_create(session, url)
    ua = UserArticle.find(user, article)
    if not ua:
        ua = UserArticle.find_or_create(session,
                                        user,
                                        article,
                                        opened=datetime.now())
    ua.opened = datetime.now()
    session.add(ua)
    session.commit()
    log(f"{ua}")
예제 #4
0
def get_subscribed_filters():
    """
    A user might be subscribed to multiple filters at once.
    This endpoint returns them as a list.

    :return: a json list with filters for which the user is registered;
     every filter in this list is a dictionary with the following info:
                id = unique id of the topic;
                title = <unicode string>
    """
    filters = TopicFilter.all_for_user(flask.g.user)
    filter_list = []
    for fil in filters:
        try:
            filter_list.append(fil.topic.as_dictionary())
        except Exception as e:
            zeeguu.log(str(e))

    return json_result(filter_list)
예제 #5
0
def get_subscribed_topics():
    """
    A user might be subscribed to multiple topics at once.
    This endpoint returns them as a list.

    :return: a json list with feeds for which the user is registered;
     every feed in this list is a dictionary with the following info:
                id = unique id of the topic;
                title = <unicode string>
    """
    subscriptions = TopicSubscription.all_for_user(flask.g.user)
    topic_list = []
    for sub in subscriptions:
        try:
            topic_list.append(sub.topic.as_dictionary())
        except Exception as e:
            zeeguu.log(str(e))

    return json_result(topic_list)
예제 #6
0
def cleanup_non_content_bits(text: str):
    """

        Sometimes newspaper still leaves some individual fragments
        in the article.text.


    :param text:
    :return:
    """
    new_text = text

    new_text = new_text.replace("\nAdvertisement\n", "")

    new_text = new_text.replace("\ntrue\n", "")
    if new_text != text:
        zeeguu.log("clean")

    return new_text
예제 #7
0
def get_subscribed_searches():
    """
    A user might be subscribed to multiple searches at once.
    This endpoint returns them as a list.

    :return: a json list with searches for which the user is registered;
     every search in this list is a dictionary with the following info:
                id = unique id of the search;
                search_keywords = <unicode string>
    """
    subscriptions = SearchSubscription.all_for_user(flask.g.user)
    searches_list = []

    for subs in subscriptions:
        try:
            searches_list.append(subs.search.as_dictionary())
        except Exception as e:
            zeeguu.log(str(e))

    return json_result(searches_list)
예제 #8
0
def get_filtered_searches():
    """
    A user might be subscribed to multiple search filters at once.
    This endpoint returns them as a list.

    :return: a json list with searches for which the user is registered;
     every search in this list is a dictionary with the following info:
                id = unique id of the topic;
                search_keywords = <unicode string>
    """
    filters = SearchFilter.all_for_user(flask.g.user)
    filtered_searches = []

    for filt in filters:
        try:
            filtered_searches.append(filt.search.as_dictionary())
        except Exception as e:
            zeeguu.log(str(e))

    return json_result(filtered_searches)
예제 #9
0
def unfilter_search():
    """
    A user can unsubscribe from the search with a given ID
    :return: OK / ERROR
    """

    search_id = int(request.form.get('search_id', ''))

    try:
        to_delete = SearchFilter.with_search_id(search_id, flask.g.user)
        session.delete(to_delete)
        to_delete = Search.find_by_id(search_id)
        session.delete(to_delete)
        session.commit()
        recompute_recommender_cache_if_needed(flask.g.user, session)

    except Exception as e:
        zeeguu.log(str(e))
        return "OOPS. SEARCH AIN'T THERE IT SEEMS (" + str(e) + ")"

    return "OK"
예제 #10
0
def create_default_bookmarks(session, user, language_code):
    bookmarks = []

    try:

        print("trying to load default bookmarks for " + str(user.name))
        for data_point in bookmark_data[language_code]:
            bookmark = Bookmark.find_or_create(session, user, data_point[0],
                                               language_code, data_point[1],
                                               "en", data_point[2],
                                               data_point[3],
                                               "Zeeguu Exercises")
            bookmarks.append(bookmark)

    except Exception as e:
        zeeguu.log(
            "could not load default bookmarks for {0}".format(language_code))
        #raise e
        return []

    return bookmarks
예제 #11
0
    def update_bookmark_priority(cls, db, user):
        """ Update all bookmark priorities of one user

        :param db: The connection to the database
        :param user: The user object
        """
        try:
            bookmarks_for_user = user.all_bookmarks_fit_for_study()
            fit_for_study_count = len(bookmarks_for_user)

            zeeguu.log(f"{fit_for_study_count} bookmarks fit for study")
            if fit_for_study_count == 0:
                return

            # tuple(0=bookmark, 1=exercise)
            bookmark_exercise_of_user = map(cls._get_exercise_of_bookmark,
                                            bookmarks_for_user)
            b1, b2 = itertools.tee(bookmark_exercise_of_user, 2)

            max_iterations = max(
                pair.exercise.id if pair.exercise is not None else 0
                for pair in b1)
            exercises_and_priorities = [
                cls._calculate_bookmark_priority(x, max_iterations) for x in b2
            ]

            with db.session.no_autoflush:  # might not be needed, but just to be safe
                for each in exercises_and_priorities:
                    entry = BookmarkPriorityARTS.find_or_create(
                        each.bookmark, each.priority)
                    entry.priority = each.priority
                    db.session.add(entry)
                    # print(entry)

            db.session.commit()
        except Exception as e:
            db.session.rollback()
            print('Error during updating bookmark priority')
            print(e)
            print(traceback.format_exc())
예제 #12
0
 def update_learned_status(self, session):
     """
         To call when something happened to the bookmark,
          that requires it's "learned" status to be updated.
     :param session:
     :return:
     """
     is_learned, learned_time = self.is_learned_based_on_exercise_outcomes(
         True)
     log = self.sorted_exercise_log()
     exercise_log_summary = ' '.join(
         [exercise.short_string_summary() for exercise in log])
     if is_learned:
         zeeguu.log(
             f"Log: {exercise_log_summary}: bookmark {self.id} learned!")
         self.learned_time = learned_time
         self.learned = True
         session.add(self)
     else:
         zeeguu.log(
             f"Log: {exercise_log_summary}: bookmark {self.id} not learned yet."
         )
예제 #13
0
def get_user_articles_sources_languages(user, limit=1000):
    """

    This method is used to get all the user articles for the sources if there are any
    selected sources for the user, and it otherwise gets all the articles for the
    current learning languages for the user.

    :param user: the user for which the articles should be fetched
    :param limit: the amount of articles for each source or language
    :return: a list of articles based on the parameters

    """

    user_languages = UserLanguage.all_reading_for_user(user)
    all_articles = []

    for language in user_languages:
        log(f'Getting articles for {language}')
        new_articles = language.get_articles(most_recent_first=True)
        all_articles.extend(new_articles)
        log(f'Added {len(new_articles)} articles for {language}')

    return all_articles
예제 #14
0
def bookmarks_to_study(bookmark_count):
    """
    Returns a number of <bookmark_count> bookmarks that
    are recommended for this user to study

    """

    int_count = int(bookmark_count)
    to_study = flask.g.user.bookmarks_to_study(int_count)
    if not to_study:
        # We might be in the situation of the priorities never having been
        # computed since theuser never did an exercise, and currently only
        # then are priorities recomputed; thus, in this case, we try to
        # update, and maybe this will solve the problem
        zeeguu.log(
            "recomputting bookmark priorities since there seem to be no bookmarks to study"
        )
        BookmarkPriorityUpdater.update_bookmark_priority(
            zeeguu.db, flask.g.user)
        to_study = flask.g.user.bookmarks_to_study(int_count)

    as_json = [bookmark.json_serializable_dict() for bookmark in to_study]
    return json_result(as_json)
예제 #15
0
    def json_serializable_dict(self, with_context=True):
        try:
            translation_word = self.translation.word
        except AttributeError as e:
            translation_word = ''
            zeeguu.log(
                f"Exception caught: for some reason there was no translation for {self.id}"
            )
            print(str(e))

        result = dict(
            id=self.id,
            to=translation_word,
            from_lang=self.origin.language.code,
            to_lang=self.translation.language.code,
            title=self.text.url.title,
            url=self.text.url.as_string(),
            origin_importance=Word.stats(self.origin.word,
                                         self.origin.language.code).importance)
        result["from"] = self.origin.word
        if with_context:
            result['context'] = self.text.content
        return result
예제 #16
0
def get_possible_translations(from_lang_code, to_lang_code):
    """

        Returns a list of possible translations in :param to_lang_code
        for :param word in :param from_lang_code.

        You must also specify the :param context, :param url, and :param title
         of the page where the word was found.

        The context is the sentence.

        :return: json array with translations

    """

    context_str = request.form.get('context', '')
    url = request.form.get('url', '')
    #
    url = url.split('articleURL=')[-1]

    zeeguu.log(f"url before being saved: {url}")
    word_str = request.form['word']
    title_str = request.form.get('title', '')

    minimal_context, query = minimize_context(context_str, from_lang_code,
                                              word_str)

    to_lang_code = flask.g.user.native_language.code
    zeeguu.log(f'translating to... {to_lang_code}')

    translator = Translator(from_lang_code, to_lang_code)
    zeeguu.log(f"Query to translate is: {query}")
    translations = translator.translate(query).translations

    # translators talk about quality, but our users expect likelihood.
    # rename the key in the dictionary
    for t in translations:
        t['likelihood'] = t.pop("quality")
        t['source'] = t.pop('service_name')

    best_guess = translations[0]["translation"]

    Bookmark.find_or_create(db_session, flask.g.user, word_str, from_lang_code,
                            best_guess, to_lang_code, minimal_context, url,
                            title_str)

    return json_result(dict(translations=translations))
예제 #17
0
def download_from_feed(feed: RSSFeed, session, limit=1000):
    """

        Session is needed because this saves stuff to the DB.


        last_crawled_time is useful because otherwise there would be a lot of time
        wasted trying to retrieve the same articles, especially the ones which
        can't be retrieved, so they won't be cached.


    """
    zeeguu.log(feed)
    downloaded = 0
    skipped = 0
    skipped_due_to_low_quality = dict()
    skipped_already_in_db = 0

    last_retrieval_time_from_DB = None
    last_retrieval_time_seen_this_crawl = None

    if feed.last_crawled_time:
        last_retrieval_time_from_DB = feed.last_crawled_time
        zeeguu.log(f"last retrieval time from DB = {last_retrieval_time_from_DB}")

    for feed_item in feed.feed_items():

        if downloaded >= limit:
            break

        try:
            url = _url_after_redirects(feed_item['url'])
        except requests.exceptions.TooManyRedirects:
            zeeguu.log(f"Too many redirects for: {url}")
            continue

        try:
            this_article_time = datetime.strptime(feed_item['published'], SIMPLE_TIME_FORMAT)
            this_article_time = this_article_time.replace(tzinfo=None)
        except:
            zeeguu.log(f"can't get time from {url}: {feed_item['published']}")
            continue

        if last_retrieval_time_from_DB:

            if this_article_time < last_retrieval_time_from_DB:
                skipped += 1
                continue

        title = feed_item['title']
        summary = feed_item['summary']

        art = model.Article.find(url)

        if (not last_retrieval_time_seen_this_crawl) or (this_article_time > last_retrieval_time_seen_this_crawl):
            last_retrieval_time_seen_this_crawl = this_article_time

        if art:
            skipped_already_in_db += 1
        else:
            try:

                art = newspaper.Article(url)
                art.download()
                art.parse()

                cleaned_up_text = cleanup_non_content_bits(art.text)

                quality_article = sufficient_quality(art, skipped_due_to_low_quality)
                if quality_article:
                    from zeeguu.language.difficulty_estimator_factory import DifficultyEstimatorFactory

                    # Create new article and save it to DB
                    new_article = zeeguu.model.Article(
                        Url.find_or_create(session, url),
                        title,
                        ', '.join(art.authors),
                        cleaned_up_text,
                        summary,
                        this_article_time,
                        feed,
                        feed.language
                    )
                    session.add(new_article)
                    session.commit()
                    downloaded += 1

                    add_topics(new_article, session)
                    add_searches(title, url, new_article, session)

                    try:
                        session.commit()
                    except Exception as e:
                        zeeguu.log(f'{LOG_CONTEXT}: Something went wrong when committing words/topic to article: {e}')

            except Exception as e:
                # raise e
                import sys
                ex = sys.exc_info()[0]
                zeeguu.log(f" {LOG_CONTEXT}: Failed to create zeeguu.Article from {url}\n{str(ex)}")

    zeeguu.log(f'  Skipped due to time: {skipped} ')
    zeeguu.log(f'  Downloaded: {downloaded}')
    zeeguu.log(f'  Low Quality: {skipped_due_to_low_quality}')
    zeeguu.log(f'  Already in DB: {skipped_already_in_db}')

    if last_retrieval_time_seen_this_crawl:
        feed.last_crawled_time = last_retrieval_time_seen_this_crawl
    session.add(feed)
    session.commit()
예제 #18
0
 def test_languages_exists(self):
     zeeguu.log("tüst")