示例#1
0
    def from_url(cls, url: str):
        data = feedparser.parse(url)

        try:
            title = data.feed.title
        except:
            title = ""

        try:
            description = data.feed.subtitle
        except:
            description = None

        try:
            image_url_string = data.feed.image.href
            print(f'Found image url at: {image_url_string}')
            image_url = Url(image_url_string, title + " Icon")
        except:
            print('Could not find any image url.')
            image_url = None

        feed_url = Url(url, title)

        return RSSFeed(feed_url, title, description, image_url, None)

        return RSSFeed()
示例#2
0
def start_following_feed():
    """
    Start following a feed for which the client provides all the
    metadata. This is useful for the cases where badly formed
    feeds can't be parsed by feedparser.

    :return:
    """

    feed_info = json.loads(request.form.get('feed_info', ''), "utf-8")

    image_url = feed_info["image"]
    language = Language.find(feed_info["language"])
    url_string = feed_info["url"]
    title = feed_info["title"]
    description = feed_info["description"]

    url = Url.find(url_string)
    zeeguu.db.session.add(url)
    # Important to commit this url first; otherwise we end up creating
    # two domains with the same name for both the urls...
    zeeguu.db.session.commit()

    feed_image_url = Url.find(image_url)

    feed_object = RSSFeed.find_or_create(url, title, description,
                                         feed_image_url, language)
    feed_registration = RSSFeedRegistration.find_or_create(
        flask.g.user, feed_object)

    zeeguu.db.session.add_all([feed_image_url, feed_object, feed_registration])
    zeeguu.db.session.commit()

    return "OK"
def set_default_exercise_based_prob():
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()

    urls = Url.query.all()

    for url in urls:
        url.path = Url.get_path(url.url)
        d = DomainName.find(Url.get_domain(url.url))
        url.domain = d

        zeeguu.db.session.add(url)
        zeeguu.db.session.add(d)
        zeeguu.db.session.commit()
def set_default_exercise_based_prob():
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()

    urls = Url.query.all()

    for url in urls:
        url.path = Url.get_path(url.url)
        d = DomainName.find(Url.get_domain(url.url))
        url.domain = d

        zeeguu.db.session.add(url)
        zeeguu.db.session.add(d)
        zeeguu.db.session.commit()
示例#5
0
def start_following_feeds():
    """
    A user can start following multiple feeds at once.

    The feeds are passed as the post parameter :feeds:
     which contains a json list with URLs for the feeds to be followed.

    :return:
    """

    json_array_with_feeds = json.loads(request.form.get('feeds', ''))

    for urlString in json_array_with_feeds:
        feed = feedparser.parse(urlString).feed

        feed_image_url_string = ""
        if "image" in feed:
            feed_image_url_string = feed.image["href"]

        lan = None
        if "language" in feed:
            lan = Language.find(two_letter_language_code(feed))

        url = Url.find(urlString)
        zeeguu.db.session.add(url)
        # Important to commit this url first; otherwise we end up creating
        # two domains with the same name for both the urls...
        zeeguu.db.session.commit()

        feed_object = RSSFeed.find_by_url(url)
        if not feed_object:
            feed_image_url = Url.find(feed_image_url_string)
            title = url
            if "title" in feed:
                title = feed.title
            feed_object = RSSFeed.find_or_create(url, title, feed.description,
                                                 feed_image_url, lan)
            zeeguu.db.session.add_all([feed_image_url, feed_object])
            zeeguu.db.session.commit()

        feed_registration = RSSFeedRegistration.find_or_create(
            flask.g.user, feed_object)

        zeeguu.db.session.add(feed_registration)
        zeeguu.db.session.commit()

    return "OK"
示例#6
0
    def _create_model_object(self):
        random_url = self.faker.uri()
        random_title = self.faker.sentence()

        url = Url.find_or_create(self.db.session, random_url, random_title)

        if self._exists_in_db(url):
            return self._create_model_object()

        return url
示例#7
0
    def test_feed_items(self):
        url = Url(
            "http://www.bild.de/rss-feeds/rss-16725492,feed=home.bild.html",
            "Build")
        feed = RSSFeed(url,
                       "Bild.de Home",
                       "build",
                       image_url=None,
                       language=None)
        items = feed.feed_items()

        first_item_date = items[0]["published"]
        assert first_item_date
示例#8
0
    def find(cls, url: str):
        """

            Find by url

        :return: object or None if not found
        """

        from zeeguu.model import Url
        try:
            url_object = Url.find(url)
            return (cls.query.filter(cls.url == url_object)).one()
        except NoResultFound:
            return None
示例#9
0
    def test_same_text_does_not_get_created_multiple_Times(self):

        context = u'Die kleine Jägermeister'
        with zeeguu.app.app_context():
            url = Url.find('http://mir.lu/stories/german/jagermeister',
                           "Die Kleine Jagermeister (Mircea's Stories)")
            source_language = Language.find('de')

            form_data = dict(url=url.as_string(), context=context, word="Die")

            self.api_post('/translate_and_bookmark/de/en', form_data)
            text1 = Text.find_or_create(context, source_language, url)
            self.api_post('/translate_and_bookmark/de/en', form_data)
            text2 = Text.find_or_create(context, source_language, url)
            assert (text1 == text2)
示例#10
0
    def find_or_create(cls, session, user, _origin: str, _origin_lang: str,
                       _translation: str, _translation_lang: str,
                       _context: str, _url: str, _url_title: str,
                       article_id: int):
        """
            if the bookmark does not exist, it creates it and returns it
            if it exists, it ** updates the translation** and returns the bookmark object

        :param _origin:
        :param _context:
        :param _url:
        :return:
        """

        origin_lang = Language.find_or_create(_origin_lang)
        translation_lang = Language.find_or_create(_translation_lang)

        origin = UserWord.find_or_create(session, _origin, origin_lang)

        article = Article.query.filter_by(id=article_id).one()

        url = Url.find_or_create(session, article.url.as_string(), _url_title)

        context = Text.find_or_create(session, _context, origin_lang, url,
                                      article)

        translation = UserWord.find_or_create(session, _translation,
                                              translation_lang)

        now = datetime.now()

        try:
            # try to find this bookmark
            bookmark = Bookmark.find_by_user_word_and_text(
                user, origin, context)

            # update the translation
            bookmark.translation = translation

        except sqlalchemy.orm.exc.NoResultFound as e:
            bookmark = cls(origin, translation, user, context, now)
        except Exception as e:
            raise e

        session.add(bookmark)
        session.commit()

        return bookmark
示例#11
0
    def test_url_domain(self):
        """Tests the correct retrieval of a domain from a random url

        e.g. 'https://google.com' should be retrieved from
        e.g. 'https://google.com/search'
        """
        url_random = UrlRule().url.as_string()

        url_parts = url_random.split('//', 1)
        domain_should_be = url_parts[0] + '//' + url_parts[1].split('/', 1)[0]

        domain_to_check = Url(url_random, self.faker.word()).domain_name()

        assert domain_to_check == domain_should_be, (domain_should_be +
                                                     " should be " +
                                                     domain_to_check)
示例#12
0
def add_bookmark(user, original_language, original_word, translation_language, translation_word,  date, the_context, the_url, the_url_title):

    url = Url.find (the_url)
    text = Text.find_or_create(the_context, translation_language, url)
    origin = UserWord.find(original_word.lower(), original_language)
    translation = UserWord.find(translation_word.lower(), translation_language)


    zeeguu.db.session.add(url)
    zeeguu.db.session.add(text)
    zeeguu.db.session.add(origin)
    zeeguu.db.session.add(translation)
    t1= Bookmark(origin, translation, user, text, date)
    zeeguu.db.session.add(t1)

    zeeguu.db.session.commit()
    add_probability_to_existing_words_of_user(user,t1,original_language)
def bookmark_with_context(from_lang_code, to_lang_code, word_str, url_str,
                          title_str, context_str, translation_str):
    """
        This function will lookup a given word-text pair, and if found, it will return
     that bookmark rather than a new one

    :param from_lang_code:
    :param to_lang_code:
    :param word_str:
    :param url_str:
    :param title_str:
    :param context_str:
    :param translation_str:
    :return:
    """
    from_lang = Language.find(from_lang_code)
    to_lang = Language.find(to_lang_code)

    user_word = UserWord.find(word_str, from_lang)

    url = Url.find(url_str, title_str)
    zeeguu.db.session.add(url)
    zeeguu.db.session.commit()

    context = Text.find_or_create(context_str, from_lang, url)
    zeeguu.db.session.add(context)
    zeeguu.db.session.commit()

    translation = UserWord.find(translation_str, to_lang)

    try:
        bookmark = Bookmark.find_all_by_user_word_and_text(
            flask.g.user, user_word, context)[0]
    #     TODO: Think about updating the date of this bookmark, or maybe creating a duplicate
    #       otherwise, in the history this translation will not be visible!

    except Exception:
        bookmark = Bookmark(user_word, translation, flask.g.user, context,
                            datetime.now())
        zeeguu.db.session.add(bookmark)
        bookmark.calculate_probabilities_after_adding_a_bookmark(
            flask.g.user, bookmark.origin.language)
        zeeguu.db.session.commit()

    return str(bookmark.id)
示例#14
0
def add_bookmark(db, user, original_language, original_word,
                 translation_language, translation_word, date, the_context,
                 the_url, the_url_title):
    session = db.session

    url = Url.find_or_create(session, the_url, the_url_title)

    text = Text.find_or_create(session, the_context, translation_language, url)

    origin = UserWord.find_or_create(session, original_word, original_language)

    translation = UserWord.find_or_create(session, translation_word,
                                          translation_language)

    b1 = Bookmark(origin, translation, user, text, date)
    db.session.add(b1)
    db.session.commit()

    return b1
def bookmark_with_context(from_lang_code, to_lang_code, word_str, url_str, title_str, context_str, translation_str):
    """
        This function will lookup a given word-text pair, and if found, it will return
     that bookmark rather than a new one

    :param from_lang_code:
    :param to_lang_code:
    :param word_str:
    :param url_str:
    :param title_str:
    :param context_str:
    :param translation_str:
    :return:
    """
    from_lang = Language.find(from_lang_code)
    to_lang = Language.find(to_lang_code)

    user_word = UserWord.find(word_str, from_lang)

    url = Url.find(url_str, title_str)
    zeeguu.db.session.add(url)
    zeeguu.db.session.commit()

    context = Text.find_or_create(context_str, from_lang, url)
    zeeguu.db.session.add(context)
    zeeguu.db.session.commit()

    translation = UserWord.find(translation_str, to_lang)

    try:
        bookmark = Bookmark.find_all_by_user_word_and_text(flask.g.user, user_word, context)[0]
    #     TODO: Think about updating the date of this bookmark, or maybe creating a duplicate
    #       otherwise, in the history this translation will not be visible!

    except Exception:
        bookmark = Bookmark(user_word, translation, flask.g.user, context, datetime.now())
        zeeguu.db.session.add(bookmark)
        bookmark.calculate_probabilities_after_adding_a_bookmark(flask.g.user, bookmark.origin.language)
        zeeguu.db.session.commit()

    return str(bookmark.id)
示例#16
0
    def test_one_domain_multiple_urls(self):
        """
        Tests that if multiple URLs are added to the database that their
        DomainName is not added to the database more than once
        """
        # Create an 'original' URL, which is saved to the Database
        url_random_obj_origin = UrlRule().url

        # Create a random number of URLs, each with the same DomainName
        random_num = random.randint(0, 10)
        for _ in range(0, random_num):
            url_random_extended = url_random_obj_origin.as_string() + self.faker.word()
            _ = Url(url_random_extended, self.faker.word())

        domain_for_query = url_random_obj_origin.domain_name()

        try:
            assert DomainName.find(domain_for_query)
        except NoResultFound:
            assert False, "No domains found in database"
        except MultipleResultsFound:
            assert False, "There were multiple DomainNames in the database"
示例#17
0
    def find_or_create(cls, session, _url:str, language=None, sleep_a_bit=False):
        """

            If not found, download and extract all
            the required info for this article.

        :param url:
        :return:
        """
        from zeeguu.model import Url, Article, Language
        import newspaper

        url = Url.extract_canonical_url(_url)

        try:
            found = cls.find(url)
            if found:
                return found

            art = newspaper.Article(url=url)
            art.download()
            art.parse()

            if art.text == '':
                raise Exception("Newspaper got empty article from: " + url)

            if sleep_a_bit:
                import time
                from random import randint
                print("GOT: " + url)
                sleep_time = randint(3, 33)
                print(f"sleeping for {sleep_time}s... so we don't annoy our friendly servers")
                time.sleep(sleep_time)

            if not language:
                if art.meta_lang == '':
                    art.meta_lang = detect(art.text)
                    zeeguu.log(f"langdetect: {art.meta_lang} for {url}")
                language = Language.find_or_create(art.meta_lang)

            # Create new article and save it to DB
            url_object = Url.find_or_create(session, url)

            new_article = Article(
                url_object,
                art.title,
                ', '.join(art.authors),
                art.text[0:32000],  # any article longer than this will be truncated...
                art.summary,
                None,
                None,
                language
            )
            session.add(new_article)

            session.commit()

            return new_article
        except sqlalchemy.exc.IntegrityError or sqlalchemy.exc.DatabaseError:
            for i in range(10):
                try:
                    session.rollback()
                    u = cls.find(url)
                    print("Found article by url after recovering from race")
                    return u
                except:
                    print("Exception of second degree in article..." + str(i))
                    time.sleep(0.3)
                    continue
                break