Пример #1
0
def addbook():
    if request.form:
        new_book = models.Books()
        new_book.title = request.form.get('title')
        new_book.synopsis = request.form.get('synopsis')
        new_book.year = request.form.get('year')
        genre = models.Genres()
        genre_name = request.form.get('genres')
        genre = db.session.query(
            models.Genres).filter_by(
            name=genre_name).first()
        if genre is None:
            genre = models.Genres(name=genre_name)
        author_name = request.form.get('author')
        author = db.session.query(
            models.Authors).filter_by(
            name=author_name).first()
        if author is None:
            author = models.Authors(name=author_name)
        user = db.session.query(
            models.Users).filter_by(
            userid=session['user']).first()
        db.session.add(new_book)
        db.session.commit()
        new_book.users.append(user)
        new_book.authors.append(author)
        new_book.genres.append(genre)
        db.session.commit()
    return redirect('/update')
Пример #2
0
def updateauthor(book_id):
    try:
        authors = request.form.getlist('author')
        author_models = []
        for author_name in authors:
            if author_name.strip() == '':
                continue
            author = models.Authors.query.filter_by(name=author_name).first()
            if author is None:
                author = models.Authors(name=author)
                db.session.add(author)
            author_models.append(db.session.merge(author))
        print(authors)
        book = models.Books.query.get(book_id)
        book = db.session.merge(book)
        book.authors = author_models
        db.session.commit()
    except Exception as e:
        print('Could not update author')
        print(e)
        return redirect('404.html')
    return redirect('/update')
Пример #3
0
def parseUserReviews(g, html, user):
    soup = BeautifulSoup(html, 'html.parser')

    reviews = []
    books = []
    authors = []

    table = soup.find('table', attrs={'id': 'books'})
    table_body = table.find('tbody')

    rows = table_body.find_all('tr')
    for row in rows:
        review_id = int(re.search(r'\d+', row.get('id')).group())
        div_title = row.find('td', attrs={
            'class': 'title'
        }).find('div', attrs={'class': 'value'})
        title = div_title.getText().replace("  ", "")
        book_id = int(
            re.search(r'\d+',
                      div_title.find('a').get('href')).group())
        author_name_forname = row.find('td', attrs={
            'class': 'field author'
        }).find('a').text.replace(' ', '').split(',')
        if (len(author_name_forname) > 1):
            author_name = author_name_forname[1] + " " + author_name_forname[0]
        else:
            author_name = author_name_forname[
                0]  #TODO give two colum for name and forname
        author_id = int(
            re.search(
                r'\d+',
                row.find('td', attrs={
                    'class': 'field author'
                }).find('a').get('href')).group())
        isbn = row.find('td', attrs={
            'class': 'field isbn'
        }).find('div', attrs={
            'class': 'value'
        }).text.replace("  ", "")
        isbn13 = row.find('td', attrs={
            'class': 'field isbn13'
        }).find('div', attrs={
            'class': 'value'
        }).text.replace("  ", "")
        avg_rating = float(
            row.find('td', attrs={
                'class': 'field avg_rating'
            }).find('div', attrs={
                'class': 'value'
            }).text.replace("  ", ""))
        num_ratings = int(
            row.find('td', attrs={
                'class': 'field num_ratings'
            }).find('div', attrs={
                'class': 'value'
            }).text.replace(",", ""))
        date_pub_string = row.find('td', attrs={
            'class': 'field date_added'
        }).find('div', attrs={
            'class': 'value'
        }).text.replace("  ", "")
        date_pub = stringToDatetime(date_pub_string)
        field_rating = row.find('td', attrs={'class': 'field rating'})
        a_staticStars = field_rating.find('a', attrs={'class': 'staticStars'})
        rating = None
        if (a_staticStars is None):
            stars = field_rating.find('div', attrs={'class': 'stars'})
            if (stars is None):
                p10 = field_rating.findAll('span', attrs={'class': 'p10'})
                rating = len(p10)
            else:
                rating = int(stars.get('data-rating'))
        else:
            #rating=int(a_staticStars['class'][2][-1])
            rating = len(a_staticStars.find('span', attrs={'class': 'p10'}))
        if (rating < 3):
            continue
        spans = row.find('td', attrs={'class': 'review'}).findAll('span')
        review = ''
        if (len(spans) > 1):
            review = ascii(spans[1].getText())
        elif (len(spans) > 0):
            review = ascii(spans[0].getText())
        else:
            review = None

        #Author
        authorTable = models.Authors(id=uuid.uuid4(),
                                     gid=author_id,
                                     name=ascii(author_name))
        authors.append(authorTable)
        #Book
        bookTable = models.Books(id=uuid.uuid4(),
                                 gid=book_id,
                                 title=ascii(title),
                                 isbn=ascii(isbn),
                                 isbn13=ascii(isbn13),
                                 publication_date=date_pub,
                                 average_rating=avg_rating,
                                 ratings_count=num_ratings)
        books.append(bookTable)
        #Review
        textblob = None
        if (review is not None):
            blob = TextBlob(ascii(review))
            # tags=[]
            # for tag in blob.tags:
            #     tags.append(models.tb_tag(word=re.sub(r'[^\x00-\x7F]+',' ', tag[0]), type=re.sub(r'[^\x00-\x7F]+',' ', tag[0])))
            #noun_phrases=[]
            #for noun_phrase in blob.noun_phrases:
            #    noun_phrases.append(re.sub(r'[^\x00-\x7F]+',' ', noun_phrase))
            sentences = []
            for sentence in blob.sentences:
                if (sentence.string is None):
                    continue
                sentences.append(
                    models.tb_sentence(
                        sentence_text=ascii(sentence.string),
                        sentence_polarity=sentence.polarity,
                        sentence_subjectivity=sentence.subjectivity))
            textblob = models.textblob(instantiated=True, sentences=sentences)
        else:
            textblob = models.textblob(instantiated=False)

        reviewTable = models.Reviews(id=uuid.uuid4(),
                                     actor=user,
                                     gid=review_id,
                                     rating=rating,
                                     text=review,
                                     textblob=textblob)
        reviews.append(reviewTable)
    return (reviews, books, authors)
Пример #4
0
def _parseAuthorBig(obj):
    return models.Authors(gid=obj.id,
                          name=ascii(obj.name),
                          average_rating=float(obj.average_rating),
                          ratings_count=int(obj.ratings_count),
                          text_reviews_count=int(obj.text_reviews_count))
Пример #5
0
def _parseAuthorSmall(obj):
    return models.Authors(id=uuid.uuid4(), gid=obj.id, name=ascii(obj.name))
Пример #6
0
def parse_data(course_blocks):
    # Extract data from BS4 instance and save to data structure

    course_objects_list = []
    for course in course_blocks:
        try:
            organization = unicode(
                course.find(
                    "div",
                    "c-courseList-entry-university").find('a').get_text())
        except None:
            organization = "Not Listed"
        try:
            title = unicode(
                course.find("div",
                            "c-courseList-entry-title").find('a').get_text())
        except None:
            title = "Not Listed"
        try:
            start_dates = course.find(
                "div", "bt3-col-xs-3 bt3-text-right").find_all('p')
        except:
            start_dates = None
        try:
            authors = course.find(
                "div", "c-courseList-entry-instructor").find_all('a')
        except AttributeError:
            author = None
        # Save each author to authors table in Postgres
        all_authors = ""
        if len(authors) > 1:
            for author in authors:
                author = author.get_text()
                # Save each author individually to author database table
                new_author = models.Authors(author_name=author,
                                            course_title=title)
                models.db_session.add(new_author)
                models.db_session.commit()
                # Create text string of authors to include in text file
                all_authors += author + ", "
        elif len(authors) == 1:
            all_authors = authors[0].get_text()

        # Process and parse date, duration and course note information
        for start_date in start_dates:
            start_date = str(start_date)
            duration = None
            course_begins = None
            course_notes = None
            if "Go at your own pace." in start_date:
                course_notes = "Go at your own pace."
            elif "There are no open sessions." in start_date:
                course_notes = "There are no open sessions."
            else:
                matches = utilties.parse_date_fields(start_date)
                course_begins, duration = utilties.clean_date_data(matches)
                duration = unicode(duration)
            try:
                course_notes = unicode(course_notes)
            except:
                course_notes = "Not listed"

            new_course = Course(organization, title, all_authors,
                                course_begins, duration, course_notes)
            course_objects_list.append(new_course)
    return course_objects_list