def insertWrap(conn, urlMD5, url, siteLogoUrl, articleImageUrl, siteTitleJa,
               siteTitleRaw, bodyTextJa, bodyTextRaw, langCode):
    siteTitleJa = pretty.bodyTextJa(siteTitleJa)
    bodyTextJa = pretty.bodyTextJa(bodyTextJa)
    mydb.insert(conn, urlMD5, url, siteLogoUrl, articleImageUrl, siteTitleJa,
                siteTitleRaw, bodyTextJa, bodyTextRaw, langCode)
    return True
Пример #2
0
def _urlOpen(conn, url, urlMD5):
    #html = urlopen(url)
    html = getByProxy(url)

    bsObj = BeautifulSoup(str(html.text), "html.parser")

    if not checkTitle(recode_uri(cleanhtml(bsObj.title))):
        print('check title is false and return')
        return False

    title = translate_text('ja', cleanhtml(bsObj.title))
    params1 = bsObj.findAll("p", {"class": "qtext_para"})

    bodyJa = []
    bodyEn = []

    for hoge in params1:
        if makeSentence(str(hoge)) != '':
            raw = makeSentence(hoge)
            bodyJa.append(translate_text('ja', raw))
            bodyEn.append(raw)

    siteLogoUrl = 'https://qsf.ec.quoracdn.net/-3-images.logo.wordmark_default.svg-26-32753849bf197b54.svg'
    articleImageUrl = 'https://qsf.ec.quoracdn.net/-3-images.logo.wordmark_default.svg-26-32753849bf197b54.svg'
    siteTitleJa = title
    siteTitleRaw = cleanhtml(bsObj.title)
    bodyTextJa = "\n".join(bodyJa)
    bodyTextRaw = "\n".join(bodyEn)
    langCode = 'en'

    siteTitleJa = pretty.bodyTextJa(siteTitleJa)
    bodyTextJa = pretty.bodyTextJa(bodyTextJa)

    mydb.insert(conn, urlMD5, url, siteLogoUrl, articleImageUrl, siteTitleJa,
                siteTitleRaw, bodyTextJa, bodyTextRaw, langCode)
Пример #3
0
 def insert(self):
     #make a tuple to send the data to the api
     tupl = (self.user.get(), self.passw.get())
     #execute the insert logic which you made in mydb.py
     mydb.insert(tupl)
     #it will show the messagebox
     messagebox.showinfo("Inserted", "Your Data has been inserted")
     #it will set the entry to empty string
     self.user.set("")
     self.passw.set("")
Пример #4
0
def fetchLifeHacker(conn, url):
    #html = urlopen(url)
    html = getByProxy(url)
    bsObj = BeautifulSoup(html.text, "html.parser")
    params1 = bsObj.findAll("h1")
    params2 = bsObj.findAll("div", {"class": "excerpt entry-summary"})
    params3 = bsObj.findAll("picture")

    head = []
    for h in params1:
        head.append(h.text)

    summary = []
    for para2 in params2:
        summary.append(para2.p.text)

    pic = []
    for obj in params3:
        list = obj.findAll("source", {"media": "--small"})
        for l in list:
            #print(l)
            hoge = l["data-srcset"]
            pic.append(hoge)

    print(len(head))
    print(len(summary))
    print(len(pic))

    host = getHost(url)
    icon = 'http://ch.res.nimg.jp/img/system/blog_author/ch901.jpg'

    num = 0
    for i in head:
        title = translate_text('ja', head[num])
        bodyEn = summary[num]
        bodyJa = translate_text('ja', bodyEn)
        imgUrl = pic[num]
        mydb.insert(conn, 'id' + str(time.time()), url, host, imgUrl, title,
                    imgUrl, bodyEn, bodyJa)
        num += 1
Пример #5
0
def downloader(list_url,db):

    for url in list_url:

        if mydb.check(db,url):
            print "Exist picture"
        else:

            mypic = urllib2.urlopen(url)

            my_uuid = str(uuid.uuid4())
            name = path + my_uuid

            out_file = open(name,'wb')
            out_file.write(mypic.read())

            sha256 = sha256hex(name)

            if mydb.check_sha256(db,sha256):
                print "Exist, sha256"

                out_file.close()
                os.remove(name)

                print "Remove "+name


            else:

                mydb.insert(db,url,my_uuid,sha256)
                out_file.close()

                name_small = spic.get_small_pic(my_uuid)
                print name_small

                sha256_small = sha256hex(spic.path_small_folder+name_small)
                mydb.insert_small(db,name_small,sha256_small)

                print "Add in table"
Пример #6
0
def addpost():
    try:
        l=gettitle()
        print 'list len is ', str(len(l))
        for one in l:
            res = mydb.ishave(one.get('title'))
            print str(res)+'  left is res ,right is title   '+str(one.get('title').encode('utf-8'))
            if  res:
                c=getdetail(one.get('url'))
                d={}
                
                d['post_title']=one.get('title').encode('utf-8')
                print 'title is 2222 %s '% one.get('title').encode('utf-8')
                d['post_content']=c.encode('utf-8')
                d['type']=3
                print d.get('post_title'),d.get('post_url')
                if not d.get('post_title'):
                    d['post_title']='none'
                if not d.get('post_content'):
                    d['post_contetn']='none'
                mydb.insert(d)
                print 'insert ',d.get('post_title')
    except Exception,e:
        print 'error msg',e 
Пример #7
0
    def on_accept(self):
        """
    Функция обработчик события нажатия на кнопку "Принять". Добавляет/изменяет запись.
    Параметры: нет
    Возращаемые значения: нет
    Автор: Подколзин М.
    """
        if s.validate_entry(self.name, "str") + s.validate_entry(
                self.year, "int") + s.validate_entry(
                    self.director, "str") + s.validate_entry(
                        self.genre, "str") + s.validate_entry(
                            self.time, "int") == 5:
            if self.name.get(
            ) in self.app.db and self.name.get() != self.row["text"]:
                mb.showerror("Ошибка", "Запись уже есть в базе!")
            else:
                if self.is_edit:
                    mydb.update(self.app.db, self.row["text"], "year",
                                self.year.get())
                    mydb.update(self.app.db, self.row["text"], "director",
                                self.director.get())
                    mydb.update(self.app.db, self.row["text"], "genre",
                                self.genre.get())
                    mydb.update(self.app.db, self.row["text"], "time",
                                self.time.get())
                    mydb.update(self.app.db, self.row["text"], "name",
                                self.name.get())
                else:
                    mydb.insert(self.app.db, self.name.get(),
                                (self.year.get(), self.director.get(),
                                 self.genre.get(), self.time.get()))

                self.app.print_records(self.app.db)
                self.dlg.destroy()
        else:
            mb.showerror("Ошибка", "Заполните поля корректно!")