def search_tweets(cnx, user_input,list,count): """ Function to search tweets with given query :param cnx: Connection object :param user_input: Domain to search tweets in :param list: tags associated with the domain :param count: Number of tweets to be returned :return: None- Tweets and Users are added to the database """ cur = cnx.cursor(buffered=True) searchUrl="https://api.twitter.com/1.1/search/tweets.json?" countPart="count="+str(count)+"&" for x in list: finalUrl= searchUrl+countPart+"q="+x getJson = connectToTwitter(finalUrl, "GET", b"", None) parsedJson = json.loads(getJson) for i in range(len(parsedJson["statuses"])): try: Users.addUsers(cur,parsedJson["statuses"][i]) cnx.commit() addTweet(cur,parsedJson["statuses"][i]) cnx.commit() except: print("user or tweet skipped") break Tags.addTweetTags(cnx,user_input,parsedJson["statuses"][i]) Tags.removeTagDuplicates(cnx) cnx.commit() cnx.close()
def parse(self, inStream): _inStream = self.readHeader(inStream) self._context["tags"] = [] lastTag = Tags.Tag(self) while type(lastTag) is not Tags.TagEnd: lastTag = Tags.readTag(_inStream, self._context["tags"], self._context)
def __init__(self, db): DBUtil.__init__(self, db) self.collection = db.Machine self.passHelper = PasswordHelper.PasswordHelper(key) self.userdb = Users.Users(db) self.roleDb = Role.Role(db) self.tagDB = Tags.Tags() # indexes self.collection.create_index([('username', ASCENDING), ('host', ASCENDING)], unique=True)
def insertTags(self, contentId): content = self._title + self._intro + self._detail self._tags = Tags.getTags(content) values = [] for tag in self._tags: values.append((tag[0], contentId)) try: self._mysqlCursor.executemany(AWSArticle.TAGS_INSERT_COMMAND, values) except Exception, e: print "插入Tags异常", e
def __init__(self, db): ''' General description: This function initializes the database variables and \ index to refer in functions. ''' DBUtil.__init__(self, db) self.collection = db.Tool self.versionsDB = Versions.Versions(db) self.tagDB = Tags.Tags() # indexes self.collection.create_index([('name', ASCENDING)], unique=True) self.collection.create_index([('tag', ASCENDING)], unique=False)
def boyerMoore(search,text,start=0): if start < 0: start = 0 m = len(search) n = len(text) if m > n: return -1 skip = Tags.nullableDict(m) for k in range(m - 1): skip[search[k]] = m - k - 1 k = m - 1+start while k < n: j = m - 1; i = k while j >= 0 and text[i] == search[j]: j -= 1; i -= 1 if j == -1: return i + 1 k += skip[text[k]] return -1
def __init__(self): self.learningRate = .0001 self.errorX = [] self.errorT = [] self.errorXT = [] self.avgError = {} self.avgError["x"] = [] self.avgError["t"] = [] self.avgError["xt"] = [] self.movies = m.MovieTable("movies") self.users = u.UserTable("ratings") self.tags = t.TagTable("tags") self.links = l.LinksTable("links") for user in self.users.table: for rating in self.users.table[user].ratings: self.movies.table[rating].add_rating(self.users.table[user].ratings[rating])
def __init__(self): ''' General description: This function initializes the database variables and \ index to refer in functions. ''' db = mongodb DBUtil.__init__(self, db) self.collection = db.DeploymentUnit self.deploymentUnitApprovalStatusDB = DeploymentUnitApprovalStatus.DeploymentUnitApprovalStatus( ) self.tagDB = Tags.Tags() self.buildDB = Build.Build() self.deploymentFieldsDB = DeploymentFields.DeploymentFields(db) self.deploymentUnitTypeDB = DeploymentUnitType.DeploymentUnitType() self.statedb = State.State(db) # self.deploymentUnitSetDB = DeploymentUnitSet.DeploymentUnitSet() # indexes self.collection.create_index([('name', ASCENDING)], unique=True)
def AddMissingChildren(self): # add tags self.tags = Tags.Tags() self.Add(self.tags)
def upsert( self, art ): """Insert or update an article""" # if no separate 'urls' set, create it if not 'urls' in art: art['urls'] = set((art['permalink'], art['srcurl'])) # fill in some defaults if missing if 'lastscraped' not in art: art['lastscraped'] = datetime.now() if 'lastseen' not in art: art['lastseen'] = datetime.now() if 'description' not in art: art['description'] = ukmedia.FirstPara(art['content']) CheckArticle( art ) # send text to the DB as utf-8 title = art['title'].encode( 'utf-8' ) byline = art[ 'byline' ].encode( 'utf-8' ) description = art['description'].encode( 'utf-8' ) pubdate = "%s" %(art['pubdate']) lastscraped = "%s" % (art['lastscraped']) lastseen = "%s" % (art['lastseen']) firstseen = lastseen # it's a new entry srcurl = art['srcurl'] permalink = art['permalink'] srcorg = art['srcorg'] # phasing out srcid... if 'srcid' in art: srcid = art['srcid'] else: srcid = art['permalink'] wordcount = None content = None # does article include content? if 'content' in art: content = art['content'].encode( 'utf-8' ) # noddy wordcount txt = ukmedia.StripHTML( art['content'] ) wordcount = len( txt.split() ); # send to db! cursor = DB.conn().cursor() updating = False if 'id' in art: updating = True if updating: # update existing article_id = art['id'] q = 'UPDATE article SET (title, byline, description, lastscraped, pubdate, lastseen, permalink, srcurl, srcorg, srcid, wordcount, last_comment_check) = (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) WHERE id=%s' cursor.execute(q, (title, byline, description, lastscraped, pubdate, lastseen, permalink, srcurl, srcorg, srcid, wordcount, lastscraped, article_id)) else: # insert new q = 'INSERT INTO article (title, byline, description, lastscraped, pubdate, firstseen, lastseen, permalink, srcurl, srcorg, srcid, wordcount, last_comment_check) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' cursor.execute( q, ( title, byline, description, lastscraped, pubdate, firstseen, lastseen, permalink, srcurl, srcorg, srcid, wordcount, lastscraped ) ) # get the newly-allocated id cursor.execute( "select currval('article_id_seq')" ) article_id = cursor.fetchone()[0] # add the known urls for the article if updating: cursor.execute( "DELETE FROM article_url WHERE article_id=%s", (article_id,)) for url in set(art['urls']): cursor.execute( "INSERT INTO article_url (url,article_id) VALUES (%s,%s)", (url,article_id)) # update content, if included if content is None: insert_content = False else: insert_content = True if updating: # TODO: keep multiple revisions to track changes # has the content actually changed? cursor.execute("SELECT id FROM article_content WHERE article_id=%s AND content=%s", (article_id,content)) foo = cursor.fetchall() # gah... couldn't get cursor.rowcount to work... if len(foo)>=1: # no change, so just leave it as is insert_content = False if insert_content: cursor.execute("DELETE FROM article_content WHERE article_id=%s", (article_id,)) q = 'INSERT INTO article_content (article_id, content,scraped) VALUES ( %s,%s,%s )' cursor.execute(q, (article_id, content, lastscraped)) # queue it for xapian indexing cursor.execute("DELETE FROM article_needs_indexing WHERE article_id=%s", (article_id,)) cursor.execute("INSERT INTO article_needs_indexing (article_id) VALUES (%s)", (article_id,)) # if there was a scraper error entry for this article, delete it now cursor.execute( "DELETE FROM error_articlescrape WHERE srcid=%s", (srcid,) ) # if there were images, add them too if updating: cursor.execute("DELETE FROM article_image WHERE article_id=%s", (article_id,)) if 'images' in art: for im in art['images']: cap = im['caption'].encode('utf-8') cred = '' if 'credit' in im: cred = im['credit'].encode('utf-8') cursor.execute("INSERT INTO article_image (article_id,url,caption,credit) VALUES (%s,%s,%s,%s)", (article_id, im['url'], cap, cred)) # if there were commentlinks, add them too if 'commentlinks' in art: for c in art['commentlinks']: c['source'] = art['srcorgname'] CommentLink.upsert(article_id, c) # add tags Tags.generate(article_id, art['content']) # attribute journos assert 'journos' in art cursor.execute("DELETE FROM journo_attr WHERE article_id=%s", (article_id,)) for journo_id in art['journos']: cursor.execute("INSERT INTO journo_attr (journo_id,article_id) VALUES (%s,%s)", (journo_id,article_id)) # make sure journo activates if they meet the criteria Journo.update_activation(journo_id) # also clear the html cache for that journos page cachename = 'j%s' % (journo_id) cursor.execute( "DELETE FROM htmlcache WHERE name=%s", (cachename,) ) op = 'update' if updating else 'new' if insert_content: op += ' meta+content' else: op += ' meta' ukmedia.DBUG2( u"%s: %s [a%s %s ] ('%s' %s)\n" % ( art['srcorgname'] if 'srcorgname' in art else srcorg, op, article_id, art['srcurl'], art['byline'], ','.join( [ '[j%s]'%(j) for j in art['journos'] ] ) )) return article_id
def sel_downlaod_win_playlist(playlist_obj): global download_list """ this window shows you the thumbnail of the video along with its title and available qualities, also shows you the download button and the file path selection menu. You click download and the video downloades. """ download_list = playlist_obj.video_urls video_obj = pt.YouTube(download_list[0]) total_vids = len(playlist_obj.video_urls) tnurl = video_obj.thumbnail_url cur_video_length = video_obj.length cur_video_title = video_obj.title global again, sel_stream, filesize # on change dropdown value, and link to the main menu def change_dropdown(*args): global sel_stream sel_stream = tkvar.get() print('value of the sel stream is : ', sel_stream) video_type = video_obj.streams.get_by_itag( list(Tags.tags.keys())[list( Tags.tags.values()).index(sel_stream)]) mbytes = (round(video_type.filesize / 1000000, 2)).__str__() + ' MB' print(mbytes) file_size_lbl.config(text=mbytes.__str__()) # opens the file explorer window to select the folder to download, and changes the global file path variable def open_file_explorer(): global FILENAME tk.Tk().withdraw() FILENAME = tk.filedialog.askdirectory() print(FILENAME) file_path.config(text=FILENAME) # to show the progess bar, and update the values of the percentage downloaded def on_progress_dothis( stream, chunk: bytes, bytes_remaining: int) -> None: # pylint: disable=W0613 Bytes = maxbytes - bytes_remaining percent = round((100 * (maxbytes - bytes_remaining)) / maxbytes, 2) downloading = percent.__str__() + '%' progress_bar["value"] = Bytes progress_value.config(text=downloading) root.update_idletasks() if percent == 100.0: downloading = 'Done! ' progress_value.config(text=downloading) # to download the video, part of the threading process, then calls the on_progress_do_this() function def download(): global maxbytes, total_vids total_vids = len(download_list) downloaded = 0 skipped = 0 print(len(download_list)) for vids in download_list: print("Accessing YouTube URL...") vid = pt.YouTube(vids, on_progress_callback=on_progress_dothis) video_type = vid.streams.get_by_itag( list(Tags.tags.keys())[list( Tags.tags.values()).index(sel_stream)]) methods.get_video_tnl(vid.watch_url, vid.thumbnail_url) img1 = Image.open( os.path.join('Assets/Thumbnails', methods.get_vid_id(vid.watch_url) + '.png')) img1 = img1.resize((283, 160), Image.ANTIALIAS) img1 = ImageTk.PhotoImage(img1) cur_vid_length = vid.length vid_len1 = methods.conv_len(cur_vid_length) cur_vid_title = vid.title vid_title.config(text=cur_vid_title) vid_length.config(text=vid_len1) video_tnl.config(image=img1) print("Fetching") maxbytes = video_type.filesize mbytes = (round(video_type.filesize / 1000000, 2)).__str__() + ' MB' print(mbytes) file_size_lbl.config(text=mbytes.__str__()) progress_bar["maximum"] = maxbytes print(maxbytes) video_type.download(FILENAME) downloaded += 1 downloaded_lbl.config(text=downloaded.__str__()) remaining = total_vids - downloaded remaining_lbl.config(text=remaining) skipped_lbl.config(text=skipped) # quits the window, after changing some global variables def restart(): global again again = True root.destroy() pass def remove(): """used to remove the selected things from the menu of showing videos""" global download_list print('you clicked remove') for item in reversed(all_videos.curselection()): all_videos.delete(item) download_list = [] download_list = all_videos.get(0, "end") new_list = [] for i in range(len(download_list)): new_list.append(download_list[i][1]) download_list = new_list remaining_lbl.config(text=len(download_list)) total_vids_lbl.config(text=len(download_list)) # Starting the loop root = tk.Tk() tkvar = tk.StringVar(root) print(tkvar) # Defining some image variables to be used in the buttons and the thumbnails dimg = Image.open(DOWNLOAD_IMAGE) dimg = dimg.resize((167, 51), Image.ANTIALIAS) dimg = ImageTk.PhotoImage(dimg) rimg = Image.open(REMOVE_IMAGE) rimg = rimg.resize((170, 30), Image.ANTIALIAS) rimg = ImageTk.PhotoImage(rimg) flsimg = Image.open(FILE_SELECT_IMAGE) flsimg = flsimg.resize((60, 40), Image.ANTIALIAS) flsimg = ImageTk.PhotoImage(flsimg) dnimg = Image.open(RESTART_IMAGE) dnimg = dnimg.resize((125, 125), Image.ANTIALIAS) dnimg = ImageTk.PhotoImage(dnimg) BG_IMG = tk.PhotoImage(file=VIDDOWN_MULTIPLE_BGIMG) # Creating the Canvas canvas = tk.Canvas(root, height=HEIGHT, width=WIDTH) canvas.pack() # Placing the background image in the canvas BG_IMG_LABEL = tk.Label(canvas, image=BG_IMG) BG_IMG_LABEL.place(relwidth=1, relheight=1) # scrapping the thumbnail from the current video and putting it in some folder methods.get_video_tnl(video_obj.watch_url, tnurl) # creating the image object for the thumbnails img = Image.open( os.path.join('Assets/Thumbnails', methods.get_vid_id(video_obj.watch_url) + '.png')) img = img.resize((283, 160), Image.ANTIALIAS) img = ImageTk.PhotoImage(img) # displaying the thumbnail video_tnl = tk.Label(canvas, image=img) video_tnl.place(relx=0.01, rely=0.2) # displaying the title of the video vid_title = tk.Label(canvas, text=cur_video_title, anchor='w', font=("Calibre", 18), bg='white', wraplength=800) vid_title.place(rely=0.2, relx=0.25) # displaying the length of the video vid_len = methods.conv_len(cur_video_length) vid_length = tk.Label(canvas, text=vid_len, anchor='w', font=("Calibre", 18), bg='white', wraplength=400) vid_length.place(rely=0.38, relx=0.25) # Creating the drop down menu qualities = Tags.get_available_qualities_with_obj(video_obj) tkvar.set(qualities[0]) # set the default option popupMenu = tk.OptionMenu(canvas, tkvar, *qualities) popupMenu.place(relx=0.55, rely=0.395, relheight=0.05) tkvar.trace('w', change_dropdown) # Displaying the download button down_btn = tk.Button( canvas, image=dimg, command=lambda: threading.Thread(target=download).start(), font=("Calibre", 16), bg='white', border=0, activebackground='white') down_btn.place(rely=0.9, relx=0.85) # displaying the file selection button file_selection_btn = tk.Button(canvas, image=flsimg, command=open_file_explorer, font=("Calibre", 16), bg='white', border=0, activebackground='white') file_selection_btn.place(rely=0.38, relx=0.92) remove_btn = tk.Button(canvas, image=rimg, command=remove, font=("Calibre", 16), bg='white', border=0, activebackground='white') remove_btn.place(rely=0.8, relx=0.02) # displaying the file path text box file_path = tk.Label( canvas, text=FILENAME, font=("Calibre", 18, 'italic'), bg='white', ) file_path.place(rely=0.85, relx=0.10) # Displaying the scrollbar next to the thing scrollbar = tk.Scrollbar(root) scrollbar.place(rely=0.53, relx=0.67, relheight=0.25) # displaying the listbox all_videos = tk.Listbox(canvas, yscrollcommand=scrollbar.set, width=70, font=("Calibre", 16, 'italic'), height=7, selectmode=tk.EXTENDED) for video in video_titles_with_urls: all_videos.insert(tk.END, video) all_videos.place(rely=0.533, relx=0.02) scrollbar.config(command=all_videos.yview) # displaying the progressbar from downlaoding the current video progress_bar = ttk.Progressbar(canvas, orient="horizontal", length=200, mode="determinate") progress_bar.place(rely=0.915, relx=0.15, relwidth=0.6, relheight=0.03) progress_bar['value'] = 0 # displaying the amount of video downlaoded progress_value = tk.Label(canvas, text='0 %', font=("Calibre", 19), bg='white') progress_value.place(rely=0.91, relx=0.76) # displaying the file size file_size_lbl = tk.Label(canvas, text="0 MB", font=("Calibre", 19), bg='white') file_size_lbl.place(rely=0.815, relx=0.88) # displaying the number of videos that we skipped coz they were unavailable to download due to some reason or error skipped_lbl = tk.Label(canvas, text="0", font=("Calibre", 19), bg='white') skipped_lbl.place(rely=0.75, relx=0.92) # displaying the remaining number of videos from the selected ones remaining_lbl = tk.Label(canvas, text=total_vids, font=("Calibre", 19), bg='white') remaining_lbl.place(rely=0.69, relx=0.92) # displaying the number of videos that we finished downloading downloaded_lbl = tk.Label(canvas, text="0", font=("Calibre", 19), bg='white') downloaded_lbl.place(rely=0.63, relx=0.92) # displaying whichth number of video it is that we are downloading from our selected list cur_number_lbl = tk.Label(canvas, text="0", font=("Calibre", 19), bg='white') cur_number_lbl.place(rely=0.57, relx=0.92) # displaying the total number of videos in the playlist given by the user total_vids_lbl = tk.Label(canvas, text=total_vids, font=("Calibre", 19), bg='white') total_vids_lbl.place(rely=0.51, relx=0.92) # displaying the button for downloading another video, that is restarting the program next_btn = tk.Button(canvas, image=dnimg, command=restart, font=("Calibre", 16), bg='#8CB0FF', border=0, activebackground='#8CB0FF') next_btn.place(rely=0.01, relx=0.9) root.mainloop()
def sel_download_win_single(url, video_obj): """ this window shows you the thumbnail of the video along with its title and available qualities, also shows you the download button and the file path selection menu. You click download and the video downloades. """ tnurl = video_obj.thumbnail_url length = video_obj.length # author = video_obj.author title = video_obj.title global again, sel_stream # on change dropdown value, and link to the main menu def change_dropdown(*args): global sel_stream sel_stream = tkvar.get() video_type = video_obj.streams.get_by_itag( list(Tags.tags.keys())[list( Tags.tags.values()).index(sel_stream)]) mbytes = (round(video_type.filesize / 1000000, 2)).__str__() + ' MB' file_size_lbl.config(text=mbytes.__str__()) # opens the file explorer window to select the folder to download, and changes the global file path variable def open_file_explorer(): global FILENAME tk.Tk().withdraw() FILENAME = tk.filedialog.askdirectory() print(FILENAME) file_path.config(text=FILENAME) # to show the progess bar, and update the values of the percentage downloaded def on_progress_dothis( stream, chunk: bytes, bytes_remaining: int) -> None: # pylint: disable=W0613 Bytes = maxbytes - bytes_remaining percent = round((100 * (maxbytes - bytes_remaining)) / maxbytes, 2) downloading = percent.__str__() + '%' progress_bar["value"] = Bytes progress_value.config(text=downloading) root.update_idletasks() if percent == 100.0: downloading = 'Done! ' progress_value.config(text=downloading) # to download the video, part of the threading process, then calls the on_progress_do_this() function def download(): global maxbytes print("Accessing YouTube URL...") video = pt.YouTube(url, on_progress_callback=on_progress_dothis) video_type = video.streams.get_by_itag( list(Tags.tags.keys())[list( Tags.tags.values()).index(sel_stream)]) print("Fetching") maxbytes = video_type.filesize mbytes = (round(video_type.filesize / 1000000, 2)).__str__() + ' MB' print(mbytes) file_size_lbl.config(text=mbytes.__str__()) progress_bar["maximum"] = maxbytes print(maxbytes) video_type.download(FILENAME) # quits the window, after changing some global variables def restart(): global again again = True root.destroy() pass # Starting the loop root = tk.Tk() tkvar = tk.StringVar(root) print(tkvar) # Defining some image variables to be used in the buttons and the thumbnails dimg = Image.open(DOWNLOAD_IMAGE) dimg = dimg.resize((167, 51), Image.ANTIALIAS) dimg = ImageTk.PhotoImage(dimg) flsimg = Image.open(FILE_SELECT_IMAGE) flsimg = flsimg.resize((78, 51), Image.ANTIALIAS) flsimg = ImageTk.PhotoImage(flsimg) dnimg = Image.open(RESTART_IMAGE) dnimg = dnimg.resize((125, 125), Image.ANTIALIAS) dnimg = ImageTk.PhotoImage(dnimg) BG_IMG = tk.PhotoImage(file=VIDDOWN_SINGLE_BGIMG) # Creating the Canvas canvas = tk.Canvas(root, height=HEIGHT, width=WIDTH) canvas.pack() # Placing the background image in the canvas BG_IMG_LABEL = tk.Label(canvas, image=BG_IMG) BG_IMG_LABEL.place(relwidth=1, relheight=1) # scrapping the thumbnail from the current video and putting it in some folder methods.get_video_tnl(url, tnurl) img = Image.open( os.path.join('Assets/Thumbnails', methods.get_vid_id(url) + '.png')) img = img.resize((283, 160), Image.ANTIALIAS) img = ImageTk.PhotoImage(img) # displaying the thumbnail video_tnl = tk.Label(canvas, image=img) video_tnl.place(relx=0.01, rely=0.2) # displaying the title of the video vid_title = tk.Label(canvas, text=title, anchor='w', font=("Calibre", 18), bg='white', wraplength=800) vid_title.place(rely=0.2, relx=0.25) # displaying the length of the video vid_len = methods.conv_len(length) vid_length = tk.Label(canvas, text=vid_len, anchor='w', font=("Calibre", 18), bg='white', wraplength=400) vid_length.place(rely=0.38, relx=0.25) # Creating the drop down menu qualities = Tags.get_available_qualities_with_obj(video_obj) tkvar.set(qualities[0]) # set the default option popupMenu = tk.OptionMenu(canvas, tkvar, *qualities) popupMenu.place(relx=0.3, rely=0.52, relwidth=0.2, relheight=0.05) tkvar.trace('w', change_dropdown) # Displaying the download button down_btn = tk.Button( canvas, image=dimg, command=lambda: threading.Thread(target=download).start(), font=("Calibre", 16), bg='white', border=0, activebackground='white') down_btn.place(rely=0.9, relx=0.85) # displaying the file selection button file_selection_btn = tk.Button(canvas, image=flsimg, command=open_file_explorer, font=("Calibre", 16), bg='white', border=0, activebackground='white') file_selection_btn.place(rely=0.6, relx=0.25) # displaying the file path text box file_path = tk.Label( canvas, text=FILENAME, font=("Calibre", 18, 'italic'), bg='white', ) file_path.place(rely=0.68, relx=0.25) progress_bar = ttk.Progressbar(canvas, orient="horizontal", length=200, mode="determinate") progress_bar.place(rely=0.82, relx=0.05, relwidth=0.7, relheight=0.05) progress_bar['value'] = 0 # displaying the amount of video downlaoded progress_value = tk.Label(canvas, text='', font=("Calibre", 18), bg='white') progress_value.place(rely=0.895, relx=0.25) # displaying the file size file_size_lbl = tk.Label(canvas, text="0 MB", font=("Calibre", 19), bg='white') file_size_lbl.place(rely=0.525, relx=0.8) # displaying the button for downloading another video, that is restarting the program next_btn = tk.Button(canvas, image=dnimg, command=restart, font=("Calibre", 16), bg='#8CB0FF', border=0, activebackground='#8CB0FF') next_btn.place(rely=0.01, relx=0.9) root.mainloop()
class Page: import Tags # Tags only needed for this specific section header = """<!DOCTYPE HTML><html lang="en">{}<body {bodyAttributes}>""" baseHeadElementsTitle = Tags.Title("Lukasz Baldyga") baseHeadElementsMeta = Tags.Meta( attributes={ "name": "viewport", "content": "width=device-width, initial-scale=1" }) baseHeadElementsMeta += Tags.Meta(attributes={"charset": "utf-8"}) baseHead = Tags.Head(text=str(baseHeadElementsMeta) + str(baseHeadElementsTitle) + "{}") bigTitleInner = Tags.Paragraph(text="Lukasz Baldyga", attributes={"class": "Title"}) bigTitleInner += Tags.Div(attributes={"class": "Hacker"}) bigTitle = Tags.Div(text=bigTitleInner, attributes={"class": "TitleWrapper"}) # Add NoScript warning bigTitle += Tags.NoScript( text= "Folders won't work unless you enable JavaScript. Maybe you're looking for the <a href=bare.html class=text-success>bare</a> version?", attributes={ "class": "text-center font-weight-bold w-100 p-3 mx-auto d-block" }) header += str(bigTitle) HeadTags = [ Tags.Script(url="https://code.jquery.com/jquery-3.5.1.slim.min.js", integrity=True), Tags.Script( url= "https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js", integrity=True), Tags.Style( url= "https://stackpath.bootstrapcdn.com/bootswatch/4.5.0/darkly/bootstrap.min.css" ), Tags.Script( url= "https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js", integrity=True), Tags.Style(url="Resources/Styles/style.css", internalPath="PublicResources/Styles/style.css"), Tags.Script(url="Resources/Scripts/pageControl.js", integrity=True, internalPath="PublicResources/Scripts/pageControl.js"), Tags.Script(url="Resources/Scripts/page.js", integrity=True, internalPath="PublicResources/Scripts/page.js") ] EmbedHeadTags = [ Tags.Script("https://code.jquery.com/jquery-3.5.1.slim.min.js", embed=True), Tags.Script( "https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js", embed=True), Tags.Style( "https://stackpath.bootstrapcdn.com/bootswatch/4.5.0/darkly/bootstrap.min.css", embed=True), Tags.Script( "https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js", embed=True), Tags.Style(internalPath="PublicResources/Styles/style.css", embed=True), Tags.Style(internalPath="PublicResources/Styles/styleEmbed.css", embed=True), Tags.Script(internalPath="PublicResources/Scripts/pageControl.js", embed=True), Tags.Script(internalPath="PublicResources/Scripts/resourcePack.js", embed=True), Tags.Script(internalPath="PublicResources/Scripts/page.js", embed=True) ] localLogger.debug("Generating HTML for beef html template") HeaderHTML = "" for tag in HeadTags: localLogger.debug("Adding tag with url: " + tag.getResourceInfo()) HeaderHTML += str(tag) localLogger.debug("Generating HTML for down template") EmbedHTML = "" for tag in EmbedHeadTags: localLogger.debug("Embedding tag with resource: " + tag.getResourceInfo()) EmbedHTML += str(tag) downloadHeader = header.format( baseHead, bodyAttributes="onLoad='onLoadCompressed()'") header = header.format(baseHead, bodyAttributes="onLoad='onLoad()'") fullHeader = header.format(HeaderHTML) embedHeader = downloadHeader.format(EmbedHTML) bareHeader = header.format("") # Increment build number buildNumber = 1 + int(HelperFunctions.Read(Generation.buildNumberLocation)) HelperFunctions.Save(Generation.buildNumberLocation, str(buildNumber)) localLogger.debug("Current build number: {}".format(buildNumber)) # Add build number to footer buildNumberParagraph = Tags.Paragraph("Build Number: " + str(buildNumber)) buildNumberParagraph += str( Tags.Paragraph("Last updated: " + strftime("%Y-%m-%d %H:%M:%S", gmtime()))) buildNumberParagraph += str( Tags.HTMLElement("a", selfClosing=False, attributes={ "href": "down.html", "target": "_blank" }, pattributes=["download"], innerHTML="Download latest page")) FooterDiv = Tags.Div(text=buildNumberParagraph, attributes={"class": "container"}) FooterTag = Tags.HTMLElement("footer", selfClosing=False, innerHTML=FooterDiv, attributes={"class": "footer"}) HTMLEnd = "</body></html>"
def CreateTags(): return Tags.Tags() def CreateTag(): return Tag.Tag()
def insertProduct(cursor, quota=1, ignore=-1, _sqliteName=None): sqliteName = "store.sqlite" if _sqliteName != None: sqliteName = _sqliteName db = sqlite3.connect(sqliteName) allProducts = db.execute('select product_id, product_title, product_intro, product_cover_img, product_thumbnail from products_info').fetchall() count = 0 ignore_count = 0 for product in allProducts: ignore_count += 1 if ignore_count <= ignore: continue # _id, title, introtext, thumbnails = news product_id, product_title, product_intro, product_cover_img, product_thumbnail = product # print product_id, product_title, product_intro, product_cover_img, product_thumbnail thumbnails = None if product_cover_img != None and len(product_cover_img.strip()) > 0: thumbnails = product_cover_img else: try: thumbnails = eval(product_thumbnail)[0] except Exception, e: pass if thumbnails == None: print "未找到图片,跳过", product_id continue # print "thumbnails", thumbnails # images是 erji_tz_portfolio_xref_content需要 images = downloadNewsThumbnails(product_id, thumbnails) print "insert images", images if images == None: print "下载", thumbnails, "失败!" continue asset_id = insertIntoAssets(cursor, product_title) # 获取full text product_intro, full_text = db.execute('select product_intro, product_detail from products_view where product_id='+str(product_id)).fetchone() content_id = insertIntoContent(cursor, asset_id, product_title, product_intro, full_text, 14) if content_id <= 0: continue insert_xref_content(cursor, content_id, images) # 插入tags _detail = product_title + " " + product_intro + " " + full_text Tags.parserTags(_detail, cursor, content_id) count += 1 if count >= quota: break
def ParseFile(self, filename, className): f = open(filename, 'r', encoding='utf8') doc = f.read() f.close() xmlDocument = xml.dom.minidom.parseString(doc) mainChildNode = xmlDocument._get_firstChild() className = mainChildNode.localName #initialize the dictionary for the corresponding document self.documentDict[className] = [] for node in mainChildNode.childNodes: if node.localName == 'row': #some nodes are None if className == 'badges': badges = bdg.Badges() badges.parse(node) self.documentDict[className].append(badges) pass elif className == 'comments': comments = cmts.Comments() comments.parse(node) self.documentDict[className].append(comments) pass elif className == 'posthistory': postHistory = phist.PostHistory() postHistory.parse(node) self.documentDict[className].append(postHistory) pass elif className == 'postlinks': postLinks = plink.PostLinks() postLinks.parse(node) self.documentDict[className].append(postLinks) pass elif className == 'posts': post = posts.Posts() post.parse(node) self.documentDict[className].append(post) pass elif className == 'tags': tag = tags.Tags() tag.parse(node) self.documentDict[className].append(tag) pass elif className == 'users': user = users.Users() user.parse(node) self.documentDict[className].append(user) pass elif className == 'votes': vote = votes.Votes() vote.parse(node) self.documentDict[className].append(vote) pass else: raise "Error"
TREPONEMA PALLIDUM IGG TREPONEMAL ANTIBODY SCREEN T_PALLIDUM IGG TREPONEMAL B CMIA'''.split('\n') rl = '''NON-REACTIVE NEGATIVE NONREACTIVE EQUIVOCAL LOW_POSITIVE NEAT'''.split('\n') #tl = ['Test1','Sample Test','The other test'] #rl = ['POSITVE','Non Negative','NA','Mostky ok'] tests = Tags.TagSet('Tests') tests.labels = [Tags.Tag(ModelBuilder.splitText(x.strip())) for x in tl] results = Tags.TagSet('Results') results.labels = [Tags.Tag(ModelBuilder.splitText(x.strip())) for x in rl] def tagsFromFullRules(data, rules, nodes, tags, rowIndex=0): text = data.mainData words = splitText(text) for index, x in enumerate(rules): s = 0 l = len(x[1]) while s < l and x[1][s].getData()[1] != TAG: s += 1 if s != 0 and s != l:
http_pattern = r'http\:\/\/[\s\S]*?\"' pattern = re.compile(http_pattern) match = re.search(http_pattern, buy_url_detail) if match: buy_url = match.group().strip() except Exception, e: print "get buy url error:", e if db: CREATE_PRODUCT_VIEW_TABLE = 'CREATE TABLE IF NOT EXISTS "products_view" ("product_id" INTEGER PRIMARY KEY NOT NULL UNIQUE, "product_intro" TEXT, "product_detail" TEXT, "product_thumbnails" TEXT, "buy_url" TEXT, "tags" TEXT )' try: db.execute(CREATE_PRODUCT_VIEW_TABLE) INSERT_COMMAND = "insert into products_view values (?,?,?,?,?,?)" _detail = self.product_title + " " + _product_intro +" " +product_detail db.execute(INSERT_COMMAND, (self.product_id, _product_intro, product_detail, str(product_thumbnails), buy_url, str(Tags.parserTags(_detail, None, self.product_id)))) # print _product_intro # db.commit() except Exception, e: print "insert product view error:", e def toTuple(self): return ( self.product_id, self.product_name, self.product_title, self.product_intro, self.comment_count, self.like_count, self.product_cover_img, self.eval_num,