def __init__(self, acesRelease='', timeStamp=''): "%s - Initialize the standard class variables" % 'Config' self._children = [] if acesRelease != '': self._children.append( Comment(acesRelease, 'ACESrelease_Version') ) if timeStamp != '': self._children.append( Comment(timeStamp, 'Timestamp') )
def __init__(self, appRelease='', copyright=''): "%s - Initialize the standard class variables" % 'Info' self._children = [] if appRelease != '': self._children.append( Comment(appRelease, 'AppRelease') ) if copyright != '': self._children.append( Comment(copyright, 'Copyright') )
def read(self, element): # Read child elements for child in element: elementType = child.tag if elementType == 'ACESrelease_Version': self._children.append( Comment(child.text, 'ACESrelease_Version') ) elif elementType == 'Timestamp': self._children.append( Comment(child.text, 'Timestamp') ) else: # Remove text used in xml names but not class names elementType = elementType.replace('-', '') elementType = elementType.replace('_', '') elementType = elementType.replace('aces:', '') elementType = normalize(elementType) elementClass = self.getClass(elementType) if elementClass != None: #print( "element : %s, %s" % (elementType, elementClass) ) element = elementClass() element.read(child) self.addElement( element ) else: print( "Config::read - Ignoring unsupport element, %s" % child.tag)
def __init__(self, clipName='', sourceMediaID='', clipDate='', note=''): "%s - Initialize the standard class variables" % 'ClipID' self._children = [] if clipName != '': self._children.append(Comment(clipName, 'ClipName')) if clipDate != '': self._children.append(Comment(clipDate, 'ClipDate')) if sourceMediaID != '': self._children.append(Comment(sourceMediaID, 'Source_MediaID')) if note != '': self._children.append(Comment(note, 'Note'))
def read(self, element): # Read child elements for child in element: elementType = child.tag #print( "Info child element type: %s" % elementType ) if elementType == 'Application': self._children.append( Comment(child.text, 'Application', child.attrib)) if elementType == 'Comment': self._children.append(Comment(child.text, 'Comment'))
def __init__(self, application='', appVersion='', comment=''): "%s - Initialize the standard class variables" % 'Info' self._children = [] if appVersion != '': attributes = {} if appVersion != '': attributes['version'] = appVersion self._children.append( Comment(application, 'Application', attributes=attributes)) if comment != '': self._children.append(Comment(comment, 'Comment'))
def read(self, element): # Read child elements for child in element: elementType = self.getElementType(child.tag) if elementType == 'AppRelease': self._children.append( Comment(child.text, 'AppRelease') ) if elementType == 'Copyright': self._children.append( Comment(child.text, 'Copyright') ) # Autodesk-specific attribute if elementType == 'Release': self._children.append( Comment(child.text, 'Release') )
def read(self, element): # Read child elements for child in element: elementType = child.tag if elementType == 'ClipName': self._children.append(Comment(child.text, 'ClipName')) if elementType == 'ClipDate': self._children.append(Comment(child.text, 'ClipDate')) if elementType == 'Source_MediaID': self._children.append(Comment(child.text, 'Source_MediaID')) if elementType == 'Note': self._children.append(Comment(child.text, 'Note'))
def saveBlog(self): #获取html中所需的内容,可以通过Chrome开发者工具的element选项在人人网该页面查看 self.content = self.spider.getContent(self.url) soup = BeautifulSoup(self.content) blogContent = soup.find('div', id='blogContent', class_='blogDetail-content') #将标签换位换行符,方便阅读 pattern = r'<p>|<br>|</p>|<br/>' #将<p>,<br>,</p>和<br/>四个标签换为换行符\n blogContent = re.sub(pattern, r'\n', blogContent.decode()) with open(self.filename, 'wb+') as f: line = u'*** 日志标题: ***' + self.summary['title'] + '\n\n' line += u'*** 创建时间: ***' + self.summary['createTime'] + '\n\n' line += u'*** 所属分类: ***' + self.summary['category'] + '\n\n' line += Config.GAP f.write(line.encode('utf-8')) f.write(blogContent.encode('utf-8')) if int(self.summary['commentCount']): f.write(Config.GAP.encode('utf-8')) f.write((u'*** 评论: ***\n\n').encode('utf-8')) comments = Comment(self.spider, self.userID, self.blogID, 'blog', self.ownerID) f.write(comments.work()) print(self.filename + ' saves successfully')
def __read_from_db(self, table_id): # Connect to db db = sqlite3.connect(self.DB_PATH) cur = db.cursor() print("Connected to db to read!") statement = '' # Prepare SQL statement if table_id == self.TABLE_ID_COMMENTS: statement = 'SELECT media_id, id, text, post_url FROM Comments' elif table_id == self.TABLE_ID_RESPONSES: statement = 'SELECT text, id FROM Responses' # Execute SQL statement cur.execute(statement) result = cur.fetchall() # Fill up if table_id == self.TABLE_ID_COMMENTS: for comment in result: self.comments.append( Comment(comment[0], comment[1], comment[2], comment[3])) elif table_id == self.TABLE_ID_RESPONSES: for response in result: self.responses.append(Response(response[0], response[1])) print('Success!') db.commit() db.close()
def __find_obj_by_id(self, obj_id, table_id): # Connect to db db = sqlite3.connect(self.DB_PATH) cur = db.cursor() print("Connected to db to read!") statement = '' # Prepare SQL statement if table_id == self.TABLE_ID_COMMENTS: statement = 'SELECT id, media_id, text, post_url FROM Comments WHERE id={}'.format( obj_id) elif table_id == self.TABLE_ID_RESPONSES: statement = 'SELECT text, id FROM Responses WHERE id={}'.format( obj_id) # Execute SQL statement cur.execute(statement) result = cur.fetchone() # Fill up if table_id == self.TABLE_ID_COMMENTS: return Comment(result[1], result[0], result[2], result[3]) elif table_id == self.TABLE_ID_RESPONSES: return Response(result[0], result[1])
def get_comments_from_maoyan(self, offset): comments = [] json_str = self.get_one_page_comment_json_from_maoyan(offset) if json_str is None: return None try: data1 = json_str['cmts'] # 获取评论内容 data2 = json_str['hcmts'] # 获取评论内容 data = data1 + data2 except KeyError: return None for item in data: comment = Comment(self.movie) # 使用get方法获取值,避免出现KeyError comment.user_loc = item.get('cityName') comment.user_avatar = item.get('avatarurl') comment.user_name = item.get('nickName') comment.user_id = item.get('userId') comment.comment = item.get('content') comment.create_time = item.get('time') comment.vote_count = item.get('approve') comments.append(comment) return comments
def getAlbumComments(self): comment = Comment(self.userID, self.spider, self.albumID, 'album', self.ownerID) content = comment.work() if content == '': with open(self.path + '/comments.markdown', 'w') as f: f.write((u'**评论: **\n\n').encode('utf-8')) f.write(content)
def request_comments(): #request comments fp = open("res.txt", "a") xw = ExcelWriter("res.xls") xw.add_sheet("test") # write head write_head(xw) polled_num = 0 current_page = 1 comment_count = 1 while(True): print "get %s" % current_page content = client.comments.timeline.get(page=current_page) total_number = content.total_number recv_num = len(content.comments) if recv_num == 0: print "recv_num = 0" break for comment in content.comments: commentRecord = Comment() if(comment.has_key("reply_comment")): commentRecord.is_reply = True commentRecord.reply_comment_id = comment.reply_comment.id commentRecord.reply_comment_text = comment.reply_comment.text commentRecord.reply_comment_created_time = comment.reply_comment.created_at commentRecord.reply_comment_user_id = comment.reply_comment.user.id commentRecord.reply_comment_user_name = comment.reply_comment.user.name commentRecord.comment_id = comment.id commentRecord.comment_text = comment.text commentRecord.comment_created_time = comment.created_at commentRecord.comment_user_id = comment.user.id commentRecord.comment_user_name = comment.user.name commentRecord.weibo_id = comment.status.id commentRecord.weibo_text = comment.status.text commentRecord.weibo_user_id = comment.status.user.id commentRecord.weibo_user_name = comment.status.user.name fp.writelines(commentRecord.vars_to_str()+'\n') # print commentRecord.is_reply, comment_count if commentRecord.is_reply == True: write_comment_xls(commentRecord, raw=comment_count, xw=xw) comment_count += 1 polled_num += recv_num print total_number, polled_num, current_page if(polled_num < total_number): current_page += 1 else: break xw.save() fp.close()
def recognizeSourceLineAndUpdateTree(subParts, line): """Updates the program call tree. List subParts IS MUTATED""" if isNewline(line): subParts.append(Newline()) elif isComment(line): subParts.append(Comment(line)) elif isImplicitNone(line): pass # Ignore "implicit none" elif isInteger(line): subParts.append(Integer(line)) else: raise RuntimeError("Unknown statement: " + line + "Please first use a Fortran compiler")
def get(self, post_hash): print("PostApi.get(): %s", post_hash) originalPost = postTable.find({Post.HASH: post_hash})[0] post = Post(originalPost).as_dict() original_comments = commentTable.find( {Comment.POST_HASH: post[Post.HASH]}) commend_list = [ Comment(originalComment).as_dict() for originalComment in original_comments ] post[Comment.COMMENTS] = commend_list print(post) return post
def collect_comments(driver, comments_dictionary): page_source_element = driver.find_element_by_xpath("//div[@id='root']") campaign_comments = page_source_element.find_element_by_class_name( 'p-campaign-comments') comments = campaign_comments.find_elements_by_class_name( "o-comments-list-item") print(len(comments)) for comment_id in range(0, len(comments)): if comment_id in comments_dictionary: continue comment_text = comments[comment_id].text comment_object = Comment(comment_text) comments_dictionary[comment_id] = comment_object
def Comentarios(): if 'User' in session: CommentComment = Comment() UsuarioComentario = Usuario() UsuarioComentario.TraerObjeto(session['User']) idPosta = request.args.get("IdPost") Userinho = UsuarioComentario ComentariosDesc = request.form.get("Comentario") CommentComment.AllSetsComments(ComentariosDesc, idPosta, Userinho) CommentComment.Insert() return redirect("/Inicio#p" + idPosta) else: return redirect("/")
def setUp(self): self.student = User("user", "password", 1) self.lecture = LectureTopic(1, "L1", "author", "Lecture", "info") self.comment = Comment(1, "author", "info", 2, self.lecture.getLTid()) self.subscribe = Subscription(1, self.lecture.getLTid(), self.student.getUid()) self.notify = Notification(1, self.subscribe.getSid()) self.db_fd, app.app.config['DATABASE'] = tempfile.mkstemp() app.app.testing = True self.app = app.app.test_client() with app.app.app_context(): app.init_db()
def download_chat_to_file(vodID : str): comments = [] fragment = { '_next' : '' } while '_next' in fragment: fragment = get_chat_fragment(vodID,fragment['_next']) for comment in fragment['comments']: comments.append(Comment(comment)) print("downloaded up to " + str(fragment['comments'][0]['content_offset_seconds']) + " seconds") with open('VOD#' + str(vodID), 'w') as outfile: json.dump(comments, outfile,default=Comment.toJSON,indent=4) perform_analytics(comments, vodID)
def execute_scrape(url, days_ago, debug=False): """ If debug is True - Soup constant HTML from consts.py. Else - Use selenium to extract HTML from URL. """ if debug: comments_html = comments_txt else: comments_html = scrape_li_comments(url) """ All post's comments generator """ all_comments = (Comment(*souped_cmt) for souped_cmt in soup_comments(comments_html)) """ Only post's new comments generator """ new_comments = (c for c in all_comments if c.is_newer_from_days_ago(days_ago) and c.has_mail) """ Print new comments """ print_comments(new_comments)
def savePhotoComment(self): with open(self.path + '/photo details.markdown', 'w') as f: for item in self.photos: line = u'**ID: ' + str(item['id']) + '**\n\n' line += u'**名称: ' + item['title'].replace('\n', ' ') + '**\n\n' line += u'**时间: ' + item['date'] + '**\n\n' f.write(line.encode('utf-8')) filename = str(item['id']) f.write(('![' + filename + '](' + filename + '.jpg)\n\n').encode('utf-8')) if int(item['commentCount']): comment = Comment(self.userID, self.spider, item['id'], 'photo', item['owner']) f.write((u'**评论: **\n\n').encode('utf-8')) f.write(comment.work()) f.write(config.gap)
def __register_comment(self, media_id, comment_id, post_code, comment_text=''): comment = Comment(media_id, comment_id, comment_text, post_code) self.comments.append(comment) if self.__write_to_db(comment): print("Succesfully added comment (id: {0} , url: {2} )\r\n\t{1}". format(comment.get_id(), comment.get_text(), comment.get_post_url())) else: print( "Failed to add comment (id: {0} , url: {2} )\r\n\t{1}".format( comment.get_id(), comment.get_text(), comment.get_post_url()))
def load_comments(workbook: Workbook, candidates: Dict[str, Candidate]): comment_sheet: Worksheet = workbook['комментарии'] current_candidate: [Candidate] = None for row_number in range(2, comment_sheet.max_row + 1): local_id = comment_sheet[f'D{row_number}'].value if local_id is not None: current_candidate = candidates[local_id] if current_candidate is not None: current_candidate.comments.append(Comment( when=comment_sheet[f'E{row_number}'].value, who=comment_sheet[f'F{row_number}'].value, text=comment_sheet[f'G{row_number}'].value ))
def sql_retrieve(conn, searchfield=None, searchval=None): conn.row_factory = sqlite3.Row if searchfield: cur = conn.execute('SELECT * FROM Comments WHERE "{}"="{}"'.format( searchfield, searchval)) else: cur = conn.execute('SELECT * from Comments') # construct the object results = [] for tablerow in cur.fetchall(): newobj = Comment(tablerow['Cid'], tablerow['Author'], tablerow['Body'], tablerow['votes'], tablerow['LTid']) results.append(newobj) return results
def postComment(type, form, LTid): body = form.Body.data # When creating a new comment the 0 for the id is just a place holder because when put into the database we will auto increment the id newPost = Comment(0, session['user_id'], body, 0, LTid) persist(newPost) subscriptions = retrieve(Subscription, "LTid", LTid) for subscription in subscriptions: newNotification = Notification(0, subscription.getSid()) persist(newNotification) flash('Comment Created', 'success') if type == 'Topic': return redirect(url_for("topic_discussion", id = LTid)) elif type == 'Lecture': return redirect(url_for("lecture_discussion", id = LTid))
def get_comments(self): url = "http://www.zhihu.com/node/QuestionCommentBoxV2?params={" + "\"question_id\":{0}".format( self.get_data_resourceid()) + "}" print url r = requests.get(url) soup = BeautifulSoup(r.content) for comment_div in soup.find_all("div", class_="zm-item-comment"): author_url = comment_div.find("a", class_="zg-link")['href'] content = comment_div.find( "div", class_="zm-comment-content").next_element date = comment_div.find("span", class_="date").next_element like_num = comment_div.find( "span", class_="like-num ").next_element # Comment(author_url,question_url,answer_url,content,date,like_num) from Comment import Comment # TODO: comment bloom yield Comment(author_url, self.url, None, content, date, like_num)
def load_comments(filename, max_iteration=None): """ Yields a Comment object :param filename: Str, a filename as a path :param max_iteration: Int, an optional argument which defines max ammount of comments to yield. :yield: Comment instance. """ current_iteration = 0 with open(filename) as dataset: for line in dataset: if max_iteration is not None and current_iteration >= max_iteration: return else: current_iteration += 1 yield Comment(json.loads(line))
def get_comments(self): """ 解析评论内容,并返回当前页list :return: """ time.sleep(3) elems = self.driver.find_elements_by_class_name("comment-item") comments = [] for elem in elems: username = elem.find_element_by_class_name( "avatar").find_element_by_tag_name("a").get_attribute("title") content = elem.find_element_by_class_name( "comment").find_element_by_tag_name("p").text vote = elem.find_element_by_class_name( "comment").find_element_by_class_name("votes").text comment = Comment(username, vote, content) comments.append(repr(comment)) return comments
def saveBlog(self): soup = BeautifulSoup(self.content) blogContent = soup.find('div', id='blogContent', class_='blogDetail-content') with open(self.filename, 'w+') as f: line = u'###日志标题: ' + self.summary['title'] + '\n\n' line += u'#####创建时间: ' + self.summary['createTime'] + '\n\n' line += u'#####所属分类: ' + self.summary['category'] + '\n\n' line += config.gap f.write(line.encode('utf-8')) f.write(blogContent.encode('utf-8')) if int(self.summary['commentCount']): f.write(config.gap) f.write((u'#####评论:\n\n').encode('utf-8')) comments = Comment(self.userID, self.spider, self.blogID, 'blog', self.ownerID) f.write(comments.work()) print self.filename + ' save success'