def post(self): new_post = Post() if len(self.request.get('caption').strip()) > 0: new_post.text_caption = self.request.get('caption') try: upload = self.get_uploads()[0] new_post.image_blob = upload.key() except: pass user = users.get_current_user() myuser_key = ndb.Key('User', user.user_id()) myuser = myuser_key.get() new_post.owner_user = myuser_key new_post.date = datetime.now() new_key = new_post.put() myuser.posts.insert(0, new_key) myuser.put() self.redirect('/')
def _parse(self): for element in self.root.iter(): if element.tag == 'ID': self.id = element.text elif element.tag == 'WRITING': post = Post() elif element.tag == 'TITLE': post.title = element.text elif element.tag == 'DATE': d = Date(element.text) post.date = d elif element.tag == 'TEXT': #self._find_emojis2(element.text) post.text = element.text elif element.tag == 'INFO': post.info = element.text self.posts.append(post)
def parse_all_posts(self, author=True, title=True, date=True, contents=True, messages=True, reply=True, images=True, ip=False): try: if self.post_urls is None: raise Exception("You must run get_posts_url first") except Exception as err: print(err) sys.exit(1) post_objects = list() for url in self.post_urls: post_url = "http://www.ptt.cc/bbs/{}/{}".format(self.board_name, url) soup = self._get_soup(post_url) post = Post(post_url) try: article_meta = soup.find_all('span', class_="article-meta-value") if author: post.author = article_meta[0].contents[0] if title: post.title = article_meta[2].contents[0] if date: post.date = article_meta[3].contents[0] # TODO: ip if ip: pass if contents: a = str(soup.find(id="main-container").contents[1]) a = a.split("</div>") a = a[4].split("<span class=\"f2\">※ 發信站: 批踢踢實業坊(ptt.cc),") post.contents = a[0].replace(' ', '').replace('\n', '').replace('\t', '') if images: a = soup.find_all('img') imgs = list() for i in a: imgs.append(i.attrs.get('src')) post.images = imgs # TODO: There are some push_content lost due to hyperlink in the content if messages or reply: # messages = list() for tag in soup.find_all("div", "push"): # d = dict() push_tag = tag.find("span", "push-tag").\ string.replace(' ', '') # d.setdefault('狀態', push_tag) # d.setdefault('留言者', push_userid) # d.setdefault('留言內容', push_content) # d.setdefault('留言時間', push_ipdatetime) # messages.append(d) if push_tag == '推': post.good += 1 elif push_tag == '噓': post.bad += 1 else: post.normal += 1 # post.messages = messages except: pass post_objects.append(post) return post_objects