def get_posts(s_, g): url, forum_id, t_id, year = g if year == 2018: resp = s_.post(url, data={ "rem": "Remove", "ac_year": year, "ac_day": "", "ac_month": "" }) else: resp = s_.post(url, data={ "set": "GO", "ac_year": year, "ac_day": "", "ac_month": "" }) d = pq(resp.text) found_posts = [] z = d.find(".post") for h in z: post_id = int(re.match("post(\d+)", d(h).attr("id")).group(1)) user_id = int( re.search("userid-(\d+)", d(h).find(".userinfo").attr("class")).group(1)) post_body = d(h).find(".postbody").html() fail = [ "<!-- google_ad_section_start -->", "<!-- google_ad_section_end -->" ] for f in fail: post_body = post_body.replace(f, "") post_date_str = d(h).find(".postdate").text().replace( "#", "").replace("?", "").strip() post_date = datetime.strptime(post_date_str, '%b %d, %Y %H:%M') p = Post.insert(post_id, user_id, post_body, t_id, post_date) found_posts += [p] return found_posts