def do_POST(self): self.data_string = self.rfile.read(int(self.headers['Content-Length'])) print(self.data_string) print(self.requestline) req = self.requestline.split() if (req[1] == '/deploy'): # response status code self.send_response(200) else: self.send_response(404) # header self.send_header('Content-type', 'text/html') self.end_headers() # remove characters like %7B try: from urllib import unquote except ImportError: from urllib.parse import unquote rawstr = unquote(self.data_string.decode('utf8')) # remove `payload=` tmp, jsonStr = rawstr.split('payload=') data = json.loads(jsonStr) commit_comments = data['commits'] for i in range(len(commit_comments)): txt = commit_comments[i]['message'].replace('+', ' ') print(txt) comment(txt + " --auto sync") send_weibo( txt + "https://esl.hohoweiya.xyz Auto Sync https://esl.szcfweiya.cn")
def main(): print("Starting...") youtube = YouTube() channel = youtube.get_channel(CHANNEL) print("Waiting for new videos...") start_time = time.asctime() while True: upload = youtube.get_latest_upload(channel, start_time) if upload: try: url = 'https://youtube.com/watch?v={}'.format(upload['id']) print("{} | {} uploaded a new video titled \"{}\"".format( time.asctime(), upload['channel_title'], upload['title'], url)) if comment(youtube.api, upload['id'], COMMENT_TEXT): print("{} | \"{}\" was commented on {}'s new video \"{}\"". format(time.asctime(), COMMENT_TEXT, upload['channel_title'], upload['title'])) start_time = time.time() except Exception as e: # If it reaches the 100 seconds api threshold, wait for 100 seconds print("Error: Too many requests:\n{}".format(e)) print("Waiting 100 seconds..") time.sleep(100)
def get_all_user_comments(username): comment_list = [] cur_connection = DatabaseManager.get_connection() cursor = cur_connection.cursor() result = DatabaseManager._execute_robust( cursor, ''' SELECT comment_id, post_id, username, parent_comment, comment_karma, comment_date, subreddit FROM comments WHERE username='******' '''.format(username=username)) for comment_result in result: new_comment = comment(comment_id=comment_result[0], post_id=comment_result[1], username=comment_result[2], parent_comment=comment_result[3], comment_karma=comment_result[4], comment_date=comment_result[5], subreddit=comment_result[6]) comment_list.append(new_comment) return comment_list
def addComment(self, msg): #row = randint(0, 20) * 25 comm = comment(msg, 1184, self.i*25, self.font) self.i += 1 if self.i >= 20: self.i = 0 #print "Added comment: " + msg self.comments.append(comm)
def reddit_scraper(my_url): ''' returns an array where the first element is the post (comment object) and the rest is comment objects about a news article ''' uClient = urllib.urlopen(my_url) page_html = uClient.read() uClient.close() page_soup = soup(page_html, "html.parser") stories = page_soup.findAll("div", {"class": "entry unvoted"}) regex = re.compile(".*?\((.*?)\)") body = '' author = "" children = [] for thing in stories[::-1]: derp = thing.findAll("p") for x in derp: z = re.findall(regex, x.text) d = [[s.encode("utf-8", "ignore") for s in p] for p in z] result = x.text if len(d) > 0: print "AUTHOR: " + author print "BODY: " + body children.insert(0, comment(author, body)) body = "" author = "" if ''.join(d[0])[0].strip().isdigit(): #is children a digit? kids = int( ''.join(d[0]).split() [0]) # NOT USING THIS ANYMORE, MAYBE IN THE FUTURE # We are not implementing a heirarchy, instead we are just saying that all comments # are on the same level. else: # else, we know that this is the end of the page(beginning I should say) author = ''.join(d[0]) if len( re.sub("[\(\[].*?[\)\]]", "", result).encode( "utf-8", "ignore")) > 0: author = ''.join( re.sub("[\(\[].*?[\)\]]", "", result).encode("utf-8", "ignore")).split()[0] else: if len( re.sub("[\(\[].*?[\)\]]", "", result).encode( "utf-8", "ignore").split()) > 0 and "submitted" == re.sub( "[\(\[].*?[\)\]]", "", result).encode( "utf-8", "ignore").split()[0]: derp = re.sub("[\(\[].*?[\)\]]", "", result).encode("utf-8", "ignore").split() author = derp[len(derp) - 1] else: body += re.sub("[\(\[].*?[\)\]]", "", result).encode("utf-8", "ignore") return children
def addChildren(self, parent, db_comment_list): """recursively add children of parent comment""" # search through all comments retrieved from the database for db_comment in db_comment_list: # look for comments whose parent is the current comment if parent.comment_id == db_comment.parent_id: # construct Python object for comment, and add to parent cmt = comment(db_comment.comment_id, db_comment.name, db_comment.text, db_comment.date) #print("adding " + str(cmt) + " to " + str(parent)) parent.addChild(cmt) self.addChildren(cmt, db_comment_list)
def load(self, db_comment_list): """load, organize comment data from list of all comments in database""" # create temporary 'root' comment, w/ unique comment_id of 0 root = comment(0, "", "", "") # recursively add children of root, starting w/ top level comments self.addChildren(root, db_comment_list) # retrieve children of root, which are all top level comments self.comments = root.children # sort by id in ascending order, so that oldest comments appear first self.comments = sorted(self.comments, key=lambda x: x.comment_id) return self.comments
def get_annotations(self, cap=None): '''Collect the annotations of the user, is cap is given, only collect the most recent ones, otherwise fetch all (requires <Number of annotations>/10 calls)''' next_page = self.host + "/annotations/for_profile_page?page=1&user_id={user_id}".format(user_id=self.rg_id) self.annotations = [] while next_page != None: print next_page r = requests.get(next_page) soup = BeautifulSoup(r.content) if self.login == None: #if we don't have the login, attempt to scrap it here try: #this works if there are no annotations self.login = soup.select(".empty_message")[0].text.strip().replace(u" hasn't annotated any lines!", '') except IndexError: try: #this should work if there are annotations (maybe there are other a classes for some users, but I can't find any) self.login = soup.select('div.annotation_unit_label a.community_contributor')[0].text except IndexError: try: #this one is for staff self.login = soup.select('div.annotation_unit a.login')[0].attrs['href'].split('/')[-1] except IndexError: #giving up pass for annotation in soup.select("div.stand_alone_annotation_container"): annotation_id = annotation.select("div.annotation_unit")[0].get("data-id") # print "id:", annotation_id annotation_content = annotation.find(attrs={'class':'annotation_body'}).text try: song_link = annotation.find('a', attrs={'class':'title'}).get('href') except AttributeError: #alt rendering puts it in a prior div, try there. try: song_link = annotation.findPrevious('div', attrs={'class':'stand_alone_referent'}).find('a', attrs={'class':'title'}).get('href') except: #print the annotation id and give up print "**couldn't grab annotation", annotation_id,"from user", self.rg_id, "moving on" continue self.annotations.append(comment(rg_id=annotation_id, text=annotation_content, song=song_link)) self.annotations[-1].get_full_history() pagination_block = soup.find("div", attrs={"class":"pagination"}) try: next_page = pagination_block.find(attrs={'class':"next_page"}).get('href') #if last page this gives None if next_page != None: next_page = self.host + next_page except AttributeError, err: #if we're here, there's no pagination block, due to exactly one page of annons, so, done. next_page = None
def fetchCommentMetaRecent(subreddit, limit=100): sub = RedditManager.get_connection().subreddit(subreddit) commentlist = sub.comments(limit=limit) meta_list = [] try: for temp_comment in commentlist: comment_id = temp_comment.id post_id = temp_comment.submission.id username = temp_comment.author.name parent_comment = None # We need to split here, because the incoming id uses the fullname syntax if temp_comment.parent_id.split('_')[0] == 't1': parent_comment = temp_comment.parent_id.split('_')[1] comment_karma = temp_comment.score comment_date = temp_comment.created_utc subreddit = str(temp_comment.subreddit) new_comment = comment(comment_id=comment_id, post_id=post_id, username=username, parent_comment=parent_comment, comment_karma=comment_karma, comment_date=comment_date, subreddit=subreddit) meta_list.append(new_comment) except: pass return meta_list
def get_all_comments(dateLimit=None): comment_list = [] cur_connection = DatabaseManager.get_connection() cursor = cur_connection.cursor() result = [] result = DatabaseManager._execute_robust(cursor, 'SELECT * FROM comments') for row in result: new_comment = comment(row[0], row[1], row[2], row[3], row[4], row[5], row[6]) comment_list.append(new_comment) return comment_list
def fetchCommentMeta(id): reddit_comment = RedditManager.get_connection().comment(id=id) comment_id = reddit_comment.id post_id = reddit_comment.submission.id if reddit_comment.author is None: username = "******" else: username = reddit_comment.author.name parent_comment = None # We need to split here, because the incoming id uses the fullname syntax if reddit_comment.parent_id.split('_')[0] == 't1': parent_comment = reddit_comment.parent_id.split('_')[1] comment_karma = reddit_comment.score comment_date = reddit_comment.created_utc subreddit = str(reddit_comment.subreddit) new_comment = comment(comment_id=comment_id, post_id=post_id, username=username, parent_comment=parent_comment, comment_karma=comment_karma, comment_date=comment_date, subreddit=subreddit) return new_comment pass
def commentElement_create(self, **kwargs): """ Populate the comment component with simulated comments/conversations. :param kwargs: :return: """ conversations = 1 s = self._stree str_root = '/' for key, val in kwargs.iteritems(): if key == 'root': str_root = val if key == 'conversations': conversations = val sample = comment.comment() sample.contents_rikeripsumBuild(conversations=conversations) s.cd(str_root) l_comment = sample.contents.lstr_lsnode('/') if sample.contents.cd('/')['status']: for c in l_comment: s.graft(sample.contents, '/%s' % c) return(dict(sample.contents.snode_root))
def commentElement_create(self, **kwargs): """ Populate the comment component with simulated comments/conversations. :param kwargs: :return: """ conversations = 1 s = self._stree str_root = '/' for key, val in kwargs.iteritems(): if key == 'root': str_root = val if key == 'conversations': conversations = val sample = comment.comment() sample.contents_rikeripsumBuild(conversations=conversations) s.cd(str_root) l_comment = sample.contents.lstr_lsnode('/') if sample.contents.cd('/')['status']: for c in l_comment: s.graft(sample.contents, '/%s' % c) return (dict(sample.contents.snode_root))
def get_all_requests(campid): key = get_campaign_key(campid) q1 = '''select Request.PrKeyPF as id, Request.priority, Request.code as prepid, Request.authorName as author_name, Request.authorCMSid as author_cmsid, Request.authorInstCode as author_inst_code, Request.pwg, Request.status, Request.statusFlow as status_flow, Request.validation, Request.type, Request.swrelease as cmssw_release, Request.inputFileName as input_filename, Request.dataTier as data_tier, Request.eventContent as event_content, Request.genFragment as gen_fragment, Request.dataSetName as dataset_name, Request.pileupDatasetName as pileup_dataset_name, Request.www, Request.processStr as process_string, Request.inputBlock as input_block, Request.preSteps as pre_steps, Request.cvsTag as cvs_tag, Request.inputCMSgen as input_cms_gen, Request.PVTflag as pvt_flag, Request.PVTcomment as pvt_comment, Request.conditions, Request.generators, Request.pileupScenario as pileup_scenario, Request.datamixerScenario as datamixer_scenario, Request.MCDBid as mcdb_id, Request.notes, Request.description, Request.remarks, Request.approvals, Request.runRange as run_range, Request.ALCA as alca, Request.SKIM as skim, Request.SKIMinput as skim_input, Request.cmsGEn as cms_gen, Request.cmsGENfile as cms_gen_file, requestDate as submission_date from Request where campaignKey=''' + str(key) + ';' q2 = '''select customizeName1 as customize_name, customizeFunction1 as customize_function, sequence1 as sequence, kcustomizeName1 as kcustomize_name, kcustomizeFunction1 as kcustomize_function, ksequence1 as ksequence from Request where code=''' q3 = '''select customizeName2 as customize_name, customizeFunction2 as customize_function, sequence2 as sequence, kcustomizeName2 as kcustomize_name, kcustomizeFunction2 as kcustomize_function, ksequence2 as ksequence from Request where code=''' q4 = '''select step from RequestOptions where forRequest=''' q5 = '''select nbEvents as total_events, nbEventsCompleted as completed_events, timeEvent as time_event, sizeEvent as size_event, TP as tp, unit from Resources where forRequest=''' q6 = '''select version, ptMax as pt_max, ptMin as pt_min, ptHatMax as pt_hat_max, ptHatMin as pt_hat_min, sHatMax as s_hat_max, sHatMin as s_hat_min, mInvMin as m_inv_min, mInvMax as m_inv_max,crossSection as cross_section, filterEff as filter_efficiency, filterEffError as filter_efficiency_error, matchEff as match_efficiency, updateDate as submission_date, updaterCMSid as author_cmsid, updaterName as author_name, updaterInstCode as author_inst_code, updaterProject as author_project from Resources where forRequest=24445''' q7 = '''select authorCMSid as author_cmsid, authorName as author_name, authorInstCode as author_inst_code, commentDate as submission_date, body as message from Comment where forKey=''' q8 = '''select cmsid as author_cmsid, name as author_name, instCode as author_inst_code, project as approval_step, approvalDate as submission_date from ApprovalStep where status="OK" and forKey=''' cursor.execute(q1) requests = cursor.fetchall() for req in requests: prepid = '\'' + req['prepid'] + '\';' key = str(req['id']) + ';' sequences = [] approvals = [] gen_params = [] comments = [] # get approvals cursor.execute(q8+key) apps = cursor.fetchall() for app in apps: appro = approval(app['author_name'], app['author_cmsid'], app['author_inst_code']) subby = appro.get_attribute('approver') subby['submission_date'] = convert_date(app['submission_date']) appro.set_attribute('approver', subby) a = appro.build(app['approval_step']) approvals.append(a) # custs # get seq1 cursor.execute(q2+prepid) seq1 = cursor.fetchone() seq = {'index':1} custname = [] custfunc = [] sequ = '' for assoc_key in seq1: if 'customizeName' in assoc_key: if seq1[assoc_key]: custname.append(seq1[assoc_key]) if 'customizeFunction' in assoc_key: if seq1[assoc_key]: custfunc.append(seq1[assoc_key]) if 'sequence' in assoc_key: sequ = seq1[assoc_key] seq['customize_name'] = custname seq['customize_function'] = custfunc seq['sequence'] = sequ sequences.append(seq) # get seq2 cursor.execute(q3+prepid) seq2 = cursor.fetchone() seq = {'index':1} custname = [] custfunc = [] sequ = '' for assoc_key in seq2: if 'customizeName' in assoc_key: if seq2[assoc_key]: custname.append(seq2[assoc_key]) if 'customizeFunction' in assoc_key: if seq2[assoc_key]: custfunc.append(seq2[assoc_key]) if 'sequence' in assoc_key: sequ = seq2[assoc_key] seq['customize_name'] = custname seq['customize_function'] = custfunc seq['sequence'] = sequ sequences.append(seq) # options cursor.execute(q4+key) ops = cursor.fetchall() req['step'] = ops[-1]['step'] # get latest # resources (main) cursor.execute(q5+key) ress = cursor.fetchall() res = ress[-1] # get latest for assoc_key in res: req[assoc_key] = res[assoc_key] # gen parameters cursor.execute(q6+key) gens = cursor.fetchall() for gen in gens: if not gen['author_name']: gen['author_name'] = req['author_name'] s = submission_details().build(gen['author_name'], gen['author_cmsid'], gen['author_inst_code'], gen['author_project']) s['submission_date'] = gen['submission_date'] gen['submission_details'] = s g = generator_parameters(gen['author_name']) for assoc_key in gen: try: g.set_attribute(assoc_key, gen[assoc_key]) except Exception as ex: continue gen_params.append(g.json()) # get comments cursor.execute(q7 + key) comms = cursor.fetchall() for comm in comms: c = comment(comm['author_name'], comm['author_cmsid'], comm['author_inst_code']).build(comm['message']) temp = c['submission_details'] temp['submission_date'] = convert_date(comm['submission_date']) c['submission_details'] = temp comments.append(c) # build request rt = request(req['author_name'], req['author_cmsid'], req['author_inst_code']) s = rt.get_attribute('submission_details') date, time = req['submission_date'].rsplit(' ') s['submission_date'] = convert_date(date, time) rt.set_attribute('submission_details', s) rt.set_attribute('approvals', approvals) rt.set_attribute('sequences', sequences) rt.set_attribute('generator_parameters', gen_params) rt.set_attribute('comments', comments) for assoc_key in req: try: if assoc_key == 'approvals': continue rt.set_attribute(assoc_key, req[assoc_key]) except Exception as ex: continue # rt.print_self() # print # print '###########################################' # print yield rt
def get_campaign(campid): key = get_campaign_key(campid) q1 = '''select id as prepid, authorName as author_name,startDate as start_date, endDate as end_date, energy, type, prodType as production_type, reprType as repr_type, swrelease as cmssw_release, description, remarks, validation, pileupDatasetName as pileup_dataset_name, processStr as process_string, conditions, generators, pileupScenario as pileup_scenario, datamixerScenario as datamixer_scenario, inputFileName as input_filename, www, preSteps as pre_steps, dataTier as data_tier, eventContent as event_content, nbEvt as total_events, nbEvtCompleted as completed_events, approvals, authorCMSid as author_cmsid, authorInstCode as author_inst_code from Campaign where id=''' + '\'' + str(campid) + '\';' q2 = '''select sequence1 as sequence, customizeName1 as customize_name, customizeFunction1 as customize_function from Campaign where id=''' + '\'' + str(campid) + '\';' q3 = '''select sequence2 as sequence, customizeName2 as customize_name, customizeFunction2 as customize_function from Campaign where id=''' + '\'' + str(campid) + '\';' q4 = '''select authorCMSid as author_cmsid, authorName as author_name, authorInstCode as author_inst_code, commentDate as submission_date, body as message from Comment where forKey=''' + str(key) + ';' q5 = '''select cmsid as author_cmsid, name as author_name, instCode as author_inst_code, project as approval_step, approvalDate as submission_date from ApprovalStep where status="OK" and forKey=''' + str(key) + ';' approvals = [] sequences = [] comments = [] # get campaign cursor.execute(q1) camp_json = cursor.fetchone() # get comments cursor.execute(q4) comms = cursor.fetchall() for comm in comms: c = comment(comm['author_name'], comm['author_cmsid'], comm['author_inst_code']).build(comm['message']) temp = c['submission_details'] date, time = comm['submission_date'].rsplit(' ') temp['submission_date'] = convert_date(date, time) c['submission_details'] = temp comments.append(c) # custs # get seq1 cursor.execute(q2) seq1 = cursor.fetchone() seq = {'index':1} custname = [] custfunc = [] sequ = '' for assoc_key in seq1: if 'customizeName' in assoc_key: if seq1[assoc_key]: custname.append(seq1[assoc_key]) if 'customizeFunction' in assoc_key: if seq1[assoc_key]: custfunc.append(seq1[assoc_key]) if 'sequence' in assoc_key: sequ = seq1[assoc_key] seq['customize_name'] = custname seq['customize_function'] = custfunc seq['sequence'] = sequ sequences.append(seq) # get seq2 cursor.execute(q3) seq2 = cursor.fetchone() seq = {'index':1} custname = [] custfunc = [] sequ = '' for assoc_key in seq2: if 'customizeName' in assoc_key: if seq2[assoc_key]: custname.append(seq2[assoc_key]) if 'customizeFunction' in assoc_key: if seq2[assoc_key]: custfunc.append(seq2[assoc_key]) if 'sequence' in assoc_key: sequ = seq2[assoc_key] seq['customize_name'] = custname seq['customize_function'] = custfunc seq['sequence'] = sequ sequences.append(seq) # get approvals allowed = ['SIM', 'HLT', 'L1', 'ALCA', 'RECO', 'Start'] # campaign hack cursor.execute(q5) apps = cursor.fetchall() for app in apps: appro = approval(app['author_name'], app['author_cmsid'], app['author_inst_code']) subby = appro.get_attribute('approver') subby['submission_date'] = convert_date(app['submission_date']) appro.set_attribute('approver', subby) if app['approval_step'] not in allowed: allowed.append(app['approval_step']) appro.set_approval_steps(allowed) a = appro.build(app['approval_step']) approvals.append(a) camp_json['approvals'] = approvals camp_json['sequences'] = sequences camp_json['comments'] = comments campy = campaign(camp_json['author_name'], camp_json['author_cmsid'], camp_json['author_inst_code']) #print simplejson.dumps(camp_json, sort_keys=True, indent=4) for key in camp_json: try: if key == 'start_date' or key == 'end_date': campy.set_attribute(key, convert_date(camp_json[key])) continue campy.set_attribute(key, camp_json[key]) except Exception as ex: continue campy.set_attribute('id', key) #campy.print_self() return campy
def get_comments_by_film(self, film_id): params = { 'start': '0', 'limit': '20', 'status': 'P', 'sort': 'new_score' } r = proxy.gethtml( 'https://movie.douban.com/subject/' + str(film_id) + '/comments', self.headers, params) if r is None: return soup = BeautifulSoup(r.content.decode(), 'html.parser') if soup is None: return tot = 0 cmt_tab = soup.find('ul', {'class': 'fleft CommentTabs'}) if cmt_tab: cmt_tab_span = cmt_tab.find('span') if cmt_tab_span: txtr = re.search(r'(\d+)', cmt_tab_span.get_text()) if txtr: tot = int(txtr.group(1)) #限制一下条数防止爬的太多爬不完 if tot > util.COMMENT_MAX: tot = util.COMMENT_MAX for i in range(0, int(tot / 20) + 1): params = { 'start': str(i * 20), 'limit': '20', 'status': 'P', 'sort': 'new_score' } r = proxy.gethtml(url='https://movie.douban.com/subject/' + str(film_id) + '/comments', params=params, headers=self.headers) if r is None: continue soup = BeautifulSoup(r.content.decode(), 'html.parser') if soup is None: continue cmts = soup.find_all('div', attrs={'class': 'comment-item'}) if cmts is None: continue for cmt in cmts: c = comment.comment() c.comment_id = cmt.get('data-cid') #<span class="votes vote-count">1042</span> sf = cmt.find('span', attrs={'class': 'votes vote-count'}) if sf: c.comment_useful = sf.get_text() ci = cmt.find('span', {'class': 'comment-info'}) if ci: un = ci.find('a') if un: c.user_name = un.get_text() c.user_url = un.get('href') urla = re.sub(r'\/$', '', c.user_url).split('/') if len(urla) > 1: c.user_id = urla[-1] dbusers = sql.get_user_byid(c.user_id) if len(dbusers) == 0: new_user = user.user() new_user.user_id = c.user_id new_user.user_name = c.user_name new_user.user_url = c.user_url new_user.get_user_info(new_user.user_url) try: dbusers = sql.get_user_byid(new_user.user_id) if len(dbusers) == 0: sql.save_user(new_user) if new_user.visible == '0': sql.update_user_spider(new_user.user_id) except Exception as e: log.logger.info(str(e)) #<span title="力荐" class="allstar50 rating"></span> if ci: star = ci.find('span', {'class': re.compile('allstar')}) if star: c.star = str( int(star.get('class')[0].replace('allstar', '')) / 10) sf = ci.find('span', {'class': 'comment-time'}) if sf: c.comment_time = sf.get_text().strip() sf = cmt.find('p', {'class': 'comment-content'}) if sf: sfs = sf.find('span') if sfs: c.comment_content = sfs.get_text() c.film_id = film_id try: dbcmts = sql.get_comment_byid(c.comment_id) if len(dbcmts) == 0: sql.save_comment(c) except Exception as e: log.logger.info("cid:" + str(c.comment_id)) log.logger.info(str(e))
def menu(): print " MENU" print "" print " 1. Train" print " 2. Test" print " 3. Load Data" print " 4. Record Data" print " 5. Check Comment" print " 6. Exit" print "" if __name__ == '__main__': com = comment.comment() header() while 1: menu() number = int(raw_input('Enter your input Number : ')) if number == 1: print "" print "TRAINING" print "" plst = raw_input('Enter the file name of positive comments list:') nlst = raw_input('Enter the file name of negative comments list:') print "" print "Training is started ... " print "" if com.positive_input(plst) == 1:
import comment if __name__ == "__main__": #c=comment.comment(2384387) c = comment.comment(4609870) c.get_comment()