def main(): # Loading GitHub issues df = pd.read_csv("artifacts/issues.csv") issue_titles = df["title"].to_list() # issue_titles = list(dict.fromkeys(issue_titles)) stack_api = StackAPI("stackoverflow") issue_questions = [] for i, title in enumerate(issue_titles): if len(title.split()) > 3: log.info(f'#{i}\tfetching questions for issue: "{title}"') issue_questions.append( stack_api.fetch("search/advanced", title=title, tagged=["python"], order="desc", sort="votes", pagesize=100, page=1)) else: log.warning(f'#{i}\tissue title is too small: "{title}"') for i, i_q in zip(df["id"], issue_questions): issue, questions = i_q log.info(f"#{i}\tSaving questions for issue: {issue}") with open(f"output/{issue}.json", "w", encoding="utf-8") as f: dump(questions, f, indent=2)
def identify_questions(tags): ques_id = [] ques_score = [] ques_body = [] query = "" for i in range(len(tags)): tags[i] = ps.stem(tags[i]) query = query + tags[i] + ';' query = query[0:-1] try: SITE = StackAPI('stackoverflow') SITE.page_size = page_size SITE.max_pages = max_pages questions = SITE.fetch('search', tagged=query, sort='relevance') for item in questions[u'items']: tags_ques = item[u'tags'] for i in range(len(tags_ques)): tags_ques[i] = ps.stem(tags_ques[i]) cnt = 0 for tag in tags_ques: if tag not in tags: cnt += 1 temp = len(tags) - len(list(set(tags).intersection(tags_ques))) cnt = cnt + (temp - len(tags)) if cnt < 0: ques_id.append(item[u'question_id']) ques_score.append(cnt) ques_body.append(item[u'title']) except StackAPIError as e: print e.message print ques_id[1] return ques_id, ques_score, ques_body
def get_data(question_id): data = {} SITE = StackAPI('codegolf') answers = SITE.fetch( 'questions/{ids}/answers', ids=[question_id], filter= '!*SU8CGYZitCB.D*(BDVIficKj7nFMLLDij64nVID)N9aK3GmR9kT4IzT*5iO_1y3iZ)6W.G*' ) for answer in answers["items"]: soup = BeautifulSoup(answer["body"], features="lxml") try: title = soup.body.h1.text raw_parts = title.split(",") components = [] count = 0 for i in raw_parts: if count: components.extend(i.split()) else: count = 1 bytecount = str(min([int(i) for i in components if i.isnumeric()])) data[parts[0]] = bytecount except: pass return data
def stackoverflow(command): try: site = StackAPI('stackoverflow') engine.say("tell id of question") engine.runAndWait() id = listentomic() #id = input("enter id of question:") question = site.fetch('questions/%s' % (id)) print(question) engine.say("what do you want from this question") engine.runAndWait() req = listentomic() #req = input("what do you want from this question:") if req == 'wrong': stackoverflow(command) else: engine.say("the %s for this question is" % req) engine.runAndWait() print(question["items"][0][req]) except stackapi.StackAPIError as e: print(" Error URL: {}".format(e.url)) print(" Error Code: {}".format(e.code)) print(" Error Error: {}".format(e.error)) print(" Error Message: {}".format(e.message))
def tagpair(request, Tag): SITE = StackAPI('stackoverflow') ori_tag = [Tag] TagPairCompares = tagpaircompare.objects.filter(tag=Tag).values('simitag') if not TagPairCompares: raise Http404("Tag pair does not exist") tagsFetch = [] for tag in TagPairCompares: tagname = tag['simitag'] tagsFetch.append(tagname) tagswiki = SITE.fetch('tags/{tags}/wikis', tags=tagsFetch) tagsWikiDict = {} for item in tagswiki['items']: excerpt = item['excerpt'] excerpt = excerpt.strip().split('. ')[0] if '.&' in excerpt: excerpt = excerpt.split('.&')[0] tagsWikiDict[item['tag_name']] = excerpt ori_tagwiki = {} ori_wiki = SITE.fetch('tags/{tags}/wikis', tags=ori_tag)['items'][0]['excerpt'] ori_wiki = ori_wiki.strip().split('. ')[0] if '.&' in ori_wiki: ori_wiki = excerpt.split('.&')[0] ori_tagwiki[Tag] = ori_wiki return render(request, 'tagpair.html', { 'tagsWikiDicts': tagsWikiDict, 'ori_tagwikis': ori_tagwiki })
def find_similar(topic, title, tags): print('DEBUG IN FIND SIMILAR: {}, {}, {}\n'.format(topic, title, tags)) if topic not in valid_sites: raise Exception('Unsupported topic') method = 'search/advanced' SITE = StackAPI(topic, key=APP_KEY, access_token=ACCESS_TOKEN) similar = [] similar += SITE.fetch( method, q=title, tags=';'.join(tags), answers=1, sort='votes')['items'] # title match and 1+ tags match similar += SITE.fetch(method, q=title, answers=1, store_new_question='votes')['items'] # title match #similar += SITE.fetch(method, tags=';'.join(tags), answers=1, sort='votes')['items'] # 1+ tags match ids = OrderedSet() for s in similar: ids.add(str(s['question_id'])) ids = list(ids)[:15] # Top 15 print('{} SIMILAR FOUND\n'.format(len(ids))) return get_questions_and_answers(topic, ids)
def __init__(self, config_file="~/zuliprc"): config = configparser.ConfigParser() config.read(os.path.abspath(os.path.expanduser(config_file))) config = config["api"] self.bot_mail = config.get("email") self.client = zulip.Client(config_file=config_file) with open(Path(__file__).parents[1].joinpath("templates", "faq.json")) as file: self.faqs = json.load(file) with open( Path(__file__).parents[1].joinpath("templates", "replies.json")) as file: self.replies = json.load(file) with open( Path(__file__).parents[1].joinpath("templates", "projects.json")) as file: self.projects = json.load(file) self.flatprojects = {} idx = 1 for key in self.projects: for title in self.projects[key]: self.flatprojects[idx] = (title, self.projects[key][title]) idx += 1 with open(Path(__file__).parents[1].joinpath("config", "config.json")) as file: self.config = json.load(file) self.questions = list(question for question in self.faqs["questions"]) self.answers = self.faqs["answers"] self.greetings = self.replies["greetings"] self.stackoverflow = StackAPI("stackoverflow") self.stackoverflow.page_size = 3 # lesser, the faster self.stackoverflow.max_pages = 1 # will hit API only once self.subscribe_all() print("Bot init complete")
def load(self): SITE = StackAPI('stackoverflow') data = SITE.fetch('questions', filter='!-y(KwOdKQqjehDBmb0h5Opw_j44BmcMCwAOxyvp5P', pagesize=s_pagesize, fromdate=s_pagesize, order=s_order, sort=s_sort, tagged=s_tags) questions = [] for i in data['items']: if (i['answer_count'] > 0): qid = i['question_id'] closed = 0 ans = [] if ('closed_date' in i): closed = i['closed_date'] if not any(qid in x for x in questions): for x in i['answers']: body = re.findall('<code>(.*?)<\/code>', x['body']) if body: ans.append([ x['answer_id'], x['question_id'], x['score'], body ]) if ans: questions.append([ qid, i['title'], i['last_activity_date'], closed, ans ]) self.posts = questions return questions
def user_rank(user_url): ''' Calculate user rank :param user_url: Stackoverflow url :return: Rank ''' try: url = user_url user_option_file = 'stackoverflowapi\\options\\user-options.json' parse_web_name = 'stackoverflow' user_id = url.split('/')[4] SITE = StackAPI(parse_web_name) user_details = SITE.fetch('users/' + user_id) items = user_details['items'][0] selected_options = dict() user_options = dict(json.load(open(user_option_file))) for key in user_options.keys(): if user_options[key] == 1: selected_options[key] = items[key] calculated_rank = rank_calculator(selected_options['reputation']) user_data_json = { "user_details": [selected_options], "Rank": calculated_rank } return user_data_json except StackAPIError as e: print(e.message)
def api_query(): SITE = StackAPI('stackoverflow') SITE.max_pages = 200 questions = SITE.fetch('questions/no-answers', order='desc', fromdate=int(time.time()) - 3600 * 72, sort='creation', tagged='google-cloud-platform') question_table = [ dict(title='TITLE', date='DATE', tags='TAGS', views='PAGE VIEWS', link='LINK') ] print(question_table) for question in questions['items']: current = dict(title=question['title'], date=time.strftime( '%m-%d %H:%M', time.localtime(question['creation_date'])), tags=', '.join( str(e) for e in question['tags'] if e != 'google-cloud-platform'), views=str(question['view_count']), link=question['link']) question_table.append(current) return render_template('query.html', questions=question_table)
def allanswers(framework, projects): global api api = StackAPI("stackoverflow") samples = get_samples(projects) output_write(framework, directory, "all_answers", get_header(), True) with open("stackoverflow/" + framework + "_questions_and_answers_output.csv") as questions: for index, question in enumerate(questions): if index == 0: continue print("Questions from sample " + question.split(",")[1]) question = question.replace("\n", "") question_id = question.split(",")[2] answers = api.fetch("questions/" + question_id + "/answers")["items"] print(len(answers)) for indx, answer in enumerate(answers): print("{0}% answers analysed of question {1}".format( (indx + 1) / len(answers) * 100, question_id)) try: answer_owner = get_owner_by_user_id( api, answer["owner"]["user_id"]) except KeyError: answer_owner = { "user_id": "", "reputation": "", "creation_date": "", "tags": [] } output = create_output(framework, question.split(",")[1], question_id, answer, answer_owner) output_write(framework, directory, "all_answers", output, False)
def test_no_endpoint_provided(self): """Testing that it raises the correct error when no endpoint is provided""" with self.assertRaises(ValueError) as cm: with patch('stackapi.StackAPI.fetch', fake_stackoverflow_exists) as mock_site: site = StackAPI('stackoverflow') site.fetch() self.assertEqual('No end point provided.', str(cm.exception))
def buscar_questoes(tag="python"): # definicao do pt.stackoverflow sopt = StackAPI("pt.stackoverflow") # conf de numero de resultados sopt.page_size = 100 sopt.max_pages = 1 resultado = [] # busca por questoes/tag de acordo com intervalo de tempo(atualmente de 1 dia) questoes_python = sopt.fetch('questions', min=1, fromdate=tsInicio, todate=tsHoje, tagged=tag) # return str(html.unescape(questoes_python['items'][0]['title'])) for i in range(0, len(questoes_python['items'])): resultado.append(""" Titulo: {} Link: {} Criacao: {} """.format(html.unescape(questoes_python['items'][i]['title']), questoes_python['items'][i]['link'], questoes_python['items'][i]['creation_date'])) return resultado
def get_stored_questions(API_name, Start_date=Date(1, 1, 2010), End_date=Date(12, 18, 2018)): site = StackAPI('stackoverflow') site.max_pages = 1 questions = site.fetch('questions', fromdate=datetime(2010, 1, 1), todate=datetime(2018, 12, 18), tagged=API_name) stored_quest = dict() stored_quests = list() # accesses the dict that is stored at questions['items] for lists in questions['items']: # stores only the relevant key value pairs into a new dict stored_quests stored_quest = { "title": lists.get("title"), "score": lists.get("score"), "is_answered": lists.get("is_answered"), "tags": lists.get("tags"), "question_id": lists.get("question_id"), "link": lists.get("link") } stored_quests.append(stored_quest) return stored_quests
def stackoverflow(framework, projects): global api api = StackAPI("stackoverflow") samples = get_samples(projects) output_write(framework, directory, "questions_and_answers", get_header(), True) for index, sample in enumerate(samples): print_status_samples(index+1, len(samples)) questions = get_questions_when_body_has(sample) for indx, question in enumerate(questions["items"]): print("{0}% questions analysed of {1}".format( (indx+1)/len(questions)*100, sample)) try: answer = api.fetch("answers/{ids}", ids=[question["accepted_answer_id"]])["items"][0] answer_owner = get_owner_by_user_id(api, answer["owner"]["user_id"]) except KeyError: answer = { "answer_id": "", "score": "", "creation_date": "" } answer_owner = { "user_id": "", "reputation": "", "creation_date": "", "tags": [] } question_owner = get_owner_by_user_id(api, question["owner"]["user_id"]) output = create_output(framework, sample, question, answer, question_owner, answer_owner) output_write(framework, directory, "questions_and_answers", output, False)
def get_questions_and_answers(topic, questionIds): if len(questionIds) == 0: return [] try: if topic not in valid_sites: raise Exception('Unsupported topic') encoded = ';'.join(questionIds) method = 'questions/{}/answers'.format(encoded) SITE = StackAPI(topic, key=APP_KEY, access_token=ACCESS_TOKEN) response = SITE.fetch(method, filter='!-*jbN.OXKfDP') answers = response['items'] curated = [] for a in answers: rep = False for i, c in enumerate(curated): if a['question_id'] == c['question_id']: curated[i]['answers'].append(a['body']) rep = True if not rep: curated.append({ 'question_id': a['question_id'], 'question_title': a['title'], 'answers': [a['body']] }) print(len(curated)) return curated except Exception as e: print(e.message) return []
def test_nonsite_parameter(self): """Testing that it can retrieve data on end points that don't want the `site` parameter. Tested using Jeff Atwood's user id""" with patch('stackapi.StackAPI.fetch', fake_stackoverflow_exists) as mock_site: site = StackAPI('stackoverflow') site._api_key = None with patch('stackapi.StackAPI.fetch', fake_users) as mock_users: self.assertGreaterEqual(len(site.fetch('/users/1/associated')['items']), 1)
def get_stored_answers(id): site = StackAPI('stackoverflow') site.max_pages = 1 answers = site.fetch('answers', id) return answers
def __init__(self, unique_user_ids=[], unique_answer_ids=[]): self.unique_user_ids = unique_user_ids self.unique_answer_ids = unique_answer_ids self.stackoverflow = StackAPI('stackoverflow') self.unique_user_ids = self.__remove_downloaded_ids( self.unique_user_ids, StackexApi.USER_INFO) self.unique_answer_ids = self.__remove_downloaded_ids( self.unique_answer_ids, StackexApi.ANSWER_INFO)
def test_exceptions_thrown(self): """Testing that a StackAPIError is properly thrown This test hits the real API.""" with self.assertRaises(StackAPIError) as cm: site = StackAPI('stackoverflow') site._api_key = None site.fetch('errors/400') self.assertEqual(cm.exception.error, 400) self.assertEqual(cm.exception.code, 'bad_parameter')
def test_nonsite_parameter(self): """Testing that it can retrieve data on end points that don't want the `site` parameter. Tested using Jeff Atwood's user id""" with patch('stackapi.StackAPI.fetch', fake_stackoverflow_exists) as mock_site: site = StackAPI('stackoverflow') site._api_key = None with patch('stackapi.StackAPI.fetch', fake_users) as mock_users: self.assertGreaterEqual( len(site.fetch('/users/1/associated')['items']), 1)
def test_buscar_questoes(): sopt = StackAPI("pt.stackoverflow") sopt.page_size = 100 sopt.max_pages = 1 questoes_python = sopt.fetch('questions', min=1, fromdate=1534582800, todate=1534636800, tagged='python') assert 1534625951 == questoes_python['items'][0]['creation_date']
def callback(): superuser = requests_client.OAuth2Session(CLIENT_ID) token = superuser.fetch_token( url=TOKEN_URL, client_secret=CLIENT_SECRET, \ authorization_response=flask.request.url, \ # redirect_uri="http://jackzlin.com/callback" ) # redirect_uri="http://localhost:5000/callback" ) redirect_uri="http://forum-rec-app.herokuapp.com/callback" ) SITE = StackAPI('superuser', key=SECRET_KEY) me = SITE.fetch('me', access_token=token['access_token']) # Keep user_id, profile_image, display_name global USER_VALS USER_VALS = me['items'][0] userId = USER_VALS['user_id'] # Get users with cold start query_users_cold = """ SELECT * FROM USERS """ cold_users = pd.read_sql(query_users_cold, con=connection) # Set of cold_users if userId in set(cold_users.user_id): pass else: answered_questions = SITE.fetch('me/answers', access_token=token['access_token'], fromdate=SPLIT_DATE) try: len_questions = len(answered_questions['items']['answers']) except: len_questions = 0 if len_questions < 25: insert_cold = """ INSERT INTO USERS (user_id, name, profile_img_url, cold) VALUES ({0}, '{1}', '{2}', TRUE) """.format(userId, USER_VALS['display_name'], USER_VALS['profile_image']) else: insert_cold = """ INSERT INTO USERS (user_id, name, profile_img_url, cold) VALUES ({0}, '{1}', '{2}', FALSE) """.format(userId, USER_VALS['display_name'], USER_VALS['profile_image']) cursor.execute(insert_cold) connection.commit() return flask.render_template('main.html', userId=userId, userItems=USER_VALS)
def _get_api(**kwargs): # TODO FIXME max_page documentation is wrong, it's 5 by default? kinda_infinity = 1_000_000 # api = StackAPI('stackoverflow', max_pages=kinda_infinity) api = StackAPI('stackoverflow', **kwargs) # right. not sure if there is any benefit in using authorised user? not that much data is private api._name = None api._api_key = None return api
def __init__(self, from_date, to_date, output_format='json', key=None): try: StackAPI.__init__(self, 'stackoverflow', key=key) self.from_date = from_date self.to_date = to_date self.outClass = Output(output_format) self.output = self.outClass.output self.error = None except: self.error = "Cannot reach StackAPI" return
def stackoverflow_data(request): SITE = StackAPI('stackoverflow') questions = SITE.fetch('questions', page=1, pagesize=2, order='desc', sort='week') context = { 'questions': questions, } return JsonResponse(context)
def fetch_questions(tags=None, filename=None, page=1, write_mode="w"): """ writes question data to csv file """ SITE = StackAPI('stackoverflow') if tags is None: tags = ['java'] write_list = [] for tag in tags: print("+++++++++++++++++ " + tag + " +++++++++++++++++") questions = SITE.fetch('questions', tagged=tag, page=page) #json_obj = json.dumps(questions,indent=4) json_obj = json.loads(json.dumps(questions)) question_list = json_obj['items'] for ques in question_list: ques_obj = json.loads(json.dumps(ques)) tags_obj = ques_obj['tags'] tags_str = "|".join(tags_obj) owner_obj = json.loads(json.dumps(ques_obj['owner'])) user_id = owner_obj['user_id'] if "user_id" in owner_obj.keys( ) else u"" reputation = owner_obj[ 'reputation'] if "reputation" in owner_obj.keys() else u"" user_type = owner_obj[ 'user_type'] if "user_type" in owner_obj.keys() else u"" display_name = owner_obj[ 'display_name'] if "display_name" in owner_obj.keys() else u"" #display_name = display_name.encode('ascii','ignore') temp_tuple = (ques_obj['question_id'], tag, ques_obj['is_answered'], tags_str, ques_obj['title'], ques_obj['answer_count'], ques_obj['creation_date'], ques_obj['score'], ques_obj['link'], ques_obj['view_count'], user_id, reputation, user_type, display_name) #print temp_tuple write_list.append(temp_tuple) with io.open(filename, write_mode) as file: writer = csv.writer(file) writer.writerow( ('question_id', 'language', 'is_answered', 'tags', 'title', 'answer_count', 'creation_date', 'score', 'link', 'view_count', 'user_id', 'reputation', 'user_type', 'display_name')) for row in write_list: try: writer.writerow(row) except Exception as e: pass print("rows written to csv file " + filename + " " + str(len(write_list))) #is_answered, title , tags , answer_count, creation_date, score, link, bounty_amount, view_count, question_id
def __init__(self, site: str, **kwargs): self.throttle = rate_limit StackAPI.__init__(self, site, max_pages=1, **kwargs) Observable.__init__(self) self.errored = set() self.working = set() self.finished = set() self.paused = set() self.tags = self.fetch(SetupMethods.Tags.value)['items'] self.tasks = dict()
def search(a): tb = sys.exc_info()[2] print(str(a) + " exception at line number: " + str(tb.tb_lineno)) print("1.Search this error in stackoverflow") print("2.I can handle this myself") answer = int(input()) if answer == 1: SITE = StackAPI("stackoverflow") test = SITE.fetch("search", intitle=a, tagged='python', sort='votes') test1 = pd.DataFrame(test['items']) links = test1['link'][0] callback(links) else: exit()
def solve_error(error: str, max_links: int = 3) -> [(str, str)]: SITE = StackAPI('stackoverflow') comments = SITE.fetch('search/advanced', sort = 'relevance', q = error) count = 0 links = [] for i, ans in enumerate(comments['items']): if ans['is_answered']: count += 1 if count > max_links: break links.append((ans['title'], ans['link'])) return links
async def so(self, ctx, *, question): try: site = StackAPI("stackoverflow") #Search in Stack Overflow questions = site.fetch( "search", sort="relevance", intitle=question ) #Fetch questions according to relevance to search query questions_list = questions[ "items"] #Assign fetched questions to a variable top_question = questions_list[0] #Get latest question await ctx.send(top_question["link"] ) #Print the link to the question except IndexError: await ctx.send( "No question matched your search query. Try again" ) #Print error message if no questions matched search query
def retrieve_post(url): """ Retrieves post information :param url: URL of post to retreive :return: dict: The post returned from the API string: The endpoint utilized """ url_regex = r"((?:https?:)?//(.*)\.(?:com|net)/((?:q(?:uestions)?|a(?:nswer)?))/(\d+)(?:/)?(?:\d+|(?:\w|-)+)?(?:/\d+)?(?:#(\d+))?)" endpoint_dict = { # A quick look up dict to utilize for determining appropriate end point "q": "questions", "questions": "questions", "a": "answers", "answers": "answers", } matches = re.compile(url_regex, re.IGNORECASE).match(url) try: site_parameter = matches.group(2).split(".")[0] # Not all sites are top level, some are site.stackexchange.com if site_parameter in ['ru', 'pt']: site_parameter += ".stackoverflow" except AttributeError: logging.critical("URL Error: {}".format(url)) logging.critical(" Groups: {}".format(matches)) logging.critical(" Groups: {}".format(matches.groups())) return if matches.group(5) is None: endpoint = endpoint_dict[matches.group(3)] post_id = matches.group(4) else: if matches.group(3) in ['q', 'questions']: endpoint = 'answers' post_id = matches.group(5) if endpoint == "questions": filter = user_settings.API_QUESTION_FILTER elif endpoint == "answers": filter = user_settings.API_ANSWER_FILTER try: SITE = StackAPI(site_parameter, key=user_settings.API_KEY, access_token=user_settings.ACCESS_TOKEN) except StackAPIError as e: logging.critical("API Error occurred.") logging.critical(" Site Parameter: %s" % (site_parameter)) logging.critical(" Error URL: %s" % (e.url)) logging.critical(" Error Number: %s" % (e.error)) logging.critical(" Error Code: %s" % (e.code)) logging.critical(" Error Message: %s" % (e.message)) return except ValueError: logging.critical("API Error occurred.") logging.critical(" Invalid Site name provided: {}".format(site_parameter)) return post = SITE.fetch("{}/{}".format(endpoint, post_id), filter=filter) try: data = post['items'][0] except IndexError: logging.info(" No 'items' for {}/{}:".format(endpoint, post_id)) data = None return data, endpoint