示例#1
0
def identify_questions(tags):
    ques_id = []
    ques_score = []
    ques_body = []
    query = ""
    for i in range(len(tags)):
        tags[i] = ps.stem(tags[i])
        query = query + tags[i] + ';'
    query = query[0:-1]
    try:
        SITE = StackAPI('stackoverflow')
        SITE.page_size = page_size
        SITE.max_pages = max_pages
        questions = SITE.fetch('search', tagged=query, sort='relevance')
        for item in questions[u'items']:
            tags_ques = item[u'tags']
            for i in range(len(tags_ques)):
                tags_ques[i] = ps.stem(tags_ques[i])
            cnt = 0
            for tag in tags_ques:
                if tag not in tags:
                    cnt += 1
            temp = len(tags) - len(list(set(tags).intersection(tags_ques)))
            cnt = cnt + (temp - len(tags))
            if cnt < 0:
                ques_id.append(item[u'question_id'])
                ques_score.append(cnt)
                ques_body.append(item[u'title'])

    except StackAPIError as e:
        print e.message
    print ques_id[1]
    return ques_id, ques_score, ques_body
示例#2
0
def buscar_questoes(tag="python"):
    # definicao do pt.stackoverflow
    sopt = StackAPI("pt.stackoverflow")

    # conf de numero de resultados
    sopt.page_size = 100
    sopt.max_pages = 1
    resultado = []
    # busca por questoes/tag de acordo com intervalo de tempo(atualmente de 1 dia)
    questoes_python = sopt.fetch('questions',
                                 min=1,
                                 fromdate=tsInicio,
                                 todate=tsHoje,
                                 tagged=tag)
    # return str(html.unescape(questoes_python['items'][0]['title']))

    for i in range(0, len(questoes_python['items'])):
        resultado.append("""
        Titulo: {}
        Link: {}
        Criacao: {}
        """.format(html.unescape(questoes_python['items'][i]['title']),
                   questoes_python['items'][i]['link'],
                   questoes_python['items'][i]['creation_date']))
    return resultado
示例#3
0
def getUsers():
  SITE = StackAPI('stackoverflow')
  SITE.max_pages = 1
  SITE.page_size = 100
  try:
    users.append(SITE.fetch('users', sort='reputation'))
  except:
    time.sleep(5)
    users.append(SITE.fetch('users', sort='reputation'))
示例#4
0
def test_buscar_questoes():
    sopt = StackAPI("pt.stackoverflow")
    sopt.page_size = 100
    sopt.max_pages = 1
    questoes_python = sopt.fetch('questions',
                                 min=1,
                                 fromdate=1534582800,
                                 todate=1534636800,
                                 tagged='python')
    assert 1534625951 == questoes_python['items'][0]['creation_date']
示例#5
0
def get_tags(parameter_list, count, sitename, maxpage, pagesize, page_no):
    try:
        SITE = StackAPI(sitename)
        SITE.max_pages = maxpage
        SITE.page_size = pagesize
        return SITE.fetch('tags', page=page_no)
    except:
        try:
            SITE = StackAPI(parameter_list[count])
            SITE.max_pages = maxpage
            SITE.page_size = pagesize
            return SITE.fetch('tags', page=page_no)
        except stackapi.StackAPIError as e:
            print(" Error URL: {}".format(e.url))
            print(" Error Code: {}".format(e.code))
            print(" Error Error: {}".format(e.error))
            print(" Error Message: {}".format(e.message))

            return 0
示例#6
0
def getQuestions():
    SITE = StackAPI('stackoverflow')
    SITE.max_pages = 10
    SITE.page_size = 100
    try:
        questions.append(SITE.fetch('questions', tagged='C#', sort='votes'))
    except:
        time.sleep(5)
        questions.append(SITE.fetch('questions', tagged='C#', sort='votes'))
    time.sleep(15)
def fetch_stackapi(text, tags, page_size=1, max_pages=1):
    """Método que realiza la petición a la Stack API. Recibe:
       text: = string de busqueda in-lin para para la busqueda
       tags: busca posts marcados con estos tags
       page_size: tamaño de páginas a recuperar
       max_pages: número de páginas a recuperar
       """

    SITE = StackAPI('stackoverflow')
    SITE.page_size = page_size
    SITE.max_pages = max_pages
    return SITE.fetch('search',
                      intext=text,
                      tagged=tags,
                      sort="relevance",
                      filter="!7qBwspMQR3L7c4q7tesaRX(_gP(rj*U-.H")
示例#8
0
	def __init__(self):
		SITE = StackAPI('stackoverflow')		
		# Number of items per page
		SITE.page_size = 10
		# Number of api calls
		SITE.max_pages = 1
		# Get the current date and time as a datetime object
		self.date = datetime.now()
		# Get dates for thepast week where first day of the week is Monday(0)
		# and last day of the week is Sunday (6)
		interval = self.past_week()
		# Get the top-rated android questions from the past week
		self.top = SITE.fetch('questions', fromdate=interval[0], 
			todate=interval[1], sort='votes', tagged='android')['items']
		# Get the most recent android questions
		self.new =  SITE.fetch('questions', sort='creation', 
			order='desc', tagged='android')['items']
示例#9
0
def set_questions(year, month):
    ''' get a set of questions from StackAPI and store in data base (MongoDB)
    '''
    my_collection = QuestionDAO()

    SITE = StackAPI('stackoverflow')
    SITE.page_size = 100
    SITE.max_pages = 15

    first_day = 1
    last_day = monthrange(year, month)[1]

    q = SITE.fetch('questions',
                   fromdate=date(year, month, first_day),
                   todate=date(year, month, last_day))

    for item in q["items"]:
        my_collection.add_question(item)
示例#10
0
def main():
	load_tags()
	SITE = StackAPI('stackoverflow', key='add your key here')
	#based on the equivalent query on StackExchange DataExplorer directly (https://data.stackexchange.com/stackoverflow/query/edit/1019759), 
	# we should have 33343 threads returned by query below for last 1 year (8508 having at least 2 answers)
	# and 111,026 for last 3 years
	#therefore, setting the limits of the API accordingly
	SITE.page_size = 100
	SITE.max_pages = 400
	#Get questions from last 1 year: Ran on March 29, 2019
	#filter created from here: https://api.stackexchange.com/docs/questions
	questions = SITE.fetch('questions', fromdate=datetime(2018,3,29), todate=datetime(2019,3,29), min=0, sort='votes', tagged='json', filter='!-*jbN-o8P3E5')
	init_corenlp()
	interesting_sentences = find_interesting_sentences(questions)

	for interesting_sentence in interesting_sentences:
		if isinstance(interesting_sentence, ConditionalSentence):
			interesting_sentence.print('|')
		else:
			interesting_sentence.print("WordPatternBaseline", '|')
示例#11
0
def get_answers(ques_id):
    answers_id = []
    answers_body = []
    try:
        SITE = StackAPI('stackoverflow')
        SITE.page_size = 10
        SITE.max_pages = 5
        for item in ques_id:
            answers = SITE.fetch('questions/{}/answers'.format(item),
                                 page=1,
                                 pagesize=1,
                                 order='desc',
                                 sort='votes',
                                 filter='withbody')
            answers_id.append(answers[u'items'][0][u"answer_id"])
            answers_body.append(
                BeautifulSoup(answers[u'items'][0][u"body"], "lxml").text)

    except StackAPIError as e:
        print e.message
    return answers_body
示例#12
0
def getQuestions():
    SITE = StackAPI('stackoverflow')
    SITE.max_pages = 1
    SITE.page_size = 100
    lastrepo = ""
    for issue in issue_results:
        if (len(issue) >= 6 and lastrepo != getRepoName(issue[0])):
            print(issue)
            try:
                questions.append(
                    SITE.fetch('search/advanced',
                               tagged='C#',
                               title='{}'.format(getRepoName(issue[0]))))
            except:
                time.sleep(5)
                questions.append(
                    SITE.fetch('search/advanced',
                               tagged='C#',
                               title='{}'.format(getRepoName(issue[0]))))
            lastrepo = getRepoName(issue[0])
            time.sleep(15)
示例#13
0
def retrieve_questions(sitename):
    """Use stack exchange API to retrieve questions

    Can request from scratch (request_query=True) or return a previously cached request.
    Use cached request to repeat experiments wihtout overloading the API.

    :param sitename: Name of StackExchange community
    :type sitename: string
    :return: question objects
    :rtype: list 

    """
    # sitename = sitename.split("/")[-1]
    if request_query:
        SITE = StackAPI(sitename, key=params["se_key"])
        SITE.page_size = 50
        SITE.max_pages = 1000  # max qs should be page_size * max_pages
        questions = SITE.fetch("questions",
                               filter="!-*jbN-o8P3E5",
                               sort="votes")  # has q and a text
        with open("{}_questions_cache.json".format(sitename), "w") as f:
            json.dump(questions, f)
    else:
        with open("{}_questions_cache.json".format(sitename), "r") as f:
            questions = json.load(f)
    # answers["items"][0]['answer_id']
    print(
        "quota max",
        questions["quota_max"],
        "quota remaining",
        questions["quota_remaining"],
        "total",
        questions["total"],
        "page",
        questions["page"],
        file=sys.stderr,
    )
    print("retrieved {} questions".format(len(questions["items"])))
    return questions["items"]
示例#14
0
def fetch_results(number_of_results, days):
    if number_of_results > 20:
        number_of_results = 20
    if days < 0 or number_of_results < 0:
        return None, None
    if number_of_results == 0 or days == 0:
        number_of_results = 10
        days = 7

    # with open('questions_time.pickle', 'rb') as data:
    #     questions = pickle.load(data)
    # with open('questions_vote.pickle', 'rb') as data:
    #     questions_by_vote = pickle.load(data)
    # return questions, questions_by_vote

    today = datetime.date.today()
    week_ago = today - datetime.timedelta(days=days)
    today_sec = time.mktime(today.timetuple()) + 86400
    week_ago_sec = time.mktime(week_ago.timetuple())
    SITE = StackAPI('stackoverflow')
    SITE.page_size = number_of_results
    SITE.max_pages = 1
    # tags = SITE.fetch('tags')
    questions = SITE.fetch('questions', fromdate=int(week_ago_sec), todate=int(today_sec),
                           tagged='Android', sort='creation', filter='!9YdnSIN*P')
    questions = questions['items']
    questions_by_vote = SITE.fetch('questions', fromdate=int(week_ago_sec), todate=int(today_sec),
                                   tagged='Android', sort='votes', filter='!9YdnSIN*P')
    questions_by_vote = questions_by_vote['items']

    print('Done fetching.')
    # with open('questions_time.pickle', 'wb') as output:
    #     pickle.dump(questions, output)
    # with open('questions_vote.pickle', 'wb') as output:
    #     pickle.dump(questions_by_vote, output)

    return questions, questions_by_vote
示例#15
0
def config_api(num_of_queries=10):
    site = StackAPI('stackoverflow')
    site.key = 'kBC4LfDjAYFLSEFWyrDhdw(( '
    site.page_size = 100
    site.max_pages = int(num_of_queries)
    return site
from stackapi import StackAPI  #using stackapi
si = StackAPI('stackoverflow')  #getting questions from stackoverflow website
si.max_pages = 150  #getting questions from 150 pages
si.page_size = 100  #getting 100 observations from each page
q = si.fetch('questions', min=10)  #using fetch function to get the data
#print(q)
data = ""
c = 0
for quest in q['items']:  #tags are stored under items column
    #print(quest['title'])
    c = c + 1
    tags = []
    if 'tags' in quest:
        tags = quest['tags']
    for label in tags:
        data = data + ("__label__" + label.replace(" ", "-") + " "
                       )  #adding the prefix to each tag
    data = data + (quest['title'] + "\n"
                   )  #questions are stored in the title column
    #print(tags)
print(c)
print(data)
text_file = open("questions.txt", "w")  #writing the data to a file
text_file.write(data)
text_file.close()
示例#17
0
# # API

from stackapi import StackAPI
from time import sleep
from random import randint

site = StackAPI('stackoverflow')

# site parameters
site.page_size = 100
site.max_pages = 20

#endpoint fetch
users = site.fetch('users')
users = pd.DataFrame(dict(users.items())['items'])

#more information on users and tags
tags = []
for i in range(20):
    tags = tags + site.fetch(
        '/users/{ids}/top-tags',
        ids=users['user_id'][i * 100:(i + 1) * 100])['items']
    sleep(randint(8, 12))

# dataframe from top tags and answer score
tags = pd.DataFrame(tags)
indicator = pd.get_dummies(tags['tag_name'])
indicator = indicator[indicator.columns].multiply(tags["answer_score"],
                                                  axis="index")
indicator['user_id'] = tags['user_id']
indicator = indicator.groupby('user_id').sum().reset_index()
#!/usr/bin/env python
# coding: utf-8

# In[4]:

from stackapi import StackAPI, StackAPIError

SITE = StackAPI(
    'stackoverflow',
    key='Qw9QT*o*6*NoY1ZHKGsVNg((')  #passing key to avoid throttling
SITE.max_pages = 3  #number of pages to fetch data
SITE.page_size = 100  #number of posts per page
questions = SITE.fetch(
    'questions', sort='activity',
    tagged='.net')  #Fetching Questions from .net sorted by votes

# In[94]:

print(len(questions['items']))  #checking for number of questions fetched

# In[6]:

#Block to collect all the question Id's of the data
qid = []
for i in (questions['items']):
    qid.append(i['question_id'])
print(len(qid))

# In[7]:

#Fetching all the answers for the respective questions
示例#19
0
#!/usr/bin/env python3

from stackapi import StackAPI
import csv

input_string = input()
tagip = input_string.strip().replace(" ", ";")

SITE = StackAPI('stackoverflow')
SITE.page_size = 50
SITE.max_pages = 5
questions = SITE.fetch('questions', tagged=tagip, sort='votes')

usertag = tagip.lower().replace(";", "_")
arrtag = usertag.split("_")

cnt = 0
dictnew = {}
listfinal = []
for k in questions["items"]:
    flgval = 1
    if "accepted_answer_id" in k.keys(
    ):  #if k.keys has accepted_answer_id then proceed, also check if all tags are present
        for tval in arrtag:
            if tval not in k["tags"]:
                flgval = 0
        if flgval == 1:
            dictnew["question_id"] = k["question_id"]
            dictnew["tag"] = usertag
            dictnew["link"] = k["link"]
            dictnew["tags"] = k["tags"]
示例#20
0
    "Stack Overflow": "stackoverflow",
    "ServerFault": "serverfault",
    "Electrical Engineering": "electronics",
    "Super User": "******",
    "Mathematics": "math",
    "Ask Ubuntu": "askubuntu"
}

SITES = {}

try:
    from stackapi import StackAPI
    for siteName, siteHandle in avalSites.items():
        SITES[siteName] = StackAPI(siteHandle, key=SEAPIkey)
        SITES[siteName].max_pages = 1
        SITES[siteName].page_size = 100
except:
    print("failed to import stackAPI")

#SITE = StackAPI('electronics',key=key)

fakeQuestion = {
    "question": {
        "id": "12345",
        "title": "who gives a f**k?",
        "content": "lolol man this content is trash hahaha"
    },
    "answers": [{
        "id": "55555",
        "votes": 15,
        "content": "happy answer 15"
示例#21
0
    def collect(self, *args, **options):
        # print(args)
        print(options)
        min = self.min_score
        if options['min']:
            min = options['min']
        tagged = self.tags
        if options['tagged']:
            tagged = options['tagged'].split(',')
        qa_list = []
        try:
            site = StackAPI('stackoverflow', key=APP_KEY)
            # site = StackAPI(self.site_name)
            if 'count' in options:
                count = options['count']
                if count < 100:
                    site.page_size = count
                    site.max_pages = 1
                else:
                    site.max_pages = count / site.page_size

            epoch_str = '19700101'
            fromdate = time.strptime("19700101", '%Y%m%d')
            todate = datetime.datetime.now()
            if options['fromdate']:
                fromdate = time.strptime(options['fromdate'], '%Y%m%d')
            if options['todate']:
                todate = time.strptime(options['todate'], '%Y%m%d')
            # calling fetch with various parameters - http://stackapi.readthedocs.io/en/latest/user/advanced.html#calling-fetch-with-various-api-parameters
            questions = site.fetch('questions',
                                   min=min,
                                   tagged=tagged,
                                   sort='votes',
                                   accepted='True',
                                   fromdate=fromdate,
                                   todate=todate)
            while (self.wait_if_throttled(questions)):
                questions = site.fetch('questions',
                                       min=min,
                                       tagged=tagged,
                                       sort='votes',
                                       accepted='True',
                                       fromdate=fromdate,
                                       todate=todate)
            total = len(questions['items'])
            print('Collecting from %s. No of questions = %d' %
                  (self.site_name, total))
            processed = 0
            added = 0
            skipped = 0
            for q in questions['items']:
                time.sleep(
                    1 / 25
                )  # this is to ensure less than 30 req per second (https://api.stackexchange.com/docs/throttle)
                if 'accepted_answer_id' in q.keys():
                    question = q['title']
                    tags = q['tags']
                    aa = site.fetch('posts', ids=[q['accepted_answer_id']])
                    while (self.wait_if_throttled(aa)):
                        aa = site.fetch('posts', ids=[q['accepted_answer_id']])
                    answer_link = aa['items'][0]['link']
                    answer = self.extract_accepted_answer_post(answer_link)
                    scraped = {
                        'question': question,
                        'answer': answer,
                        'source': answer_link,
                        'tags': tags
                    }
                    # print scraped
                    # print question
                    qa_list.append(scraped)
                    if self.add_qa(scraped):
                        added += 1
                    else:
                        skipped += 1
                processed += 1
                self.show_progress(processed, total, added, skipped)
        except StackAPIError as e:
            print('Failed to fetch data from stack overflow: [%s]. Skipping.' %
                  e.message)
        return qa_list
示例#22
0
from flask import current_app, Flask, render_template, request, session, jsonify, abort, Blueprint
from app.models import Answers, Questions, db
from app.functions import *
from . import search

from lxml import html
from lxml.etree import HTMLParser
from app.functions import AlchemyEncoder
from stackapi import StackAPI
from urllib.parse import unquote
from app.celery_tasks import insertQuestion, insertAnswer

whitespace_parser = HTMLParser(remove_blank_text=True)
stackOverflowConnection = StackAPI('stackoverflow')
stackOverflowConnection.page_size = 20
stackOverflowConnection.max_pages = 1

bp = Blueprint('main', __name__, url_prefix='/', static_folder='static')


def stack_get_answers(question_id):
    """
    Api -> https://api.stackexchange.com/docs/types/answer
    """
    returnedAnswers = stackOverflowConnection.fetch('questions/{ids}/answers',
                                                    ids=[int(question_id)],
                                                    sort='votes',
                                                    filter="withbody")

    # answerID = [items['answer_id'] for items in returnedAnswers['items']][0:20]
示例#23
0
day = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
r = datetime.datetime.today().weekday()-1

# 현재 시간
now = datetime.datetime.now()
tf = cal_fromdate(now, 1)  # 시작시간
tt = cal_todate(now, 1)  # 끝 시간

fromdate = time.mktime(datetime.datetime.strptime(
    tf, '%Y-%m-%d %H:%M:%S').timetuple())

todate = time.mktime(datetime.datetime.strptime(
    tt, '%Y-%m-%d %H:%M:%S').timetuple())

SITE = StackAPI('stackoverflow')
SITE.page_size = 100
SITE.max_pages = 100
questions = SITE.fetch('questions', min=int(fromdate), max=int(todate), sort='creation',
                       filter="!LaSRLvLhBKxW(RHyO8wrN-")

idd = []
title = []
body = []
creation_date = []
tags = []
view_count = []
up_vote_count = []

for question in questions['items']:
    idd.append(question['question_id'])
    title.append(question['title'])
    def post(self, request):
        site = StackAPI('stackoverflow')
        query_parameters = [
            'fromdate', 'todate', 'min', 'sort', 'tag', 'page', 'page-size',
            'order', 'max'
        ]
        query_string = f''
        print(request.data)

        for key, value in request.data.items():
            if key == 'page-size' and request.data.get(
                    'sort') == 'votes' and value and value != "None":
                # set page_size to passed value
                print('passing')
                site.page_size = value
                pass
            elif key == 'page' and request.data.get(
                    'votes') and value and value != "None":
                # set max_pages to passed value
                print('p2')
                site.max_pages = value
                pass
            elif key == 'max' or key == 'min' and value and value != "None":
                if request.data.get('sort') == 'hot' \
                    or request.data.get('sort') == 'week' or request.data.get('sort') == 'month':
                    if value:
                        query_string += f'{key}="{value}", '
                elif request.data.get(
                        'sort') == 'activity' or request.data.get(
                            'sort') == 'creation':
                    if value:
                        query_string += f'{key}={value}, '
            elif key == 'order' or key == 'sort' or key == 'tag' and value and value != "None":
                if key == 'page-size':
                    pass
                else:
                    if value:
                        query_string += f'{key}="{value}", '
            elif key in query_parameters and value and value != "None":
                if key == 'page-size':
                    pass
                else:
                    if value:
                        query_string += f'{key}={value}, '

        if query_string.endswith(', '):
            query_string = query_string[0:-2]

        print(f"site.fetch('questions', {query_string})")
        questions = eval(f"site.fetch('questions', {query_string})")
        existing_query = Query.objects.filter(query=query_string)
        print(existing_query)

        if existing_query.exists():
            print("cached")
            serialized_data = self.serializer_class(existing_query, many=True)
            return Response(serialized_data.data, status=status.HTTP_200_OK)
        else:
            query = Query.objects.create(query=query_string,
                                         results=questions,
                                         user=request.user)
            serialized_data = self.serializer_class(query)
            return Response(serialized_data.data,
                            status=status.HTTP_201_CREATED)
import datetime
import typing

from bs4 import BeautifulSoup
from stackapi import StackAPI

from stackrunner._meta import config
'''
Pass this as key when making requests against the Stack Exchange API to receive a higher request quota.

This is not considered a secret, and may be safely embed in client side code or distributed binaries.
'''
APP_KEY = 'i1jWtawQVUugZZgFSlTlTg(('

StackOverflowApi = StackAPI('stackoverflow', key=APP_KEY)
StackOverflowApi.page_size = 50
StackOverflowApi.max_pages = 1


def fetch_code(
        keyword: str,
        config: config.RunnerConfig) -> typing.Generator[str, None, None]:
    question_options = {
        'order': 'desc',
        'sort': 'relevance',
        'q': keyword,
        'nottagged': config.not_tags,
        'tagged': config.tags,
        'filter': '!b93xdWqUwqOO7m'
    }
    answer_options = {
from stackapi import StackAPI
from flask import Flask, jsonify
from flask import request
from flask import render_template
app = Flask(__name__, template_folder="templates")
app.root_path = os.path.dirname(os.path.abspath(__file__))
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
LOG = logging.getLogger()
SITE_TYPE = "stackoverflow"
PAGE_SIZE = 100
MAX_PAGES = 1
TOP_N = 10
N_DAYS = 7

site = StackAPI(SITE_TYPE)
site.page_size = PAGE_SIZE
site.max_pages = MAX_PAGES


@app.route("/most_recent_questions", methods=['GET'])
def get_most_recent_questions():
    """
    This function is used to get the top 10 most recent asked questions for a particular tag from stackoverflow
    :return:
    """
    try:
        LOG.info("The request is: {} ".format(request))
        post_type = request.args.get('post_type', 'questions')
        tagged = request.args.get('tagged', 'android')
        sort = request.args.get('sort', 'creation')
        response_items = []