def followingsList(self, login): followings = self.getFollowing(login) p = mp.ThreadPool(self.num_threads) if followings != 0: results = p.map(self.getLogin, followings) else: results = 0 return results
def followersList(self, listLogin): p = mp.ThreadPool(self.num_threads) followings = [ x.get("login") for x in listLogin if x.get("login") != None ] followersfinal = p.map(self.getFollowersLazy, followings) aux = {} for x in followersfinal: aux[x["login"]] = x["followers"] return aux
def __init__(self, dict_planner, weight_scenario, reuse_plans: bool = True, multithread: bool = True, nb_thread: int = 5, verbose: bool = False): self.dict_planner = dict_planner self.weight_scenario = weight_scenario self.plan_by_scenario = tree() self.action_by_scenario = tree() self.q_values_scenar = tree() self.q_values = tree() self.planned = tree() self.nb_thread = nb_thread self.multithread = multithread self.pool_threads = pp.ThreadPool(self.nb_thread) self.launch_things = self.pool_threads.map if self.multithread else map self.reuse_plans = reuse_plans self.verbose = verbose
def requestMany(self,urlList,category): print("request Many in once .....") results_list = [] bodyEmpty = False #reconnection while True: urlBase = "https://api.github.com/repos/mvgolom/letroca/commits" credencial = '?per_page=100&client_id='+self.client_id+'&client_secret='+self.client_secret urlf = ("{}{}").format(urlBase,credencial) response = requests.get(urlf) header = response.headers self.verifyRequestLimit(header) if int(response.status_code) == 200: break if int(response.status_code) == 404: self.wait_for_internet_connection() p = mp.ThreadPool(self.num_threads) if(category == "issues"): try: results_list = p.map(self.requesterIssueComments, urlList) except TypeError as e: #print urlList print('I got a TypeError - reason "%s"' % str(e)) elif(category == "commits"): try: results_list = p.map(self.requesterCommitInfo, urlList) except TypeError as e: print(urlList) print('I got a TypeError - reason "%s"' % str(e)) elif(category == "prs"): try: results_list = p.map(self.requesterPRInfo, urlList) except TypeError as e: print(urlList) print('I got a TypeError - reason "%s"' % str(e)) return results_list
def requestOne(self,url,raw=False): results_list = [] bodyEmpty = False #connection test and reconnect while True: urlBase = "https://api.github.com/" credencial = '?per_page=100&client_id='+self.client_id+'&client_secret='+self.client_secret urlf = ("{}{}").format(urlBase,credencial) response = requests.get(urlf) header = response.headers self.verifyRequestLimit(header) if int(response.status_code) == 200: break if int(response.status_code) == 404: self.wait_for_internet_connection() numPages = self.getRange(url) p = mp.ThreadPool(self.num_threads) if(numPages > 1): results_list = p.map(partial(self.multiRequester, url=url), list(range(1,numPages+1))) else: response = self.requester(url) if response != -1: results_list.append(response) else: bodyEmpty = True self.getRange(url) if ((bodyEmpty == False) and (raw == False)): response = list(chain.from_iterable(results_list[i] for i in range(len(results_list)))) return response elif(raw == True): return response else: final = [] return final
def getFollowing(self, name): auths = self.clientInfo flag = False bodyEmpty = False url = 'https://api.github.com/users/'+name+'/following'+'?per_page=100&client_id='+auths["client_id"]+\ '&client_secret='+auths["client_secret"] results_list = [] response = urllib.request.urlopen(url) header = response.info() num_pages = self.get_pages_range_max(header) print("qtd de pages: " + str(num_pages)) urlList = [{"url": url, "index": x + 1} for x in range(0, num_pages)] p = mp.ThreadPool(self.num_threads) if num_pages > 1: results_list = p.map(self.requester, urlList) flag = self.verify_rate_limit(header, name) if flag == True: url = 'https://api.github.com/users/'+name+'/following'+'?per_page=100&client_id='+self.clientInfo["client_id"]+\ '&client_secret='+self.clientInfo["client_secret"] else: response = urllib.request.urlopen(url) body = json.load(response) if body != []: results_list.append(self.byteify(body)) else: bodyEmpty = True if bodyEmpty == False: followings = list( chain.from_iterable(results_list[i] for i in range(len(results_list)))) return self.byteify(followings) else: return 0
def requestOne(self,url): results_list = [] bodyEmpty = False #connection test and while True: urlBase = "https://api.github.com/repos/mvgolom/letroca/commits" credencial = '?client_id='+self.client_id+'&client_secret='+self.client_secret urlf = ("{}{}").format(urlBase,credencial) response = requests.get(urlf) header = response.headers self.verifyRequestLimit(header) if int(response.status_code) == 200: break if int(response.status_code) == 404: self.wait_for_internet_connection() numPages = self.getRange(url) p = mp.ThreadPool(self.num_threads) if(numPages > 1): results_list = p.map(partial(self.multiRequester, url=url), range(1,numPages+1)) # results_list = Parallel(n_jobs=self.num_threads)(delayed(unwrap_self)(url,x+1)for x in range(0,numPages)) else: response = self.requester(url) if response != -1: results_list.append(self.byteify(response)) else: bodyEmpty = True self.getRange(url) if bodyEmpty == False: response = list(chain.from_iterable(results_list[i] for i in xrange(len(results_list)))) return response else: final = [] return final
def request2(self, request, parameters=None): responses = [] vetDivs = [] results_list = [] if (parameters != None): range_pages = self.range_max_verify(request, parameters) else: range_pages = self.range_max_verify(request) try: print range_pages print 'Processing request: ' + request url = 'https://api.github.com/' + request + '?client_id=' + \ self.id + '&client_secret=' + self.secret if (parameters != None) and ("page=" not in parameters[0]): print 'Processing request: ' + request + ' ' + str(parameters) url = url + "&" + "&".join(parameters) print url rate_limite = requests.get(url) header = rate_limite.headers self.verify_rate_limit(header) if (range_pages == 1): responses = self.requester(url) results_list.append(responses) else: link_vet = self.link_generator(url, range_pages) #fracionar vetor para a pool vetDivs = self.tuple_generator(range_pages) #pool of workers p = mp.ThreadPool(self.num_threads) print len(link_vet) #minimize memory control if (range_pages <= 100): results_list = p.map(self.requester, link_vet) else: for elem in vetDivs: print 'Processing request: ' + str( elem[0]) + " -> " + str(elem[1]) subvet = link_vet[elem[0]:elem[1]] aux = p.map(self.requester, subvet) results_list.append(aux) #join requests list if (range_pages <= 100): responses = list( chain.from_iterable( results_list[i] for i in xrange(len(results_list)))) else: aux1 = list( chain.from_iterable( results_list[i] for i in xrange(len(results_list)))) responses = list( chain.from_iterable(aux1[i] for i in xrange(len(aux1)))) rate_limite = requests.get(url) header = rate_limite.headers self.verify_rate_limit(header) return responses except urllib2.URLError as error: if 'HTTP Error 404' in error: self.wait_internet_connection(request, parameters) with open('error.log', 'a') as error_file: error_file.write('Found a error in request: \n') if parameters is None: error_file.write('https://api.github.com/' + request + 'client_id=' + self.id + '&client_secret=' + self.secret + '\n') else: error_file.write('https://api.github.com/' + request + '?client_id=' + self.id + '&client_secret=' + self.secret + '&' + '&'.join(parameters) + '\n') error_file.write('Error type: ' + str(error) + '\n\n') pass