def getDetails(enterprise): logging.info('Getting Details of Company %s', enterprise[0]) edp,name = enterprise[0],enterprise[1] d = dict() d['id'] = edp d['name'] = name d['register_date'] = datetime.now().isoformat() page = openUrl(buildUrl(URL_EDETAIL, 'CodCVM=' + edp)) # Company Data att = getTables(page, 'panel1a') for table in att[:3]: for row in getRows(table): rowToDict(row,d) #print tostring(page) att = HtmlElement(page).get_element_by_id('panel1a') d['trading_codes'] = [ l.text for l in HtmlElement(att).find_class('LinkCodNeg') ] att = d['industry_classification'].split('/') d['sector'] = att[0].strip() d['subsector'] = att[1].strip() d['segment'] = att[2].strip() d['detailwebsite'] = buildUrl(URL_RESUMO, 'codigoCvm=' + edp, 'idioma=pt-BR') if 'website' in d: d['website'] = urlparse.urlsplit(d['website'], 'http').geturl() return d
def item(self, url, action): return ListItem( self.author, buildUrl(url, action=action, authorId=self.authorId), isFolder=True, infos={"video": {"plot": self.plot()}}, poster=self.thumbnail)
def item(self, url, action): return ListItem( self.title, buildUrl(url, action=action, playlistId=self.playlistId), isFolder=True, infos={"video": {"plot": self.plot()}}, poster=self.playlistThumbnail)
def item(self, url): return ListItem( self.value, buildUrl(url, action="search", type=self.type, q=self.value), isFolder=True, infos={"video": { "title": self.value, "plot": self.value }}, contextMenus=self.menus(_type=self.type, key=self.key))
def item(self, url, action): return ListItem(self.author, buildUrl(url, action=action, authorId=self.authorId), isFolder=True, infos={"video": { "plot": self.plot() }}, contextMenus=self.menus( authorId=self.authorId, author=quote_plus(self.author.encode("utf-8"))), poster=self.thumbnail)
def getDaeCc(nsd, nsr, infos): logging.info('Getting Dados da Empresa - Composicao do Capital - %s %s', nsd, nsr) page = HtmlElement(openUrl(buildUrl(URL_DCC, PARMS_GERAL, PARMS_BPP, PARMS_BPP, 'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr))) # Multiplicador em = page.xpath('.//div[@id="UltimaTabela"]/table/tr/td/b/text()') m = 1000 if len(em) > 0 and em[0].find('(Mil)') != -1 else 1 for i in ['QtdAordCapiItgz', 'QtdAprfCapiItgz', 'QtdTotAcaoCapiItgz', 'QtdAordTeso', 'QtdAprfTeso', 'QtdTotAcaoTeso']: qnt = page.get_element_by_id('ctl00_cphPopUp_{0}_1'.format(i)) infos[i] = 0 if qnt is None else toInt(qnt.text) * m
def item(self, url, **kwargs): folder = _folders_schema_[self.type][self.style] label = folder["id"] if isinstance(label, int): label = localizedString(label) action = folder.get("action", self.type) kwargs.update(folder.get("kwargs", {})) plot = folder.get("plot", label) if isinstance(plot, int): plot = localizedString(plot) return ListItem( label, buildUrl(url, action=action, **kwargs), isFolder=True, infos={"video": {"title": label, "plot": plot}})
def getDftDemRes(nsd, nsr, infos, params_geral=PARMS_GERAL): logging.info('Getting DFs Consolidadas - Demonstracao do Resultado - %s %s', nsd, nsr) t = dict() page = openUrl(buildUrl(URL_FDF, params_geral, PARMS_BPP, 'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr, 'Informacao=2', 'Demonstracao=4')) table = HtmlElement(page).get_element_by_id(TABLE_BPP) for row in getRows(table): rowToDict(row, t) # Multiplicador m = getMultiplicador(page) # Receita Liquida infos['RL'] = toInt(t['3.01'][1])*m if '3.01' in t else 0 # Lucro Liquito infos['LL'] = toInt(t['3.11'][1])*m if '3.11' in t else 0
def getDfpConBPA(nsd, nsr, infos): logging.info('Getting DFs Consolidadas - Balanco Patrimonial Ativo - %s %s', nsd, nsr) t = dict() page = HtmlElement(openUrl(buildUrl(URL_FDF, PARMS_GERAL, PARMS_BPP, 'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr, 'Informacao=2', 'Demonstracao=2'))) table = page.get_element_by_id(TABLE_BPP) for row in getRows(table): rowToDict(row, t) # Multiplicador m = getMultiplicador(page) # Caixa cxa = toInt(t['1.01.01'][1])*m if '1.01.01' in t else 0 apf = toInt(t['1.01.02'][1])*m if '1.01.02' in t else 0 infos['CAIXA'] = cxa + apf
def getDfpConBPP(nsd, nsr, infos): logging.info('Getting DFs Consolidadas - Balanco Patrimonial Passivo - %s %s', nsd, nsr) t = dict() page = openUrl(buildUrl(URL_FDF, PARMS_GERAL, PARMS_BPP, 'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr, 'Informacao=2', 'Demonstracao=3')) table = HtmlElement(page).get_element_by_id(TABLE_BPP) for row in getRows(table): rowToDict(row, t) # Multiplicador m = getMultiplicador(page) # Patrimonio Liquido infos['PL'] = toInt(t['2.03'][1])*m if '2.03' in t else 0 # Divida Bruta CP = toInt(t['2.01.04'][1])*m if '2.01.04' in t else 0 LP = toInt(t['2.02.01'][1])*m if '2.02.01' in t else 0 infos['DB'] = CP + LP
def getAuthenticationUrl(self, redirect_uri): url = "https://accounts.spotify.com/authorize" params = { "client_id": self.CLIENT_ID, "response_type": "code", "redirect_uri": redirect_uri, "state": "TODO", "scope": " ".join([ "playlist-read-private", "playlist-read-collaborative", "playlist-modify-public", "playlist-modify-private" ]) } return buildUrl(url, params, {}, method="get")
def getRobotstxt(url): print url, # check that the site has not been crawled yet if not (getDateCrawled(url)[0]): robotsUrl = buildUrl(url) try: req = requests.get(robotsUrl, timeout=30) if req.status_code == 200: robots = req.content else: robots = None except requests.exceptions.Timeout: print 'Timeout!' robots = None insertRobots(url, robots) print url, " robots.txt grabbed" print robots else: print " already crawled"
def getFdFiles(edp, dftype): logging.info('Getting {0} Infos for {1}'.format(dftype, edp)) # Getting newer version of documents fps = dict([(i['date'], i) for i in sorted(getFPs(edp, dftype), cmp=mycmp) if i['version'] > 0.0]) for year, fp in fps.iteritems(): logging.info('Getting {0} of {1} for {2}'.format(dftype, edp, year.year)) params = urlparse.parse_qs(urlparse.urlparse(fp['url']).query) query = 'NumeroSequencialDocumento={0}&CodigoInstituicao={1}'.format(params['NumeroSequencialDocumento'][0], params['CodigoTipoInstituicao'][0]) filepath = path.join(getPath(dftype), str(edp), str(year.year)) filename = '{0}/{1}_{2}-{3}.zip'.format(filepath, params['NumeroSequencialDocumento'][0], params['CodigoTipoInstituicao'][0], fp['version']) if path.isfile(filename): logging.info('File {0} exists, skiping...'.format(filename)) yield filename continue if not path.isdir(filepath): makedirs(filepath) logging.info('Downloading file {0}'.format(filename)) with open(filename, 'wb+') as f: f.write(openUrl(buildUrl(URL_DD, query), True)) yield filename
def getFPs(edp, dftype): page = openUrl(buildUrl(URL_DF, 'codigoCVM=' + edp, 'idioma=pt-br', 'tipo='+dftype.lower())) return [parseFd(dfp) for dfp in page.findall('.//div[@id="' + DIV_DFPS + '"]/div/div/div/div/p/a')]
def item(self, url, action): return self._item(buildUrl(url, action=action, videoId=self.videoId))
def getITRs(edp): page = openUrl(buildUrl(URL_DF, 'codigoCVM=' + edp, 'idioma=pt-br', 'tipo=itr')) return [parseFd(itr) for itr in page.findall('.//div[@id="' + DIV_DFPS + '"]/div/div/div/div/p/a')]