Exemplos de connect em Python, exemplos de crawler.connect em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: dailyrefresh.py Projeto: aki-559/edinetcrawler

def get_n(url):
    res = crawler.connect(url)
    soup = BeautifulSoup(res, "lxml")
    kekka = soup.find("p", attrs={"class": "pageLink"}).string
    kekka = rm_escape(str(kekka))
    num = int(kekka.split("件")[0])

    return num

Exemplo n.º 2

0

Exibir arquivo

def companies():
    url = "http://www.jpx.co.jp/markets/statistics-equities/misc/01.html"
    root = "http://www.jpx.co.jp"

    res = crawler.connect(url)
    soup = BeautifulSoup(res, "lxml")
    link = soup.table.td.a["href"]
    res = crawler.connect(root + link)

    if res != "":
        with open(os.path.expanduser("~/c_info/data_j.xls"), "wb") as f:
            f.write(res)

        market = {"出資証券":"0", "市場第一部（内国株）":"1", "市場第二部（内国株）":"2", "マザーズ（内国株）":"3", \
        "JASDAQ(スタンダード・内国株）":"4", "ETF・ETN":"5", "REIT・ベンチャーファンド・カントリーファンド・インフラファンド": "6"}

        df = pd.read_excel(os.path.expanduser("~/c_info/data_j.xls"))
        df["市場・商品区分"] = df["市場・商品区分"].map(market)
        df = df.loc[:, [
            "コード",
            "銘柄名",
            "市場・商品区分",
            "33業種区分",
            "17業種区分",
        ]].applymap(make_blank)
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
        timestamps = pd.Series([timestamp] * len(df))
        df["created_at"] = timestamps
        df["updated_at"] = timestamps
        df.to_csv(os.path.expanduser("~/c_info/companies.csv"),
                  index=False,
                  header=False,
                  encoding="utf-8")

    else:
        print("更新に失敗しました。")

Exemplo n.º 3

0

Exibir arquivo

def check_node():
    """
    Checks the current status of a node. This is a live result, so response times will be longer - to view a saved
    result see /api/check_historic_node.
    :param node: connection string, e.g. ok:127.0.0.1:6970 - port is optional if it is the network default
    :param to_services (integer, optional): outgoing services to broadcast, default=0
    :param from_services (integer, optional): outgoing services to broadcast, default=0
    :param version (integer, optional): version code to broadcast, default varies by network
    :param user_agent (string, optional): user agent to broadcast, default="/oknodes:0.1/"
    :param height (integer, optional): block height to broadcast during handshake. default=network median
    :param p2p_nodes (bool, optional): issues a getaddr call and list of connected nodes, default=False
    :return: json dict {"result":{"user_agent":"/oktoshi:5.0.0.2/", "version":" .... }, "nodes":[["127.0.0.1:6970, 157532132191], ...]}
    """

    dat = request.form
    node = dat.get("node")
    network, address, port, resp = deconstruct_address_string(node)

    network_data = CONF['networks'][network]
    if dat.get("height"):
        network_data['height'] = dat.get("height")
    else:
        with open("static/network_summaries.json", 'r') as f:
            network_data['height'] = int(json.load(f)[network]['med'])

    network_data['protocol_version'] = dat.get(
        "version") or network_data['protocol_version']
    result = connect(network,
                     address,
                     port,
                     to_services=dat.get("to_services")
                     or network_data['services'],
                     network_data=network_data,
                     user_agent=dat.get("user_agent") or None,
                     p2p_nodes=False,
                     explicit_p2p=dat.get("p2p_nodes") or False,
                     from_services=dat.get('from_services') or None,
                     keepalive=False)

    resp['result'] = result[0]
    resp['nodes'] = result[1]

    resp['result'] = geocode(resp['result'])
    return to_json(resp)

Exemplo n.º 4

0

Exibir arquivo

def e_code_list():
    root = "https://disclosure.edinet-fsa.go.jp"
    top = "/E01EW/download?1512538021191"
    uji_bean = "ee.bean.W1E62071.EEW1E62071Bean"
    uji_verb = "W1E62071EdinetCodeDownload"
    SESSIONKEY = top.split("?")[1]
    file_info = "lgKbn=2&dflg=0&iflg=0&dispKbn=1"
    _id = "W1E62071"
    components = [
        top, "uji.bean=" + uji_bean, "uji.verb=" + uji_verb, "TID=" + _id,
        "PID" + _id, "SESSIONKEY=" + SESSIONKEY, file_info
    ]
    url = root + "&".join(components)

    res = crawler.connect(url)

    with open(os.path.expanduser("~/c_info/EdinetcodeDlInfo.zip"), "wb") as f:
        f.write(res)

    with zipfile.ZipFile(os.path.expanduser("~/c_info/EdinetcodeDlInfo.zip"),
                         "r") as input_file:
        input_file.extractall(path=os.path.expanduser("~/c_info"))

    os.remove(os.path.expanduser("~/c_info/EdinetcodeDlInfo.zip"))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: dailyrefresh.py Projeto: aki-559/edinetcrawler

def get_components(url):
    root = "https://disclosure.edinet-fsa.go.jp"
    pdfs = []
    xbrls = []
    titles = []
    codes = []

    res = crawler.connect(url)
    soup = BeautifulSoup(res, "lxml")

    for row in soup.find_all("tr"):
        for cell in row.find_all("td",
                                 class_="table_border_1 table_cellpadding_1 "):
            if cell.find("a",
                         attrs={
                             "onclick":
                             re.compile("^return clickDocNameForNotPaper")
                         }) != None:
                #titleの格納
                title = str(cell.a.string)
                title = rm_escape(title)

                if not re.match("有価*", title):
                    break

                titles.append(title)

            elif cell.find("img", attrs={"alt": "PDF"}) != None:
                #pdfのリンクを格納
                pdf = root + cell.a["href"]
                pdfs.append(pdf)

            elif cell.find("img", attrs={"alt": "XBRL"}) != None:
                #xbrlのリンクを格納
                xbrl = cell.a["onclick"]
                components = xbrl.split("\'")
                top = components[7]
                uji_bean = components[3]
                uji_verb = components[1]
                SESSIONKEY = top.split("?")[1]
                file_info = components[5]
                components = [
                    top, "uji.bean=" + uji_bean, "uji.verb=" + uji_verb,
                    "SESSIONKEY=" + SESSIONKEY, file_info
                ]
                xbrl = root + "&".join(components)

                xbrls.append(xbrl)

            elif cell.div != None:
                #edinetコードの格納
                content = str(cell.div.string)
                content = rm_escape(content)

                if re.match("^E", content):
                    code = content
                    code = e_to_s(code)
                    codes.append(code)

                elif cell.div.br != None:
                    content = str(cell.div)
                    content = rm_escape(content)
                    code = content.split(">")[1].replace("/<br/", "")
                    code = e_to_s(code)
                    codes.append(code)

    if len(pdfs) == len(xbrls) and len(titles) == len(codes) and len(
            pdfs) == len(titles):
        return codes, titles, pdfs, xbrls
    else:
        print(len(codes), len(titles), len(pdfs), len(xbrls))
        return [], [], [], []