Exemplos de scrape em Python, exemplos de wikitablescrape.scrape em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: SwitchBrew-title-list-scraper.py Projeto: tunip3/SwitchBrew-title-list-scraper

"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    os.remove('TitleID/TitleID.csv')
except:
    pass

wikitablescrape.scrape(
    url="http://switchbrew.org/index.php?title=Title_list/Games",
    output_name="TitleID")

print("done, feel free to exit")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: world_Co2_population_test_wikitablescrape.1.py Projeto: ARONDALTON/wiki-table-scrape

"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    shutil.rmtree('output')
except FileNotFoundError:
    pass

wikitablescrape.scrape(
    url=
    "https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population",
    output_name="world_population_by_contry")

wikitablescrape.scrape(
    url=
    "https://en.wikipedia.org/wiki/List_of_countries_by_carbon_dioxide_emissions",
    output_name="carbon_dioxide_by_contry")

# wikitablescrape.scrape(
#     url="https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_career_scoring_leaders",
#     output_name="nba"
# )

# wikitablescrape.scrape(
#     url="https://en.wikipedia.org/wiki/List_of_highest-grossing_films",
#     output_name="films"

Exemplo n.º 3

0

Exibir arquivo

"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    shutil.rmtree('output')
except FileNotFoundError:
    pass

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_current_NBA_team_rosters",
    output_name="output"
)


# Move all CSV folders into a single 'output' folder
os.makedirs('output')
shutil.move('./test', './output')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: getData.py Projeto: thisml/Israeli-Arabic-Population

import wikitablescrape

wikitablescrape.scrape(
    url=
    "https://en.wikipedia.org/wiki/Population_statistics_for_Israeli_settlements_in_the_West_Bank",
    output_name="SettlementsPopulation")

# see: https://github.com/rocheio/wiki-table-scrape

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_wikitablescrape.py Projeto: rocheio/wiki-table-scrape

"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    shutil.rmtree("output")
except FileNotFoundError:
    pass

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_mountains_by_elevation",
    output_name="mountains",
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_volcanoes_by_elevation",
    output_name="volcanoes",
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_career_scoring_leaders",
    output_name="nba",
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_highest-grossing_films",
    output_name="films",

Exemplo n.º 6

0

Exibir arquivo

1 record for each zip 3  (roughly 1000 records total)
1 column to record the average temperature for each of 52 weeks, averaged over the last 3 years (52 columns per record)

Temperature values for each record should be in fahrenheit.  Whole numbers are sufficient.
"""

import pandas as pd
import wikitablescrape
from wunderground_scraper import scrape_station
import pickle
from datetime import datetime, timedelta

# get a dictionary mapping zip 3 to its address
wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_ZIP_code_prefixes",
    output_name="zip")

table = pd.read_csv('./zip/zip.csv', header=None)
i = 1
while i < 10:
    tmp = pd.read_csv('./zip/zip_{}.csv'.format(i), header=None)
    table = table.append(tmp)
    i += 1

table = table.replace(['†', '\*'], '', regex=True)

zip_list = []
for j in table.columns:
    zip_list += table[j].tolist()

Exemplo n.º 7

0

Exibir arquivo

"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    shutil.rmtree('output')
except FileNotFoundError:
    pass

wikitablescrape.scrape(
    url="https://www.bchydro.com/powersmart/residential/savings-and-rebates/current-rebates-buy-backs/home-renovation-rebates/eligibility.html",
    output_name="rebates"
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/R-value_(insulation)",
    output_name="insulation"
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_career_scoring_leaders",
    output_name="nba"
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_highest-grossing_films",
    output_name="films"

Exemplo n.º 8

0

Exibir arquivo

"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    shutil.rmtree('output')
except FileNotFoundError:
    pass

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_mountains_by_elevation",
    output_name="mountains"
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_volcanoes_by_elevation",
    output_name="volcanoes"
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_career_scoring_leaders",
    output_name="nba"
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_highest-grossing_films",
    output_name="films"

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_wikitablescrape.py Projeto: feedthebeat90/topos-challenge

"""
"""Test the wikitablescrape script on four articles."""

import os
import shutil

import wikitablescrape

# Delete previous output folder if it exists, then create a new one
try:
    shutil.rmtree("output")
except FileNotFoundError:
    pass

wikitablescrape.scrape(
    url=
    "https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population",
    output_name="cities",
)

wikitablescrape.scrape(
    url="https://en.wikipedia.org/wiki/List_of_United_States_cities_by_area",
    output_name="areas",
)

# Move all CSV folders into a single 'output' folder
os.makedirs("output")
#shutil.move("./mountains", "./output")
shutil.move("./cities", "./output")
shutil.move("./areas", "./output")