Пример #1
0
    def __init__(self,
                 url,
                 from_date,
                 to_date,
                 datetime_format="%Y-%m-%d",
                 browser=None):
        # TODO: check the url is for a economic calendar
        if "economic-calendar" not in url:
            raise ValueError, "The given url does not look like an economic calendar."
        # TODO: parse url

        self.url = url
        _ = self.url.split("/")
        _ = _[-1].split("-")
        self.calendar, self.id = string.join(_[:-1], "-"), _[-1]

        self.timezone = "America/New_York"
        self.datetime_format = datetime_format
        self.from_date = parse_tz(from_date, in_tz=None)
        self.to_date = parse_tz(to_date, in_tz=None)

        self.browser = webdriver.Chrome() if not browser else browser

        self.html_table = None
        self.table = None
Пример #2
0
    def __init__(self,
                 url,
                 resolution,
                 from_date,
                 to_date,
                 datetime_format="%Y-%m-%d",
                 browser=None):
        self.timezone = "America/New_York"
        self.datetime_format = datetime_format
        self.url = url
        self.resolution = resolution
        self.from_date = parse_tz(from_date, in_tz=None)
        self.to_date = parse_tz(to_date, in_tz=None)

        _ = self.url.split("/")
        self.instrument = _.pop()
        self.category = string.join(_[_.index("www.investing.com") + 1:], "/")

        self.browser = webdriver.Chrome() if not browser else browser
        self.browser.get(
            self.URL.format(category=self.category,
                            instrument=self.instrument))

        self._html_table = None
        self.table = None
Пример #3
0
 def _parse_dates(self, cell):
     """Returns the parsed dates formatted as self.datetime_format.
     """
     m = re.findall(r"\(\w+\)", cell)
     if m: cell = cell.replace(m.pop(), "")
     cell = parse_tz(datetime_str=cell, in_tz=None)
     return cell
Пример #4
0
    def set_html_table(self):
        wait = WebDriverWait(self.browser, 10)

        if self.resolution != "Daily":
            time_frame = self.browser.find_element(By.ID, "data_interval")
            options = time_frame.find_elements(By.TAG_NAME, "option")
            for option in options:
                if option.get_attribute("value") == self.resolution:
                    option.click()
                    break
            html_table = wait.until(
                EC.presence_of_element_located((By.ID, "curr_table")))
        else:
            html_table = self.browser.find_element(By.ID, "curr_table")
        last_record_date = parse_tz(
            datetime_str=html_table.find_element_by_css_selector(
                "tbody tr:last-child td").text,
            in_tz=None)
        if last_record_date > self.from_date:
            date_range_button = self.browser.find_element(
                By.ID, "widgetFieldDateRange")
            self.browser.execute_script("arguments[0].click();",
                                        date_range_button)

            start_date_field = self.browser.find_element(By.ID, "startDate")
            start_date_field.clear()
            start_date_field.send_keys(self.from_date.strftime("%m/%d/%Y"))
            end_date_field = self.browser.find_element(By.ID, "endDate")
            end_date_field.clear()
            end_date_field.send_keys(self.to_date.strftime("%m/%d/%Y"))
            apply_date_btn = self.browser.find_element(By.ID, "applyBtn")
            self.browser.execute_script("arguments[0].click();",
                                        apply_date_btn)

            wait = WebDriverWait(self.browser, 10)
            html_table = wait.until(
                EC.presence_of_element_located((By.ID, "curr_table")))

        self._html_table = html_table

        return None
Пример #5
0
def request_data(*args, **kwargs):
    import locale
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from pyCBT.common.timezone import parse_tz, timezone_shift

    locale.setlocale(locale.LC_TIME, "en_US")
    from_date = parse_tz(kwargs.get("from_date"), in_tz=kwargs.get("timezone"))
    to_date = parse_tz(kwargs.get("to_date"), in_tz=kwargs.get("timezone"))

    browser = webdriver.Chrome()
    browser.get(
        "https://www.investing.com/economic-calendar/eia-crude-oil-inventories-75"
    )

    inv_table = browser.find_element(By.ID, "eventHistoryTable75")
    last_date_str = inv_table.find_element_by_css_selector(
        "tbody tr:last-child td").text
    last_record_date = parse_tz(remove_pattern(last_date_str, r"\(\w+\)"),
                                in_tz="America/New_York")

    wait = WebDriverWait(browser, 10)
    while last_record_date > from_date:
        show_more = wait.until(
            EC.element_to_be_clickable((By.ID, "showMoreHistory75")))
        browser.execute_script("arguments[0].click();", show_more)

        inv_table = wait.until(
            inventory_table_has_changed_from((By.ID, "eventHistoryTable75"),
                                             inv_table))
        last_date_str = inv_table.find_element_by_css_selector(
            "tbody tr:last-child td").text
        last_record_date = parse_tz(remove_pattern(last_date_str, r"\(\w+\)"),
                                    in_tz="America/New_York")

    table = pd.read_html(u"<table>" + inv_table.get_attribute("innerHTML") +
                         u"</table>")[0]
    table.insert(0,
                 "Datetime",
                 value=table["Release Date"] + " " + table["Time"])
    better = map(
        lambda span: "better" in span.get_attribute("title").lower()
        if span.get_attribute("title").strip() else None,
        inv_table.find_elements_by_tag_name("span"))
    table.insert(table.columns.size, "Better", value=better)
    table["Datetime"] = table["Datetime"].apply(remove_pattern,
                                                args=(r"\(\w+\)", ))
    table["Datetime"] = table["Datetime"].apply(
        timezone_shift,
        args=("America/New_York", kwargs.get("timezone"),
              kwargs.get("datetime_format")))
    mask = [
        not (from_date <= parse_tz(release_date, in_tz="America/New_York") <=
             to_date) for release_date in table["Datetime"]
    ]
    table.drop(table.index[mask], axis="index", inplace=True)
    table.drop(["Release Date", "Time", "Unnamed: 5"],
               axis="columns",
               inplace=True)
    table.set_index("Datetime", inplace=True)
    table = table.applymap(lambda cell: eval(cell.strip("M"))
                           if type(cell) == str else cell)

    locale.resetlocale(locale.LC_TIME)
    return table