Exemplo n.º 1
0
    def latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return pd.DataFrame()

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(BytesIO(response.content), header=0, index_col=0, parse_dates=True)

        # set index
        df.index = self.utcify_index(df.index)
        df.index.set_names(['timestamp'], inplace=True)

        # set names and labels
        df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1)
        df['gen_MW'] = df['ACT']

        # return
        return df[['fuel_name', 'gen_MW']]
Exemplo n.º 2
0
    def handle_options(self, **kwargs):
        """
        Process and store keyword argument options.
        """
        super(EIAClient, self).handle_options(**kwargs)

        if not hasattr(self, 'BA'):
            LOGGER.error('Balancing authority not set.')
            raise ValueError('Balancing authority not set.')

        if 'market' not in self.options:
            if self.options['forecast']:
                self.options['market'] = self.MARKET_CHOICES.dam
            elif self.options['sliceable'] and self.options['data'] == 'gen':
                self.options['market'] = self.MARKET_CHOICES.dam
            else:
                self.options['market'] = self.MARKET_CHOICES.hourly
        if 'freq' not in self.options:
            if self.options['forecast']:
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
            elif self.options['sliceable'] and self.options['data'] == 'gen':
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
            else:
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
        if 'yesterday' not in self.options:
            self.options['yesterday'] = False
Exemplo n.º 3
0
    def get_trade(self, latest=False,
                  start_at=False, end_at=False, **kwargs):
        # set args
        self.handle_options(data='trade', latest=latest,
                            start_at=start_at, end_at=end_at, **kwargs)

        # set up storage
        parsed_data = []

        # collect data
        for this_date in self.dates():
            # fetch
            try:
                df, mode = self.fetch_df(this_date)
            except (HTTPError, ValueError):
                LOGGER.warn('No data available in NVEnergy at %s' % this_date)
                continue

            # store
            try:
                parsed_data += self.parse_trade(df, this_date, mode)
            except KeyError:
                LOGGER.warn('Unparseable data available in NVEnergy at %s: %s' % (this_date, df))
                continue

        # return
        return self.time_subset(parsed_data)
Exemplo n.º 4
0
    def _dst_active_hours_for_transition_day(self, local_dt_index):
        """
        When attempting to localize a timezone-naive list of dates, the daylight savings status may be ambigous. This
        method is meant as a fallback when the ambiguous='infer' datetime handling in pandas fails. It assumes
        that the datetime index is a daylight saving transition day.

        :param pandas.DatetimeIndex local_dt_index: A list of timezone-naive DatetimeIndex values.
        :return: A list of bool values indicating whether daylight savings time is active for the list provided.
            This returned list of boolean value is useful for passing to pandas 'ambiguous' kwarg.
        :rtype: list
        """
        dst_active_list = []
        hour_idx = local_dt_index.hour
        if len(hour_idx) > 3:
            starting_timestamp = local_dt_index[0]
            starting_month = starting_timestamp.month
            starting_hour = starting_timestamp.hour

            if starting_month == 3 and starting_hour == 0:
                dst_active_list = [h > 1 for h in hour_idx]
            elif starting_month == 11 and starting_hour == 0:
                dst_active_list = [h < 2 for h in hour_idx]
            elif 3 < starting_month < 11:
                dst_active_list = [True for h in hour_idx]
            elif starting_month < 3 or starting_month > 11:
                dst_active_list = [False for h in hour_idx]
            else:
                LOGGER.warn("Uanble to infer fallback DST status for ambiguous DatetimeIndex values.")
        return dst_active_list
Exemplo n.º 5
0
    def get_lmp(self, node_id='INTERNALHUB', latest=True, start_at=False, end_at=False, **kwargs):
        # set args
        self.handle_options(data='lmp', latest=latest,
                            start_at=start_at, end_at=end_at, node_id=node_id, **kwargs)
        # get location id
        try:
            locationid = self.locations[node_id.upper()]
        except KeyError:
            raise ValueError('No LMP data available for location %s' % node_id)

        # set up storage
        raw_data = []
        # collect raw data
        for endpoint in self.request_endpoints(locationid):
            # carry out request
            data = self.fetch_data(endpoint, self.auth)

            # pull out data
            try:
                raw_data += self.parse_json_lmp_data(data)
            except ValueError as e:
                LOGGER.warn(e)
                continue

        # parse and slice
        df = self._parse_json(raw_data)
        df = self.slice_times(df)

        # return
        return df.to_dict(orient='record')
Exemplo n.º 6
0
    def unzip(self, content):
        """
        Unzip encoded data.
        Returns the unzipped content as an array of strings, each representing one file's content
        or returns None if an error was encountered.
        ***Previous behavior: Only returned the content from the first file***
        """
        # create zip file
        try:
            filecontent = BytesIO(content)
        except TypeError:
            filecontent = StringIO(content)

        try:
            # have zipfile
            z = zipfile.ZipFile(filecontent)
        except zipfile.BadZipfile:
            LOGGER.error('%s: unzip failure for content:\n%s' % (self.NAME, content))
            return None

        # have unzipped content
        unzipped = [z.read(thisfile) for thisfile in z.namelist()]
        z.close()

        # return
        return unzipped
Exemplo n.º 7
0
    def request(self, *args, **kwargs):
        response = super(PJMClient, self).request(*args, **kwargs)
        if response and response.status_code == 400:
            LOGGER.warn('PJM request returned Bad Request %s' % response)
            return None

        return response
Exemplo n.º 8
0
    def get_load(self, latest=False, start_at=False, end_at=False,
                 forecast=False, **kwargs):
        # set args
        self.handle_options(data='load', latest=latest, forecast=forecast,
                            start_at=start_at, end_at=end_at, **kwargs)

        # set up storage
        raw_data = []

        # collect raw data
        for endpoint in self.request_endpoints():
            # carry out request
            data = self.fetch_data(endpoint, self.auth)

            # pull out data
            try:
                raw_data += self.parse_json_load_data(data)
            except ValueError as e:
                LOGGER.warn(e)
                continue

        # parse data
        try:
            df = self._parse_json(raw_data)
        except ValueError:
            return []
        df = self.slice_times(df)

        # return
        return self.serialize_faster(df, drop_index=True)
Exemplo n.º 9
0
    def fetch_forecast(self, date):
        # construct url
        datestr = date.strftime('%Y%m%d')
        url = self.base_url + '/Library/Repository/Market%20Reports/' + datestr + '_da_ex.xls'

        # make request with self.request for easier debugging, mocking
        response = self.request(url)
        if not response:
            return pd.DataFrame()
        if response.status_code == 404:
            LOGGER.debug('No MISO forecast data available at %s' % datestr)
            return pd.DataFrame()

        xls = pd.read_excel(BytesIO(response.content))

        # clean header
        header_df = xls.iloc[:5]
        df = xls.iloc[5:]
        df.columns = ['hour_str'] + list(header_df.iloc[-1][1:])

        # set index
        idx = []
        for hour_str in df['hour_str']:
            # format like 'Hour 01' to 'Hour 24'
            ihour = int(hour_str[5:]) - 1
            local_ts = datetime(date.year, date.month, date.day, ihour)
            idx.append(self.utcify(local_ts))
        df.index = idx
        df.index.set_names(['timestamp'], inplace=True)

        # return
        return df
Exemplo n.º 10
0
    def fetch_csvs(self, date, label):
        # construct url
        datestr = date.strftime('%Y%m%d')
        if self.options['data'] == 'lmp':
            url = '%s/%s/%s%s_zone.csv' % (self.base_url, label, datestr, label)
        else:
            url = '%s/%s/%s%s.csv' % (self.base_url, label, datestr, label)

        # make request
        response = self.request(url)

        # if 200, return
        if response and response.status_code == 200:
            return [response.text]

        # if failure, try zipped monthly data
        datestr = date.strftime('%Y%m01')
        if self.options['data'] == 'lmp':
            url = '%s/%s/%s%s_zone_csv.zip' % (self.base_url, label, datestr, label)
        else:
            url = '%s/%s/%s%s_csv.zip' % (self.base_url, label, datestr, label)

        # make request and unzip
        response_zipped = self.request(url)
        if response_zipped:
            unzipped = self.unzip(response_zipped.content)
        else:
            return []

        # return
        if unzipped:
            LOGGER.info('Failed to find daily %s data for %s but found monthly data, using that' % (self.options['data'], date))
            return unzipped
        else:
            return []
Exemplo n.º 11
0
    def fetch_forecast(self, date):
        # construct url
        datestr = date.strftime("%Y%m%d")
        url = self.base_url + "/Library/Repository/Market%20Reports/" + datestr + "_da_ex.xls"

        # make request
        try:
            xls = pd.read_excel(url)
        except HTTPError:
            LOGGER.debug("No MISO forecast data available at %s" % datestr)
            return pd.DataFrame()

        # clean header
        header_df = xls.iloc[:5]
        df = xls.iloc[5:]
        df.columns = ["hour_str"] + list(header_df.iloc[-1][1:])

        # set index
        idx = []
        for hour_str in df["hour_str"]:
            # format like 'Hour 01' to 'Hour 24'
            ihour = int(hour_str[5:]) - 1
            local_ts = datetime(date.year, date.month, date.day, ihour)
            idx.append(self.utcify(local_ts))
        df.index = idx
        df.index.set_names(["timestamp"], inplace=True)

        # return
        return df
Exemplo n.º 12
0
    def utcify_index(self, local_index, tz_name=None):
        """
        Convert a DateTimeIndex to UTC.

        :param DateTimeIndex local_index: The local DateTimeIndex to be converted.
        :param string tz_name: If local_ts is naive, it is assumed to be in timezone tz.
            If tz is not provided, the client's default timezone is used.
        :return: DatetimeIndex in UTC.
        :rtype: DatetimeIndex
        """
        # set up tz
        if tz_name is None:
            tz_name = self.TZ_NAME

        # localize
        try:
            aware_local_index = local_index.tz_localize(tz_name)
        except AmbiguousTimeError as e:
            LOGGER.debug(e)
            aware_local_index = local_index.tz_localize(tz_name, ambiguous='infer')
        # except Exception as e:
        #     LOGGER.debug(e)  # already aware
        #     print e
        #     aware_local_index = local_index

        # convert to utc
        aware_utc_index = aware_local_index.tz_convert('UTC')

        # return
        return aware_utc_index
Exemplo n.º 13
0
    def latest_fuel_mix(self):
        # set up request
        url = self.base_url + "/ria/FuelMix.aspx?CSV=True"

        # carry out request
        response = self.request(url)
        if not response:
            return pd.DataFrame()

        # test for valid content
        if "The page cannot be displayed" in response.text:
            LOGGER.error("MISO: Error in source data for generation")
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(StringIO(response.text), header=0, index_col=0, parse_dates=True)

        # set index
        df.index = self.utcify_index(df.index)
        df.index.set_names(["timestamp"], inplace=True)

        # set names and labels
        df["fuel_name"] = df.apply(lambda x: self.fuels[x["CATEGORY"]], axis=1)
        df["gen_MW"] = df["ACT"]

        # return
        return df[["fuel_name", "gen_MW"]]
Exemplo n.º 14
0
 def fetch_todays_outlook_renewables(self):
     # get renewables data
     response = self.request(self.base_url_outlook+'renewables.html')
     try:
         return BeautifulSoup(response.content)
     except AttributeError:
         LOGGER.warn('No response for CAISO today outlook renewables')
         return None
Exemplo n.º 15
0
    def get_load(self, latest=False, start_at=None, end_at=None, forecast=False, **kwargs):
        # set args
        self.handle_options(data='load', latest=latest,
                            start_at=start_at, end_at=end_at, forecast=forecast,
                            **kwargs)

        if self.options['forecast']:
            # fetch from eData
            df = self.fetch_edata_series('ForecastedLoadHistory', {'name': 'PJM RTO Total'})
            sliced = self.slice_times(df)
            sliced.columns = ['load_MW']

            # format
            extras = {
                'freq': self.FREQUENCY_CHOICES.hourly,
                'market': self.MARKET_CHOICES.dam,
                'ba_name': self.NAME,
            }
            data = self.serialize_faster(sliced, extras=extras)

            # return
            return data

        elif self.options['end_at'] and self.options['end_at'] < datetime.now(pytz.utc) - timedelta(hours=1):
            df = self.fetch_historical_load(self.options['start_at'].year)
            sliced = self.slice_times(df)

            # format
            extras = {
                'freq': self.FREQUENCY_CHOICES.hourly,
                'market': self.MARKET_CHOICES.dam,
                'ba_name': self.NAME,
            }
            data = self.serialize_faster(sliced, extras=extras)

            # return
            return data

        else:
            # handle real-time
            load_ts, load_val = self.fetch_edata_point('InstantaneousLoad', 'PJM RTO Total', 'MW')

            # fall back to OASIS
            if not (load_ts and load_val):
                load_ts, load_val = self.fetch_oasis_data()
            if not (load_ts and load_val):
                LOGGER.warn('No PJM latest load data')
                return []

            # format and return
            return [{
                'timestamp': load_ts,
                'freq': self.FREQUENCY_CHOICES.fivemin,
                'market': self.MARKET_CHOICES.fivemin,
                'load_MW': load_val,
                'ba_name': self.NAME,
            }]
Exemplo n.º 16
0
 def time_from_soup(self, soup):
     """
     Returns a UTC timestamp if one is found in the soup,
     or None if an error was encountered.
     """
     ts_elt = soup.find(class_='ts')
     if not ts_elt:
         LOGGER.error('PJM: Timestamp not found in soup:\n%s' % soup)
         return None
     return self.utcify(ts_elt.string)
Exemplo n.º 17
0
    def fetch_oasis(self, payload={}, return_all_files=False):
        """
        Returns a list of report data elements, or an empty list if an error was encountered.

        If return_all_files=False, returns only the content from the first file in the .zip -
        this is the default behavior and was used in earlier versions of this function.

        If return_all_files=True, will return an array representing the content from each file.
        This is useful for processing LMP data or other fields where multiple price components are returned in a zip.
        """
        # set up storage
        raw_data = []

        if return_all_files is True:
            default_return_val = []
        else:
            default_return_val = ''

        # try get
        response = self.request(self.base_url_oasis, params=payload)
        if not response:
            return default_return_val

        # read data from zip
        # This will be an array of content if successful, and None if unsuccessful
        content = self.unzip(response.content)
        if not content:
            return default_return_val

        # check xml content for errors
        soup = BeautifulSoup(content[0], 'lxml')
        error = soup.find('m:error')
        if error:
            code = error.find('m:err_code')
            desc = error.find('m:err_desc')
            msg = 'XML error for CAISO OASIS with payload %s: %s %s' % (payload, code, desc)
            LOGGER.error(msg)
            return default_return_val

        # return xml or csv data
        if payload.get('resultformat', False) == 6:
            # If we requested CSV files
            if return_all_files:
                return content
            else:
                return content[0]
        else:
            # Return XML content
            if return_all_files:
                raw_data = [BeautifulSoup(thisfile).find_all('report_data') for thisfile in content]
                return raw_data
            else:
                raw_data = soup.find_all('report_data')
                return raw_data
Exemplo n.º 18
0
    def utcify_index(self, local_index, tz_name=None, tz_col=None):
        """
        Convert a DateTimeIndex to UTC.

        :param DateTimeIndex local_index: The local DateTimeIndex to be converted.
        :param string tz_name: If local_ts is naive, it is assumed to be in timezone tz.
            If tz is not provided, the client's default timezone is used.
        :return: DatetimeIndex in UTC.
        :rtype: DatetimeIndex
        """
        # set up tz
        if tz_name is None:
            tz_name = self.TZ_NAME

        # use tz col if given
        if tz_col is not None:
            # it seems like we shouldn't have to iterate, but all the smart ways aren't working
            aware_utc_list = []
            for i in range(len(local_index)):
                try:
                    aware_local_ts = pytz.timezone(tz_col[i]).localize(
                        local_index[i])
                except pytz.UnknownTimeZoneError:
                    # fall back to local ts
                    aware_local_ts = pytz.timezone(tz_name).localize(
                        local_index[i])

                # utcify
                aware_utc_ts = self.utcify(aware_local_ts)
                aware_utc_list.append(aware_utc_ts)

            # indexify
            aware_utc_index = pd.DatetimeIndex(aware_utc_list)

        else:
            # localize
            try:
                aware_local_index = local_index.tz_localize(tz_name)
            except AmbiguousTimeError as e:
                LOGGER.debug(e)
                aware_local_index = local_index.tz_localize(tz_name,
                                                            ambiguous='infer')
            except TypeError as e:
                # already aware
                LOGGER.debug(e)
                aware_local_index = local_index

            # convert to utc
            aware_utc_index = aware_local_index.tz_convert('UTC')

        # return
        return aware_utc_index
Exemplo n.º 19
0
    def fetch_oasis(self, payload={}, return_all_files=False):
        """
        Returns a list of report data elements, or an empty list if an error was encountered.
        If return_all_files=False, returns only the content from the first file in the .zip -
        this is the default behavior and was used in earlier versions of this function.
        If return_all_files=True, will return an array representing the content from each file.
        This is useful for processing LMP data or other fields where multiple price components are returned in a zip.
        """
        # set up storage
        raw_data = []

        if return_all_files is True:
            default_return_val = []
        else:
            default_return_val = ''

        # try get
        response = self.request(self.base_url_oasis, params=payload)
        if not response:
            return default_return_val

        # read data from zip
        # This will be an array of content if successful, and None if unsuccessful
        content = self.unzip(response.content)
        if not content:
            return default_return_val

        # check xml content for errors
        soup = BeautifulSoup(content[0], 'xml')
        error = soup.find(['error', 'ERROR'])
        if error:
            code = error.find(['err_code', 'ERR_CODE'])
            desc = error.find(['err_desc', 'ERR_DESC'])
            msg = 'XML error for CAISO OASIS with payload %s: %s %s' % (payload, code, desc)
            LOGGER.error(msg)
            return default_return_val
        # return xml or csv data
        if payload.get('resultformat', False) == 6:
            # If we requested CSV files
            if return_all_files:
                return content
            else:
                return content[0]
        else:
            # Return XML content
            if return_all_files:
                raw_data = [BeautifulSoup(thisfile, 'xml').find_all(['REPORT_DATA', 'report_data']) for thisfile in content]
                return raw_data
            else:
                raw_data = soup.find_all(['REPORT_DATA', 'report_data'])
                return raw_data
Exemplo n.º 20
0
 def _assert_entries_1hr_apart(self, result_ts):
     prev_entry = None
     for entry in result_ts:
         if prev_entry:
             seconds_delta = (entry['timestamp'] - prev_entry['timestamp']).total_seconds()
             if seconds_delta > 3600:
                 LOGGER.error('prev_entry timestamp: ' + str(
                     prev_entry['timestamp'].astimezone(pytz.timezone(self.nbpower_client.TZ_NAME))
                 ))
                 LOGGER.error('entry timestamp: ' + str(
                     entry['timestamp'].astimezone(pytz.timezone(self.nbpower_client.TZ_NAME))
                 ))
             self.assertEqual(3600, seconds_delta)
         prev_entry = entry
Exemplo n.º 21
0
    def get_load(self, latest=False, yesterday=False, start_at=False, end_at=False, **kwargs):
        super(AESOClient, self).handle_options(latest=latest, yesterday=yesterday, start_at=start_at, end_at=end_at,
                                               **kwargs)

        if latest:
            return self._get_latest_report(request_type=ParserFormat.load)
        elif self.options.get('start_at', None) and self.options.get('end_at', None):
            earliest_load_dt = self.mtn_tz.localize(datetime(year=2000, month=1, day=1, hour=0, minute=0, second=0))
            latest_load_dt = self.local_now().replace(hour=23, minute=59, second=59, microsecond=999999)
            start_at = max(self.options['start_at'], earliest_load_dt).astimezone(self.mtn_tz)
            end_at = min(self.options['end_at'], latest_load_dt).astimezone(self.mtn_tz)
            return self._get_load_for_date_range(start_at=start_at, end_at=end_at)
        else:
            LOGGER.warn('No valid options were supplied.')
Exemplo n.º 22
0
    def get_lmp(self, node_id, latest=True, start_at=False, end_at=False, **kwargs):
        # set args
        self.handle_options(data='lmp', latest=latest,
                            start_at=start_at, end_at=end_at, **kwargs)

        # get location id
        try:
            locationid = self.locations[node_id.upper()]
        except KeyError:
            raise ValueError('No LMP data available for location %s' % node_id)

        # set up storage
        raw_data = []
        parsed_data = []

        # collect raw data
        for endpoint in self.request_endpoints(locationid):
            # carry out request
            data = self.fetch_data(endpoint, self.auth)

            # pull out data
            try:
                raw_data += self.parse_json_lmp_data(data)
            except ValueError as e:
                LOGGER.warn(e)
                continue

        # parse data
        for raw_dp in raw_data:
            # set up storage
            parsed_dp = {}

            # add values
            parsed_dp['timestamp'] = self.utcify(raw_dp['BeginDate'])
            parsed_dp['lmp'] = raw_dp['LmpTotal']
            parsed_dp['ba_name'] = self.NAME
            parsed_dp['market'] = self.options['market']
            parsed_dp['freq'] = self.options['frequency']
            parsed_dp['node_id'] = node_id
            parsed_dp['lmp_type'] = 'energy'

            # add to full storage
            to_store = True
            if self.options['sliceable']:
                if self.options['start_at'] > parsed_dp['timestamp'] or self.options['end_at'] < parsed_dp['timestamp']:
                    to_store = False
            if to_store:
                parsed_data.append(parsed_dp)

        return parsed_data
Exemplo n.º 23
0
    def get_trade(self,
                  latest=False,
                  yesterday=False,
                  start_at=None,
                  end_at=None,
                  **kwargs):
        trade_ts = list([])
        self.handle_options(latest=latest,
                            yesterday=yesterday,
                            start_at=start_at,
                            end_at=end_at,
                            **kwargs)
        inter_sched_flow_handler = IntertieScheduleFlowReportHandler(
            ieso_client=self)
        adequacy_handler = AdequacyReportHandler(ieso_client=self)

        if self.options.get('latest', False):
            self._get_latest_report_trimmed(
                result_ts=trade_ts,
                report_handler=inter_sched_flow_handler,
                parser_format=ParserFormat.trade)
        elif self.options.get('start_at', None) and self.options.get(
                'end_at', None):
            if self.options.get('historical', False):
                range_start = max(
                    self.options['start_at'],
                    inter_sched_flow_handler.earliest_available_datetime())
                range_end = min(
                    self.options['end_at'],
                    inter_sched_flow_handler.latest_available_datetime())
                self._get_report_range(result_ts=trade_ts,
                                       report_handler=inter_sched_flow_handler,
                                       parser_format=ParserFormat.trade,
                                       range_start=range_start,
                                       range_end=range_end)
            if self.options.get('forecast', False):
                range_start = max(
                    self.options['start_at'],
                    inter_sched_flow_handler.latest_available_datetime(),
                    adequacy_handler.earliest_available_datetime())
                range_end = min(self.options['end_at'],
                                adequacy_handler.latest_available_datetime())
                self._get_report_range(result_ts=trade_ts,
                                       report_handler=adequacy_handler,
                                       parser_format=ParserFormat.trade,
                                       range_start=range_start,
                                       range_end=range_end)
        else:
            LOGGER.warn('No valid options were supplied.')
        return trade_ts
Exemplo n.º 24
0
    def parse_oasis_renewable(self, raw_data):
        """Parse raw data output of fetch_oasis for renewables."""
        # set up storage
        preparsed_data = {}
        parsed_data = []

        # extract values from xml

        for raw_soup_dp in raw_data:
            # set up storage for timestamp
            ts = self.utcify(
                raw_soup_dp.find(['INTERVAL_START_GMT',
                                  'interval_start_gmt']).string)
            if ts not in preparsed_data:
                preparsed_data[ts] = {'wind': 0, 'solar': 0}

            # store generation value
            try:
                fuel_name = raw_soup_dp.find(
                    ['RENEWABLE_TYPE', 'renewable_type']).string.lower()
                gen_MW = float(raw_soup_dp.find(['VALUE', 'value']).string)
                preparsed_data[ts][fuel_name] += gen_MW
            except TypeError:
                LOGGER.error('Error in schema for CAISO OASIS result %s' %
                             raw_soup_dp.prettify())
                continue

        # collect values into dps
        freq = self.options.get('freq', self.FREQUENCY_CHOICES.hourly)
        market = self.options.get('market', self.MARKET_CHOICES.hourly)

        for ts, preparsed_dp in preparsed_data.items():
            # set up base
            base_parsed_dp = {
                'timestamp': ts,
                'freq': freq,
                'market': market,
                'gen_MW': 0,
                'ba_name': self.NAME
            }

            # collect data
            for fuel_name in ['wind', 'solar']:
                parsed_dp = copy.deepcopy(base_parsed_dp)
                parsed_dp['fuel_name'] = fuel_name
                parsed_dp['gen_MW'] += preparsed_dp[fuel_name]
                parsed_data.append(parsed_dp)

        # return
        return parsed_data
Exemplo n.º 25
0
    def handle_ba_limitations(self):
        """Handle BA limitations"""
        today = pytz.utc.localize(datetime.utcnow()).astimezone(
            pytz.timezone(self.TZ_NAME))
        two_days_ago = today - timedelta(days=2)
        load_not_supported_bas = [
            'DEAA', 'EEI', 'GRIF', 'GRMA', 'GWA', 'HGMA', 'SEPA', 'WWA', 'YAD'
        ]
        delay_bas = [
            'AEC', 'DOPD', 'GVL', 'HST', 'NSB', 'PGE', 'SCL', 'TAL', 'TIDC',
            'TPWR'
        ]
        canada_mexico = [
            'IESO', 'BCTC', 'MHEB', 'AESO', 'HQT', 'NBSO', 'CFE', 'SPC'
        ]
        if self.BA in delay_bas:
            if self.options['end_at'] and self.options['end_at'] > two_days_ago:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['yesterday']:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['forecast']:
                raise ValueError('No data: 2 day delay for this BA.')

        if self.BA in load_not_supported_bas:
            if self.options['data'] == 'load':
                LOGGER.error('Load data not supported for %s' % self.BA)
                raise ValueError('Load data not supported for this BA.')
        if self.BA in canada_mexico:
            LOGGER.error('Data not supported for %s' % self.BA)
            raise ValueError(
                'Data not currently supported for Canada and Mexico')
Exemplo n.º 26
0
    def handle_ba_limitations(self):
        """Handle BA limitations"""
        today = pytz.utc.localize(datetime.utcnow()).astimezone(pytz.timezone(self.TZ_NAME))
        two_days_ago = today - timedelta(days=2)
        load_not_supported_bas = ['DEAA', 'EEI', 'GRIF', 'GRMA', 'GWA',
                                  'HGMA', 'SEPA', 'WWA', 'YAD']
        delay_bas = ['AEC', 'DOPD', 'GVL', 'HST', 'NSB', 'PGE', 'SCL',
                     'TAL', 'TIDC', 'TPWR']
        canada_mexico = ['IESO', 'BCTC', 'MHEB', 'AESO', 'HQT', 'NBSO',
                         'CFE', 'SPC']
        if self.BA in delay_bas:
            if self.options['end_at'] and self.options['end_at'] > two_days_ago:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['yesterday']:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['forecast']:
                raise ValueError('No data: 2 day delay for this BA.')

        if self.BA in load_not_supported_bas:
            if self.options['data'] == 'load':
                LOGGER.error('Load data not supported for %s' % self.BA)
                raise ValueError('Load data not supported for this BA.')
        if self.BA in canada_mexico:
            LOGGER.error('Data not supported for %s' % self.BA)
            raise ValueError('Data not currently supported for Canada and Mexico')
Exemplo n.º 27
0
    def utcify_index(self, local_index, tz_name=None, tz_col=None):
        """
        Convert a DateTimeIndex to UTC.

        :param DateTimeIndex local_index: The local DateTimeIndex to be converted.
        :param string tz_name: If local_ts is naive, it is assumed to be in timezone tz.
            If tz is not provided, the client's default timezone is used.
        :return: DatetimeIndex in UTC.
        :rtype: DatetimeIndex
        """
        # set up tz
        if tz_name is None:
            tz_name = self.TZ_NAME

        # use tz col if given
        if tz_col is not None:
            # it seems like we shouldn't have to iterate, but all the smart ways aren't working
            aware_utc_list = []
            for i in range(len(local_index)):
                try:
                    aware_local_ts = pytz.timezone(tz_col[i]).localize(local_index[i])
                except pytz.UnknownTimeZoneError:
                    # fall back to local ts
                    aware_local_ts = pytz.timezone(tz_name).localize(local_index[i])

                # utcify
                aware_utc_ts = self.utcify(aware_local_ts)
                aware_utc_list.append(aware_utc_ts)

            # indexify
            aware_utc_index = pd.DatetimeIndex(aware_utc_list)

        else:
            # localize
            try:
                aware_local_index = local_index.tz_localize(tz_name)
            except AmbiguousTimeError as e:
                LOGGER.debug(e)
                aware_local_index = local_index.tz_localize(tz_name, ambiguous='infer')
            except TypeError as e:
                # already aware
                LOGGER.debug(e)
                aware_local_index = local_index

            # convert to utc
            aware_utc_index = aware_local_index.tz_convert('UTC')

        # return
        return aware_utc_index
Exemplo n.º 28
0
 def _assert_entires_5min_apart(self, result_ts):
     prev_entry = None
     for entry in result_ts:
         if prev_entry:
             seconds_delta = (entry['timestamp'] -
                              prev_entry['timestamp']).total_seconds()
             if seconds_delta > 300:
                 LOGGER.error('prev_entry timestamp: ' +
                              str(prev_entry['timestamp'].astimezone(
                                  pytz.timezone(self.ieso_client.TZ_NAME))))
                 LOGGER.error('entry timestamp: ' +
                              str(entry['timestamp'].astimezone(
                                  pytz.timezone(self.ieso_client.TZ_NAME))))
             self.assertEqual(300, seconds_delta)
         prev_entry = entry
Exemplo n.º 29
0
    def _get_load_forecast_report(self):
        """
        :return: List of dicts, each with keys ``[ba_name, timestamp, freq, market, load_MW]``.
           Timestamps are in UTC.
        :rtype: list
        """
        load_ts = list([])
        forecast_url_base = 'http://tso.nbpower.com/reports%20%26%20assessments/load%20forecast/hourly/'
        forecast_filename_fmt = '%Y-%m-%d %H.csv'
        earliest_forecast = copy(self.atlantic_now).replace(minute=0,
                                                            second=0,
                                                            microsecond=0)
        latest_forecast = earliest_forecast + timedelta(hours=3)

        if self.local_start_at <= latest_forecast:
            forecast_filename = earliest_forecast.strftime(
                forecast_filename_fmt)
            load_forecast_url = forecast_url_base + quote(forecast_filename)
            response = self.request(load_forecast_url)
            response_body = BytesIO(response.content)
            response_df = read_csv(response_body,
                                   names=['timestamp', 'load'],
                                   usecols=[0, 1],
                                   dtype={'load': float},
                                   parse_dates=[0],
                                   date_parser=self.parse_forecast_timestamps)
            for idx, row in response_df.iterrows():
                if self.atlantic_now <= row.timestamp and self.local_start_at <= row.timestamp <= self.local_end_at:
                    row_pd_timestamp = Timestamp(
                        row.timestamp.astimezone(pytz.utc))

                    # In the event of a duplicate timestamp (e.g. daylight savings transition hours), use latest value.
                    if len(load_ts) > 0 and load_ts[-1][
                            'timestamp'] == row_pd_timestamp:
                        del load_ts[-1:]

                    load_ts.append({
                        'ba_name': self.NAME,
                        'timestamp': row_pd_timestamp,
                        'freq': self.FREQUENCY_CHOICES.hourly,
                        'market': self.MARKET_CHOICES.dam,
                        'load_MW': row.load
                    })
        else:
            LOGGER.warn('The latest load forecast available is ' +
                        str(latest_forecast) +
                        '. The requested start_at must be before this time.')
        return load_ts
Exemplo n.º 30
0
    def get_latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return None

        # return good
        return response.content
Exemplo n.º 31
0
 def get_load(self, latest=False, yesterday=False, start_at=False, end_at=False, **kwargs):
     self.handle_options(latest=latest, yesterday=yesterday, start_at=start_at, end_at=end_at, data='load')
     loads = []
     if latest:
         self._load_latest(loads)
     elif self._is_valid_date_range():
         self._hourly_range(loads)
     else:
         if self.options.get('forecast', False):
             LOGGER.warn(self.NAME + ': Load forecasts are not supported.')
         else:
             msg = '%s: Requested date range %s to %s is outside range of available data from %s to %s.' % \
                   (self.NAME, self.options.get('start_at', None), self.options.get('end_at', None),
                    self.options.get('earliest_data_at', None), self.options.get('latest_data_at', None))
             LOGGER.warn(msg)
     return loads
Exemplo n.º 32
0
    def parse_ace_data(self, content):
        if not content:
            return pd.DataFrame()

        # preliminary parsing
        df = pd.DataFrame(content, columns=['instantEST', 'value'])
        df['instantEST'] = pd.to_datetime(df['instantEST'])
        df.set_index('instantEST', inplace=True)
        # set index
        try:
            df.index = self.utcify_index(df.index)
        except AttributeError:
            LOGGER.error('MISO: Error in source data for ACE %s' % content)
            return pd.DataFrame()
        df.index.set_names(['timestamp'], inplace=True)
        return df
Exemplo n.º 33
0
    def get_latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return None

        # return good
        return response.content
Exemplo n.º 34
0
    def get_latest_fuel_mix(self):
        # set up request
        url = self.base_url + '?messageType=getfuelmix&returnType=csv'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return None

        # return good
        return response.content
Exemplo n.º 35
0
 def _format_start_end(self, data):
     formatted_sliced = []
     if 'gen' not in self.options['data']:
         formatted_sliced = [i for i in data if i['timestamp'] >= self.options['start_at'] and i['timestamp'] <= self.options['end_at']]
     else:
         try:
             yesterday = (self.local_now() - timedelta(days=2)).replace(hour=0, minute=0,
                                                                        second=0, microsecond=0)
             tomorrow = (self.local_now() + timedelta(days=1)).replace(hour=23, minute=0,
                                                                       second=0, microsecond=0)
             assert ((self.options['start_at'] >= yesterday) and (self.options['end_at'] <= tomorrow))
             formatted_sliced = [i for i in data if i['timestamp'] >= self.options['start_at'] and i['timestamp'] <= self.options['end_at']]
         except:
             LOGGER.error('Generation data error for %s' % self.BA)
             raise ValueError('Generation data is available for the \
                              previous and current day.', self.options)
     return formatted_sliced
Exemplo n.º 36
0
 def get_trade(self, latest=False, yesterday=False, start_at=False, end_at=False, **kwargs):
     self.handle_options(latest=latest, yesterday=yesterday, start_at=start_at, end_at=end_at, data='trade')
     # http://yukonenergy.ca/energy-in-yukon/electricity-101/electricity-library/whats-an-isolated-grid-and-what-does-that-mean-for-me
     LOGGER.warn('Yukon Energy is an isolated grid. Trade will always be zero.')
     trades = []
     hourly_rounded_dt = self.options.get('start_at').replace(minute=0, second=0, microsecond=0)
     while hourly_rounded_dt <= self.options.get('end_at'):
         if self.options['start_at'] <= hourly_rounded_dt <= self.options['end_at']:
             trades.append({
                 'ba_name': self.NAME,
                 'timestamp': Timestamp(hourly_rounded_dt),
                 'freq': self.FREQUENCY_CHOICES.hourly,
                 'market': self.MARKET_CHOICES.hourly,
                 'net_exp_MW': 0
             })
         hourly_rounded_dt = hourly_rounded_dt + timedelta(hours=1)
     return trades
Exemplo n.º 37
0
    def fetch_entsoe(self, url, payload, count=0):
        if not getattr(self, 'session', None):
            self.auth()

        r = self.request(url, params=payload)
        # TODO error checking
        if len(r.text) == 0:
            if count > 3:  # try 3 times to get response
                LOGGER.warn('Request failed, no response found after %i attempts' % count)
                return False
            # throttled
            sleep(5)
            return self.fetch_entsoe(url, payload, count + 1)
        if 'UNKNOWN_EXCEPTION' in r.text:
            LOGGER.warn('UNKNOWN EXCEPTION')
            return False
        return r.text
Exemplo n.º 38
0
    def val_from_soup(self, soup, key):
        """
        Returns a float value if one is found in the soup for the provided key,
        or None if an error was encountered.
        """
        for elt in soup.find_all('td'):
            try:
                if elt.find('a').string == key:
                    # numbers may have commas in the thousands
                    val_str = elt.next_sibling.string.replace(',', '')
                    return float(val_str)
            except AttributeError:  # no 'a' child
                continue

        # no value found
        LOGGER.error('PJM: Value for %s not found in soup:\n%s' % (key, soup))
        return None
Exemplo n.º 39
0
Arquivo: eu.py Projeto: mhdella/pyiso
    def fetch_entsoe(self, url, payload, count=0):
        if not getattr(self, 'session', None):
            self.auth()

        r = self.request(url, params=payload)
        # TODO error checking
        if len(r.text) == 0:
            if count > 3:  # try 3 times to get response
                LOGGER.warn('Request failed, no response found after %i attempts' % count)
                return False
            # throttled
            sleep(5)
            return self.fetch_entsoe(url, payload, count + 1)
        if 'UNKNOWN_EXCEPTION' in r.text:
            LOGGER.warn('UNKNOWN EXCEPTION')
            return False
        return r.text
Exemplo n.º 40
0
    def get_load(self, latest=False, yesterday=False, start_at=False,
                 end_at=False, forecast=False, **kwargs):
        """
        Scrape and parse load data.
        """

        self.handle_options(data='load', latest=latest, start_at=start_at,
                            end_at=end_at, **kwargs)
        self.handle_ba_limitations()
        self.format_url()
        result = self.request(self.url)
        if result is not None:
            result_json = json.loads(result.text)
            result_formatted = self.format_result(result_json)
            return result_formatted
        else:
            LOGGER.error('No results for %s' % self.BA)
            return []
Exemplo n.º 41
0
    def get_latest_ace(self):
        # set up request
        url = self.base_url + '?messageType=getACE&returnType=json'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for ACE')
            return None
        ace = json.loads(response.content)

        # return good
        data = self.parse_ace_data(ace['ACE'])
        return self.serialize_faster(data)
Exemplo n.º 42
0
    def fetch_csvs(self, date, label):
        # construct url
        datestr = date.strftime('%Y%m%d')
        if self.options['data'] == 'zone_lmp':
            url = '%s/%s/%s%s_zone.csv' % (self.base_url, label, datestr,
                                           label)
        elif self.options['data'] == 'lmp':
            url = '%s/%s/%s%s_gen.csv' % (self.base_url, label, datestr, label)
        else:
            url = '%s/%s/%s%s.csv' % (self.base_url, label, datestr, label)

        # make request
        response = self.request(url)

        # if 200, return
        if response and response.status_code == 200:
            return [response.text]

        # if failure, try zipped monthly data
        datestr = date.strftime('%Y%m01')
        if self.options['data'] == 'zone_lmp':
            url = '%s/%s/%s%s_zone_csv.zip' % (self.base_url, label, datestr,
                                               label)
        elif self.options['data'] == 'lmp':
            url = '%s/%s/%s%s_gen_csv.zip' % (self.base_url, label, datestr,
                                              label)
        else:
            url = '%s/%s/%s%s_csv.zip' % (self.base_url, label, datestr, label)

        # make request and unzip
        response_zipped = self.request(url)
        if response_zipped:
            unzipped = self.unzip(response_zipped.content)
        else:
            return []

        # return
        if unzipped:
            LOGGER.info(
                'Failed to find daily %s data for %s but found monthly data, using that'
                % (self.options['data'], date))
            return unzipped
        else:
            return []
Exemplo n.º 43
0
    def get_generation(self, latest=False, yesterday=False,
                       start_at=False, end_at=False, **kwargs):
        """
        Scrape and parse generation fuel mix data.
        Note: Generation may be quite low for HST and NSB BAs.
        """

        self.handle_options(data='gen', latest=latest, yesterday=yesterday,
                            start_at=start_at, end_at=end_at, **kwargs)
        self.handle_ba_limitations()
        self.format_url()
        result = self.request(self.url)
        if result is not None:
            result_json = json.loads(result.text)
            result_formatted = self.format_result(result_json)
            return result_formatted
        else:
            LOGGER.error('No results for %s' % self.BA)
            return []
Exemplo n.º 44
0
 def format_url(self):
     """Set EIA API URL based on options"""
     if self.options['data'] == 'gen':
         if self.options['forecast']:
             LOGGER.error('Forecast not supported for generation.')
             raise ValueError('Forecast not supported for generation.')
         else:
             self.set_url('series', '-ALL.NG.H')
     elif self.options['data'] == 'load':
         if self.options['forecast']:
             self.set_url('series', '-ALL.DF.H')
         else:
             self.set_url('series', '-ALL.D.H')
     elif self.options['data'] == 'trade':
         if self.options['forecast']:
             LOGGER.error('Forecast not supported for generation.')
             raise ValueError('Forecast not supported for trade.')
         elif self.options['end_at']:
             if self.options['end_at'] > pytz.utc.localize(
                     datetime.utcnow()):
                 LOGGER.error('Forecast not supported for generation.')
                 raise ValueError('Forecast not supported for trade.')
             else:
                 self.set_url('series', '-ALL.TI.H')
         else:
             self.set_url('series', '-ALL.TI.H')
Exemplo n.º 45
0
    def parse_forecast(self, df):
        sliced = self.slice_times(df)

        if self.options['data'] == 'gen':
            try:
                sliced['gen_MW'] = 1000.0 * sliced['Supply Cleared (GWh) - Physical']
                sliced['fuel_name'] = 'other'
                return sliced[['gen_MW', 'fuel_name']]
            except KeyError:
                LOGGER.warn('MISO genmix error: missing key %s in %s' % ('Supply Cleared (GWh) - Physical', sliced.columns))
                return pd.DataFrame()

        elif self.options['data'] == 'load':
            try:
                sliced['load_MW'] = 1000.0 * (sliced['Demand Cleared (GWh) - Physical - Fixed'] +
                                              sliced['Demand Cleared (GWh) - Physical - Price Sen.'])
                return sliced['load_MW']
            except KeyError:
                LOGGER.warn('MISO load error: missing key %s in %s' % ('Demand Cleared (GWh) - Physical - Fixed', sliced.columns))
                return pd.DataFrame()

        elif self.options['data'] == 'trade':
            try:
                sliced['net_exp_MW'] = -1000.0 * sliced['Net Scheduled Imports (GWh)']
                return sliced['net_exp_MW']
            except KeyError:
                LOGGER.warn('MISO trade error: missing key %s in %s' % ('Net Scheduled Imports (GWh)', sliced.columns))
                return pd.DataFrame()

        else:
            raise ValueError('Can only parse MISO forecast gen, load, or trade data, not %s'
                             % self.options['data'])
Exemplo n.º 46
0
    def get_generation(self, latest=False, yesterday=False, start_at=None, end_at=None, **kwargs):
        generation_ts = list([])
        self.handle_options(latest=latest, yesterday=yesterday, start_at=start_at, end_at=end_at, **kwargs)

        gen_out_cap_handler = GeneratorOutputCapabilityReportHandler(ieso_client=self)
        gen_out_by_fuel_handler = GeneratorOutputByFuelHourlyReportHandler(ieso_client=self)
        adequacy_handler = AdequacyReportHandler(ieso_client=self)

        if self.options.get('latest', False):
            self._get_latest_report_trimmed(result_ts=generation_ts, report_handler=gen_out_cap_handler,
                                            parser_format=ParserFormat.generation)
        elif self.options.get('start_at', None) and self.options.get('end_at', None):
            # For long time ranges more than hour ending 1, seven days in the past, it is more efficient to request the
            # Generator Output by Fuel Type Hourly Report rather than repeated calls to the Generator Output and
            # Capability Report.
            # TODO Minor optimization, but this actually check if the start/end range is greater than 7 days.
            if self.options['start_at'] < self.local_start_of_day.replace(hour=1) - timedelta(days=7):
                self.timeout_seconds = 90  # These reports can get rather large ~7MB for a full year.
                range_start = max(self.options['start_at'], gen_out_by_fuel_handler.earliest_available_datetime())
                range_end = min(self.options['end_at'], gen_out_by_fuel_handler.latest_available_datetime())
                self._get_report_range(result_ts=generation_ts, report_handler=gen_out_by_fuel_handler,
                                       parser_format=ParserFormat.generation, range_start=range_start,
                                       range_end=range_end)
            elif self.options.get('historical', False):
                range_start = max(self.options['start_at'], gen_out_cap_handler.earliest_available_datetime())
                range_end = min(self.options['end_at'], gen_out_cap_handler.latest_available_datetime())
                self._get_report_range(result_ts=generation_ts, report_handler=gen_out_cap_handler,
                                       parser_format=ParserFormat.generation, range_start=range_start,
                                       range_end=range_end)

            if self.options.get('forecast', False):
                range_start = max(self.options['start_at'], self.local_now)
                range_end = min(self.options['end_at'], adequacy_handler.latest_available_datetime())
                self._get_report_range(result_ts=generation_ts, report_handler=adequacy_handler,
                                       parser_format=ParserFormat.generation, range_start=range_start,
                                       range_end=range_end)
        else:
            LOGGER.warn('No valid options were supplied.')
        return generation_ts
Exemplo n.º 47
0
    def parse_latest_fuel_mix(self, content):
        # handle bad input
        if not content:
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(BytesIO(content), header=0, index_col=0, skiprows=2, parse_dates=True)

        # set index
        try:
            df.index = self.utcify_index(df.index)
        except AttributeError:
            LOGGER.error('MISO: Error in source data for generation %s' % content)
            return pd.DataFrame()
        df.index.set_names(['timestamp'], inplace=True)

        # set names and labels
        df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1)
        df['gen_MW'] = df['ACT']

        # return
        return df[['fuel_name', 'gen_MW']]
Exemplo n.º 48
0
    def format_result(self, data):
        """Output EIA API results in pyiso format"""
        try:
            assert ('series' in data)
        except:
            LOGGER.error('Unable to format result for %s' % data['request'])
            raise ValueError('Query error for %s:' % data['request'])
        market = self._set_market()
        data_type = self._set_data_type()
        data_formatted = []
        if self.options['latest']:
            data_formatted = self._format_latest(data, data_type, market)
        elif self.options['yesterday']:
            data_formatted = self._format_yesterday(data, data_type, market)
        else:
            data_formatted = self._format_general(data, data_type, market)

        if self.options['start_at'] and self.options['end_at']:
            data_formatted = self._format_start_end(data_formatted)
        if self.options['data'] == 'gen':
            data_formatted = self.add_gen_data(data_formatted)
        return data_formatted
Exemplo n.º 49
0
    def get_lmp(self,
                node_id='INTERNALHUB',
                latest=True,
                start_at=False,
                end_at=False,
                **kwargs):
        # set args
        self.handle_options(data='lmp',
                            latest=latest,
                            start_at=start_at,
                            end_at=end_at,
                            node_id=node_id,
                            **kwargs)
        # get location id
        try:
            locationid = self.locations[node_id.upper()]
        except KeyError:
            raise ValueError('No LMP data available for location %s' % node_id)

        # set up storage
        raw_data = []
        # collect raw data
        for endpoint in self.request_endpoints(locationid):
            # carry out request
            data = self.fetch_data(endpoint, self.auth)

            # pull out data
            try:
                raw_data += self.parse_json_lmp_data(data)
            except ValueError as e:
                LOGGER.warn(e)
                continue

        # parse and slice
        df = self._parse_json(raw_data)
        df = self.slice_times(df)

        # return
        return df.to_dict(orient='record')
Exemplo n.º 50
0
    def time_as_of(self, content):
        """
        Returns a UTC timestamp if one is found in the html content,
        or None if an error was encountered.
        """
        # soup it up
        soup = BeautifulSoup(content, 'lxml')

        # like 12.11.2015 17:15
        ts_elt = soup.find(id='ctl00_ContentPlaceHolder1_DateAndTime')
        if not ts_elt:
            LOGGER.error('PJM: Timestamp not found in soup:\n%s' % soup)
            return None
        ts_str = ts_elt.string

        # EDT or EST
        tz_elt = ts_elt.next_sibling
        tz_str = tz_elt.string.strip()
        is_dst = tz_str == 'EDT'

        # utcify and return
        return self.utcify(ts_str, is_dst=is_dst)
Exemplo n.º 51
0
    def get_load(self,
                 latest=False,
                 start_at=False,
                 end_at=False,
                 forecast=False,
                 **kwargs):
        # set args
        self.handle_options(data='load',
                            latest=latest,
                            forecast=forecast,
                            start_at=start_at,
                            end_at=end_at,
                            **kwargs)

        # set up storage
        raw_data = []

        # collect raw data
        for endpoint in self.request_endpoints():
            # carry out request
            data = self.fetch_data(endpoint, self.auth)

            # pull out data
            try:
                raw_data += self.parse_json_load_data(data)
            except ValueError as e:
                LOGGER.warn(e)
                continue

        # parse data
        try:
            df = self._parse_json(raw_data)
        except ValueError:
            return []
        df = self.slice_times(df)

        # return
        return self.serialize_faster(df, drop_index=True)
Exemplo n.º 52
0
    def get_load(self, latest=False, yesterday=False, start_at=None, end_at=None, **kwargs):
        load_ts = list([])
        self.handle_options(latest=latest, yesterday=yesterday, start_at=start_at, end_at=end_at, **kwargs)
        rt_const_totals_handler = RealTimeConstrainedTotalsReportHandler(ieso_client=self)
        predisp_const_totals_handler = PredispatchConstrainedTotalsReportHandler(ieso_client=self)

        if self.options.get('latest', False):
            self._get_latest_report_trimmed(result_ts=load_ts, report_handler=rt_const_totals_handler,
                                            parser_format=ParserFormat.load)
        elif self.options.get('start_at', None) and self.options.get('end_at', None):
            if self.options.get('historical', False):
                range_start = max(self.options['start_at'], rt_const_totals_handler.earliest_available_datetime())
                range_end = min(self.options['end_at'], rt_const_totals_handler.latest_available_datetime())
                self._get_report_range(result_ts=load_ts, report_handler=rt_const_totals_handler,
                                       parser_format=ParserFormat.load, range_start=range_start, range_end=range_end)
            if self.options.get('forecast', False):
                range_start = max(self.options['start_at'], rt_const_totals_handler.latest_available_datetime(),
                                  predisp_const_totals_handler.earliest_available_datetime())
                range_end = min(self.options['end_at'], predisp_const_totals_handler.latest_available_datetime())
                self._get_report_range(result_ts=load_ts, report_handler=predisp_const_totals_handler,
                                       parser_format=ParserFormat.load, range_start=range_start, range_end=range_end)
        else:
            LOGGER.warn('No valid options were supplied.')
        return load_ts
Exemplo n.º 53
0
    def get_lmp(self, node_id='HB_HUBAVG', **kwargs):
        self.handle_options(data='lmp', node_id=node_id, **kwargs)

        if self.options['market'] == self.MARKET_CHOICES.fivemin:
            report_name = 'rt5m_lmp'
        elif self.options['market'] == self.MARKET_CHOICES.dam:
            report_name = 'dam_hrly_lmp'
        elif self.options['market'] == self.MARKET_CHOICES.hourly:
            raise NotImplementedError(
                'ERCOT does not produce realtime hourly prices?')

        self.now = datetime.now(pytz.utc)

        if 'start_at' in self.options:
            # get start and end days in local time
            tz = pytz.timezone(self.TZ_NAME)
            start = tz.normalize(self.options['start_at'])
            end = tz.normalize(self.options['end_at'])

            pieces = []
            if self.options['market'] == self.MARKET_CHOICES.fivemin:
                # warning, this could take a long time
                fivemin_periods = int(
                    (end - start).total_seconds() / (60 * 5)) + 1
                p_list = [
                    end - timedelta(minutes=5 * x)
                    for x in range(fivemin_periods)
                ]

                for period in p_list:
                    try:
                        report = self._request_report(report_name, date=period)
                        pieces.append(report)
                    except ValueError:
                        pass

            else:
                start = datetime(start.year,
                                 start.month,
                                 start.day,
                                 tzinfo=start.tzinfo)
                days_list = [
                    end - timedelta(days=x)
                    for x in range((end - start).days + 1)
                ]
                for day in days_list:
                    try:
                        report = self._request_report(report_name, day)
                        pieces.append(report)
                    except ValueError:
                        pass

            # combine pieces, if any
            if len(pieces) > 0:
                report = pd.concat(pieces)
            else:
                LOGGER.warn('No ERCOT LMP found for %s' % self.options)
                return []
        else:
            report = self._request_report(report_name, self.now)
            if report is None:
                report = self._request_report(report_name,
                                              self.now - timedelta(days=1))
        df = self.format_lmp(report)

        # strip uneeded times
        df = self.slice_times(df)

        # strip out unwanted nodes
        if node_id:
            if not isinstance(node_id, list):
                node_id = [node_id]
            reg = re.compile('|'.join(node_id))
            df = df.ix[df['node_id'].str.contains(reg)]

        return df.to_dict(orient='records')
Exemplo n.º 54
0
 def no_forecast_warn(self):
     if not self.options['latest'] and self.options[
             'start_at'] >= pytz.utc.localize(datetime.utcnow()):
         LOGGER.warn(
             "SVERI does not have forecast data. There will be no data for the chosen time frame."
         )
Exemplo n.º 55
0
    def _generation_historical(self):
        # set up storage
        parsed_data = []

        # collect data
        request_date = self.options['start_at'].astimezone(self.ca_tz).date()
        local_end_at = self.options['end_at'].astimezone(self.ca_tz).date()
        while request_date <= local_end_at:
            # set up request
            url_file = request_date.strftime('%Y%m%d_DailyRenewablesWatch.txt')
            url = self.base_url_gen + url_file

            # carry out request
            response = self.request(url)
            if not response:
                request_date += timedelta(days=1)
                continue

            dst_error_text = 'The supplied DateTime represents an invalid time.  For example, when the clock is ' \
                             'adjusted forward, any time in the period that is skipped is invalid.'
            header_idx = 1
            for part in [1, 2]:  # process both halves of page (i.e. two parts)
                num_data_rows = 24

                # The day transitioning to daylight saving time adds extra erroneous lines of text.
                if part == 1 and dst_error_text in response.text:
                    num_data_rows = 29

                df = self.parse_to_df(response.text,
                                      nrows=num_data_rows,
                                      header=header_idx,
                                      delimiter='\t+')

                # The day transitioning to daylight saving time has errors in part two of the file that need removal.
                if part == 2:
                    df = df[df.THERMAL.map(str) != '#VALUE!']

                # combine date with hours to index
                try:
                    indexed = self.set_dt_index(df, request_date, df['Hour'])
                except Exception as e:
                    LOGGER.error(e)
                    continue

                # original header is fuel names
                indexed.rename(columns=self.fuels, inplace=True)

                # remove non-fuel cols
                fuel_cols = list(
                    set(self.fuels.values()) & set(indexed.columns))
                subsetted = indexed[fuel_cols]

                # pivot
                pivoted = self.unpivot(subsetted)
                pivoted.rename(columns={
                    'level_1': 'fuel_name',
                    0: 'gen_MW'
                },
                               inplace=True)

                # slice times
                sliced = self.slice_times(pivoted)

                # store
                parsed_data += self.serialize(
                    sliced,
                    header=['timestamp', 'fuel_name', 'gen_MW'],
                    extras={
                        'ba_name': self.NAME,
                        'market': self.MARKET_CHOICES.hourly,
                        'freq': self.FREQUENCY_CHOICES.hourly
                    })

                # If processing the first part, set the header index for second part.
                if part == 1:
                    header_idx = num_data_rows + 3

            # finish day
            request_date += timedelta(days=1)

        # return
        return parsed_data
Exemplo n.º 56
0
    def request(self,
                url,
                mode='get',
                retry_sec=5,
                retries_remaining=5,
                **kwargs):
        """
        Get or post to a URL with the provided kwargs.
        Returns the response, or None if an error was encountered.
        If the mode is not 'get' or 'post', raises ValueError.
        """
        # check args
        allowed_modes = ['get', 'post']
        if mode not in allowed_modes:
            raise ValueError('Invalid request mode %s' % mode)

        # check for session
        try:
            session = getattr(self, 'session')
        except AttributeError:
            self.session = requests.Session()
            session = self.session

        # carry out request
        try:
            response = getattr(session, mode)(url,
                                              verify=False,
                                              timeout=self.timeout_seconds,
                                              **kwargs)
        # except requests.exceptions.ChunkedEncodingError as e:
        #     # JSON incomplete or not found
        #     msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
        #     LOGGER.error(msg)
        #     return None
        except (requests.exceptions.ConnectionError,
                requests.exceptions.Timeout) as e:
            # eg max retries exceeded
            msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url,
                                                            kwargs, e)
            LOGGER.error(msg)
            return None
        # except requests.exceptions.RequestException:
        #     msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
        #     LOGGER.error(msg)
        #     return None

        if response.status_code == 200:
            # success
            LOGGER.debug('%s: request success for %s, %s with cache hit %s' %
                         (self.NAME, url, kwargs,
                          getattr(response, 'from_cache', None)))

        elif response.status_code == 429:
            if retries_remaining > 0:
                # retry on throttle
                LOGGER.warn(
                    '%s: retrying in %d seconds (%d retries remaining), throttled for %s, %s'
                    % (self.NAME, retry_sec, retries_remaining, url, kwargs))
                sleep(retry_sec)
                retries_remaining -= 1
                return self.request(url,
                                    mode=mode,
                                    retry_sec=retry_sec * 2,
                                    retries_remaining=retries_remaining,
                                    **kwargs)
            else:
                # exhausted retries
                LOGGER.warn('%s: exhausted retries for %s, %s' %
                            (self.NAME, url, kwargs))
                return None

        else:
            # non-throttle error
            LOGGER.error('%s: request failure with code %s for %s, %s' %
                         (self.NAME, response.status_code, url, kwargs))

        return response