Exemplo n.º 1
0
    def _fetch_queries(self):
        query_aggregations = aggregate_queries(self._data_queries)

        for dataset_id, query_map in query_aggregations.items():
            for query in query_map.values():
                df = fetch_query(query)
                for query_dimensions, query_infos in query['queries'].items():
                    if valid_dimensions(query_dimensions, df):
                        queried_df = df.query(
                            build_query_string(query_dimensions))
                        for query_info in query_infos:
                            measure = query_info.query.coordinate.measure
                            query_info.data = queried_df[measure if isinstance(
                                measure, str) else measure.value]
                    else:
                        for query_info in query_infos:
                            query_info.data = Series()

        for query_info in self._data_queries:
            if query_info.data is None or len(query_info.data) == 0:
                asyncio.get_event_loop().run_until_complete(
                    query_info.processor.calculate(
                        query_info.attr,
                        ProcessorResult(
                            False, f'No data found for '
                            f'Coordinate {query_info.query.coordinate}'),
                        self.rule_cache))
            else:
                asyncio.get_event_loop().run_until_complete(
                    query_info.processor.calculate(
                        query_info.attr,
                        ProcessorResult(True,
                                        query_info.data), self.rule_cache))
Exemplo n.º 2
0
    def process(self, entity: Entity) -> ProcessorResult:
        """ Fetch the entity and resolve the field """
        try:
            # First try to get the value off the entity
            entity_dict = entity.get_entity()
            data = get(entity_dict, self.field)
            if data:
                return ProcessorResult(True, data)

            # If not found, try to get the value from the asset identifiers
            identifier = next(
                iter(
                    filter(lambda x: x['type'] == self.field,
                           entity_dict.get('identifiers', []))), None)
            if identifier:
                return ProcessorResult(True, identifier['value'])

            # Return a failed processor result if no field was found on the object or it's identifiers
            return ProcessorResult(
                False,
                f'Unable to find {self.field} in identifiers for entity {entity.get_marquee_id()}'
            )

        except ValueError:
            return ProcessorResult(False, "Could not get field on entity")
Exemplo n.º 3
0
    def process(self):
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if a_data.success:
                date_array = a_data.data.reset_index()['date']
                if self.end is None:
                    self.end = max(date_array)
                if self.start is None:
                    self.start = min(date_array)
                if not isinstance(self.weekdays_only, bool):
                    self.value = ProcessorResult(
                        False,
                        "DateRangeProcessor requires weekdays_only argument to be a boolean."
                    )
                yesterday = date.today() - timedelta(days=1)
                with PricingContext(pricing_date=yesterday):
                    # for EOD datasets latest datapoint is T-1,
                    # relative dates will be evaluated using yesterday as base_date
                    if isinstance(self.end, RelativeDate):
                        self.end = self.end.apply_rule()
                    if isinstance(self.start, RelativeDate):
                        self.start = self.start.apply_rule()

                result = date_range(a_data.data,
                                    start_date=self.start,
                                    end_date=self.end,
                                    weekdays_only=self.weekdays_only)
                self.value = ProcessorResult(True, result)
            else:
                self.value = ProcessorResult(
                    False,
                    "DateRangeProcessor does not have 'a' series values yet")
        else:
            self.value = ProcessorResult(
                False, "DateRangeProcessor does not have 'a' series yet")
Exemplo n.º 4
0
 def process(self):
     min_data = self.children_data.get('minimum')
     max_data = self.children_data.get('maximum')
     markers_data = [
         self.children_data.get(marker.name) for marker in self.markers
     ]
     if isinstance(min_data, ProcessorResult) and isinstance(
             max_data, ProcessorResult):
         if min_data.success and max_data.success:
             result = {
                 'min': min_data.data.get(0),
                 'max': max_data.data.get(0),
                 'markers': []
             }
             for marker_data in markers_data:
                 if marker_data and marker_data.success and marker_data.data:
                     valid, reason = validate_markers_data(
                         result['min'], result['max'], marker_data.data)
                     if valid:
                         result['markers'].append(marker_data.data)
                     else:
                         result['markers'].append({
                             **marker_data.data,
                             **{
                                 'invalidReason': reason
                             }
                         })
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(
                 False, "Processor does not have min, max values yet")
     else:
         self.value = ProcessorResult(
             False, "Processor does not have min, max data yet")
Exemplo n.º 5
0
    def _fetch_queries(self):
        query_aggregations = aggregate_queries(self._data_queries)

        for dataset_id, query_map in query_aggregations.items():
            for query in query_map.values():
                df = fetch_query(query)
                for query_dimensions, query_infos in query['queries'].items():
                    if valid_dimensions(query_dimensions, df):
                        queried_df = df.query(
                            build_query_string(query_dimensions))
                        for query_info in query_infos:
                            query_info.data = queried_df[
                                query_info.query.coordinate.measure]
                    else:
                        for query_info in query_infos:
                            query_info.data = Series()

        for query_info in self._data_queries:
            if query_info.data is None or len(query_info.data) == 0:
                query_info.processor.calculate(
                    query_info.attr,
                    ProcessorResult(
                        False, f'No data found for '
                        f'Coordinate {query_info.query.coordinate}'))
            else:
                query_info.processor.calculate(
                    query_info.attr, ProcessorResult(True, query_info.data))
Exemplo n.º 6
0
    def process(self):
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if a_data.success:
                data_series = a_data.data
                change_pd = data_series.tail(2)
                change = returns(change_pd).iloc[-1]

                # Pass in all values except last value (which is last value)
                returns_series = returns(data_series.head(-1))
                std_result = std(
                    returns_series,
                    w=Window(None, 0) if self.w is None else self.w).iloc[-1]

                if change is not None and std_result != 0:
                    self.value = ProcessorResult(
                        True, pd.Series([change / std_result]))
                else:
                    self.value = ProcessorResult(
                        False, "StdMoveProcessor returns a NaN")
            else:
                self.value = ProcessorResult(
                    False,
                    "StdMoveProcessor does not have 'a' series values yet")
        else:
            self.value = ProcessorResult(
                False, "StdMoveProcessor does not have 'a' series yet")
        return self.value
Exemplo n.º 7
0
    def process(self):
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if a_data.success:
                b_data = self.children_data.get('b')
                # Need to check if the child node b was set in the first place.
                if self.children.get('b') and isinstance(
                        b_data, ProcessorResult):
                    if b_data.success:
                        result = percentiles(a_data.data,
                                             b_data.data,
                                             w=self.w)
                        self.value = ProcessorResult(True, result)
                    else:
                        self.value = ProcessorResult(
                            True,
                            'PercentilesProcessor: b is not a valid series.')
                result = percentiles(a_data.data, w=self.w)
                self.value = ProcessorResult(True, result)
            else:
                self.value = ProcessorResult(
                    False,
                    "PercentilesProcessor does not have 'a' series values yet")
        else:
            self.value = ProcessorResult(
                False, "PercentilesProcessor does not have 'a' series yet")

        return self.value
Exemplo n.º 8
0
 def process(self):
     key: str = self.dimension.value if isinstance(self.dimension, Enum) else self.dimension
     coordinate = self.children.get('a')
     dimension_value = coordinate.dimensions.get(key) if coordinate else None
     if dimension_value:
         return ProcessorResult(True, dimension_value)
     else:
         return ProcessorResult(False, f'Dimension {key} not in given coordinate')
Exemplo n.º 9
0
 def process(self):
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success:
             result = diff(a_data.data, self.obs)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(False, "DiffProcessor does not have 'a' series values yet")
     else:
         self.value = ProcessorResult(False, "DiffProcessor does not have 'a' series yet")
Exemplo n.º 10
0
 def process(self):
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success:
             result = volatility(a_data.data, self.w, self.returns_type)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(False, 'Could not compute volatility')
     else:
         self.value = ProcessorResult(False, 'Processor does not have data')
Exemplo n.º 11
0
 def update(self, result: ProcessorResult) -> None:
     """ Sets the value of the cell"""
     if isinstance(result.data, Series):
         if result.data.empty:
             self.value = ProcessorResult(False, 'Empty series as a result of processing.')
         else:
             self.value = ProcessorResult(True, result.data.iloc[-1])
     else:
         self.value = ProcessorResult(True, result.data)
     self.updated_time = get_utc_now()
Exemplo n.º 12
0
 def process(self):
     a_data = self.children_data.get('a')
     benchmark_data = self.children_data.get('benchmark')
     if isinstance(a_data, ProcessorResult) and isinstance(benchmark_data, ProcessorResult):
         if a_data.success and benchmark_data.success:
             result = correlation(a_data.data, benchmark_data.data, w=self.w, type_=self.type_)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(False, "Processor does not have A and Benchmark data yet")
     else:
         self.value = ProcessorResult(False, "Processor does not have A and Benchmark data yet")
Exemplo n.º 13
0
 def process(self) -> None:
     a_data = self.children_data.get('a')
     b_data = self.children_data.get('b')
     if isinstance(a_data, ProcessorResult) and isinstance(b_data, ProcessorResult):
         if a_data.success and b_data.success:
             result = a_data.data.append(b_data.data)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(False, "Processor does not have A and B data yet")
     else:
         self.value = ProcessorResult(False, "Processor does not have A and B data yet")
Exemplo n.º 14
0
 def process(self) -> None:
     """ Calculate the result and store it as the processor value """
     a = self.children_data.get('a')
     if isinstance(a, ProcessorResult):
         if a.success and isinstance(a.data, Series):
             self.value = ProcessorResult(True, pd.Series(min(a.data)))
         else:
             self.value = ProcessorResult(
                 False, "Processor does not data series yet")
     else:
         self.value = ProcessorResult(False,
                                      "Processor does not have series yet")
Exemplo n.º 15
0
 def process(self) -> None:
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success:
             data = a_data.data
             if self.observations is None:
                 if len(data) > 1:
                     self.value = ProcessorResult(True, Series([(data.iloc[-1] - data.iloc[0]) / data.iloc[-1]]))
                 else:
                     self.value = ProcessorResult(True, 'Series has is less than 2.')
             else:
                 value = returns(a_data.data, self.observations, self.type_)
                 self.value = ProcessorResult(True, value)
Exemplo n.º 16
0
 def process(self):
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success:
             series_length = len(a_data.data)
             window = None
             if self.w:
                 window = self.w if self.w <= series_length else series_length
             result = zscores(a_data.data, w=window)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(False, "ZscoresProcessor does not have 'a' series values yet")
     else:
         self.value = ProcessorResult(False, "ZscoresProcessor does not have 'a' series yet")
Exemplo n.º 17
0
    def process(self, cross1: Entity) -> ProcessorResult:
        if isinstance(cross1, Cross) and isinstance(self.cross2, Cross):
            try:
                with DataContext(self.start, self.end):
                    result = fx_implied_correlation(cross1, self.cross2,
                                                    self.tenor)
                    self.value = ProcessorResult(True, result)
            except Exception as e:
                self.value = ProcessorResult(False, str(e))
        else:
            self.value = ProcessorResult(
                False, "Processor does not have valid crosses as inputs")

        return self.value
Exemplo n.º 18
0
 def process(self):
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success:
             self.value = ProcessorResult(
                 True, {
                     'name': self.name,
                     'value': a_data.data.get(0),
                     'shape': self.shape.value
                 })
         else:
             self.value = ProcessorResult(False,
                                          'Could not compute pipe marker')
     else:
         self.value = ProcessorResult(False, 'Processor does not have data')
Exemplo n.º 19
0
    async def update(self,
                     attribute: str,
                     result: ProcessorResult,
                     rdate_entity_map: Dict[str, date],
                     pool: ProcessPoolExecutor = None):
        """ Handle the update of a single coordinate and recalculate the value

        :param attribute: Attribute alinging to data coordinate in the processor
        :param result: Processor result including success and series from data query
        """
        self.__handle_date_range(result, rdate_entity_map)
        self.children_data[attribute] = result

        if isinstance(result, ProcessorResult):
            if result.success:
                try:
                    if pool:
                        value = await asyncio.get_running_loop().run_in_executor(pool, self.process)
                        self.value = value
                    else:
                        self.process()
                    self.post_process()
                except Exception as e:
                    self.value = ProcessorResult(False,
                                                 f'Error Calculating processor {self.__class__.__name__}  due to {e}')
            else:
                self.value = result
Exemplo n.º 20
0
    def __init__(self,
                 name: str,
                 processor: BaseProcessor,
                 entity: Entity,
                 dimension_overrides: List[Override],
                 column_index: int,
                 row_index: int,
                 row_group: str = None):
        # Cell starts with root processor
        # Deep copies so the processor and children are unique objects
        self.cell_id = str(uuid.uuid4())
        self.processor: BaseProcessor = copy.deepcopy(processor)
        self.entity: Entity = entity
        self.name: str = name
        self.dimension_overrides = dimension_overrides
        self.column_index = column_index
        self.row_index = row_index
        self.row_group = row_group

        self.updated_time: Optional[str] = None

        # Default the value for a cell processor
        self.value: ProcessorResult = ProcessorResult(False, DATA_CELL_NOT_CALCULATED)

        # Store the cell data queries
        self.data_queries: List[DataQueryInfo] = []
Exemplo n.º 21
0
 def process(self):
     """ Calculate the result and store it as the processor value """
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success and isinstance(a_data.data, Series):
             index = -1 * self.n
             self.value = ProcessorResult(True,
                                          pd.Series(a_data.data[index]))
         else:
             self.value = ProcessorResult(
                 False,
                 "NthLastProcessor does not have 'a' series values yet")
     else:
         self.value = ProcessorResult(
             False, "NthLastProcessor does not have 'a' series values yet")
     return self.value
Exemplo n.º 22
0
 def process(self):
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if a_data.success:
             series_length = len(a_data.data)
             window = None
             if self.w:
                 window = self.w if self.w <= series_length else series_length
             result = percentile(a_data.data, self.n, w=window)
             if not isinstance(result, pd.Series):
                 result = pd.Series(result)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(False, "PercentileProcessor does not have 'a' series values yet")
     else:
         self.value = ProcessorResult(False, "PercentileProcessor does not have 'a' series yet")
Exemplo n.º 23
0
    def process(self):
        """ Calculate the result and store it as the processor value """
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if a_data.success and isinstance(a_data.data, Series):
                self.value = ProcessorResult(True, pd.Series(a_data.data[-1:]))

        return self.value
Exemplo n.º 24
0
 def __init__(self):
     self.id = f'{self.__class__.__name__}-{str(uuid.uuid4())}'
     self.value: ProcessorResult = ProcessorResult(False, 'Value not set')
     self.parent: Optional[BaseProcessor] = None
     self.parent_attr: Optional[str] = None
     self.children: Dict[str, Union[DataCoordinateOrProcessor, DataQueryInfo]] = {}
     self.children_data: Dict[str, ProcessorResult] = {}
     self.data_cell = None
Exemplo n.º 25
0
 def process(self):
     a_data = self.children_data.get('a')
     if isinstance(a_data, ProcessorResult):
         if not a_data.success:
             self.value = a_data
             return
         if self.dividend:
             value = a_data.data.div(self.dividend)
             self.value = ProcessorResult(True, value)
             return
         b_data = self.children_data.get('b')
         if isinstance(b_data, ProcessorResult):
             if b_data.success:
                 value = a_data.data.div(b_data.data)
                 self.value = ProcessorResult(True, value)
             else:
                 self.value = b_data
Exemplo n.º 26
0
    def process(self):
        start = self.children_data.get('start')
        end = self.children_data.get('end')
        if isinstance(start, ProcessorResult) and isinstance(end, ProcessorResult):
            if start.success and end.success:
                self.value = ProcessorResult(True, {
                    'name': self.name,
                    'start': start.data.get(0),
                    'end': end.data.get(0),
                    'shape': self.shape.value
                })
            else:
                self.value = ProcessorResult(False, "Processor does not have start and end values yet")
        else:
            self.value = ProcessorResult(False, "Processor does not have start and end data yet")

        return self.value
Exemplo n.º 27
0
    def process(self):
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if a_data.success:
                result = zscores(
                    a_data.data,
                    w=Window(None, 0) if self.w is None else self.w)
                self.value = ProcessorResult(True, result)
            else:
                self.value = ProcessorResult(
                    False,
                    "ZscoresProcessor does not have 'a' series values yet")
        else:
            self.value = ProcessorResult(
                False, "ZscoresProcessor does not have 'a' series yet")

        return self.value
Exemplo n.º 28
0
    def process(self):
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if not a_data.success:
                self.value = a_data
                return self.value

            data = a_data.data
            if len(data) >= 2:
                value = data.drop(data.index[-1].date(), errors='ignore')
                if len(value) >= 2:
                    self.value = ProcessorResult(True, value[-2:])
                    return self.value

        self.value = ProcessorResult(
            False, 'Not enough values given to OneDayProcessor.')
        return self.value
    def process(self):
        a_data = self.children_data.get('a')
        if isinstance(a_data, ProcessorResult):
            if a_data.success:
                value = change(a_data.data)
                self.value = ProcessorResult(True, value)

        return self.value
Exemplo n.º 30
0
 def process(self,
             w: Union[Window, int] = Window(None, 0),
             type_: SeriesType = SeriesType.PRICES):
     a_data = self.children_data.get('a')
     benchmark_data = self.children_data.get('benchmark')
     if isinstance(a_data, ProcessorResult) and isinstance(
             benchmark_data, ProcessorResult):
         if a_data.success and benchmark_data.success:
             result = correlation(a_data.data,
                                  benchmark_data.data,
                                  w=self.w,
                                  type_=SeriesType.PRICES)
             self.value = ProcessorResult(True, result)
         else:
             self.value = ProcessorResult(
                 False, "Processor does not have A and Benchmark data yet")
     else:
         self.value = ProcessorResult(
             False, "Processor does not have A and Benchmark data yet")