def __init__(self, from_text: Optional[str] = None, strict_validation=True, default_source=None) -> None: """ Create a new RPSL object, optionally instantiated from a string. Optionally, you can set/unset strict validation. This means all attribute values are validated, and attribute presence/absence is verified. Non-strict validation is limited to primary and lookup keys. If you set default_source, and strict_validation is False, the parser will accept objects without a source attribute, and treat them as if their source was default_source. """ self.messages = RPSLParserMessages() self._object_data: TypeRPSLObjectData = [] self.strict_validation = strict_validation if default_source: self.default_source = default_source.strip().upper() if from_text: self._extract_attributes_values(from_text) self._validate_object()
class RPSLObject(metaclass=RPSLObjectMeta): """ Base class for RPSL objects. To clean an RPSL object in string form, the best option is not to instance this or a subclass, but instead call rpsl_object_from_text() which automatically derives the correct class. This class should not be instanced directly - instead subclasses should be made for each RPSL type with the appropriate fields defined. Note that any subclasses should also be added to OBJECT_CLASS_MAPPING. """ fields: Dict[str, RPSLTextField] = OrderedDict() rpsl_object_class: str pk_fields: List[str] = [] attrs_allowed: List[str] = [] attrs_required: List[str] = [] attrs_multiple: List[str] = [] ip_first: IP = None ip_last: IP = None asn_first: Optional[int] = None asn_last: Optional[int] = None prefix: IP = None prefix_length: Optional[int] = None rpki_status: RPKIStatus = RPKIStatus.not_found scopefilter_status: ScopeFilterStatus = ScopeFilterStatus.in_scope pk_asn_segment: Optional[str] = None default_source: Optional[str] = None # noqa: E704 (flake8 bug) # Whether this object has a relation to RPKI ROA data, and therefore RPKI # checks should be performed in certain scenarios. Enabled for route/route6. rpki_relevant = False # Fields whose values are discarded during parsing discarded_fields: List[str] = [] # Fields that are ignored in validation even # for authoritative objects (see #587 for example). ignored_validation_fields: List[str] = ['last-modified'] _re_attr_name = re.compile(r'^[a-z0-9_-]+$') def __init__(self, from_text: Optional[str] = None, strict_validation=True, default_source=None) -> None: """ Create a new RPSL object, optionally instantiated from a string. Optionally, you can set/unset strict validation. This means all attribute values are validated, and attribute presence/absence is verified. Non-strict validation is limited to primary and lookup keys. If you set default_source, and strict_validation is False, the parser will accept objects without a source attribute, and treat them as if their source was default_source. """ self.messages = RPSLParserMessages() self._object_data: TypeRPSLObjectData = [] self.strict_validation = strict_validation if default_source: self.default_source = default_source.strip().upper() if from_text: self._extract_attributes_values(from_text) self._validate_object() def pk(self) -> str: """Get the primary key value of an RPSL object. The PK is always converted to uppercase.""" if len(self.pk_fields) == 1: return self.parsed_data.get(self.pk_fields[0], "").upper() composite_values = [] for field in self.pk_fields: composite_values.append(self.parsed_data.get(field, "")) return ''.join(composite_values).upper() def source(self) -> str: """Shortcut to retrieve object source""" try: return self.parsed_data['source'] except KeyError: raise ValueError('RPSL object has no known source') def ip_version(self) -> Optional[int]: """ Get the IP version to which this object relates, or None for e.g. person or as-block objects. """ if self.ip_first: return self.ip_first.version() return None def referred_strong_objects(self) -> List[Tuple[str, List, List]]: """ Get all objects that this object refers to (e.g. an admin-c attribute on this object, that refers to person/role) along with the data this object has for that reference. This information can be used to check whether all references from an object are valid. Only references which have strong=True are returned, weak references are not returned as they should not be included in reference validation. Returns a list of tuples, which each tuple having: - field name on this object (e.g. 'admin-c') - RPSL object class names of objects referred (e.g. ['role', 'person'] - Values this object has for that field (e.g. ['A-RIPE', 'B-RIPE'] """ result = [] for field_name, referred_objects in self.referring_strong_fields: # type: ignore data = self.parsed_data.get(field_name) if not data: continue result.append((field_name, referred_objects, data)) return result def references_strong_inbound(self) -> Set[str]: """ Get a set of field names under which other objects refer to this object. E.g. for a person object, this would typically return {'zone-c', 'admin-c', 'tech-c'}. """ result = set() from irrd.rpsl.rpsl_objects import OBJECT_CLASS_MAPPING for rpsl_object in OBJECT_CLASS_MAPPING.values(): for field_name, field in rpsl_object.fields.items(): if self.rpsl_object_class in getattr( field, 'referring', []) and getattr(field, 'strong'): result.add(field_name) return result def render_rpsl_text(self, last_modified: datetime.datetime = None) -> str: """ Render the RPSL object as an RPSL string. If last_modified is provided, removes existing last-modified: attributes and adds a new one with that timestamp, if self.source() is authoritative. """ output = "" authoritative = get_setting(f'sources.{self.source()}.authoritative') for attr, value, continuation_chars in self._object_data: if authoritative and last_modified and attr == 'last-modified': continue attr_display = f'{attr}:'.ljust(RPSL_ATTRIBUTE_TEXT_WIDTH) value_lines = list(splitline_unicodesafe(value)) if not value_lines: output += f'{attr}:\n' for idx, line in enumerate(value_lines): if idx == 0: output += attr_display + line else: continuation_char = continuation_chars[idx - 1] # Override the continuation char for empty lines #298 if not line: continuation_char = '+' output += continuation_char + (RPSL_ATTRIBUTE_TEXT_WIDTH - 1) * ' ' + line output += '\n' if authoritative and last_modified: output += 'last-modified:'.ljust(RPSL_ATTRIBUTE_TEXT_WIDTH) output += last_modified.replace(microsecond=0).isoformat().replace( '+00:00', 'Z') output += '\n' return output def generate_template(self): """Generate a template in text form of the main attributes of all fields.""" template = "" max_name_width = max(len(k) for k in self.fields.keys()) for name, field in self.fields.items(): mandatory = '[optional] ' if field.optional else '[mandatory]' single = '[multiple]' if field.multiple else '[single] ' metadata = [] if field.primary_key and field.lookup_key: metadata.append('primary/look-up key') elif field.primary_key: metadata.append('primary key') elif field.lookup_key: metadata.append('look-up key') if getattr(field, 'referring', []): reference_type = 'strong' if getattr(field, 'strong') else 'weak' metadata.append(f'{reference_type} references ' + '/'.join(field.referring)) metadata_str = ', '.join(metadata) name_padding = (max_name_width - len(name)) * ' ' template += f'{name}: {name_padding} {mandatory} {single} [{metadata_str}]\n' return template def clean(self) -> bool: """ Additional cleaning steps for some objects. """ return True def clean_for_create(self) -> bool: """ Additional cleaning steps for creations only. """ return True def _extract_attributes_values(self, text: str) -> None: """ Extract all attributes and associated values from the input string. This is mostly straight forward, except for the tricky feature of line continuation. An attribute's value can be continued on the next lines, which is distinct from an attribute occurring multiple times. The parse result is internally stored in self._object_data. This is a list of 3-tuples, where each tuple contains the attribute name, attribute value, and the continuation characters. The continuation characters are needed to reconstruct the original object into a string. """ continuation_chars = (' ', '+', '\t') current_attr = None current_value = "" current_continuation_chars: List[str] = [] for line_no, line in enumerate(splitline_unicodesafe(text.strip())): if not line: self.messages.error( f'Line {line_no+1}: encountered empty line in the middle of object: [{line}]' ) return if not line.startswith(continuation_chars): if current_attr and current_attr not in self.discarded_fields: # Encountering a new attribute requires saving the previous attribute data first, if any, # which can't be done earlier as line continuation means we can't know earlier whether # the attribute is finished. self._object_data.append((current_attr, current_value, current_continuation_chars)) if ':' not in line: self.messages.error( f'Line {line_no+1}: line is neither continuation nor valid attribute [{line}]' ) return current_attr, current_value = line.split(':', maxsplit=1) current_attr = current_attr.lower() current_value = current_value.strip() current_continuation_chars = [] if current_attr not in self.attrs_allowed and not self._re_attr_name.match( current_attr): self.messages.error( f'Line {line_no+1}: encountered malformed attribute name: [{current_attr}]' ) return else: # Whitespace between the continuation character and the start of the data is not significant. current_value += '\n' + line[1:].strip() current_continuation_chars += line[0] if current_attr and current_attr not in self.discarded_fields: self._object_data.append( (current_attr, current_value, current_continuation_chars)) def _validate_object(self) -> None: """ Validate an object. The strictness depends on self.strict_validation (see the docstring for __init__). """ self.parsed_data: Dict[str, Any[str, List]] = {} if not self.messages.errors(): self._validate_attribute_counts() self._parse_attribute_data( allow_invalid_metadata=bool(self.messages.errors())) if self.strict_validation and not self.messages.errors(): self.clean() def _validate_attribute_counts(self) -> None: """ Validate the number of times each attribute occurs. The expected counts (0, 1, or >=1) are derived indirectly from the field data. In non-strict mode, only validate presence of all PK attributes. """ attrs_present = Counter([attr[0] for attr in self._object_data]) if self.strict_validation: for attr_name, count in attrs_present.items(): if attr_name in self.ignored_validation_fields: continue if attr_name not in self.attrs_allowed: self.messages.error( f'Unrecognised attribute {attr_name} on object {self.rpsl_object_class}' ) if count > 1 and attr_name not in self.attrs_multiple: self.messages.error( f'Attribute "{attr_name}" on object {self.rpsl_object_class} occurs multiple times, but is ' f'only allowed once') for attr_required in self.attrs_required: if attr_required not in attrs_present: self.messages.error( f'Mandatory attribute "{attr_required}" on object {self.rpsl_object_class} is missing' ) else: required_fields = self.pk_fields if not self.default_source: required_fields = required_fields + ['source'] for attr_pk in required_fields: if attr_pk not in attrs_present: self.messages.error( f'Primary key attribute "{attr_pk}" on object {self.rpsl_object_class} is missing' ) def _parse_attribute_data(self, allow_invalid_metadata=False) -> None: """ Clean the data stored in attributes. If self.strict_validation is not set, only checks primary and lookup keys, as they need to be indexed. All parsed values (e.g. without comments) are stored in self.parsed_data - stored in upper case unless a field is marked case sensitive. If allow_invald_metadata is set, the parser will accept invalid metadata being stored, but still make a best effort to extract data. This should only be used if it is already known the object is invalid, and will never be stored. """ for idx, (attr_name, value, continuation_chars) in enumerate(self._object_data): field = self.fields.get(attr_name) if field: normalised_value = self._normalise_rpsl_value(value) # We always parse all fields, but only care about errors if we're running # in strict validation mode, if the field is primary or lookup, or if it's # the source field. In all other cases, the field parsing is best effort. # In all these other cases we pass a new parser messages object to the # field parser, so that we basically discard any errors. raise_errors = self.strict_validation or field.primary_key or field.lookup_key or attr_name == 'source' field_messages = self.messages if raise_errors else RPSLParserMessages( ) parsed_value = field.parse(normalised_value, field_messages, self.strict_validation) if parsed_value: parsed_value_str = parsed_value.value if parsed_value_str != normalised_value: # Note: this replacement can be incomplete: if the normalised value is not contained in the # parsed value as single string, the replacement will not occur. This is not a great concern, # as this is purely cosmetic, and self.parsed_data will have the correct normalised value. new_value = value.replace(normalised_value, parsed_value_str) self._object_data[ idx] = attr_name, new_value, continuation_chars values_list = parsed_value.values_list if values_list: if not field.keep_case: values_list = list(map(str.upper, values_list)) if attr_name in self.parsed_data: self.parsed_data[attr_name] += values_list else: self.parsed_data[attr_name] = values_list else: if not field.keep_case: parsed_value_str = parsed_value_str.upper() if field.multiple: if attr_name in self.parsed_data: self.parsed_data[attr_name].append( parsed_value_str) else: self.parsed_data[attr_name] = [ parsed_value_str ] else: if attr_name in self.parsed_data: self.parsed_data[ attr_name] = '\n' + parsed_value_str else: self.parsed_data[attr_name] = parsed_value_str # Some fields provide additional metadata about the resources to # which this object pertains. if field.primary_key or field.lookup_key: for attr in 'ip_first', 'ip_last', 'asn_first', 'asn_last', 'prefix', 'prefix_length': attr_value = getattr(parsed_value, attr, None) if attr_value: existing_attr_value = getattr(self, attr, None) if existing_attr_value and not allow_invalid_metadata: # pragma: no cover raise ValueError( f'Parsing of {parsed_value.value} reads {attr_value} for {attr},' f'but value {existing_attr_value} is already set.' ) setattr(self, attr, attr_value) if 'source' not in self.parsed_data and self.default_source: self.parsed_data['source'] = self.default_source def _normalise_rpsl_value(self, value: str) -> str: """ Normalise an RPSL attribute value to its significant parts in a consistent format. For example, the following is valid in RPSL: inetnum: 192.0.2.0 # comment1 +- # comment 2 +192.0.2.1 # comment 3 + # comment 4 This value will be normalised by this method to: 192.0.2.0 - 192.0.2.1 to be used for further validation and extraction of primary keys. """ normalized_lines = [] # The shortcuts below are functionally inconsequential, but significantly improve performance, # as most values are single line without comments, and this method is called extremely often. if '\n' not in value: if '#' in value: return value.split('#')[0].strip() return value.strip() for line in splitline_unicodesafe(value): parsed_line = line.split('#')[0].strip('\n\t, ') if parsed_line: normalized_lines.append(parsed_line) return ','.join(normalized_lines) def _update_attribute_value(self, attribute, new_values): """ Update the value of an attribute in the internal state and in parsed_data. This is used for key-cert objects, where e.g. owner lines are derived from other data in the object. All existing occurences of the attribute are removed, new items are always inserted at line 2 of the object. """ if isinstance(new_values, str): new_values = [new_values] self.parsed_data[attribute] = '\n'.join(new_values) self._object_data = list( filter(lambda a: a[0] != attribute, self._object_data)) insert_idx = 1 for new_value in new_values: self._object_data.insert(insert_idx, (attribute, new_value, [])) insert_idx += 1 def __repr__(self): source = self.parsed_data.get('source', '') return f'{self.rpsl_object_class}/{self.pk()}/{source}' def __key(self): return self.rpsl_object_class, self.pk(), json.dumps(self.parsed_data, sort_keys=True) def __hash__(self): return hash(self.__key()) def __eq__(self, other): return isinstance(self, type(other)) and self.__key() == other.__key()
def _parse_attribute_data(self, allow_invalid_metadata=False) -> None: """ Clean the data stored in attributes. If self.strict_validation is not set, only checks primary and lookup keys, as they need to be indexed. All parsed values (e.g. without comments) are stored in self.parsed_data - stored in upper case unless a field is marked case sensitive. If allow_invald_metadata is set, the parser will accept invalid metadata being stored, but still make a best effort to extract data. This should only be used if it is already known the object is invalid, and will never be stored. """ for idx, (attr_name, value, continuation_chars) in enumerate(self._object_data): field = self.fields.get(attr_name) if field: normalised_value = self._normalise_rpsl_value(value) # We always parse all fields, but only care about errors if we're running # in strict validation mode, if the field is primary or lookup, or if it's # the source field. In all other cases, the field parsing is best effort. # In all these other cases we pass a new parser messages object to the # field parser, so that we basically discard any errors. raise_errors = self.strict_validation or field.primary_key or field.lookup_key or attr_name == 'source' field_messages = self.messages if raise_errors else RPSLParserMessages( ) parsed_value = field.parse(normalised_value, field_messages, self.strict_validation) if parsed_value: parsed_value_str = parsed_value.value if parsed_value_str != normalised_value: # Note: this replacement can be incomplete: if the normalised value is not contained in the # parsed value as single string, the replacement will not occur. This is not a great concern, # as this is purely cosmetic, and self.parsed_data will have the correct normalised value. new_value = value.replace(normalised_value, parsed_value_str) self._object_data[ idx] = attr_name, new_value, continuation_chars values_list = parsed_value.values_list if values_list: if not field.keep_case: values_list = list(map(str.upper, values_list)) if attr_name in self.parsed_data: self.parsed_data[attr_name] += values_list else: self.parsed_data[attr_name] = values_list else: if not field.keep_case: parsed_value_str = parsed_value_str.upper() if field.multiple: if attr_name in self.parsed_data: self.parsed_data[attr_name].append( parsed_value_str) else: self.parsed_data[attr_name] = [ parsed_value_str ] else: if attr_name in self.parsed_data: self.parsed_data[ attr_name] = '\n' + parsed_value_str else: self.parsed_data[attr_name] = parsed_value_str # Some fields provide additional metadata about the resources to # which this object pertains. if field.primary_key or field.lookup_key: for attr in 'ip_first', 'ip_last', 'asn_first', 'asn_last', 'prefix', 'prefix_length': attr_value = getattr(parsed_value, attr, None) if attr_value: existing_attr_value = getattr(self, attr, None) if existing_attr_value and not allow_invalid_metadata: # pragma: no cover raise ValueError( f'Parsing of {parsed_value.value} reads {attr_value} for {attr},' f'but value {existing_attr_value} is already set.' ) setattr(self, attr, attr_value) if 'source' not in self.parsed_data and self.default_source: self.parsed_data['source'] = self.default_source