class BovespaCompanyFile(CustomDjangoCassandraModel): __table_name__ = "bovespa_company_file" # ID of the company in B3 ccvm = columns.Text(partition_key=True) # The type of document doc_type = columns.Text(max_length=3, primary_key=True) # The fiscal date the file is making reference. fiscal_date = columns.Date(primary_key=True, clustering_order="DESC") # The file version. The company could present different version of # the files for a specific fiscal period version = columns.Text(primary_key=True, clustering_order="DESC") status = columns.Text(default=FILE_STATUS_NOT_PROCESSED) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() # The protocol code associated with the file protocol = columns.Text(required=True) # When the documents were delivered delivery_date = columns.DateTime(required=True) # Why the files were delivered delivery_type = columns.Text(required=True) # The official name of the company company_name = columns.Text(required=True) # The company CNPJ company_cnpj = columns.Text(required=True) # The Fiscal Period decomposed into year, quarter, month # The year of the balance sheet # Ex. 2015 fiscal_date_y = columns.SmallInt() # The day of the year of the balance sheet # Ex. 2015 fiscal_date_yd = columns.SmallInt() # The quarter of the balance sheet # Ex. 1 fiscal_date_q = columns.SmallInt() # The month of the balance sheet # Ex. 1 fiscal_date_m = columns.SmallInt() # The day of the month of the balance sheet # Ex. 1 fiscal_date_md = columns.SmallInt() # The week of the year # Ex. 1 fiscal_date_w = columns.SmallInt() # The day of the week of the year # Ex. 1 fiscal_date_wd = columns.SmallInt() # Combination of YEAR-QUARTER in the form of 2018-Q1 # That allows us to facet results per quarter fiscal_date_yq = columns.Text() # Combination of YEAR-MONTH in the form of 2018-01 # That allows us to facet results per month fiscal_date_ym = columns.Text() # The url to the file that contains the information in bovespa. This # will be the url we will use to download the file from the source source_url = columns.Text(required=True) # The url to the file that contains the information. Is an url to a # repository of our own. The file has already beed downloaded and # persisted into a custom repository. We do not need to access the source file_url = columns.Text() # The internal name of the file file_name = columns.Text() # The extension of the filename file_extension = columns.Text() # Each key represents the name of the file in the ENER arquive. # The value is the original content converted into JSON - when possible - # and persisted as Text # content = KeyEncodedMap( # key_type=columns.Text, value_type=columns.Text) class Meta: get_pk_field = "ccvm" def validate(self): super().validate() if self.doc_type not in DOC_TYPES: raise ValidationError( "Invalid doc type [{0}]. Valid types are: {1}.".format( self.doc_type, DOC_TYPES)) if self.status not in FILE_STATUSES: raise ValidationError( "Invalid file status [{0}]. Valid statuses are: {1}.".format( self.status, FILE_STATUSES))
class Allv4Datatypes(UserType): a = columns.Date() b = columns.SmallInt() c = columns.Time() d = columns.TinyInt()
class v4DatatypesModel(Model): id = columns.Integer(primary_key=True) a = columns.Date() b = columns.SmallInt() c = columns.Time() d = columns.TinyInt()
class Task(CustomDjangoCassandraModel): """ Represents a task that could be an on demand task or a batch task. Args: task_id: the task id that is the unique partition key. user: The user that asked for the task, if it is an ondemand task. created_at: the date of the creation of the task. updated_at: the date that we last updated the task. is_deleted: controls if the data is deleted. status: representes the actual status of the task, could be: - 0 (Created) - 1 (Queued) - 2 (In Progress) - 3 (Finished) - 4 (Faulty) - 5 (Unknown) kind: the name of the crawler that will execute the task. params: the set of params used to execute the crawler command, this will be saved as Text. params_map: the exactly same content as `params` but saved on a way that we can search using solr (KeyEncodedMap). options: the set of options that is used to guide the crawler during the execution, this will be saved as text. options_map: the exactly same content as `options` but saved on a way that we can search using solr (KeyEncodedMap). times_performed: keep track on how many times the task was run. type: the type of the task, could be OnDemand(1) or Batch(2) """ __table_name__ = "davinci_task" _cassandra_consistency_level_read = ConsistencyLevel.ONE _cassandra_consistency_level_write = ConsistencyLevel.ALL # Force that all the values will reside in the seam node of the cluster task_id = columns.UUID(partition_key=True, default=uuid.uuid4) # The owner of the data. Who own's the company data persisted user = columns.Text() # When was created the entity and the last modification date created_at = columns.DateTime(default=timezone.now, primary_key=True, clustering_order="DESC") updated_at = columns.DateTime(default=timezone.now) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) status = columns.SmallInt(default=STATUS_CREATED) kind = columns.Text(required=True) params_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) params = columns.Text(required=True) options_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) options = columns.Text(required=False) times_performed = columns.SmallInt(default=0) type = columns.SmallInt(default=ON_DEMAND_TASK) more_info = columns.List(value_type=UserDefinedType(TaskMoreInfo)) differences_from_last_version = columns.Text() inserted_fields = columns.List(value_type=columns.Text) updated_fields = columns.List(value_type=columns.Text) deleted_fields = columns.List(value_type=columns.Text) changed_fields = columns.List(value_type=columns.Text) logging_task = columns.Boolean(default=False) class Meta: get_pk_field = "task_id" def validate(self): super().validate() if self.type not in ALL_TASK_TYPES: raise ValidationError("Invalid task type [{0}]. Valid types are: " "{1}.".format(self.type, ALL_TASK_TYPES)) if self.status not in ALL_STATUS: raise ValidationError( "Invalid task status [{0}]. Valid status are: " "{1}.".format(self.status, ALL_STATUS))
class ApiAccess(CustomDjangoCassandraModel): """ A model to persist all the access made through the API """ __table_name__ = "caravaggio_api_access" year_month = columns.Text(partition_key=True) """ The combination of year and month for the timestamp associated with the request. Ex. 201901. We use this field as row keys. Each row will contain the access logs made during the month """ time_ms = columns.Integer(primary_key=True, clustering_order="DESC") """ Microseconds (to sort data within one row). """ id = columns.UUID(primary_key=True, default=uuid.uuid4) """ Monotonous UUID(NOT time - based UUID1) """ user = columns.UUID(required=True) """ The user that made the request. """ created_at = columns.DateTime(default=timezone.now) """ When was created the entity and the last modification date""" remote_address = InetAddress(required=True, index=True) """ The IP address of the user doing the request """ server_hostname = columns.Text(required=True) """ The name of the host that is processing the request """ request_method = columns.Text(required=True) """ The method of the request """ request_path = columns.Text(required=True) """ The absolute path of the request """ request_query_params = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) """ We save all the query params informed in the request as a map. We use caravaggio KeyEncodedMap that appends the field name to each of the keys in order to make them indexable by the Search Indexer. """ request_body = columns.Bytes(required=True) """ The body of the request made by the user""" response_status = columns.SmallInt(required=True) response_body = columns.Text(required=True) """ The JSON the server responded to the client. If the response is not a JSON response, the body will be replaced by a <<<Streaming>>> text if the request is in steamming, or <<<Not JSON>>> in other case. """ run_time = columns.Integer(required=True) latitude = columns.Float() longitude = columns.Float() coordinates = columns.Text() class Meta: get_pk_field = "year_month" def validate(self): super(ApiAccess, self).validate()