class V1(StoredObject): _id = fields.StringField(_primary_key=True, index=True) my_string = fields.StringField() my_float = fields.FloatField() my_number = fields.FloatField() my_null = fields.StringField(required=False) _meta = {'optimistic': True, 'version': 1, 'optimistic': True}
class Foo(StoredObject): _id = fields.IntegerField(primary=True) integer_field = fields.IntegerField() string_field = fields.StringField() datetime_field = fields.DateTimeField() float_field = fields.FloatField() list_field = fields.IntegerField(list=True)
class Schema1(StoredObject): _id = fields.StringField(primary=True) number = fields.IntegerField() deleted = fields.FloatField() _meta = { 'optimistic': True, 'version': 1, }
class Tag(StoredObject): value = fields.StringField(primary=True, index=False) count = fields.StringField(default='c', validate=True, index=True) misc = fields.StringField(default='') misc2 = fields.StringField(default='') created = fields.DateTimeField(validate=True) modified = fields.DateTimeField(validate=True, auto_now=True) keywords = fields.StringField( default=['keywd1', 'keywd2'], validate=[MinLengthValidator(5), MaxLengthValidator(10)], list=True) mybool = fields.BooleanField(default=False) myint = fields.IntegerField() myfloat = fields.FloatField(required=True, default=4.5) myurl = fields.StringField(validate=URLValidator())
class Document(StoredObject): _id = fields.StringField(default=make_oid) document_type = fields.StringField() filepath = fields.StringField() extract_filepath = fields.StringField() url = fields.StringField() verification_score = fields.FloatField() extract_path = fields.StringField() extracted = fields.BooleanField() _meta = { 'abstract': True, } def read(self): raise NotImplementedError def text_file_name(self): _, tail = os.path.split(self.filepath) root, _= os.path.splitext(tail) return '{}.txt'.format(root) def save_extract(self, text, save=True, **kwargs): path = EXTRACT_SAVE_DIRS[self.document_type] if not os.path.exists(path): mkdir_p(path) filepath = os.path.join(path, self.text_file_name()) self.extract_filepath = filepath self.extracted = True for key, value in kwargs.iteritems(): setattr(self, key, value) open(filepath, 'w').write( to_unicode(text).encode('utf-8') ) if save: self.save() def verify(self, threshold, overwrite=False): """Verify that the document matches the target article: checks that the document contains a minimum fraction of words in the article abstract. :param float threshold: Minimum fraction of abstract words present :param bool overwrite: Recalculate existing verification score :return bool: Article is verified """ # Return stored if self.verification_score and not overwrite: return self.verification_score > threshold text = self.read() # Load target article try: article = self.article__scraped[0] except IndexError: return False # AB -> Abstract abstract = article. record.get('AB', None) if not text or not abstract: return False text = text.lower() abstract = abstract.lower() abstract_tokens = re.split(r'\s+', abstract) tokens_contained = [ token for token in abstract_tokens if token in text ] prop_contained = len(tokens_contained) / len(abstract_tokens) self.verification_score = prop_contained self.save() return prop_contained >= threshold
class OsfStorageFileVersion(StoredObject): _id = oid_primary_key creator = fields.ForeignField('user', required=True) status = fields.StringField(required=True, validate=validate_status) signature = fields.StringField() date_created = fields.DateTimeField(auto_now_add=True) date_resolved = fields.DateTimeField() last_ping = fields.FloatField(default=lambda: time.time()) # Dictionary specifying all information needed to locate file on backend # { # 'service': 'cloudfiles', # required # 'container': 'osf', # required # 'object': '20c53b', # required # 'worker_url': '127.0.0.1', # 'worker_host': 'upload-service-1', # } location = fields.DictionaryField() # Dictionary containing raw metadata from upload service response # { # 'size': 1024, # required # 'content_type': 'text/plain', # required # 'date_modified': '2014-11-07T20:24:15', # required # 'md5': 'd077f2', # } metadata = fields.DictionaryField() size = fields.IntegerField() content_type = fields.StringField() date_modified = fields.DateTimeField() @property def pending(self): return self.status != status_map['COMPLETE'] @property def expired(self): """A version is expired if in pending state and has not received a ping from the upload service since in `PING_TIMEOUT` seconds. """ if self.status != status_map['UPLOADING']: return False return time.time() > (self.last_ping + settings.PING_TIMEOUT) @property def location_hash(self): return self.location['object'] if self.location else None def is_duplicate(self, other): return (bool(self.location_hash) and self.location_hash == other.location_hash) @check_status(status_map['UPLOADING']) def ping(self, signature): """Verify upload signature and update last ping time. :param str signature: Signature used in signed URL """ self.last_ping = time.time() self.save() @check_status(status_map['UPLOADING']) def set_cached(self, signature): """ """ self.status = status_map['CACHED'] self.save() @check_status(status_map['UPLOADING'], status_map['CACHED']) def resolve(self, signature, location, metadata): """ """ self.status = status_map['COMPLETE'] self.date_resolved = datetime.datetime.utcnow() self.location = location self.metadata = metadata for key, parser in metadata_fields.iteritems(): try: value = metadata[key] except KeyError: raise errors.MissingFieldError setattr(self, key, parser(value)) self.save() @check_status() def update_metadata(self, signature, metadata): self.metadata.update(metadata) self.save() @check_status(status_map['UPLOADING']) def cancel(self, signature): pass