示例#1
0
    def store_annotations(self, user_id, data):
        doc_filename = data['doc']
        doc = Document.all().filter("filename =", doc_filename).get()

        doc_annotation = DocumentAnnotation.all().filter(
            "user_id =", user_id).filter("document =", doc.filename).get()
        if not doc_annotation:
            doc_annotation = DocumentAnnotation(user_id=user_id,
                                                document=doc.filename)

        doc_annotation.approved = data["approved"]

        doc_annotation.concepts = [Text(item) for item in data["concepts"]]
        doc_annotation.relations = [Text(item) for item in data["relations"]]
        doc_annotation.arg_units = [Text(item) for item in data["arg_units"]]

        doc_annotation.notes = Text(data['notes'])

        log("Storing annotations " +
            ("[Approved]" if doc_annotation.approved else "") + " for " +
            str(doc.filename) + ": " + str(doc_annotation.arg_units) +
            " - notes: " +
            doc_annotation.notes.encode("utf-8").replace("\n", " NL "))

        db.get(doc_annotation.put())
示例#2
0
    def value_for_db(self, value, field):
        if value is None:
            return None

        db_type = field.db_type(self.connection)

        if db_type in ('integer', 'long'):
            if isinstance(value, float):
                # round() always returns a float, which has a smaller max value than an int
                # so only round() it if it's already a float
                value = round(value)
            value = long(value)
        elif db_type == 'float':
            value = float(value)
        elif db_type == 'string' or db_type == 'text':
            value = coerce_unicode(value)
            if db_type == 'text':
                value = Text(value)
        elif db_type == 'bytes':
            # Store BlobField, DictField and EmbeddedModelField values as Blobs.
            value = Blob(value)
        elif db_type == 'decimal':
            value = self.adapt_decimalfield_value(value, field.max_digits,
                                                  field.decimal_places)
        elif db_type in ('list', 'set'):
            if hasattr(value, "__len__") and not value:
                value = None  #Convert empty lists to None
            elif hasattr(value, "__iter__"):
                # Convert sets to lists
                value = list(value)

        return value
示例#3
0
    def convert_value_for_db(self, db_type, value):
        if isinstance(value, unicode):
            value = unicode(value)
        elif isinstance(value, str):
            value = str(value)
        elif isinstance(value, (list, tuple)) and len(value) and \
                db_type.startswith('ListField:'):
            db_sub_type = db_type.split(':', 1)[1]
            value = [
                self.convert_value_for_db(db_sub_type, subvalue)
                for subvalue in value
            ]
        elif isinstance(value,
                        decimal.Decimal) and db_type.startswith("decimal:"):
            value = self.connection.ops.value_to_db_decimal(
                value, *eval(db_type[8:]))

        if db_type == 'gae_key':
            return value
        elif db_type == 'longtext':
            # long text fields cannot be indexed on GAE so use GAE's database
            # type Text
            value = Text((isinstance(value, str) and value.decode('utf-8'))
                         or value)
        elif db_type == 'text':
            value = (isinstance(value, str) and value.decode('utf-8')) or value
        elif db_type == 'blob':
            value = Blob(value)
        elif type(value) is str:
            # always store unicode strings
            value = value.decode('utf-8')
        elif db_type == 'date' or db_type == 'time' or db_type == 'datetime':
            # here we have to check the db_type because GAE always stores datetimes
            value = to_datetime(value)
        return value
示例#4
0
    def convert_from_legacy_arg_units(self, anno):
        """
        In former versions of the tools, pre-claim and post-claim premises were not distinguished.
        This function converts the arg_units of the annotation to pre-claim arg_units.
        """
        new_arg_units = []
        for arg_unit in anno.arg_units:
            new_arg_unit = str(arg_unit)

            # As of Nov 18, 2013, we distinguish pre- and post-claim premises
            new_arg_unit = new_arg_unit.replace("\"support\"",
                                                "\"support-pre\"")
            new_arg_unit = new_arg_unit.replace("\"rebutter\"",
                                                "\"rebutter-pre\"")
            # As of Nov 22, 2013, rebutters are called attack now
            new_arg_unit = new_arg_unit.replace("rebutter", "attack")

            # As of Nov 20, 2013, arg_units have a confidence level
            entries = new_arg_unit.split(",")
            if entries[1] not in ["\"high\"", "\"medium\"", "\"low\""]:
                entries.insert(1, DEFAULT_CONFIDENCE)

            new_arg_unit = ",".join(entries)

            new_arg_units.append(Text(new_arg_unit))
        anno.arg_units = new_arg_units
示例#5
0
文件: base.py 项目: meizon/djangae
    def value_for_db(self, value, field):
        if value is None:
            return None

        db_type = self.connection.creation.db_type(field)

        if db_type == 'string' or db_type == 'text':
            if isinstance(value, str):
                try:
                    value = value.decode('utf-8')
                except UnicodeDecodeError:
                    raise DatabaseError("Bytestring is not encoded in utf-8")

            if db_type == 'text':
                value = Text(value)
        elif db_type == 'bytes':
            # Store BlobField, DictField and EmbeddedModelField values as Blobs.
            value = Blob(value)
        elif db_type == 'date':
            value = self.value_to_db_date(value)
        elif db_type == 'datetime':
            value = self.value_to_db_datetime(value)
        elif db_type == 'time':
            value = self.value_to_db_time(value)
        elif db_type == 'decimal':
            value = self.value_to_db_decimal(value, field.max_digits,
                                             field.decimal_places)

        return value
示例#6
0
文件: base.py 项目: kyasui/djangae
    def value_for_db(self, value, field):
        if value is None:
            return None

        db_type = field.db_type(self.connection)

        if db_type == 'string' or db_type == 'text':
            value = coerce_unicode(value)
            if db_type == 'text':
                value = Text(value)
        elif db_type == 'bytes':
            # Store BlobField, DictField and EmbeddedModelField values as Blobs.
            value = Blob(value)
        elif db_type == 'date':
            value = self.value_to_db_date(value)
        elif db_type == 'datetime':
            value = self.value_to_db_datetime(value)
        elif db_type == 'time':
            value = self.value_to_db_time(value)
        elif db_type == 'decimal':
            value = self.value_to_db_decimal(value, field.max_digits,
                                             field.decimal_places)
        elif db_type in ('list', 'set'):
            if hasattr(value, "__len__") and not value:
                value = None  #Convert empty lists to None
            elif hasattr(value, "__iter__"):
                # Convert sets to lists
                value = list(value)

        return value
示例#7
0
文件: base.py 项目: hahnicity/Morsels
    def _value_for_db(self, value, field, field_kind, db_type, lookup):
        """
        GAE database may store a restricted set of Python types, for
        some cases it has its own types like Key, Text or Blob.

        TODO: Consider moving empty list handling here (from insert).
        """

        # Store Nones as Nones to handle nullable fields, even keys.
        if value is None:
            return None

        # Parent can handle iterable fields and Django wrappers.
        value = super(DatabaseOperations,
                      self)._value_for_db(value, field, field_kind, db_type,
                                          lookup)

        # Convert decimals to strings preserving order.
        if field_kind == 'DecimalField':
            value = decimal_to_string(value, field.max_digits,
                                      field.decimal_places)

        # Create GAE db.Keys from Django keys.
        # We use model's table name as key kind (the table of the model
        # of the instance that the key identifies, for ForeignKeys and
        # other relations).
        if db_type == 'key':
            #            value = self._value_for_db_key(value, field_kind)
            try:
                value = key_from_path(field.model._meta.db_table, value)
            except (
                    BadArgumentError,
                    BadValueError,
            ):
                raise DatabaseError("Only strings and positive integers "
                                    "may be used as keys on GAE.")

        # Store all strings as unicode, use db.Text for longer content.
        elif db_type == 'string' or db_type == 'text':
            if isinstance(value, str):
                value = value.decode('utf-8')
            if db_type == 'text':
                value = Text(value)

        # Store all date / time values as datetimes, by using some
        # default time or date.
        elif db_type == 'date':
            value = datetime.datetime.combine(value, self.DEFAULT_TIME)
        elif db_type == 'time':
            value = datetime.datetime.combine(self.DEFAULT_DATE, value)

        # Store BlobField, DictField and EmbeddedModelField values as Blobs.
        elif db_type == 'bytes':
            value = Blob(value)

        return value
示例#8
0
    def annotation_from_json(self, annotation_data, doc):
        anno = DocumentAnnotation()
        anno.approved = self.bool_from_string(
            annotation_data["approved"]
        ) if "approved" in annotation_data else False
        anno.user_id = annotation_data["annotator"]

        anno.notes = annotation_data[
            "notes"] if "notes" in annotation_data else ""

        if "arg_units" in annotation_data:
            anno.arg_units = [Text(p) for p in annotation_data["arg_units"]]
        else:
            anno.arg_units = [Text(p) for p in annotation_data["propositions"]]

        if "relations" in annotation_data:
            anno.relations = [Text(p) for p in annotation_data["relations"]]

        if "concepts" in annotation_data:
            anno.concepts = [Text(p) for p in annotation_data["concepts"]]

        self.convert_from_legacy_arg_units(anno)
        anno.document = doc.filename
        return anno