Пример #1
0
def query_wikipedia(*,
                    query: str,
                    logger: logging.Logger,
                    sentences: int = 10) -> None:
    """Log a Wikipedia summary of length `sentences` for the given `query` or
       raise an error"""
    try:
        logger.debug("Searching Wikipedia for %s",
                     query,
                     extra={"traceback": get_traceback()})
        # The wikipedia package has not configured bs4 correctly and causes a warning
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            summary = wikipedia.summary(query, sentences=sentences)
    except (
            wikipedia.exceptions.DisambiguationError,
            wikipedia.exceptions.PageError,
    ) as e:
        logger.error(
            "Encountered an error searching Wikipedia for %s: %s",
            query,
            e,
            extra={"traceback": get_traceback()},
        )
        UNKNOWN.add(query)
    else:
        logger.info(
            "Found summary information for %s: %s",
            query,
            summary,
            extra={"traceback": get_traceback()},
        )
        KNOWN.add(query)
Пример #2
0
def analyse(text: str, outpath: pathlib.Path) -> None:
    """Runs all text-related tasks"""
    logger = get_logger(__name__, outpath)

    logger.debug("Analysing orbit text with spaCy",
                 extra={"traceback": get_traceback()})
    doc = NLP(text)

    for ent in doc.ents:
        if (ent.label_ == "ORG"
            ):  # Most of the nouns we care about get classified as 'ORG'
            logger.debug(f"Found {ent.text}",
                         extra={"traceback": get_traceback()})
            ORGS.add(ent.text)

            safe_text = ent.text.lower()

            if safe_text in KNOWN or safe_text in words.words():
                logger.debug(
                    "I know what %s means, I have seen it before",
                    ent.text,
                    extra={"traceback": get_traceback()},
                )
                continue
            elif safe_text in UNKNOWN:
                logger.debug(
                    "I have seen %s before but can't work out what it means",
                    ent.text,
                    extra={"traceback": get_traceback()},
                )
                continue
            else:
                query_wikipedia(query=safe_text, logger=logger)
                if safe_text in UNKNOWN:  # Wikipedia was not helpful, try Wiktionary
                    query_wiktionary(query=safe_text, logger=logger)
Пример #3
0
def analyse_orbit(orbit: dict) -> None:
    """Analyses the orbit data and logs it to the correct file"""

    directory = pathlib.Path(str(orbit["number"]))
    outpath = directory.joinpath("log.html")

    if not os.path.exists(directory):
        os.makedirs(directory)

    with open(outpath, "w") as fh:
        date_string = format_date_string(orbit["start"], orbit["finish"])
        intro_string = f"""
        <section>
            <h2>Orbit {orbit['number']} — {date_string}</h2>"""
        orbit_notes = format_orbit_notes(orbit["notes"])
        fh.write(intro_string + orbit_notes)

    logger = get_logger(__name__, outpath)
    logger.debug(
        "Analysing the text for orbit %s",
        orbit["number"],
        extra={"traceback": get_traceback()},
    )
    analyse_text(orbit["notes"], outpath)
    analyse_images(orbit, outpath)

    with open(outpath, "a") as fh:
        fh.write("</section>")
Пример #4
0
def query_wiktionary(*, query: str, logger: logging.Logger) -> None:
    """Log a definition from Wiktionary for a given `query`"""
    logger.debug("Searching Wiktionary for %s",
                 query,
                 extra={"traceback": get_traceback()})
    word = PARSER.fetch(query)

    if len(word[0]["definitions"]):
        logger.info(
            "Found definitions for %s: %s",
            query,
            [definition["text"] for definition in word[0]["definitions"]],
            extra={"traceback": get_traceback()},
        )
        KNOWN.add(query)
    else:
        logger.info(
            "Found nothing in Wiktionary for %s",
            query,
            extra={"traceback": get_traceback()},
        )
        UNKNOWN.add(query)
Пример #5
0
    def read(self, request, creator_id=None, annotation_id=None, *args, **kwargs):
        def create_date(s):
            reg = (
                r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})"
                "T(?P<hour>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})Z"
            )

            match = re.match(reg, s)
            mch = lambda d: int(match.group(d))

            return datetime.datetime(
                year=mch("year"), month=mch("month"), day=mch("day"), hour=mch("hour"), minute=mch("minutes")
            )

        includeDeletions = True if request.GET.get("includeDeletions", 0) == "true" else False

        active = Annotation.annotations.active()

        annotations = active if includeDeletions else active.filter(deleted=False)

        def filter_by_creator_id(request, qs, creator_id):
            log.info("Filter by creator id")

            if request.user.is_authenticated():
                # exclude from queryset annotations which are private
                # and are not owned by registered user
                qs = qs.exclude(~Q(author=request.user), private=True)

            else:
                # if user is not logged in exclude all private annotations
                qs = qs.exclude(private=True)

            # NOTE: Django user id has low priority before drupal user id
            # Drupal user? we need to remove this.
            if creator_id:
                # user = (get_object_or_None(Profile, username=self.fake_username(creator_id)) or get_object_or_None(Profile, id=creator_id))
                user = get_object_or_None(Profile, username=self.fake_username(creator_id)) or get_object_or_None(
                    Profile, id=creator_id
                )

                if not user:
                    raise NotFoundError("User not found")

                qs = qs.filter(author=user) if user else qs
            return qs

        def filter_by_annotation_id(request, qs, annotation_id, filtered_by_creator=False, creator_id=None):
            log.info("Filter by annotation id")

            """This function recieves queryset filtered by user and filters it by anntation id."""
            if not annotation_id:
                return qs

            if not filtered_by_creator and creator_id:
                qs = filter_by_creator_id(request, qs, creator_id)

            kw = dict(id=annotation_id)

            if includeDeletions:
                kw.setdefault("deleted", True)

            if bool(request.GET.get("nested", False)):
                annotation = qs.get(**kw)
                return qs.filter(
                    target__url__in=[t.url for t in annotation.target.all()],
                    has_answers=False,
                    deleted=True if includeDeletions else False,
                ).exclude(id=annotation.id)
            return qs.filter(**kw)

        #
        # Filter by constraint
        # date: oldest, newest
        # targetUri: URI
        # start: number
        # limit: number
        #
        def filter_by_constraint(request, qs):

            constraints = {
                "oldest": lambda v: ("creation_date__lte", create_date(v)),
                "newest": lambda v: ("creation_date__gte", create_date(v)),
                "targetUri": lambda v: ("target__url__icontains", v),
            }

            q = dict(q_rep(request.GET.get(con)) for con, q_rep in constraints.items() if con in request.GET)

            if q:
                qs = qs.filter(**q)

            return qs

        def filter_by_limit(request, qs):

            try:
                start = int(request.GET.get("start", 0))
                limit = int(request.GET.get("limit", request.GET.get("rows", 50)))
                end = start + limit

            except ValueError:
                start = 0
                end = 50

            return qs.order_by("creation_date")[start:end]

        def inGET(name):
            return name in request.GET

        try:

            if creator_id:
                annotations = filter_by_creator_id(request, annotations, creator_id)

            if annotation_id:
                annotations = filter_by_annotation_id(request, annotations, annotation_id)

            annotations = filter_by_constraint(request, annotations)

            return filter_by_limit(request, annotations)

        except NotFoundError as e:

            log.info(u"Error in AnnotationHandler.read(). Error was " + get_traceback())
            raise e

        except Exception as e:

            log.info(u"Error in AnnotationHandler.read(). Error was " + get_traceback())
            raise e
Пример #6
0
    def create(self, request, creator_id, **kwargs):
        def get_user(request, creator=None):

            if creator and request.user.is_anonymous():
                return Profile.objects.get_or_create(drupal_uid=creator, username=self.fake_username(creator))[0]

            elif request.user.is_authenticated():
                return request.user

            else:
                return {
                    "error": (
                        "Can not create annotation with fully annonymous user. Please login or provide drupal_uid."
                    )
                }

        #
        # Saves constraints and returns saved constraints instances.
        # argument "field_name" define where constraints are locate in request.POST
        #
        def save_constraints(request, annotation, field_name="ranges"):

            if field_name in request.POST:
                print "savin' constraints {0}".format(request.POST.get(field_name, "No ranges in request.POST"))

                # save constraints if exists
                def save_one(data):
                    form = ConstraintForm(data)
                    if form.is_valid():
                        const = form.save(commit=False)
                        const.annotation = annotation
                        const.save()
                        const.target = annotation.target.all()
                        return const

                    else:
                        return

                return [save_one(i) for i in request.POST[field_name] if isinstance(i, dict)]

        try:

            creator_id = get_user(request, creator_id)
            annotation = request.form.save(commit=False)
            annotation.author = creator_id
            annotation.private = 0 if creator_id.drupal_uid else request.form.cleaned_data["private"]
            annotation.save()
            request.form.save_m2m()

            constraints = save_constraints(request, annotation)

            qs = Annotation.objects.filter(
                target__url__in=[t.url for t in annotation.target.all()], has_answers=False, deleted=False
            ).exclude(id=annotation.id)

            if annotation.type == "Reply" and qs.count():
                qs.update(has_answers=True)
            return annotation
        except Exception as e:
            log.error(
                "Errow while creating new one annotation. Error was {0}, traceback: {1}".format(e, get_traceback())
            )
            return e