Exemplo n.º 1
0
def fetch_data_from_url(url, content):
    data = {"url": url}
    try:
        readable = Readability(url, content)
        data["title"] = reduce_whitespace(unescape_entities(readable.get_article_title()))
        # Try to get abstract from meta description:
        abstract = reduce_whitespace(unescape_entities(strip_tags(readable.get_meta_description()).strip()))
        if not abstract:
            abstract = reduce_whitespace(unescape_entities(strip_tags(readable.get_article_text()).strip()))
        abstract = truncate_words(abstract, 200)
        data["abstract"] = abstract
    except ReadabilityException:
        pass

    if VIDEO_URL_RE.search(url):
        data["media_formats"] = MediaFormat.objects.filter(name="Video")

    urls = URL_RE.findall(content)
    OLD_CC_LICENCES = [l[0] for l in CC_OLD_LICENSES[1:]]

    for url in urls:
        if CC_LICENSE_URL_RE.match(url):
            url = url.lower()
            if url in OLD_CC_LICENCES:
                data["license_type"] = "cc-old"
                data["license_cc_old"] = url
            else:
                data["license_type"] = "cc"
                data["license_cc"] = url

    return data
Exemplo n.º 2
0
 def prepare_value(self, value):
     if not value:
         return {}
     if hasattr(value, '_meta'):
         value = value.serializable_value("url")
     else:
         value = value
     name = License.objects.get_cc_license_name_from_url(value) if CC_LICENSE_URL_RE.match(value) else u""
     return dict(url=value, name=name)
Exemplo n.º 3
0
 def clean_license_cc(self):
     value = self.cleaned_data["license_cc"]
     if self.cleaned_data.get("license_type") == "cc":
         if not value:
             raise forms.ValidationError(u"You should select the license.")
         if not CC_LICENSE_URL_RE.match(value):
             raise forms.ValidationError(u"Invalid license URL.")
     else:
         return u""
     return value
Exemplo n.º 4
0
def issue(request):
    if request.method != "POST":
        raise Http404()

    fields = License.objects.get_cc_issue_fields()
    answers = {}
    for field in fields:
        answers[field["id"]] = request.POST.get("cc-question-%s" % field["id"], u"")

    response = {"status": "error"}
    result = License.objects.issue(answers)
    if result:
        response["status"] = "success"
        response.update(result)
        license_type = CC_LICENSE_URL_RE.search(result["url"]).groupdict()["cc_type"]
        response["license_classes"] = license_type.split("-")
    else:
        response["status"] = "error"
        response["message"] = u"Unable to get license information from CreativeCommons.org. Try again later."

    return response
Exemplo n.º 5
0
    def render(self, name, value, attrs=None):
        license_url = value.get("url", u"")
        license_name = value.get("name", u"")
        derivatives = u""
        commercial = u""

        r = CC_LICENSE_URL_RE.search(license_url)
        if r:
            cc_type = r.groupdict()["cc_type"]
            if "nd" in cc_type:
                derivatives = "n"
            elif "sa" in cc_type:
                derivatives = "sa"
            else:
                derivatives = "y"
            commercial = "n" if "nc" in cc_type else "y"
        else:
            cc_type = ""
            license_url = u""
            license_name = u""

        return mark_safe(render_to_string(
            "authoring/forms/license-widget.html",
            dict(
                name=name,
                cc_type=cc_type,
                name_widget=forms.HiddenInput().render("%s_name" % name, license_name),
                url_widget=forms.HiddenInput().render("%s_url" % name, license_url),
                derivatives_widget=forms.RadioSelect(attrs={"disabled": "disabled"} if self.readonly else None).render(
                    "%s_derivatives" % name,
                    derivatives,
                    choices=self.DERIVATIVES_CHOICES,
                ),
                commercial_widget=forms.RadioSelect(attrs={"disabled": "disabled"} if self.readonly else None).render(
                    "%s_commercial" % name,
                    commercial,
                    choices=self.COMMERCIAL_CHOICES,
                )
            )
        ))
Exemplo n.º 6
0
    def post(self, request, *args, **kwargs):
        self.form = ValidateCSVForm(request.POST, request.FILES)

        if not self.form.is_valid():
            messages.error(request, u"Data is not valid, see below.")
            return self.get(request, *args, **kwargs)

        if self.form.validation_errors:
            self.validation_errors = self.form.validation_errors
            messages.error(request, u"Data is not valid, see below.")
            return self.get(request, *args, **kwargs)

        if "validate" in request.REQUEST:
            messages.success(request, u"Data appears to be valid.")
            return self.get(request, *args, **kwargs)


        model = self.form.model
        if model == Course:
            field_name_prefix = "CR_"
            simple_fields = SIMPLE_FIELDS + COURSE_SIMPLE_FIELDS
            m2m_fields = M2M_FIELDS + COURSE_M2M_FIELDS
        elif model == Library:
            field_name_prefix = "LIB_"
            simple_fields = SIMPLE_FIELDS + LIBRARY_SIMPLE_FIELDS
            m2m_fields = M2M_FIELDS + LIBRARY_M2M_FIELDS

        authors_field = None
        keywords_field = None
        for field in model._meta.many_to_many:
            if field.name == "authors":
                authors_field = field
            elif field.name == "keywords":
                keywords_field = field

        institution_field = None
        collection_field = None
        license_field = None
        prerequisite_1_field = None
        prerequisite_2_field = None
        postrequisite_1_field = None
        postrequisite_2_field = None
        derived_from_field = None
        for field in model._meta.fields:
            if field.name == "institution":
                institution_field = field
            elif field.name == "collection":
                collection_field = field
            elif field.name == "license":
                license_field = field
            elif field.name == "prerequisite_1":
                prerequisite_1_field = field
            elif field.name == "prerequisite_2":
                prerequisite_2_field = field
            elif field.name == "postrequisite_1":
                postrequisite_1_field = field
            elif field.name == "postrequisite_2":
                postrequisite_2_field = field
            elif field.name == "derived_from":
                derived_from_field = field


        imported_objects = []

        for row_index, row in enumerate(self.form.csv_data):

            data = {}

            for field_index, value in enumerate(row):
                field_name = self.form.header[field_index][len(field_name_prefix):]
                data[field_name] = value

            try:
                check_for_unique_url = True
                try:
                    obj = model.objects.get(url=data["URL"])
                    new_url = data.get("NEW_URL")
                    if new_url:
                        obj.url = new_url
                    else:
                        check_for_unique_url = False
                except MultipleObjectsReturned:
                    self.validation_errors.append(
                        (row_index + 1, u"", u"URL '%s' is registered multiple times, can't find an object to update." % data["URL"])
                    )
                    continue
                except model.DoesNotExist:
                    obj = model(creator=request.user)
                    obj.url = data["URL"]
                    obj.workflow_state = IMPORTED_STATE

                # Do not re-index the object until the transaction is finished
                obj.skip_indexing = True

                if check_for_unique_url and model.objects.filter(url=obj.url).exists():
                    self.validation_errors.append(
                        (row_index + 1, u"", u"URL '%s' is registered in database already." % obj.url)
                    )
                    continue

                for csv_field_name, obj_field_name in simple_fields:
                    if csv_field_name in data:
                        setattr(obj, obj_field_name, data[csv_field_name])

                if institution_field and "INSTITUTION" in data:
                    institution_field.save_form_data(obj, dict(name=data["INSTITUTION"]))

                if collection_field and "COLLECTION" in data:
                    collection_field.save_form_data(obj, dict(name=data["COLLECTION"]))

                if license_field and ("COU_TITLE" in data or "COU_URL" in data):
                    url = data.get("COU_URL", u"")
                    name = data.get("COU_TITLE", u"")
                    if url:
                        if CC_LICENSE_URL_RE.match(url):
                            name = License.objects.get_cc_license_name_from_url(url)
                        elif PUBLIC_DOMAIN_URL_RE.match(url):
                            name = PUBLIC_DOMAIN_NAME
                        elif GNU_FDL_URL_RE.match(url):
                            name = GNU_FDL_NAME
                    description = data.get("COU_DESCRIPTION", u"")
                    copyright_holder = data.get("COU_COPYRIGHT_HOLDER", u"")
                    license_field.save_form_data(obj,
                        dict(url=url, name=name, description=description,
                             copyright_holder=copyright_holder)
                    )

                if prerequisite_1_field and "PREREQ_TITLE1" in data:
                    title = data["PREREQ_TITLE1"]
                    if title:
                        url = data.get("PREREQ_URL1", u"")
                        prerequisite_1_field.save_form_data(obj,
                            dict(title=title, url=url)
                        )
                    else:
                        obj.prerequisite_1 = None

                if prerequisite_2_field and "PREREQ_TITLE2" in data:
                    title = data["PREREQ_TITLE2"]
                    if title:
                        url = data.get("PREREQ_URL2", u"")
                        prerequisite_2_field.save_form_data(obj,
                            dict(title=title, url=url)
                        )
                    else:
                        obj.prerequisite_2 = None

                if postrequisite_1_field and "POSTREQ_TITLE1" in data:
                    title = data["POSTREQ_TITLE1"]
                    if title:
                        url = data.get("POSTREQ_URL1", u"")
                        postrequisite_1_field.save_form_data(obj,
                            dict(title=title, url=url)
                        )
                    else:
                        obj.postrequisite_1 = None

                if postrequisite_2_field and "POSTREQ_TITLE2" in data:
                    title = data["POSTREQ_TITLE2"]
                    if title:
                        url = data.get("POSTREQ_URL2", u"")
                        postrequisite_2_field.save_form_data(obj,
                            dict(title=title, url=url)
                        )
                    else:
                        obj.postrequisite_2 = None

                if derived_from_field:
                    if data.get("PARENT_MODIFIED") == False:
                        obj.derived_from = None
                    else:
                        title = data.get("PARENT_TITLE", u"")
                        if not title:
                            obj.derived_from = None
                        else:
                            url = data.get("PARENT_URL", u"")
                            description = data.get("PARENT_CHANGES", u"")
                            derived_from_field.save_form_data(obj,
                                dict(title=title, url=url, description=description)
                            )

                obj.save()

                for csv_field_name, obj_field_name, field_model, field_model_key in m2m_fields:
                    if csv_field_name in data:
                        field = getattr(obj, obj_field_name)
                        field.clear()
                        for value in data[csv_field_name]:
                            field.add(field_model.objects.get(**{field_model_key: value}))

                if authors_field and "AUTHOR_NAME" in data:
                    author_names = data["AUTHOR_NAME"]
                    author_emails = data.get("AUTHOR_EMAIL", [])
                    author_countries = data.get("AUTHOR_COUNTRY", [])
                    obj.authors.clear()
                    authors_data = []
                    for i, name in enumerate(author_names):
                        try:
                            email = author_emails[i]
                        except IndexError:
                            email = u""
                        try:
                            country = Country.objects.get(slug=author_countries[i])
                        except IndexError:
                            country = None
                        authors_data.append(dict(name=name, email=email,
                                                 country=country))
                    authors_field.save_form_data(obj, authors_data)

                if keywords_field and "KEYWORDS" in data:
                    obj.keywords.clear()
                    keywords_data = []
                    for name in data["KEYWORDS"]:
                        keywords_data.append(dict(name=name))
                    keywords_field.save_form_data(obj, keywords_data)

                imported_objects.append(obj)

            except:
                transaction.rollback()
                if settings.DEBUG:
                    raise
                self.validation_errors.append(
                    (row_index + 1, u"", unicode(sys.exc_info()[1]))
                )

        if self.validation_errors:
            transaction.rollback()
            self.is_valid = False
            messages.error(request, u"There were some errors, see below.")
        else:
            if "dry_run" in request.REQUEST:
                transaction.rollback()
                messages.success(request, u"Data appears to be valid. "
                    "It is not imported because 'Dry run' option is selected.")
            else:
                transaction.commit()

                for object in imported_objects:
                    object.skip_indexing = False
                    reindex(object)

                transaction.commit()
                messages.success(request, u"Data was imported successfully.")

        return self.get(request, *args, **kwargs)
Exemplo n.º 7
0
 def to_python(self, value):
     if not value:
         return None
     if not CC_LICENSE_URL_RE.match(value):
         raise forms.ValidationError(self.default_error_messages["invalid"])
     return dict(url=value, name=License.objects.get_cc_license_name_from_url(value))