Exemplo n.º 1
0
def download(args):
    """
    Function for downloading all examples in AudioSet containing labels for given classes
    :param args:
    :return:
    """
    print("Downloading classes from AudioSet.")

    for class_name in args.classes:
        utils.download(class_name, args)
def download_aug(path, overwrite=False):
    _AUG_DOWNLOAD_URLS = [(
        "http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz",
        "7129e0a480c2d6afb02b517bb18ac54283bfaa35",
    )]
    makedirs(path)
    for url, checksum in _AUG_DOWNLOAD_URLS:
        filename = download(url,
                            path=path,
                            overwrite=overwrite,
                            sha1_hash=checksum)
        # extract
        with tarfile.open(filename) as tar:
            tar.extractall(path=path)
            shutil.move(
                os.path.join(path, "benchmark_RELEASE"),
                os.path.join(path, "VOCaug"),
            )
            filenames = ["VOCaug/dataset/train.txt", "VOCaug/dataset/val.txt"]
            # generate trainval.txt
            with open(os.path.join(path, "VOCaug/dataset/trainval.txt"),
                      "w") as outfile:
                for fname in filenames:
                    fname = os.path.join(path, fname)
                    with open(fname) as infile:
                        for line in infile:
                            outfile.write(line)
def download_coco(path, overwrite=False):
    _DOWNLOAD_URLS = [
        (
            "http://images.cocodataset.org/zips/train2017.zip",
            "10ad623668ab00c62c096f0ed636d6aff41faca5",
        ),
        (
            "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
            "8551ee4bb5860311e79dace7e79cb91e432e78b3",
        ),
        (
            "http://images.cocodataset.org/zips/val2017.zip",
            "4950dc9d00dbe1c933ee0170f5797584351d2a41",
        ),
        # ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
        # '46cdcf715b6b4f67e980b529534e79c2edffe084'),
        # test2017.zip, for those who want to attend the competition.
        # ('http://images.cocodataset.org/zips/test2017.zip',
        #  '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
    ]
    makedirs(path)
    for url, checksum in _DOWNLOAD_URLS:
        filename = download(url,
                            path=path,
                            overwrite=overwrite,
                            sha1_hash=checksum)
        # extract
        with zipfile.ZipFile(filename) as zf:
            zf.extractall(path=path)
Exemplo n.º 4
0
    def handle(self, *args, **kwargs):
        _, _, content = download(expand_gdrive_download_url(kwargs["infile"]))

        current_flags_count = Flag.objects.count()
        flags = []
        if content is None:
            self.stderr.write("Cannot download file using url {}".format(
                kwargs["infile"]))

            return

        scores = defaultdict(float)

        for l in content.splitlines():
            d = json.loads(l)
            try:
                pep = Person.objects.get(pk=int(d.get("id_PEP", 0)))
                rule = Rule.objects.get(pk=d.get("Rule"))
            except Person.DoesNotExist:
                self.stderr.write("PEP with id '{}' doesn't exists".format(
                    d.get("id_PEP")))
                continue
            except Rule.DoesNotExist:
                self.stderr.write("Rule with id '{}' doesn't exists".format(
                    d.get("Rule")))
                continue

            scores[pep.pk] += rule.weight
            flags.append(
                Flag(person=pep, rule=rule, data=d.get("flag_data", {})))

        if len(flags) < current_flags_count * 0.9 and not kwargs["force"]:
            self.stderr.write(
                "Major decrease in number of flags (was {}, now {}), aborting".
                format(current_flags_count, len(flags)))

            return

        Flag.objects.all().delete()
        Flag.objects.bulk_create(flags)

        max_score = max(scores.values())

        for pep_id, score in scores.items():
            if score == max_score:
                self.stdout.write("Max score is {} (pep {})".format(
                    score, pep_id))
                break

        if max_score < 1. and not kwargs["force"]:
            self.stderr.write(
                "Max score is {} which is too low".format(max_score))

            return

        Rule.objects.update(scale=10 / max_score)
        self.stdout.write("Import is complete, {} new flags added".format(
            len(flags)))
Exemplo n.º 5
0
def download_sbu(path, overwrite=False):
    _DOWNLOAD_URLS = [
        ('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
    ]
    download_dir = os.path.join(path, 'downloads')
    makedirs(download_dir)
    for url in _DOWNLOAD_URLS:
        filename = download(url, path=path, overwrite=overwrite)
        # extract
        with zipfile.ZipFile(filename, "r") as zf:
            zf.extractall(path=path)
        print("Extracted", filename)
def download_voc(path, overwrite=False):
    _DOWNLOAD_URLS = [
        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
         '34ed68851bce2a36e2a223fa52c661d592c66b3c'),
        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
         '41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
         '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
    makedirs(path)
    for url, checksum in _DOWNLOAD_URLS:
        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
        # extract
        with tarfile.open(filename) as tar:
            tar.extractall(path=path)
def download_ade(path, overwrite=False):
    _AUG_DOWNLOAD_URLS = [
        ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
         '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
        ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
         'e05747892219d10e9243933371a497e905a4860c'),
    ]
    download_dir = os.path.join(path, 'downloads')
    makedirs(download_dir)
    for url, checksum in _AUG_DOWNLOAD_URLS:
        filename = download(url,
                            path=download_dir,
                            overwrite=overwrite,
                            sha1_hash=checksum)
        # extract
        with zipfile.ZipFile(filename, "r") as zip_ref:
            zip_ref.extractall(path=path)
Exemplo n.º 8
0
    def handle(self, *args, **options):
        peklun = User.objects.get(username="******")

        wks = get_spreadsheet().sheet1

        for i, l in enumerate(wks.get_all_records()):
            # reopen it time from time to avoid disconnect by timeout
            if i % 2000 == 0 and i:
                wks = get_spreadsheet().sheet1

            self.stdout.write('Processing line #{}'.format(i))

            company_ipn = l.get("ІПН", "")
            company_name = l.get("Назва", "")
            person_id = l.get("id персони", "")
            company_id = l.get("id компанії", "")
            photo_url = l.get("Фото", "")

            person = None
            # First let's search for appropriate company
            company = self.process_company(company_id, company_ipn,
                                           company_name)

            # No company — no go
            if company is None:
                continue

            # Let's backwrite company id to the spreadsheet for further use
            if company.pk != company_id:
                company_id = company.pk
                wks.update_cell(i + 2, len(l.keys()), company.pk)

            person_name = l.get("ПІБ", "").strip()
            position = l.get("Посада", "").strip()
            person_dob = unicode(l.get("Дата народження", "")).strip()
            person_from = parse_date(l.get("Дата призначення", ""))
            person_to = parse_date(l.get("Дата звільнення", ""))

            doc_received = parse_date(l.get("Дата відповіді", ""))
            docs = l.get("Лінк на відповідь", "").strip()
            website = l.get("лінк на сайт", "").strip()

            # Now let's search for the person
            if person_name:
                last_name, first_name, patronymic, _ = parse_fullname(
                    person_name)

                if not last_name:
                    continue

                # First we search by person_id (if it's present)
                if person_id:
                    try:
                        person = Person.objects.get(pk=person_id)
                    except Person.DoesNotExist:
                        pass

                # If nothing is found we search for name (for now)
                if not person:
                    try:
                        person = Person.objects.get(
                            first_name_uk__iexact=first_name,
                            last_name_uk__iexact=last_name,
                            patronymic_uk__iexact=patronymic)
                    except Person.MultipleObjectsReturned:
                        self.stderr.write(
                            "Double person {}!".format(person_name))
                    except Person.DoesNotExist:
                        pass

                # If nothing is found, let's create a record for that person
                if not person:
                    person = Person()
                    self.stderr.write(
                        "Created new person {}".format(person_name))

                person.first_name_uk = first_name
                person.last_name_uk = last_name
                person.patronymic_uk = patronymic

                Ua2RuDictionary.objects.get_or_create(term=first_name)
                Ua2RuDictionary.objects.get_or_create(term=last_name)
                Ua2RuDictionary.objects.get_or_create(term=patronymic)

                person.first_name_en = translitua(first_name)
                person.last_name_en = translitua(last_name)
                person.patronymic_en = translitua(patronymic)

                person.is_pep = True
                person.imported = True
                person.type_of_official = 1

                # Parsing date (can be a full date or just a year or
                # year/month)
                if person_dob:
                    person.dob = parse_date(person_dob)
                    if len(person_dob) == 4:
                        person.dob_details = 2  # Only year

                    if len(person_dob) > 4 and len(person_dob) < 7:
                        person.dob_details = 1  # month and year

                # Let's download the photo (if any)
                if not person.photo and photo_url:
                    photo_name, photo_san_name, photo_content = download(
                        photo_url, translitua(person_name))

                    if photo_name:
                        person.photo.save(photo_san_name,
                                          ContentFile(photo_content))
                    else:
                        self.stdout.write("Cannot download image %s for %s" %
                                          (photo_url, person_name))

                person.save()

                # Let's write the person id back to the table.
                if person.pk != person_id:
                    person_id = person.pk
                    wks.update_cell(i + 2, len(l.keys()) - 1, person.pk)

                # Now let's download all supporting docs
                docs_downloaded = []
                first_doc_name = False

                # There might be many of them
                for doc in docs.split(", "):
                    doc_instance = None

                    # we cannot download folders from google docs, so let's
                    # skip them

                    if doc and "folderview" not in doc \
                            and "drive/#folders" not in doc:
                        doc = expand_gdrive_download_url(doc)
                        doc_hash = sha1(doc).hexdigest()

                        # Check, if docs
                        try:
                            doc_instance = Document.objects.get(hash=doc_hash)
                        except Document.DoesNotExist:
                            self.stdout.write(
                                'Downloading file {}'.format(doc))
                            doc_name, doc_san_name, doc_content = download(doc)
                            doc_san_name = translitua(doc_san_name)

                            if doc_name:
                                doc_instance = Document(name_uk=doc_name,
                                                        uploader=peklun,
                                                        hash=doc_hash)

                                doc_instance.doc.save(doc_san_name,
                                                      ContentFile(doc_content))
                                doc_instance.save()
                            else:
                                self.stdout.write(
                                    'Cannot download file {}'.format(doc))

                        if doc_instance:
                            first_doc_name = doc_instance.name_uk
                            docs_downloaded.append(doc_instance.doc.url)

                # Now let's setup links between person and companies
                links = Person2Company.objects.filter(
                    (Q(date_established=person_from)
                     | Q(date_established=mangle_date(person_from))
                     | Q(date_established__isnull=True)),
                    (Q(date_finished=person_to)
                     | Q(date_finished=mangle_date(person_to))
                     | Q(date_finished__isnull=True)),
                    from_person=person,
                    to_company=company)

                # Delete if there are doubling links
                # including those cases when dates were imported incorrectly
                # because of parse_date
                if len(links) > 1:
                    links.delete()

                link, _ = Person2Company.objects.update_or_create(
                    from_person=person,
                    to_company=company,
                    date_established=person_from,
                    date_established_details=0,
                    date_finished=person_to,
                    date_finished_details=0)

                if not link.relationship_type:
                    link.relationship_type = position

                # And translate them
                Ua2EnDictionary.objects.get_or_create(
                    term=lookup_term(position))

                # oh, and add links to supporting docs
                all_docs = docs_downloaded + website.split(", ")
                if all_docs:
                    link.proof = ", ".join(filter(None, all_docs))

                    if first_doc_name:
                        link.proof_title = first_doc_name

                link.date_confirmed = doc_received
                link.is_employee = True

                link.save()
def load_peps(apps, schema_editor):
    User = apps.get_model("auth", "User")
    Company = apps.get_model("core", "Company")
    Person = apps.get_model("core", "Person")
    Person2Company = apps.get_model("core", "Person2Company")
    Document = apps.get_model("core", "Document")

    peklun = User.objects.get(username="******")

    with open("core/dicts/new_peps.csv", "r") as fp:
        r = DictReader(fp, errors="ignore")

        for i, l in enumerate(r):
            print(i)
            company_ipn = l.get("ІПН", "")
            company_name = l.get("Назва", "")

            company = None

            if not company_ipn and not company_name:
                continue

            # Search by IPN first (if it's present)
            if company_ipn:
                try:
                    company = Company.objects.get(edrpou=company_ipn)
                except Company.DoesNotExist:
                    pass

            # then search by name (if it's present)
            if company_name:
                if company is None:
                    try:
                        company = Company.objects.get(name=company_name)
                    except Company.DoesNotExist:
                        pass

            if company is None:
                company = Company(state_company=True)

            # Set missing params
            if not company.name:
                company.name = company_name

            if not company.edrpou:
                company.edrpou = company_ipn

            company.save()

            person_name = l.get("ПІБ", "").strip()
            position = l.get("Посада", "").strip()
            person_dob = l.get("Дата народження", "").strip()
            person_from = l.get("Дата призначення", "").strip()
            person_to = l.get("Дата звільнення", "").strip()

            doc_received = l.get("Дата відповіді", "").strip()
            doc = l.get("Лінк на відповідь", "").strip()
            website = l.get("лінк на сайт", "").strip()

            if person_name:
                chunks = person_name.split(" ")
                if len(chunks) == 2:
                    last_name = title(chunks[0])
                    first_name = title(chunks[1])
                else:
                    last_name = title(" ".join(chunks[:-2]))
                    first_name = title(chunks[-2])
                    patronymic = title(chunks[-1])

                # Kind of get_or_create
                try:
                    person = Person.objects.get(first_name__iexact=first_name,
                                                last_name__iexact=last_name,
                                                patronymic__iexact=patronymic)
                except Person.DoesNotExist:
                    person = Person(first_name=first_name,
                                    last_name=last_name,
                                    patronymic=patronymic)

                person.is_pep = True
                person.type_of_official = 1
                if person_dob:
                    person.dob = parse_date(person_dob)
                    if len(person_dob) == 4:
                        person.dob_details = 2  # Only year

                    if len(person_dob) > 4 and len(person_dob) < 7:
                        person.dob_details = 1  # month and year

                person.save()

                doc_instance = None
                if doc and "folderview" not in doc \
                        and "drive/#folders" not in doc:
                    print(doc)
                    doc = expand_gdrive_download_url(doc)
                    doc_hash = sha1(doc).hexdigest()

                    try:
                        doc_instance = Document.objects.get(hash=doc_hash)
                    except Document.DoesNotExist:
                        doc_name, doc_san_name, doc_content = download(doc)
                        doc_san_name = translitua(doc_san_name)

                        if doc_name:
                            doc_instance = Document(name=doc_name,
                                                    uploader=peklun,
                                                    hash=doc_hash)

                            doc_instance.doc.save(doc_san_name,
                                                  ContentFile(doc_content))
                            doc_instance.save()

                link, link_created = Person2Company.objects.update_or_create(
                    from_person=person,
                    to_company=company,
                    date_established=parse_date(person_from),
                    date_finished=parse_date(person_to))

                if not link.relationship_type:
                    link.relationship_type = position

                if doc_instance is not None:
                    link.proof_title = doc_instance.name
                    link.proof = doc_instance.doc.url

                link.date_confirmed = parse_date(doc_received)
                if not doc and website:
                    link.proof = website

                link.save()