Пример #1
0
    def __init__(self,):
        sha1sum_id = "652a17f1315ec0961336aa140cf983776400c933"
        dataset = "san_francisco_airbnb"
        file_name = f"{dataset}_for_unit_tests.zip"
        url = get_repo_url() + file_name
        save_path = os.path.join(get_data_home_dir(), file_name)
        self._path = os.path.join(get_data_home_dir(), dataset)
        download(
            url=url,
            path=save_path,
            sha1_hash=sha1sum_id,
        )
        protected_zip_extraction(
            save_path,
            sha1_hash=sha1sum_id,
            folder=self._path,
        )
        self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0)
        self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0)
        for img_col in self.image_columns:
            self._train_df[img_col] = self._train_df[img_col].apply(
                lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images")))
            self._test_df[img_col] =\
                self._test_df[img_col].apply(
                    lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images")))
            print(self._train_df[img_col][0])
            print(self._test_df[img_col][0])

        self._train_df.reset_index(drop=True, inplace=True)
        self._test_df.reset_index(drop=True, inplace=True)

        print(f"train sample num: {len(self._train_df)}")
        print(f"test sample num: {len(self._test_df)}")
Пример #2
0
    def __init__(self,):
        sha1sum_id = "2aae657b786f505004ac2922b66097d60a540a58"
        dataset = "hateful_memes"
        file_name = f"{dataset}_for_unit_tests.zip"
        url = get_repo_url() + file_name
        save_path = os.path.join(get_data_home_dir(), file_name)
        self._path = os.path.join(get_data_home_dir(), dataset)
        download(
            url=url,
            path=save_path,
            sha1_hash=sha1sum_id,
        )
        protected_zip_extraction(
            save_path,
            sha1_hash=sha1sum_id,
            folder=self._path,
        )
        self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0)
        self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0)
        for img_col in self.image_columns:
            self._train_df[img_col] = self._train_df[img_col].apply(
                lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images")))
            self._test_df[img_col] =\
                self._test_df[img_col].apply(
                    lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images")))
            print(self._train_df[img_col][0])
            print(self._test_df[img_col][0])
        self._train_df.reset_index(drop=True, inplace=True)
        self._test_df.reset_index(drop=True, inplace=True)

        print(f"train sample num: {len(self._train_df)}")
        print(f"test sample num: {len(self._test_df)}")
def parse_spec(provider, spec):
    ref = get_ref(provider, spec)
    org = get_org(provider, spec)
    # NOTE: repo_url must be unique, e.g. it must be same for specs
    # such as "1-Nameless-1/Lign167.git/master" and "1-Nameless-1/Lign167/master"
    # so generate repo_urls here instead of in create_repo_table.py
    repo_url = get_repo_url(provider, spec)
    return ref, org, repo_url
Пример #4
0
def get_repo_options(request, lang):
    """Returns a list of the names and titles of the launched repositories."""
    options = []
    for repo in model.Repo.list_launched():
        titles = config.get_for_repo(repo, 'repo_titles', {})
        default_title = (titles.values() or ['?'])[0]
        title = titles.get(lang, titles.get('en', default_title))
        url = utils.get_repo_url(request, repo)
        test_mode = config.get_for_repo(repo, 'test_mode')
        options.append(
            utils.Struct(repo=repo, title=title, url=url, test_mode=test_mode))
    return options
Пример #5
0
def get_repo_options(request, lang):
    """Returns a list of the names and titles of the launched repositories."""
    options = []
    for repo in model.Repo.list_launched():
        titles = config.get_for_repo(repo, 'repo_titles', {})
        default_title = (titles.values() or ['?'])[0]
        title = titles.get(lang, titles.get('en', default_title))
        url = utils.get_repo_url(request, repo)
        test_mode = config.get_for_repo(repo, 'test_mode')
        options.append(utils.Struct(repo=repo, title=title, url=url,
                                    test_mode=test_mode))
    return options
Пример #6
0
    def __init__(self,):
        sha1sum_id = "72cb19612318bb304d4a169804f525f88dc3f0d0"
        dataset = "petfinder"
        file_name = f"{dataset}_for_unit_tests.zip"
        url = get_repo_url() + file_name
        save_path = os.path.join(get_data_home_dir(), file_name)
        self._path = os.path.join(get_data_home_dir(), dataset)
        download(
            url=url,
            path=save_path,
            sha1_hash=sha1sum_id,
        )
        protected_zip_extraction(
            save_path,
            sha1_hash=sha1sum_id,
            folder=self._path,
        )
        self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0)
        self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0)
        for img_col in self.image_columns:
            self._train_df[img_col] = self._train_df[img_col].apply(
                lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images")))
            self._test_df[img_col] =\
                self._test_df[img_col].apply(
                    lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images")))
            print(self._train_df[img_col][0])
            print(self._test_df[img_col][0])

        _, self._train_df = train_test_split(
            self._train_df,
            test_size=0.1,
            random_state=np.random.RandomState(123),
            stratify=self._train_df[self.label_columns[0]],
        )
        _, self._test_df = train_test_split(
            self._test_df,
            test_size=0.1,
            random_state=np.random.RandomState(123),
            stratify=self._test_df[self.label_columns[0]],
        )
        self._train_df.reset_index(drop=True, inplace=True)
        self._test_df.reset_index(drop=True, inplace=True)

        print(f"train sample num: {len(self._train_df)}")
        print(f"test sample num: {len(self._test_df)}")
Пример #7
0
    def repo_options(self):
        """This is different from env.repo_options because this contains all
        repositories including deactivated ones.

        This is defined as a property so that it is evaluated lazily only
        when necessary.
        """
        try:
            return [
                utils.Struct(repo=repo,
                             url=utils.get_repo_url(self.request, repo) +
                             '/admin') for repo in sorted(model.Repo.list())
            ]
        except:
            # Logs the exception here because exceptions thrown during template
            # variable evaluation is silently ignored. Note that
            # logging.exception() logs the current exception by default.
            logging.exception('Exception thrown')
            return None
Пример #8
0
    def repo_options(self):
        """This is different from env.repo_options because this contains all
        repositories including deactivated ones.

        This is defined as a property so that it is evaluated lazily only
        when necessary.
        """
        try:
            return [
                utils.Struct(
                    repo=repo,
                    url=utils.get_repo_url(self.request, repo) + '/admin')
                for repo in sorted(model.Repo.list())]
        except:
            # Logs the exception here because exceptions thrown during template
            # variable evaluation is silently ignored. Note that
            # logging.exception() logs the current exception by default.
            logging.exception('Exception thrown')
            return None
Пример #9
0
    def __init__(
        self,
    ):
        sha1sum_id = "9c701aa6fc42ec3fe429bfe85a8dac4532ab9fcd"
        dataset = "amazon_review_sentiment_cross_lingual"
        file_name = f"{dataset}.zip"
        url = get_repo_url() + file_name
        save_path = os.path.join(get_data_home_dir(), file_name)
        self._path = os.path.join(get_data_home_dir(), dataset)
        download(
            url=url,
            path=save_path,
            sha1_hash=sha1sum_id,
        )
        protected_zip_extraction(
            save_path,
            sha1_hash=sha1sum_id,
            folder=get_data_home_dir(),
        )
        self._train_en_df = pd.read_csv(
            os.path.join(self._path, "en_train.tsv"),
            sep="\t",
            header=None,
            names=["label", "text"],
        ).sample(1000, random_state=123)

        self._test_en_df = pd.read_csv(
            os.path.join(self._path, "en_test.tsv"),
            sep="\t",
            header=None,
            names=["label", "text"],
        ).sample(200, random_state=123)

        self._train_en_df.reset_index(drop=True, inplace=True)
        self._test_en_df.reset_index(drop=True, inplace=True)

        print(f"train sample num: {len(self._train_en_df)}")
        print(f"test sample num: {len(self._test_en_df)}")
Пример #10
0
    def __init__(self,):
        sha1sum_id = "8c2a25555c49ef2b30545004488022465808d03f"
        dataset = "ae"
        file_name = f"{dataset}_for_unit_tests.zip"
        url = get_repo_url() + file_name
        save_path = os.path.join(get_data_home_dir(), file_name)
        self._path = os.path.join(get_data_home_dir(), dataset)
        download(
            url=url,
            path=save_path,
            sha1_hash=sha1sum_id,
        )
        protected_zip_extraction(
            save_path,
            sha1_hash=sha1sum_id,
            folder=self._path,
        )
        self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0)
        self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0)
        self._train_df.reset_index(drop=True, inplace=True)
        self._test_df.reset_index(drop=True, inplace=True)

        print(f"train sample num: {len(self._train_df)}")
        print(f"test sample num: {len(self._test_df)}")
Пример #11
0
def setup_env(request):
    """Constructs the 'env' object, which contains various template variables
    that are commonly used by most handlers."""
    env = utils.Struct()
    env.repo, env.action = get_repo_and_action(request)
    env.config = config.Configuration(env.repo or '*')

    env.analytics_id = config.get('analytics_id')
    env.amp_gtm_id = config.get('amp_gtm_id')
    env.maps_api_key = config.get('maps_api_key')

    # Internationalization-related stuff.
    env.charset = select_charset(request)
    env.lang = select_lang(request, env.config)
    env.rtl = env.lang in const.LANGUAGES_BIDI
    env.virtual_keyboard_layout = const.VIRTUAL_KEYBOARD_LAYOUTS.get(env.lang)

    # Used for parsing query params. This must be done before accessing any
    # query params which may have multi-byte value, such as "given_name" below
    # in this function.
    request.charset = env.charset

    # Determine the resource bundle to use.
    env.default_resource_bundle = config.get('default_resource_bundle', '1')
    env.resource_bundle = (request.cookies.get('resource_bundle', '')
                           or env.default_resource_bundle)

    # Information about the request.
    env.url = utils.set_url_param(request.url, 'lang', env.lang)
    env.scheme, env.netloc, env.path, _, _ = urlparse.urlsplit(request.url)
    env.force_https = False
    env.domain = env.netloc.split(':')[0]
    env.global_url = utils.get_repo_url(request, 'global')

    # Commonly used information that's rendered or localized for templates.
    env.language_options = get_language_options(request, env.config, env.lang)
    env.repo_options = get_repo_options(request, env.lang)
    env.expiry_options = [
        utils.Struct(value=value, text=const.PERSON_EXPIRY_TEXT[value])
        for value in sorted(const.PERSON_EXPIRY_TEXT.keys(), key=int)
    ]
    env.status_options = [
        utils.Struct(value=value, text=const.NOTE_STATUS_TEXT[value])
        for value in pfif.NOTE_STATUS_VALUES
        if (value != 'believed_dead' or not env.config
            or env.config.allow_believed_dead_via_ui)
    ]
    env.hidden_input_tags_for_preserved_query_params = (
        get_hidden_input_tags_for_preserved_query_params(request))

    ui_param = request.get('ui', '').strip().lower()

    # Interprets "small" and "style" parameters for backward compatibility.
    # TODO(ichikawa): Delete these in near future when we decide to drop
    # support of these parameters.
    small_param = request.get('small', '').strip().lower()
    style_param = request.get('style', '').strip().lower()
    if not ui_param and small_param == 'yes':
        ui_param = 'small'
    elif not ui_param and style_param:
        ui_param = style_param

    if ui_param:
        env.ui = ui_param
    elif user_agents.is_jp_tier2_mobile_phone(request):
        env.ui = 'light'
    else:
        env.ui = 'default'

    # UI configurations.
    #
    # Enables features which require JavaScript.
    env.enable_javascript = True
    # Enables operations which requires Captcha.
    env.enable_captcha = True
    # Enables photo upload.
    env.enable_photo_upload = True
    # Enables to flag/unflag notes as spam, and to reveal spam notes.
    env.enable_spam_ops = True
    # Enables duplicate marking mode.
    env.enable_dup_mode = True
    # Shows a logo on top of the page.
    env.show_logo = True
    # Shows language menu.
    env.show_language_menu = True
    # Uses short labels for buttons.
    env.use_short_buttons = False
    # Optional "target" attribute for links to non-small pages.
    env.target_attr = ''
    # Shows record IDs in the results page.
    env.show_record_ids_in_results = True
    # Shows non AMP HTML pages by default.
    env.amp = False

    if env.ui == 'small':
        env.show_logo = False
        env.target_attr = ' target="_blank" '

    elif env.ui == 'light':
        # Disables features which requires JavaScript. Some feature phones
        # doesn't support JavaScript.
        env.enable_javascript = False
        # Disables operations which requires Captcha because Captcha requires
        # JavaScript.
        env.enable_captcha = False
        # Uploading is often not supported in feature phones.
        env.enable_photo_upload = False
        # Disables spam operations because it requires JavaScript and
        # supporting more pages on ui=light.
        env.enable_spam_ops = False
        # Disables duplicate marking mode because it doesn't support
        # small screens and it requires JavaScript.
        env.enable_dup_mode = False
        # Hides the logo on the top to save the space. Also, the logo links
        # to the global page which doesn't support small screens.
        env.show_logo = False
        # Hides language menu because the menu in the current position is
        # annoying in feature phones.
        # TODO(ichikawa): Consider layout of the language menu.
        env.show_language_menu = False
        # Too long buttons are not fully shown in some feature phones.
        env.use_short_buttons = True
        # To make it simple.
        env.show_record_ids_in_results = False

    env.back_chevron = u'\xab'
    back_chevron_in_charset = True
    try:
        env.back_chevron.encode(env.charset)
    except UnicodeEncodeError:
        # u'\xab' is not in the charset (e.g. Shift_JIS).
        back_chevron_in_charset = False
    if not back_chevron_in_charset or env.ui == 'light':
        # Use ASCII characters on ui=light too because some feature phones
        # support UTF-8 but don't render UTF-8 symbols such as u'\xab'.
        env.back_chevron = u'<<'

    env.enable_maps = (env.enable_javascript
                       and not env.config.zero_rating_mode
                       and env.maps_api_key)
    env.enable_analytics = (env.enable_javascript
                            and not env.config.zero_rating_mode
                            and env.analytics_id)
    env.enable_translate = (env.enable_javascript
                            and not env.config.zero_rating_mode
                            and env.config.translate_api_key)

    env.admin = AdminEnv(request)

    # Repo-specific information.
    if env.repo:
        # repo_url is the root URL for the repository.
        env.repo_url = utils.get_repo_url(request, env.repo)
        # start_url is like repo_url but preserves parameters such as 'ui'.
        env.start_url = utils.get_url(request, env.repo, '')
        # URL of the link in the heading. The link on ui=small links to the
        # normal UI.
        env.repo_title_url = (env.repo_url
                              if env.ui == 'small' else env.start_url)
        # URL to force default UI. Note that we show ui=light version in some
        # user agents when ui parameter is not specified.
        env.default_ui_url = utils.get_url(request, env.repo, '', ui='default')
        env.repo_path = urlparse.urlsplit(env.repo_url)[2]
        env.repo_title = get_localized_message(env.config.repo_titles,
                                               env.lang, '?')
        env.start_page_custom_html = get_localized_message(
            env.config.start_page_custom_htmls, env.lang, '')
        env.results_page_custom_html = get_localized_message(
            env.config.results_page_custom_htmls, env.lang, '')
        env.view_page_custom_html = get_localized_message(
            env.config.view_page_custom_htmls, env.lang, '')
        env.seek_query_form_custom_html = get_localized_message(
            env.config.seek_query_form_custom_htmls, env.lang, '')
        env.footer_custom_html = get_localized_message(
            env.config.footer_custom_htmls, env.lang, '')
        # If the repository is deactivated, we should not show test mode
        # notification.
        env.repo_test_mode = (env.config.test_mode
                              and not env.config.deactivated)
        env.force_https = env.config.force_https

        env.params_full_name = request.get('full_name', '').strip()
        if not env.params_full_name:
            # Preformat the name from 'given_name' and 'family_name' parameters.
            given_name = request.get('given_name', '').strip()
            family_name = request.get('family_name', '').strip()
            env.params_full_name = utils.get_full_name(given_name, family_name,
                                                       env.config)

    return env
Пример #12
0
def setup_env(request):
    """Constructs the 'env' object, which contains various template variables
    that are commonly used by most handlers."""
    env = utils.Struct()
    env.repo, env.action = get_repo_and_action(request)
    env.config = config.Configuration(env.repo or '*')
    # TODO(ryok): Rename to local_test_mode or something alike to disambiguate
    # better from repository's test_mode.
    env.test_mode = (request.remote_addr == '127.0.0.1' and
                     request.get('test_mode'))

    # We sometimes want to disable analytics/maps for requests from a specific
    # mobile carrier (specified by IP ranges).
    # In this way, we can avoid requests to sites outside google.org, and
    # allow the carrier to zero-rate access to Person Finder.
    # TODO(ichikawa): Add server test for this feature.
    #
    # TODO(kpy): Make these global config settings and get rid of get_secret().

    if utils.is_ip_address_in_one_of_networks(
            request.remote_addr, env.config.ip_networks_to_disable_analytics):
        env.analytics_id = None
    else:
        env.analytics_id = get_secret('analytics_id')

    if utils.is_ip_address_in_one_of_networks(
            request.remote_addr, env.config.ip_networks_to_disable_maps):
        env.maps_api_key = None
    else:
        env.maps_api_key = get_secret('maps_api_key')

    # Internationalization-related stuff.
    env.charset = select_charset(request)
    env.lang = select_lang(request, env.config)
    env.rtl = env.lang in django_setup.LANGUAGES_BIDI
    env.virtual_keyboard_layout = const.VIRTUAL_KEYBOARD_LAYOUTS.get(env.lang)

    # Used for parsing query params. This must be done before accessing any
    # query params which may have multi-byte value, such as "given_name" below
    # in this function.
    request.charset = env.charset

    # Determine the resource bundle to use.
    env.default_resource_bundle = config.get('default_resource_bundle', '1')
    env.resource_bundle = (request.cookies.get('resource_bundle', '') or
                           env.default_resource_bundle)

    # Information about the request.
    env.url = utils.set_url_param(request.url, 'lang', env.lang)
    env.scheme, env.netloc, env.path, _, _ = urlparse.urlsplit(request.url)
    env.force_https = False
    env.domain = env.netloc.split(':')[0]
    env.global_url = utils.get_repo_url(request, 'global')

    # Commonly used information that's rendered or localized for templates.
    env.language_options = get_language_options(request, env.config)
    env.repo_options = get_repo_options(request, env.lang)
    env.expiry_options = [
        utils.Struct(value=value, text=const.PERSON_EXPIRY_TEXT[value])
        for value in sorted(const.PERSON_EXPIRY_TEXT.keys(), key=int)
    ]
    env.status_options = [
        utils.Struct(value=value, text=const.NOTE_STATUS_TEXT[value])
        for value in pfif.NOTE_STATUS_VALUES
        if (value != 'believed_dead' or
            not env.config or env.config.allow_believed_dead_via_ui)
    ]
    env.hidden_input_tags_for_preserved_query_params = (
        get_hidden_input_tags_for_preserved_query_params(request))

    ui_param = request.get('ui', '').strip().lower()

    # Interprets "small" and "style" parameters for backward compatibility.
    # TODO(ichikawa): Delete these in near future when we decide to drop
    # support of these parameters.
    small_param = request.get('small', '').strip().lower()
    style_param = request.get('style', '').strip().lower()
    if not ui_param and small_param == 'yes':
        ui_param = 'small'
    elif not ui_param and style_param:
        ui_param = style_param

    if ui_param:
        env.ui = ui_param
    elif user_agents.is_jp_tier2_mobile_phone(request):
        env.ui = 'light'
    else:
        env.ui = 'default'

    # UI configurations.
    #
    # Enables features which require JavaScript.
    env.enable_javascript = True
    # Enables operations which requires Captcha.
    env.enable_captcha = True
    # Enables photo upload.
    env.enable_photo_upload = True
    # Enables to flag/unflag notes as spam, and to reveal spam notes.
    env.enable_spam_ops = True
    # Enables duplicate marking mode.
    env.enable_dup_mode = True
    # Shows a logo on top of the page.
    env.show_logo = True
    # Shows language menu.
    env.show_language_menu = True
    # Uses short labels for buttons.
    env.use_short_buttons = False
    # Optional "target" attribute for links to non-small pages.
    env.target_attr = ''
    # Shows record IDs in the results page.
    env.show_record_ids_in_results = True

    if env.ui == 'small':
        env.show_logo = False
        env.target_attr = ' target="_blank" '

    elif env.ui == 'light':
        # Disables features which requires JavaScript. Some feature phones
        # doesn't support JavaScript.
        env.enable_javascript = False
        # Disables operations which requires Captcha because Captcha requires
        # JavaScript.
        env.enable_captcha = False
        # Uploading is often not supported in feature phones.
        env.enable_photo_upload = False
        # Disables spam operations because it requires JavaScript and
        # supporting more pages on ui=light.
        env.enable_spam_ops = False
        # Disables duplicate marking mode because it doesn't support
        # small screens and it requires JavaScript.
        env.enable_dup_mode = False
        # Hides the logo on the top to save the space. Also, the logo links
        # to the global page which doesn't support small screens.
        env.show_logo = False
        # Hides language menu because the menu in the current position is
        # annoying in feature phones.
        # TODO(ichikawa): Consider layout of the language menu.
        env.show_language_menu = False
        # Too long buttons are not fully shown in some feature phones.
        env.use_short_buttons = True
        # To make it simple.
        env.show_record_ids_in_results = False

    env.back_chevron = u'\xab'
    back_chevron_in_charset = True
    try:
        env.back_chevron.encode(env.charset)
    except UnicodeEncodeError:
        # u'\xab' is not in the charset (e.g. Shift_JIS).
        back_chevron_in_charset = False
    if not back_chevron_in_charset or env.ui == 'light':
        # Use ASCII characters on ui=light too because some feature phones
        # support UTF-8 but don't render UTF-8 symbols such as u'\xab'.
        env.back_chevron = u'<<'

    # Repo-specific information.
    if env.repo:
        # repo_url is the root URL for the repository.
        env.repo_url = utils.get_repo_url(request, env.repo)
        # start_url is like repo_url but preserves parameters such as 'ui'.
        env.start_url = utils.get_url(request, env.repo, '')
        # URL of the link in the heading. The link on ui=small links to the
        # normal UI.
        env.repo_title_url = (
            env.repo_url if env.ui == 'small' else env.start_url)
        # URL to force default UI. Note that we show ui=light version in some
        # user agents when ui parameter is not specified.
        env.default_ui_url = utils.get_url(request, env.repo, '', ui='default')
        env.repo_path = urlparse.urlsplit(env.repo_url)[2]
        env.repo_title = get_localized_message(
            env.config.repo_titles, env.lang, '?')
        env.start_page_custom_html = get_localized_message(
            env.config.start_page_custom_htmls, env.lang, '')
        env.results_page_custom_html = get_localized_message(
            env.config.results_page_custom_htmls, env.lang, '')
        env.view_page_custom_html = get_localized_message(
            env.config.view_page_custom_htmls, env.lang, '')
        env.seek_query_form_custom_html = get_localized_message(
            env.config.seek_query_form_custom_htmls, env.lang, '')
        env.footer_custom_html = get_localized_message(
            env.config.footer_custom_htmls, env.lang, '')
        # If the repository is deactivated, we should not show test mode
        # notification.
        env.repo_test_mode = (
            env.config.test_mode and not env.config.deactivated)
        env.force_https = env.config.force_https

        env.params_full_name = request.get('full_name', '').strip()
        if not env.params_full_name:
            # Preformat the name from 'given_name' and 'family_name' parameters.
            given_name = request.get('given_name', '').strip()
            family_name = request.get('family_name', '').strip()
            env.params_full_name = utils.get_full_name(
                given_name, family_name, env.config)

    return env