def __init__(self,): sha1sum_id = "652a17f1315ec0961336aa140cf983776400c933" dataset = "san_francisco_airbnb" file_name = f"{dataset}_for_unit_tests.zip" url = get_repo_url() + file_name save_path = os.path.join(get_data_home_dir(), file_name) self._path = os.path.join(get_data_home_dir(), dataset) download( url=url, path=save_path, sha1_hash=sha1sum_id, ) protected_zip_extraction( save_path, sha1_hash=sha1sum_id, folder=self._path, ) self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0) self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0) for img_col in self.image_columns: self._train_df[img_col] = self._train_df[img_col].apply( lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images"))) self._test_df[img_col] =\ self._test_df[img_col].apply( lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images"))) print(self._train_df[img_col][0]) print(self._test_df[img_col][0]) self._train_df.reset_index(drop=True, inplace=True) self._test_df.reset_index(drop=True, inplace=True) print(f"train sample num: {len(self._train_df)}") print(f"test sample num: {len(self._test_df)}")
def __init__(self,): sha1sum_id = "2aae657b786f505004ac2922b66097d60a540a58" dataset = "hateful_memes" file_name = f"{dataset}_for_unit_tests.zip" url = get_repo_url() + file_name save_path = os.path.join(get_data_home_dir(), file_name) self._path = os.path.join(get_data_home_dir(), dataset) download( url=url, path=save_path, sha1_hash=sha1sum_id, ) protected_zip_extraction( save_path, sha1_hash=sha1sum_id, folder=self._path, ) self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0) self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0) for img_col in self.image_columns: self._train_df[img_col] = self._train_df[img_col].apply( lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images"))) self._test_df[img_col] =\ self._test_df[img_col].apply( lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images"))) print(self._train_df[img_col][0]) print(self._test_df[img_col][0]) self._train_df.reset_index(drop=True, inplace=True) self._test_df.reset_index(drop=True, inplace=True) print(f"train sample num: {len(self._train_df)}") print(f"test sample num: {len(self._test_df)}")
def parse_spec(provider, spec): ref = get_ref(provider, spec) org = get_org(provider, spec) # NOTE: repo_url must be unique, e.g. it must be same for specs # such as "1-Nameless-1/Lign167.git/master" and "1-Nameless-1/Lign167/master" # so generate repo_urls here instead of in create_repo_table.py repo_url = get_repo_url(provider, spec) return ref, org, repo_url
def get_repo_options(request, lang): """Returns a list of the names and titles of the launched repositories.""" options = [] for repo in model.Repo.list_launched(): titles = config.get_for_repo(repo, 'repo_titles', {}) default_title = (titles.values() or ['?'])[0] title = titles.get(lang, titles.get('en', default_title)) url = utils.get_repo_url(request, repo) test_mode = config.get_for_repo(repo, 'test_mode') options.append( utils.Struct(repo=repo, title=title, url=url, test_mode=test_mode)) return options
def get_repo_options(request, lang): """Returns a list of the names and titles of the launched repositories.""" options = [] for repo in model.Repo.list_launched(): titles = config.get_for_repo(repo, 'repo_titles', {}) default_title = (titles.values() or ['?'])[0] title = titles.get(lang, titles.get('en', default_title)) url = utils.get_repo_url(request, repo) test_mode = config.get_for_repo(repo, 'test_mode') options.append(utils.Struct(repo=repo, title=title, url=url, test_mode=test_mode)) return options
def __init__(self,): sha1sum_id = "72cb19612318bb304d4a169804f525f88dc3f0d0" dataset = "petfinder" file_name = f"{dataset}_for_unit_tests.zip" url = get_repo_url() + file_name save_path = os.path.join(get_data_home_dir(), file_name) self._path = os.path.join(get_data_home_dir(), dataset) download( url=url, path=save_path, sha1_hash=sha1sum_id, ) protected_zip_extraction( save_path, sha1_hash=sha1sum_id, folder=self._path, ) self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0) self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0) for img_col in self.image_columns: self._train_df[img_col] = self._train_df[img_col].apply( lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images"))) self._test_df[img_col] =\ self._test_df[img_col].apply( lambda ele: path_expander(ele, base_folder=os.path.join(self._path, "images"))) print(self._train_df[img_col][0]) print(self._test_df[img_col][0]) _, self._train_df = train_test_split( self._train_df, test_size=0.1, random_state=np.random.RandomState(123), stratify=self._train_df[self.label_columns[0]], ) _, self._test_df = train_test_split( self._test_df, test_size=0.1, random_state=np.random.RandomState(123), stratify=self._test_df[self.label_columns[0]], ) self._train_df.reset_index(drop=True, inplace=True) self._test_df.reset_index(drop=True, inplace=True) print(f"train sample num: {len(self._train_df)}") print(f"test sample num: {len(self._test_df)}")
def repo_options(self): """This is different from env.repo_options because this contains all repositories including deactivated ones. This is defined as a property so that it is evaluated lazily only when necessary. """ try: return [ utils.Struct(repo=repo, url=utils.get_repo_url(self.request, repo) + '/admin') for repo in sorted(model.Repo.list()) ] except: # Logs the exception here because exceptions thrown during template # variable evaluation is silently ignored. Note that # logging.exception() logs the current exception by default. logging.exception('Exception thrown') return None
def repo_options(self): """This is different from env.repo_options because this contains all repositories including deactivated ones. This is defined as a property so that it is evaluated lazily only when necessary. """ try: return [ utils.Struct( repo=repo, url=utils.get_repo_url(self.request, repo) + '/admin') for repo in sorted(model.Repo.list())] except: # Logs the exception here because exceptions thrown during template # variable evaluation is silently ignored. Note that # logging.exception() logs the current exception by default. logging.exception('Exception thrown') return None
def __init__( self, ): sha1sum_id = "9c701aa6fc42ec3fe429bfe85a8dac4532ab9fcd" dataset = "amazon_review_sentiment_cross_lingual" file_name = f"{dataset}.zip" url = get_repo_url() + file_name save_path = os.path.join(get_data_home_dir(), file_name) self._path = os.path.join(get_data_home_dir(), dataset) download( url=url, path=save_path, sha1_hash=sha1sum_id, ) protected_zip_extraction( save_path, sha1_hash=sha1sum_id, folder=get_data_home_dir(), ) self._train_en_df = pd.read_csv( os.path.join(self._path, "en_train.tsv"), sep="\t", header=None, names=["label", "text"], ).sample(1000, random_state=123) self._test_en_df = pd.read_csv( os.path.join(self._path, "en_test.tsv"), sep="\t", header=None, names=["label", "text"], ).sample(200, random_state=123) self._train_en_df.reset_index(drop=True, inplace=True) self._test_en_df.reset_index(drop=True, inplace=True) print(f"train sample num: {len(self._train_en_df)}") print(f"test sample num: {len(self._test_en_df)}")
def __init__(self,): sha1sum_id = "8c2a25555c49ef2b30545004488022465808d03f" dataset = "ae" file_name = f"{dataset}_for_unit_tests.zip" url = get_repo_url() + file_name save_path = os.path.join(get_data_home_dir(), file_name) self._path = os.path.join(get_data_home_dir(), dataset) download( url=url, path=save_path, sha1_hash=sha1sum_id, ) protected_zip_extraction( save_path, sha1_hash=sha1sum_id, folder=self._path, ) self._train_df = pd.read_csv(os.path.join(self._path, 'train.csv'), index_col=0) self._test_df = pd.read_csv(os.path.join(self._path, 'test.csv'), index_col=0) self._train_df.reset_index(drop=True, inplace=True) self._test_df.reset_index(drop=True, inplace=True) print(f"train sample num: {len(self._train_df)}") print(f"test sample num: {len(self._test_df)}")
def setup_env(request): """Constructs the 'env' object, which contains various template variables that are commonly used by most handlers.""" env = utils.Struct() env.repo, env.action = get_repo_and_action(request) env.config = config.Configuration(env.repo or '*') env.analytics_id = config.get('analytics_id') env.amp_gtm_id = config.get('amp_gtm_id') env.maps_api_key = config.get('maps_api_key') # Internationalization-related stuff. env.charset = select_charset(request) env.lang = select_lang(request, env.config) env.rtl = env.lang in const.LANGUAGES_BIDI env.virtual_keyboard_layout = const.VIRTUAL_KEYBOARD_LAYOUTS.get(env.lang) # Used for parsing query params. This must be done before accessing any # query params which may have multi-byte value, such as "given_name" below # in this function. request.charset = env.charset # Determine the resource bundle to use. env.default_resource_bundle = config.get('default_resource_bundle', '1') env.resource_bundle = (request.cookies.get('resource_bundle', '') or env.default_resource_bundle) # Information about the request. env.url = utils.set_url_param(request.url, 'lang', env.lang) env.scheme, env.netloc, env.path, _, _ = urlparse.urlsplit(request.url) env.force_https = False env.domain = env.netloc.split(':')[0] env.global_url = utils.get_repo_url(request, 'global') # Commonly used information that's rendered or localized for templates. env.language_options = get_language_options(request, env.config, env.lang) env.repo_options = get_repo_options(request, env.lang) env.expiry_options = [ utils.Struct(value=value, text=const.PERSON_EXPIRY_TEXT[value]) for value in sorted(const.PERSON_EXPIRY_TEXT.keys(), key=int) ] env.status_options = [ utils.Struct(value=value, text=const.NOTE_STATUS_TEXT[value]) for value in pfif.NOTE_STATUS_VALUES if (value != 'believed_dead' or not env.config or env.config.allow_believed_dead_via_ui) ] env.hidden_input_tags_for_preserved_query_params = ( get_hidden_input_tags_for_preserved_query_params(request)) ui_param = request.get('ui', '').strip().lower() # Interprets "small" and "style" parameters for backward compatibility. # TODO(ichikawa): Delete these in near future when we decide to drop # support of these parameters. small_param = request.get('small', '').strip().lower() style_param = request.get('style', '').strip().lower() if not ui_param and small_param == 'yes': ui_param = 'small' elif not ui_param and style_param: ui_param = style_param if ui_param: env.ui = ui_param elif user_agents.is_jp_tier2_mobile_phone(request): env.ui = 'light' else: env.ui = 'default' # UI configurations. # # Enables features which require JavaScript. env.enable_javascript = True # Enables operations which requires Captcha. env.enable_captcha = True # Enables photo upload. env.enable_photo_upload = True # Enables to flag/unflag notes as spam, and to reveal spam notes. env.enable_spam_ops = True # Enables duplicate marking mode. env.enable_dup_mode = True # Shows a logo on top of the page. env.show_logo = True # Shows language menu. env.show_language_menu = True # Uses short labels for buttons. env.use_short_buttons = False # Optional "target" attribute for links to non-small pages. env.target_attr = '' # Shows record IDs in the results page. env.show_record_ids_in_results = True # Shows non AMP HTML pages by default. env.amp = False if env.ui == 'small': env.show_logo = False env.target_attr = ' target="_blank" ' elif env.ui == 'light': # Disables features which requires JavaScript. Some feature phones # doesn't support JavaScript. env.enable_javascript = False # Disables operations which requires Captcha because Captcha requires # JavaScript. env.enable_captcha = False # Uploading is often not supported in feature phones. env.enable_photo_upload = False # Disables spam operations because it requires JavaScript and # supporting more pages on ui=light. env.enable_spam_ops = False # Disables duplicate marking mode because it doesn't support # small screens and it requires JavaScript. env.enable_dup_mode = False # Hides the logo on the top to save the space. Also, the logo links # to the global page which doesn't support small screens. env.show_logo = False # Hides language menu because the menu in the current position is # annoying in feature phones. # TODO(ichikawa): Consider layout of the language menu. env.show_language_menu = False # Too long buttons are not fully shown in some feature phones. env.use_short_buttons = True # To make it simple. env.show_record_ids_in_results = False env.back_chevron = u'\xab' back_chevron_in_charset = True try: env.back_chevron.encode(env.charset) except UnicodeEncodeError: # u'\xab' is not in the charset (e.g. Shift_JIS). back_chevron_in_charset = False if not back_chevron_in_charset or env.ui == 'light': # Use ASCII characters on ui=light too because some feature phones # support UTF-8 but don't render UTF-8 symbols such as u'\xab'. env.back_chevron = u'<<' env.enable_maps = (env.enable_javascript and not env.config.zero_rating_mode and env.maps_api_key) env.enable_analytics = (env.enable_javascript and not env.config.zero_rating_mode and env.analytics_id) env.enable_translate = (env.enable_javascript and not env.config.zero_rating_mode and env.config.translate_api_key) env.admin = AdminEnv(request) # Repo-specific information. if env.repo: # repo_url is the root URL for the repository. env.repo_url = utils.get_repo_url(request, env.repo) # start_url is like repo_url but preserves parameters such as 'ui'. env.start_url = utils.get_url(request, env.repo, '') # URL of the link in the heading. The link on ui=small links to the # normal UI. env.repo_title_url = (env.repo_url if env.ui == 'small' else env.start_url) # URL to force default UI. Note that we show ui=light version in some # user agents when ui parameter is not specified. env.default_ui_url = utils.get_url(request, env.repo, '', ui='default') env.repo_path = urlparse.urlsplit(env.repo_url)[2] env.repo_title = get_localized_message(env.config.repo_titles, env.lang, '?') env.start_page_custom_html = get_localized_message( env.config.start_page_custom_htmls, env.lang, '') env.results_page_custom_html = get_localized_message( env.config.results_page_custom_htmls, env.lang, '') env.view_page_custom_html = get_localized_message( env.config.view_page_custom_htmls, env.lang, '') env.seek_query_form_custom_html = get_localized_message( env.config.seek_query_form_custom_htmls, env.lang, '') env.footer_custom_html = get_localized_message( env.config.footer_custom_htmls, env.lang, '') # If the repository is deactivated, we should not show test mode # notification. env.repo_test_mode = (env.config.test_mode and not env.config.deactivated) env.force_https = env.config.force_https env.params_full_name = request.get('full_name', '').strip() if not env.params_full_name: # Preformat the name from 'given_name' and 'family_name' parameters. given_name = request.get('given_name', '').strip() family_name = request.get('family_name', '').strip() env.params_full_name = utils.get_full_name(given_name, family_name, env.config) return env
def setup_env(request): """Constructs the 'env' object, which contains various template variables that are commonly used by most handlers.""" env = utils.Struct() env.repo, env.action = get_repo_and_action(request) env.config = config.Configuration(env.repo or '*') # TODO(ryok): Rename to local_test_mode or something alike to disambiguate # better from repository's test_mode. env.test_mode = (request.remote_addr == '127.0.0.1' and request.get('test_mode')) # We sometimes want to disable analytics/maps for requests from a specific # mobile carrier (specified by IP ranges). # In this way, we can avoid requests to sites outside google.org, and # allow the carrier to zero-rate access to Person Finder. # TODO(ichikawa): Add server test for this feature. # # TODO(kpy): Make these global config settings and get rid of get_secret(). if utils.is_ip_address_in_one_of_networks( request.remote_addr, env.config.ip_networks_to_disable_analytics): env.analytics_id = None else: env.analytics_id = get_secret('analytics_id') if utils.is_ip_address_in_one_of_networks( request.remote_addr, env.config.ip_networks_to_disable_maps): env.maps_api_key = None else: env.maps_api_key = get_secret('maps_api_key') # Internationalization-related stuff. env.charset = select_charset(request) env.lang = select_lang(request, env.config) env.rtl = env.lang in django_setup.LANGUAGES_BIDI env.virtual_keyboard_layout = const.VIRTUAL_KEYBOARD_LAYOUTS.get(env.lang) # Used for parsing query params. This must be done before accessing any # query params which may have multi-byte value, such as "given_name" below # in this function. request.charset = env.charset # Determine the resource bundle to use. env.default_resource_bundle = config.get('default_resource_bundle', '1') env.resource_bundle = (request.cookies.get('resource_bundle', '') or env.default_resource_bundle) # Information about the request. env.url = utils.set_url_param(request.url, 'lang', env.lang) env.scheme, env.netloc, env.path, _, _ = urlparse.urlsplit(request.url) env.force_https = False env.domain = env.netloc.split(':')[0] env.global_url = utils.get_repo_url(request, 'global') # Commonly used information that's rendered or localized for templates. env.language_options = get_language_options(request, env.config) env.repo_options = get_repo_options(request, env.lang) env.expiry_options = [ utils.Struct(value=value, text=const.PERSON_EXPIRY_TEXT[value]) for value in sorted(const.PERSON_EXPIRY_TEXT.keys(), key=int) ] env.status_options = [ utils.Struct(value=value, text=const.NOTE_STATUS_TEXT[value]) for value in pfif.NOTE_STATUS_VALUES if (value != 'believed_dead' or not env.config or env.config.allow_believed_dead_via_ui) ] env.hidden_input_tags_for_preserved_query_params = ( get_hidden_input_tags_for_preserved_query_params(request)) ui_param = request.get('ui', '').strip().lower() # Interprets "small" and "style" parameters for backward compatibility. # TODO(ichikawa): Delete these in near future when we decide to drop # support of these parameters. small_param = request.get('small', '').strip().lower() style_param = request.get('style', '').strip().lower() if not ui_param and small_param == 'yes': ui_param = 'small' elif not ui_param and style_param: ui_param = style_param if ui_param: env.ui = ui_param elif user_agents.is_jp_tier2_mobile_phone(request): env.ui = 'light' else: env.ui = 'default' # UI configurations. # # Enables features which require JavaScript. env.enable_javascript = True # Enables operations which requires Captcha. env.enable_captcha = True # Enables photo upload. env.enable_photo_upload = True # Enables to flag/unflag notes as spam, and to reveal spam notes. env.enable_spam_ops = True # Enables duplicate marking mode. env.enable_dup_mode = True # Shows a logo on top of the page. env.show_logo = True # Shows language menu. env.show_language_menu = True # Uses short labels for buttons. env.use_short_buttons = False # Optional "target" attribute for links to non-small pages. env.target_attr = '' # Shows record IDs in the results page. env.show_record_ids_in_results = True if env.ui == 'small': env.show_logo = False env.target_attr = ' target="_blank" ' elif env.ui == 'light': # Disables features which requires JavaScript. Some feature phones # doesn't support JavaScript. env.enable_javascript = False # Disables operations which requires Captcha because Captcha requires # JavaScript. env.enable_captcha = False # Uploading is often not supported in feature phones. env.enable_photo_upload = False # Disables spam operations because it requires JavaScript and # supporting more pages on ui=light. env.enable_spam_ops = False # Disables duplicate marking mode because it doesn't support # small screens and it requires JavaScript. env.enable_dup_mode = False # Hides the logo on the top to save the space. Also, the logo links # to the global page which doesn't support small screens. env.show_logo = False # Hides language menu because the menu in the current position is # annoying in feature phones. # TODO(ichikawa): Consider layout of the language menu. env.show_language_menu = False # Too long buttons are not fully shown in some feature phones. env.use_short_buttons = True # To make it simple. env.show_record_ids_in_results = False env.back_chevron = u'\xab' back_chevron_in_charset = True try: env.back_chevron.encode(env.charset) except UnicodeEncodeError: # u'\xab' is not in the charset (e.g. Shift_JIS). back_chevron_in_charset = False if not back_chevron_in_charset or env.ui == 'light': # Use ASCII characters on ui=light too because some feature phones # support UTF-8 but don't render UTF-8 symbols such as u'\xab'. env.back_chevron = u'<<' # Repo-specific information. if env.repo: # repo_url is the root URL for the repository. env.repo_url = utils.get_repo_url(request, env.repo) # start_url is like repo_url but preserves parameters such as 'ui'. env.start_url = utils.get_url(request, env.repo, '') # URL of the link in the heading. The link on ui=small links to the # normal UI. env.repo_title_url = ( env.repo_url if env.ui == 'small' else env.start_url) # URL to force default UI. Note that we show ui=light version in some # user agents when ui parameter is not specified. env.default_ui_url = utils.get_url(request, env.repo, '', ui='default') env.repo_path = urlparse.urlsplit(env.repo_url)[2] env.repo_title = get_localized_message( env.config.repo_titles, env.lang, '?') env.start_page_custom_html = get_localized_message( env.config.start_page_custom_htmls, env.lang, '') env.results_page_custom_html = get_localized_message( env.config.results_page_custom_htmls, env.lang, '') env.view_page_custom_html = get_localized_message( env.config.view_page_custom_htmls, env.lang, '') env.seek_query_form_custom_html = get_localized_message( env.config.seek_query_form_custom_htmls, env.lang, '') env.footer_custom_html = get_localized_message( env.config.footer_custom_htmls, env.lang, '') # If the repository is deactivated, we should not show test mode # notification. env.repo_test_mode = ( env.config.test_mode and not env.config.deactivated) env.force_https = env.config.force_https env.params_full_name = request.get('full_name', '').strip() if not env.params_full_name: # Preformat the name from 'given_name' and 'family_name' parameters. given_name = request.get('given_name', '').strip() family_name = request.get('family_name', '').strip() env.params_full_name = utils.get_full_name( given_name, family_name, env.config) return env