def create_es(org_label, project_label, view_data, view_id=None): """ Creates an ElasticSearch view :param org_label: Label of the organization the view wil belong to :param project_label: label of the project the view will belong too :param view_data: Mapping data required for ElasticSearch indexing :param view_id: OPTIONAL if provided, the view will be created with the given id. Otherwise, an autogenerated one will be given by Nexus :return: The payload representing the view. This payload only contains the Nexus metadata """ org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/views/" + org_label + "/" + project_label # we give the possibility to use a JSON string instead of a dict if (not isinstance(view_data, dict)) and isinstance(view_data, str): view_data = json.loads(view_data) if "@type" not in view_data: view_data["@type"] = ["View", ELASTIC_TYPE, "Alpha"] if view_id is None: return http_post(path, body=view_data, use_base=True) else: view_id = url_encode(view_id) path = path + "/" + view_id return http_put(path, body=view_data, use_base=True)
def index_generator(self, folder_ids, add_non_nsw_files: bool, add_nsw_files_without_title_id: bool, success: str = None): for folder_id in folder_ids: for (file_id, file_details) in self.gdrive_service.get_files_in_folder_id( folder_id).items(): if add_non_nsw_files or file_details["name"][-4:] in ( ".nsp", ".nsz", ".xci", ".xcz"): if add_nsw_files_without_title_id or regex_search( r"\%5B[0-9A-Fa-f]{16}\%5D", url_encode(file_details["name"], safe="")): self.index["files"].append({ "url": "gdrive:{file_id}#{file_name}".format( file_id=file_id, file_name=url_encode(file_details["name"], safe="")), "size": int(file_details["size"]) }) if success is not None: self.index.update({"success": success})
def create(self, org_label, project_label, data, schema_id=None, resource_id=None): """ Create a resource. If resource_id is provided, this given ID will be used. If resource_id not provided, an ID will be automatically generated for this new resource. :param org_label: The label of the organization that the resource belongs to :param project_label: The label of the project that the resource belongs to :param schema_id: OPTIONAL The schema to constrain the data. Can be None for non constrained data (default: "_") :param data: dictionary containing the data to store in this new resource :param resource_id: OPTIONAL force the use of a specific id when creating the new resource :return: A payload containing only the Nexus metadata for this updated resource. If the data does not have a "@context" value, a default one is automatically added. """ # if no schema is provided, we can create a resource with a non-constraining # default schema called "_" if schema_id is None: schema_id = "_" # the element composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) schema_id = url_encode(schema_id) path = "/resources/" + org_label + "/" + project_label + "/" + schema_id if resource_id is None: return self._http.post(path, data, use_base=True) else: resource_id = url_encode(resource_id) path = path + "/" + resource_id return self._http.put(path, data, use_base=True)
def create(self, org_label, project_label, schema_obj, schema_id=None): """ Create a new schema :param org_label: Label of the organization in which to create the schema :param project_label: label of the project in which to create a schema :param schema_obj: Schema, can be a dictionary or a JSON string :param schema_id: OPTIONAL The view will be created with this specific internal id, if provided. Otherwise, an id will be generated by Nexus. :return: payload of the schema as a Python dictionary. This payload is partial and contains only Nexus metadata. To get the full schema payload, use the fetch() method. """ # we give the possibility to use a JSON string instead of a dict if (not isinstance(schema_obj, dict)) and isinstance(schema_obj, str): schema_obj = json.loads(schema_obj) org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/schemas/" + org_label + "/" + project_label if schema_id is None: return self._http.post(path, schema_obj, use_base=True) else: schema_id = url_encode(schema_id) path = path + "/" + schema_id return self._http.put(path, schema_obj, use_base=True)
def update(org_label: str, project_label: str, filepath: str, file_id: str, rev: int, storage_id: Optional[str] = None, filename: Optional[str] = None, content_type: Optional[str] = None) -> Dict: """ Updates an existing file resource with a new binary attachment. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param filepath: The path of the file to upload :param file_id: The id of the file resource to update. :param rev: The revision to update from. :param storage_id: OPTIONAL The id of the storage backend where the file will be stored. If not provided, the project's default storage is used. :param filename: OPTIONAL Overrides the automatically detected filename :param content_type: OPTIONAL Overrides the automatically detected content type :return: A payload containing only the Nexus metadata for this updated file. """ # the elements composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) path = [SEGMENT, org_label, project_label, url_encode(file_id)] if filename is None: filename = filepath.split("/")[-1] file_obj = { "file": (filename, open(filepath, "rb"), _content_type(filepath, content_type)) } return http_put(path, body=file_obj, data_type="file", rev=rev, storage=storage_id)
def fetch(org_label, project_label, view_id, rev=None, tag=None): """ Fetches a distant view and returns the payload as a dictionary. In case of error, an exception is thrown. :param org_label: The label of the organization that the view belongs to :param project_label: The label of the project that the view belongs to :param view_id: id of the view :param rev: OPTIONAL fetches a specific revision of a view (default: None, fetches the last) :param tag: OPTIONAL fetches the view version that has a specific tag (default: None) :return: Payload of the whole view as a dictionary """ if rev is not None and tag is not None: raise Exception( "The arguments rev and tag are mutually exclusive. One or the other must be chosen." ) # the element composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) view_id = url_encode(view_id) path = "/views/" + org_label + "/" + project_label + "/" + view_id if rev is not None: path = path + "?rev=" + str(rev) if tag is not None: path = path + "?tag=" + str(tag) return http_get(path, use_base=True)
def list(self, org_label, project_label, pagination_from=0, pagination_size=20, deprecated=None, full_text_search_query=None): """ List all the schemas available. :param org_label: Label of the organization to which listing the schema :param project_label: Label of the project to which listing the schema :param pagination_from: OPTIONAL The pagination index to start from (default: 0) :param pagination_size: OPTIONAL The maximum number of elements to returns at once (default: 20) :param deprecated: OPTIONAL Get only deprecated resource if True and get only non-deprecated results if False. If not specified (default), return both deprecated and not deprecated resource. :param full_text_search_query: A string to look for as a full text query :return: The raw payload as a dictionary :return: List of schema and some Nexus metadata """ org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/schemas/" + org_label + "/" + project_label path = path + "?from=" + str(pagination_from) + "&size=" + str(pagination_size) if deprecated is not None: deprecated = "true" if deprecated else "false" path = path + "&deprecated=" + deprecated if full_text_search_query: full_text_search_query = url_encode(full_text_search_query) path = path + "&q=" + full_text_search_query return self._http.get(path, use_base=True)
def create(org_label, project_label, filepath, file_id=None): """ This is the POST method, when the user does not provide a file ID. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param filepath: path of the file to upload :param file_id: OPTIONAL Will use this id to identify the file if provided. If not provided, an ID will be generated :return: A payload containing only the Nexus metadata for this updated file. """ # the element composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/files/" + org_label + "/" + project_label file_obj = {"file": open(filepath, "rb")} if file_id is None: return http_post(path, body=file_obj, data_type="file", use_base=True) else: file_id = url_encode(file_id) path = path + "/" + file_id return http_put(path, use_base=True, body=file_obj, data_type="file")
def search_all_dork(query,site=None): """ main function, returns parsed results Args: query - search string cookie - facebook cookie page - search result page number (optional) """ # escaped = url_encode('https://www.google.com/search?q=site:sakura.myacgcat.top&start=2&filter=True' ) query2=url_encode(query) # print('query2: '+query2) if site: escaped = url_encode('https://www.google.com/search?q=site:'+site +'+'+ query2 ) # print(escaped) else: escaped = url_encode('https://www.google.com/search?q='+query2+'&start=0&filter=True' ) headers = { 'Host': 'developers.facebook.com', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'deflate', 'Connection': 'keep-alive', 'Cookie': cookie, 'Upgrade-Insecure-Requests': '1', 'Cache-Control': 'max-age=0', 'TE': 'Trailers' } response = requests.get('https://developers.facebook.com/tools/debug/echo/?q=%s' % escaped, headers=headers,proxies=proxies_http) cleaned_response = decode_html(response.text) return cleaned_response
def search_all(query, linea, cookie, page=0, full=False): """ main function, returns parsed results Args: query - search string cookie - facebook cookie page - search result page number (optional) """ offset = page * 10 full = 1 if False else 0 escaped = url_encode( 'https://www.google.com/search?q=site:%s+%s&start=%i&filter=%i' % (url_encode(query), url_encode(linea), offset, full)) headers = { 'Host': 'developers.facebook.com', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'deflate', 'Connection': 'keep-alive', 'Cookie': cookie, 'Upgrade-Insecure-Requests': '1', 'Cache-Control': 'max-age=0', 'TE': 'Trailers' } response = requests.get( 'https://developers.facebook.com/tools/debug/echo/?q=%s' % escaped, headers=headers) cleaned_response = decode_html(response.text) parsed = parse(cleaned_response) return parsed
def encode_twitter_key(*, consumer_key: str, consumer_secret: str): return "Basic {code}".format(code=b64encode( "{key}:{secret}".format( key=url_encode(consumer_key), secret=url_encode(consumer_secret) ).encode(encoding='ascii') ).decode(encoding='ascii'))
def fetch(self, org_label, project_label, resource_id, schema_id="_", rev=None, tag=None): """ Fetches a distant resource and returns the payload as a dictionary. In case of error, an exception is thrown. :param org_label: The label of the organization that the resource belongs to :param project_label: The label of the project that the resource belongs to :param resource_id: id of the resource :param schema_id: OPTIONAL id of the schema (default: "_" means whatever) :param rev: OPTIONAL fetches a specific revision of a resource (default: None, fetches the last) :param tag: OPTIONAL fetches the resource version that has a specific tag (default: None) :return: Payload of the whole resource as a dictionary """ if rev is not None and tag is not None: raise Exception("The arguments rev and tag are mutually exclusive. One or the other must be chosen.") # the element composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) schema_id = url_encode(schema_id) resource_id = url_encode(resource_id) path = "/resources/" + org_label + "/" + project_label + "/" + schema_id + "/" + resource_id if rev is not None: path = path + "?rev=" + str(rev) if tag is not None: path = path + "?tag=" + str(tag) return self._http.get(path, use_base=True)
def update_link(org_label: str, project_label: str, filename: str, filepath: str, media_type: str, rev: int, file_id: str, storage_id: Optional[str] = None) -> Dict: """ Update a file (of any kind, not necessarily a link) with a link. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param filename: The filename that will be exposed in the resource metadata :param filepath: The path (relative to its storage root) of the file to link :param media_type: The linked file's media type :param rev: The previous file revision :param file_id: The previous file resource id :param storage_id: OPTIONAL The id of the storage backend where the file is located. If not provided, the project's default storage is used. :return: A payload containing only the Nexus metadata for this linked file. """ # the elements composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) payload = {"filename": filename, "path": filepath, "mediaType": media_type} request_path = [SEGMENT, org_label, project_label, url_encode(file_id)] return http_put(request_path, body=payload, rev=rev, storage=storage_id)
def fetch(org_label, project_label, schema_id, rev=None, tag=None): """ Fetches a distant schema and returns the payload as a dictionary. In case of error, an exception is thrown. :param org_label: The label of the organization that the resource belongs to :param project_label: The label of the project that the resource belongs to :param schema_id: id of the schema :param rev: OPTIONAL fetches a specific revision of a schema (default: None, fetches the last) :param tag: OPTIONAL fetches the schema version that has a specific tag (default: None) :return: Payload of the whole schema as a dictionary """ if rev is not None and tag is not None: raise Exception( "The arguments rev and tag are mutually exclusive. One or the other must be chosen." ) org_label = url_encode(org_label) project_label = url_encode(project_label) schema_id = url_encode(schema_id) path = "/schemas/" + org_label + "/" + project_label + "/" + schema_id if rev is not None: path = path + "?rev=" + str(rev) if tag is not None: path = path + "?tag=" + str(tag) return http_get(path, use_base=True)
def fetch(self, org_label: str, project_label: str, file_id: str, rev: Optional[int] = None, tag: Optional[str] = None, out_filepath: Optional[str] = None) -> Dict: """ Fetches a distant file and returns the metadata of this file. In addition, if the argument `out_filepath` can be of three forms: - out_filepath=None (default): the binary is not fetched - out_filepath="./some/folder/" the binary is fetched and written in this dir with it's original filename - out_filepath="./somefile.jpg" the binary is fetched and written under this exact filename In case of error, an exception is thrown. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param file_id: id of the file :param rev: OPTIONAL fetches a specific revision of a file (default: None, fetches the last) :param tag: OPTIONAL fetches the file version that has a specific tag (default: None) :param out_filepath: OPTIONAL the filename to write (default: None) :return: Payload of the whole file as a dictionary """ if rev is not None and tag is not None: raise Exception( "The arguments rev and tag are mutually exclusive. One or the other must be chosen." ) # the elements composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) file_id = url_encode(file_id) path = [self.segment, org_label, project_label, file_id] response_metadata = self._http.get(path, rev=rev, tag=tag) response_binary = self._http.get(path, get_raw_response=True, accept="all", stream=True, rev=rev, tag=tag) if out_filepath is not None: if os.path.isdir(out_filepath): out_filepath = os.path.join(out_filepath, response_metadata["_filename"]) # we write the result of the request into a file with open(out_filepath, "wb") as f: for chunk in response_binary.iter_content(chunk_size=4096): f.write(chunk) return response_metadata
def list(org_label, project_label, pagination_from=0, pagination_size=20, deprecated=None, type=None, rev=None, schema=None, created_by=None, updated_by=None, resource_id=None): """ List the resources available for a given organization and project. :param org_label: The label of the organization that the resource belongs to :param project_label: The label of the project that the resource belongs to :param schema: OPTIONAL Lists only the resource for a given schema (default: None) :param pagination_from: OPTIONAL The pagination index to start from (default: 0) :param pagination_size: OPTIONAL The maximum number of elements to returns at once (default: 20) :param deprecated: OPTIONAL Get only deprecated resource if True and get only non-deprecated results if False. If not specified (default), return both deprecated and not deprecated resource. :param type: OPTIONAL Lists only the resource for a given type (default: None) :param rev: OPTIONAL List only the resource with this particular revision :param created_by: OPTIONAL List only the resources created by a certain user :param updated_by: OPTIONAL List only the resources that were updated by a certain user :param resource_id: OPTIONAL List only the resources with this id. Relevant only when combined with other args :return: The raw payload as a dictionary """ org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/resources/" + org_label + "/" + project_label # if schema: # schema = url_encode(schema) # path = path + "/" + schema params = { "from": pagination_from, "size": pagination_size, "type": type, "deprecated": deprecated, "rev": rev, "schema": schema, "created_by": created_by, "updated_by": updated_by, "id": resource_id } return http_get(path, use_base=True, params=params)
def create(self, org_label: str, project_label: str, filepath: str, storage_id: Optional[str] = None, file_id: Optional[str] = None, filename: Optional[str] = None, content_type: Optional[str] = None) -> Dict: """ Creates a file resource from a binary attachment using the POST method when the user does not provide a file ID, PUT otherwise. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param filepath: path of the file to upload :param storage_id: OPTIONAL The id of the storage backend where the file will be stored. If not provided, the project's default storage is used. :param file_id: OPTIONAL Will use this id to identify the file if provided. If not provided, an ID will be generated. :param filename: OPTIONAL Overrides the automatically detected filename :param content_type: OPTIONAL Override the automatically detected content type :return: A payload containing only the Nexus metadata for this updated file. """ # the elements composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) path = [self.segment, org_label, project_label] if filename is None: filename = filepath.split("/")[-1] file_obj = { "file": (filename, open(filepath, "rb"), self._content_type(filepath, content_type)) } if file_id is None: return self._http.post(path, body=file_obj, data_type="file", storage=storage_id) else: path.append(url_encode(file_id)) return self._http.put(path, body=file_obj, data_type="file", storage=storage_id)
def query_sparql(org_label, project_label, query): """ Perform a SparQL query. :param org_label: Label of the organization to perform the query on :param project_label: Label of the project to perform the query on :param query: Query as a string :return: result of the query as a dictionary """ org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/views/" + org_label + "/" + project_label + "/graph/sparql" return http_post(path, body=query, data_type="sparql", use_base=True)
def list(self, org_label: str, project_label: str, pagination_from: int = 0, pagination_size: int = 20, deprecated: Optional[bool] = None, type: Optional[str] = None, rev: Optional[int] = None, schema: Optional[str] = None, created_by: Optional[str] = None, updated_by: Optional[str] = None, file_id: Optional[str] = None) -> Dict: """ List the files available for a given organization and project. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param pagination_from: OPTIONAL The pagination index to start from (default: 0) :param pagination_size: OPTIONAL The maximum number of elements to returns at once (default: 20) :param deprecated: OPTIONAL Get only deprecated file if True and get only non-deprecated results if False. If not specified (default), return both deprecated and not deprecated file. :param type: OPTIONAL Lists only the file for a given type (default: None) :param rev: OPTIONAL List only the resource with this particular revision :param schema: OPTIONAL list only the views with a certain schema :param created_by: OPTIONAL List only the file created by a certain user :param updated_by: OPTIONAL List only the file that were updated by a certain user :param file_id: OPTIONAL List only the file with this id. Relevant only when combined with other args :return: The raw list payload as a dictionary """ org_label = url_encode(org_label) project_label = url_encode(project_label) path = [self.segment, org_label, project_label] params = { "from": pagination_from, "size": pagination_size, "type": type, "deprecated": deprecated, "rev": rev, "schema": schema, "created_by": created_by, "updated_by": updated_by, "id": file_id } return self._http.get(path, params=params)
def index_generator( self, folder_ids, add_non_nsw_files: bool, add_nsw_files_without_title_id: bool, success: str = None, ) -> None: title_id_pattern = r"\%5B[0-9A-Fa-f]{16}\%5D" pattern = regex_compile(title_id_pattern) for folder_id in folder_ids: files = self.gdrive_service.get_files_in_folder_id(folder_id) for (file_id, file_details) in files.items(): if add_non_nsw_files or file_details["name"][-4:] in ( ".nsp", ".nsz", ".xci", ".xcz", ): file_name = url_encode(file_details["name"], safe="") if add_nsw_files_without_title_id or pattern.search( title_id_pattern, file_name, ): size = int(file_details["size"]) self.index["files"].append({ "url": f"gdrive:{file_id}#{file_name}", "size": size, }) if success is not None: self.index.update({"success": success})
def build_filter_query(self, start_date, end_date, sender, message_subject, has_attachments): """ Build the "$search" query portion of the string to search the body of the message. :param start_date: date/time string of email received dated to start search :param end_date: date/time string of email received dated to end search :param sender: email address of sender to search for :param message_subject: search for emails containing this string in the "subject" of email :param has_attachments: boolean flag indicating to search for emails with or without attachments :return: $filter portion of the query string containing parameter """ # Initialize $filter query string filter_query = u'?$filter=' filter_query_start_len = len(filter_query) if start_date: # convert from epoch to utc time. utc_time = datetime.datetime.fromtimestamp( start_date / 1000).strftime('%Y-%m-%dT%H:%M:%SZ') start_date_query = u'(receivedDateTime%20ge%20{0})'.format( utc_time) filter_query = self.append_query_to_query_url( filter_query, start_date_query) if end_date: # convert from epoch to utc time. utc_time = datetime.datetime.fromtimestamp( end_date / 1000).strftime('%Y-%m-%dT%H:%M:%SZ') end_date_query = u'(receivedDateTime%20le%20{0})'.format(utc_time) filter_query = self.append_query_to_query_url( filter_query, end_date_query) if sender: sender_query = u"(from/emailAddress/address%20eq%20'{0}')".format( sender) filter_query = self.append_query_to_query_url( filter_query, sender_query) if has_attachments is not None: has_attachments_query = u'(hasAttachments%20eq%20{0})'.format( str(has_attachments).lower()) filter_query = self.append_query_to_query_url( filter_query, has_attachments_query) if message_subject: # OData query requires single quotes be replaced by 2 single quotes when using $filter (not $search)! # First url encode the subject and then substitutes one single quote (%27) # with 2 single quotes (not url encoded). url_encoded_subject = url_encode(message_subject.encode('utf8')) url_encoded_subject = url_encoded_subject.replace("%27", "''") subject_query = u"(contains(subject,'{0}'))".format( url_encoded_subject) filter_query = self.append_query_to_query_url( filter_query, subject_query) # If nothing was added, then return the empty string. if len(filter_query) == filter_query_start_len: return "" return filter_query
def encode_post_data(*args, **kargs): try: from urllib.parse import urlencode as url_encode except ImportError: url_encode = None stopped("Unsupported version of Python. You need Version 3 :<") return url_encode(*args, **kargs).encode('ascii')
def append_form(self, obj, headers=None): """ Helper method to add url_encoded field. Parameters ---------- obj : `mapping` of (`str`, `Any`) items, `sequence` of `tuple` (`str`, `Any`) items The object, what should be percent encoded for a post request. headers : `None` or ``imultidict`` of (`str`, `str`) items, Optional Optional headers for the url_encoded field. Returns ------- payload : ``StringPayload`` The created string payload. Raises ------ RuntimeError - The `payload`'s content has unknown content-encoding. - The `payload`'s content has unknown content-transfer-encoding. """ if hasattr(obj.__class__, 'items'): # mapping type obj = list(obj.items()) data = url_encode(obj, doseq=True) kwargs = {'content_type': 'application/x-www-form-url_encoded'} if (headers is not None): kwargs['headers'] = headers payload = StringPayload(data, kwargs) self.append_payload(payload) return payload
def publish(html_file, name='', short=False, bypass=False): name = url_encode(name) try: with open(html_file, 'r') as f: html = f.read() except (FileNotFoundError, IsADirectoryError) as e: print(e) exit() data = base64.b64encode( lzma.compress(bytes(html, encoding="utf-8"), format=lzma.FORMAT_ALONE, preset=9)) url = f"https://itty.bitty.site/#{name}/{data.decode('utf-8')}" size = len(bytes(url, encoding="utf-8")) if size > 2048 and not bypass: print( "Size too big to be published at https://itty.bitty.site/ (Maximum allowed: 2048 bytes)" ) elif short: from shorten import shorten print(shorten(url)) else: print(url) print(f"{size} Bytes")
def scan_folder(self, folder_id: str, files_progress_bar: tqdm, recursion: bool, add_nsw_files_without_title_id: bool, add_non_nsw_files: bool): """Scans the folder id for files and updates the instance index""" title_id_pattern = r"\%5B[0-9A-Fa-f]{16}\%5D" files = self.gdrive_service.get_all_files_in_folder( folder_id, recursion, files_progress_bar) pattern = regex_compile(title_id_pattern) for (file_id, file_details) in files.items(): url_encoded_file_name = url_encode(file_details["name"], safe="") file_valid_nsw_check = add_non_nsw_files or \ url_encoded_file_name[-4:] in (".nsp", ".nsz", ".xci", ".xcz") file_title_id_check = add_nsw_files_without_title_id or \ pattern.search(url_encoded_file_name) if file_title_id_check and file_valid_nsw_check: file_entry_to_add = { "url": f"gdrive:{file_id}#{url_encoded_file_name}", "size": int(file_details["size"]) } if file_entry_to_add not in self.index["files"]: self.index["files"].append(file_entry_to_add) self.files_shared_status.update( {file_id: file_details["shared"]})
def create_link(self, org_label: str, project_label: str, filename: str, filepath: str, media_type: str, storage_id: Optional[str] = None, file_id: Optional[str] = None) -> Dict: """ Creates a file resource from a link to an existing binary using the POST method when the user does not provide a file ID, PUT otherwise. :param org_label: The label of the organization that the file belongs to :param project_label: The label of the project that the file belongs to :param filename: The filename that will be exposed in the resource metadata :param filepath: The path (relative to its storage root) of the file to link :param media_type: The media type of the linked file :param storage_id: OPTIONAL The id of the storage backend where the file is located. If not provided, the project's default storage is used. :param file_id: OPTIONAL The id of the created resource if provided. If not, an ID will be generated :return: A payload containing only the Nexus metadata for this linked file. """ # the elements composing the query URL need to be URL-encoded org_label = url_encode(org_label) project_label = url_encode(project_label) payload = { "filename": filename, "path": filepath, "mediaType": media_type } request_path = [self.segment, org_label, project_label] if file_id is not None: request_path.append(url_encode(file_id)) if file_id is None: return self._http.post(request_path, body=payload, storage=storage_id) else: return self._http.put(request_path, body=payload, storage=storage_id)
def list(self, org_label, project_label, pagination_from=0, pagination_size=20, deprecated=None, type=None, rev=None, schema=None, created_by=None, updated_by=None, view_id=None): """ List the views available for a given organization and project. All views, of all kinds. :param org_label: The label of the organization that the view belongs to :param project_label: The label of the project that the view belongs to :param pagination_from: OPTIONAL The pagination index to start from (default: 0) :param pagination_size: OPTIONAL The maximum number of elements to returns at once (default: 20) :param deprecated: OPTIONAL Get only deprecated view if True and get only non-deprecated results if False. If not specified (default), return both deprecated and not deprecated view. :param type: OPTIONAL The view type :param rev: OPTIONAL Revision to list :param schema: OPTIONAL list only the views with a certain schema :param created_by: OPTIONAL List only the views created by a certain user :param updated_by: OPTIONAL List only the views that were updated by a certain user :param view_id: OPTIONAL List only the view with this id. Relevant only when combined with other args :return: The raw payload as a dictionary """ org_label = url_encode(org_label) project_label = url_encode(project_label) path = "/views/" + org_label + "/" + project_label params = { "from": pagination_from, "size": pagination_size, "type": type, "deprecated": deprecated, "rev": rev, "schema": schema, "created_by": created_by, "updated_by": updated_by, "id": view_id } return self._http.get(path, use_base=True, params=params)
def on_change(self): def e(t): return t.encode("utf8") data = [(e(k), e(v)) for k, v in self.items()] qs = url_encode(data) self.env["QUERY_STRING"] = qs self.env["webob._parsed_query_vars"] = (self, qs)
def __init__(self, query, index): """Initializes a search object with a query and target index.""" self.query = query self.index = index self._cursor_supported = (index in self._cursored_indexes) self._uri = self._base_url + self.index + '?query=' + url_encode( self.query) self.results_df = pd.DataFrame()
def __init__(self, query, index, keygen): """Initializes a search object with a query and target index.""" self.query = query self.index = index self.keygen = keygen self.status_code = None self._cursor_supported = (index in self._cursored_indexes) self._uri = self._base_url + self.index + '?query=' + url_encode( self.query)
def get_definition(self, word: str, allow_cache: bool=True, store_in_cache: bool=True) -> Definition or None: """ Gets the definition of some word from SSKJ. :param word: The keyword to search for :type word: str :param allow_cache: Indicates if you want to check the cache for already-fetched definitions :type allow_cache: bool :param store_in_cache: Indicates if you want to save the definition in the cache after getting it :return: Definition object """ word = str(word) # Return from cache if valid if allow_cache and self._in_cache(word): if self._cache_age(word) < self.max_age: log.debug("Using cache for '{}'".format(word)) return self.cache.get(word) log.debug("Requesting definition for '{}'".format(word)) encoded = BASE_ENDPOINT.format(url_encode(word)) html = self.req.get(encoded) bs_html = BeautifulSoup(html, "html.parser").find("div", {"class": "list-group results"}) try: keyword = bs_html.find("span", {"class": "font_xlarge"}).text except AttributeError: # Return None as the word cannot be found raise NotFound("no result: {}".format(word)) attributes = bs_html.find("span", {"data-group": "header"}).text # Find out if there are multiple definitions try: sub = bs_html.find("ol", {"class": "manual"}).find_all("li") except AttributeError: sub = None if sub: # Multiple definitions definitions = [parse_encoding(a).capitalize() for a in [remove_num(a.text) for a in sub]] # Last item also includes terminology and slang so we filter it last_definition = definitions.pop().split("●") definitions.append(last_definition[0]) # And define terminology and slang terminology = last_definition[1].split("♦")[0] slang = last_definition[1].split("♦")[1] else: # Only one definition paragraph = str(bs_html.find("div", {"class": "list-group-item entry"}) .text[len(keyword + attributes):]).replace(attributes, "") if len(paragraph.split(Sc.SLANG)) == 1: slang = None if len(paragraph.split(Sc.TERMINOLOGY)) == 1: terminology = None definitions = [str(paragraph)] else: terminology = paragraph.split(Sc.TERMINOLOGY)[1] definitions = [paragraph.split(Sc.TERMINOLOGY)[0]] else: definitions = [paragraph.split(Sc.SLANG)[0]] if len(paragraph.split(Sc.TERMINOLOGY)) == 1: terminology = None slang = paragraph.split(Sc.SLANG)[1] else: terminology = paragraph.split(Sc.TERMINOLOGY)[1] slang = paragraph.split(Sc.SLANG)[1].split(Sc.TERMINOLOGY)[0] definitions = [parse_encoding(a) for a in definitions] # Create the Definition object timestamp = time.time() obj = Definition( base_word=word, keyword=keyword, attributes=attributes, summary=clean(definitions[0]), definitions=[clean(d) for d in definitions], terminology=clean(terminology), slang=clean(slang), html=html, timestamp=timestamp ) # Store in cache if allowed if store_in_cache: self.cache[str(word)] = obj self.def_ages[str(word)] = timestamp return obj