def validate(self): """Method which is verifying that the metadata does have the correct type and if the dependencies are respected. The dependencies have to be check because the value of a metadata can implied the presence of another one. For example, if *upload_type* (which is a necessary metadata) has the value *publication* that implied the presence of the metadata *publication_type*. """ # Check if the minimal set of information are provided self._check_minimal() # Check validity of the license (if open or embargoed) self._check_license_availability() try: jsonschema.validate(self._metadata, self._schema) except jsonschema.exceptions.ValidationError as err: error = 'ValidationError: {}'.format(err.message) logger.error(error) raise ZenodoMetadataException(error) logger.info('Metadata should be ok to use for upload')
def get_files_path(fname): """Function to get the path(s) of the file(s) Parameters ---------- fname: str Name of the file to get the path or the directory to list """ # If fname is a file return a list with fname if os.path.isfile(fname): files_paths = [fname] else: # initializing empty file paths list file_paths = [] # crawling through directory and subdirectories for root, directories, files in os.walk(fname): for filename in files: # join the two strings in order to form the full filepath. filepath = os.path.join(root, filename) file_paths.append(filepath) if not len(file_paths): message = 'File or directory: {} to upload does not exist.'.format( fname) logger.error(message) raise DatalightException(message) # returning all file paths return file_paths
def _check_minimal(self): """Method to check that the minimal set of Metadata needed for Zenodo is present """ if self._metadata is None: message = 'Metadata not provided' logger.error(message) raise ZenodoMetadataException(message) minimal_keys = ('title', 'upload_type', 'description', 'creators') for key in minimal_keys: if key not in self._metadata.keys(): error = 'Missing metadata information: {}'.format(key) logger.error(error) raise ZenodoMetadataException(error) if 'access_right' not in self._metadata: self._metadata['access_right'] = 'open' logger.warning('Add metadata: "access_right" set to default value ' '"open"') if 'license' not in self._metadata: self._metadata['license'] = 'cc-by-4.0' logger.warning('Add metadata: "license" set to default value ' '"cc-by-4.0"') # Default value.(Should be done in schema) #TODO return True
def _check_status_code(status_code): """Method to test that the request went as expected. Parameters ---------- status_code: int status code return by the request (requests.status_code) Exception --------- ZenodoException: Raise exception if the request ended with a problem .. note: If the error is a Server conncetion problem, the exception is not raised (problem with the test in other hand) """ # Test that everything went as expected if status_code < 400: logger.debug('Request succeed ' 'with status code: {}'.format(status_code)) return status_code if status_code >= 500: message = 'Server connection failed ' \ 'with error: {}'.format(status_code) logger.error(message) raise ZenodoException(message) if status_code >= 400: message = 'Request failed ' \ 'with error: {}'.format(status_code) logger.error(message) raise ZenodoException(message)
def get_field(self, item): """ 获取字段 :param item: :return: """ try: div = item.xpath(u'div[@class="pic-box J_MouseEneterLeave J_PicBox"]//*')[0] url = div.xpath(u'div[@class="pic"]//a')[0].attrib.get("href") # url地址 url = self.is_startswith(url) url_md5 = hashlib.md5(url).hexdigest() bo = exist_by_urlmd5(url_md5) if bo is False: # 数据库中不存在才新增 img = div.xpath(u'div[@class="pic"]//img')[0].attrib cover = img.get("src") if img.get("src") else img.get("data-src") # 封面 cover = self.is_startswith(cover) similars = div.xpath(u'div[@class="similars"]//a') if similars: same_style_url = similars[0].attrib.get("href") # 同款url if same_style_url is None: same_style_url = "" else: same_style_url = "https://s.taobao.com" + same_style_url if len(similars) > 1: similar_url = similars[1].attrib.get("href") if similar_url is None: similar_url = "" else: similar_url = "https://s.taobao.com" + similar_url # 相似url else: similar_url = "" else: same_style_url = "" similar_url = "" div = item.xpath(u'div[@class="ctx-box J_MouseEneterLeave J_IconMoreNew"]/div') price = div[0].xpath(u'div[@class="price g_price g_price-highlight"]/strong')[0].text # 商品价格 sale_num = div[0].xpath(u'div[@class="deal-cnt"]')[0].text if sale_num is None: sale_num = 0 else: sale_num = "".join([s for s in sale_num if s.isdigit()]) # 商品购买人数 title_a = etree.tounicode(div[1].xpath(u'a')[0]) # 商品名称 p = re.compile('<[^>]+>') # 去掉html标签, 只留字符 title = p.sub("", title_a).strip() shop_name = div[2].xpath(u'div/a/span')[1].text # 商铺名称 addr = div[2].xpath(u'div')[1].text # 商铺地址 tianmao = div[3].xpath(u'div/ul/li//span[@class="icon-service-tianmao"]') is_tmall = 1 if tianmao else 0 # 是否天猫商店 data = {"url": url, "title": title, "cover": cover, "price": price, "sale_num": sale_num, "shop_name": shop_name, "addr": addr, "is_tmall": is_tmall, "url_md5": url_md5, "same_style_url": same_style_url, "similar_url": similar_url} self.pool.spawn(self.save, data) except Exception, e: logger.error("获取字段异常: " + str(e), exc_info=True)
def save(self, data): """ 保存数据库 :param data: :return: """ try: save_tb(data) except Exception, e: logger.error("保存到数据库异常: " + str(e), exc_info=True)
def req_url(self, url): """ 请求url :param url: :return: """ try: self.browser.get(url) content = self.browser.page_source self.pool.spawn(self.parse_html, content.decode('utf-8', 'ignore')) except Exception, e: logger.error("请求url异常: " + str(e), exc_info=True)
def _verify_token(self): """ Function to test if token could be valid Exception --------- ZenodoException if token not define (token = None). """ if self.token is None: message = 'No Zenodo token provided' logger.error(message) raise ZenodoException(message)
def set_schema(self, schema): if type(schema) is str: logger.info('Schema file use: {}'.format(schema)) self._schema = self._read_schema(schema) elif type(schema) is dict: logger.info('Schema provided through dictionary object') if self._schema is None: self._schema = schema else: self._schema.update(schema) else: message = 'Something is wrong with the schema: {}.'.format(schema) logger.error(message) raise ZenodoMetadataException(message)
def _read_metadata(fmetadata): """Method to read Zenodo metadata file """ logger.info('Read metadata from: {}'.format(fmetadata)) try: with open(fmetadata) as f: _metadata = yaml.load(f) except FileNotFoundError as err: message = 'Metadata file not founded.'.format(fmetadata) logger.error(message) raise ZenodoMetadataException(message) # change communities identifier in lower case (only format accepted by zenodo) if 'communities' in _metadata: for _com in _metadata['communities']: _com['identifier'] = _com['identifier'].lower() return _metadata
def delete(self, _id=None): """Method to delete deposition. Parameters ---------- _id: int deposition id of the record to delete .. note:: it worked only if it is not publish. Exception --------- ZenodoException raise if token not define (token = None) or if connection return status >= 400 """ # Test if token was defined self._verify_token() # Use provided if if not None. If not provided use self.deposition_id if _id is not None: self.deposition_id = _id # Create the request url request_url = (self.depositions_url + '/{}'.format(self.deposition_id)) logger.info('Delete url: {}'.format(request_url)) try: request = requests.delete(request_url, params={'access_token': self.token}) self.status_code = request.status_code logger.debug('Status code: {}'.format(self.status_code)) if self.status_code >= 400: raise ZenodoException except ZenodoException: message = 'Request_url does not exist or bad token. ' \ 'Error: {}'.format(self.status_code) logger.error(message) raise ZenodoException(message)
def _get_opendefinition_file(): """Method which download the definition file for open source licenses accepted by Zenodo. Return ------ licenses: dict a dictionnary which contains the informations the differents licenses. """ url = 'https://licenses.opendefinition.org/licenses/groups/all.json' try: with urllib.request.urlopen(url) as f: licenses = json.load(f) logger.info( 'open licenses file use for validation: {}'.format(url)) except urllib.error.URLError: message = 'Not possible to access to the list ' \ '(internet connection problem?): {}'.format(url) logger.error(message) raise ZenodoMetadataException(message) return licenses
def get_deposition_id(self): """Method to obtain the deposition id need to upload documents to Zenodo Attributes ---------- deposition_id: int Deposition id gave by Zenodo deposition api to be used to upload files and metadata. Exception --------- ZenodoException raise if token not define (token = None) or if connection return status >= 400 """ headers = {'Content-Type': 'application/json'} # Test if Token defined and access zenodo to test the token if exist self._verify_token() request = requests.post(self.depositions_url, params={'access_token': self.token}, json={}, headers=headers) self.status_code = request.status_code logger.debug('Status code: {}'.format(self.status_code)) logger.debug('deposition url: {}'.format(self.depositions_url)) # Test that the request succeed if self.status_code >= 400: message = ('Deposition id not obtain, ' 'error: {}'.format(self.status_code)) logger.error(message) raise ZenodoException(message) else: self.deposition_id = request.json()['id'] logger.info('Deposition id: {}'.format(self.deposition_id)) logger.info('Deposition url: {}'.format(self.deposition_id))
def _read_schema(fschema): """Method to read the schema. Parameter --------- schema: str Name of the file which contain the definition of the schema Return ------ _schema: dict dictionary which contains the schema used to validate the metadata. """ logger.info('Read schema from: {}'.format(fschema)) try: with open(fschema) as f: _schema = yaml.load(f) except FileNotFoundError as err: message = 'Schema file not founded.'.format(fschema) logger.error(message) raise ZenodoMetadataException(message) return _schema
def connection(self): """Method to test that connection with Zenodo website is working. Exception --------- ZenodoException raise if token not define (token = None) or if connection return status >= 400 """ # Test if Token defined and access zenodo to test the token if exist self._verify_token() request = requests.get(self.depositions_url, params={'access_token': self.token}) self.status_code = request.status_code logger.debug('Status code: {}'.format(self.status_code)) # Raise exception if Error from Zenodo (status >= 400) if self.status_code >= 400: message = 'Access token not accepted by Zenodo. ' \ 'Error: {}'.format(self.status_code) logger.error(message) self.token = None raise ZenodoException(message)
async def get_phone(sem, id_): sql = f'select count(1) from t_info where id_ = {id_}' result = engine.execute(sql).fetchmany()[0][0] if result != 0: return True url = f'http://lxbjs.baidu.com/cb/url/show?f=55&id={id_}' async with ClientSession() as session: async with sem: try: async with session.get(url, headers=headers, timeout=10) as respone: text = await respone.text() soup = BeautifulSoup(text, 'html.parser') info = soup.find('div', class_='cpy-info').text tel = soup.find('div', class_='cpy-info cpy-tel').text url = soup.find('div', class_='cpy-info cpy-url').find('a').text sql = f"insert into t_info values({id_}, '{info}', '{tel}', '{url}')" engine.execute(sql) logger.info(f'爬取信息成功: [{id_}] {info}') return True except Exception as e: logger.error(f'爬取信息失败: [{id_}] {type(e)}: {str(e)}') return False
def main(args=None): """Run datalight scripts to upload file on data repository Command line:: Usage: datalight [-h | --help] <files>... (-m <metadata> | --metadata=<metadata>) [options] Options: -m FILE --metadata=FILE File which contains the metadata information -z zipname --zipname=FILE Name of the zip file which will be uploaded [default: data.zip] --nozip Do not create zip file containing the data to upload -r NAME --repository=NAME Name of a data repository [default: zenodo] -p --publish If present publish the data -s --sandbox If present, datalight will use the sandbox data repository -k --keep Keep zip file created -h --help Print this help -v --version Print version of the software Examples: datalight file1 file2 datalight directory --metadata=metadata.yml --repository=zenodo datalight file -m metadata.yml Raises ------ SystemExit if the file or the folder to treat is not available. KeyError if no key found for the data repository wanted ImportError if the not possible to import the data repository wanted """ # Read the arguments and option with docopt arguments = docopt(main.__doc__, argv=args, version=__version__) # Convert docopt results in the proper variable (change type when needed) # Lists all the files and/or directories to upload fnames = arguments['<files>'] # Get list of the files path to upload files = [] try: for fname in fnames: files += get_files_path(fname) except DatalightException: logger.error('Problem with the files to upload.') sys.exit() # option which will give the name of the metadata file metadata = arguments['--metadata'] if not os.path.exists(metadata): logger.error('Metadata file: {} does not exist.'.format(metadata)) sys.exit(1) # Choice of repository default Zenodo repository = arguments['--repository'] if repository is None: repository = 'zenodo' # If sandbox is present the version of the repository # used will be the sandbox one sandbox = arguments['--sandbox'] # Zip data in an archive (to keep paths) if not arguments['--nozip']: zipname = arguments['--zipname'] zipdata(files, zipname) # Change the name of the files to upload for the zip file created files, directory = [zipname], '.' if repository == 'zenodo': try: from .zenodo import Zenodo as DataRepo from .zenodo import ZenodoException as DataRepoException except ImportError: from zenodo import Zenodo as DataRepo from zenodo import ZenodoException as DataRepoException # Read zenodo token file from home repository tokenfile = os.path.join(home, '.zenodo') zenoconfig = configparser.ConfigParser() zenoconfig.read(tokenfile) try: if sandbox: token = zenoconfig['sandbox.zenodo.org']['lightform'] else: token = zenoconfig['zenodo.org']['lightform'] except KeyError: token = input('Provide Zenodo token: ') # Save the token to the ~/.zenodo config = configparser.ConfigParser() if sandbox: config['sandbox.zenodo.org'] = {'lightform': token} else: config['zenodo.org'] = {'lightform': token} with open(tokenfile, 'a', encoding="utf-8") as configfile: config.write(configfile) datarepo = DataRepo(token=token, sandbox=sandbox) datarepo.get_deposition_id() datarepo.upload_files(files, path=directory) datarepo.set_metadata(metadata) datarepo.upload_metadata() if arguments['--publish']: datarepo.publish() # Remove zip file create but if asked to keep it if not arguments['--nozip'] \ and not arguments['--keep'] \ and len(files) == 1: logger.info('Remove created zip file: {}'.format(files[0])) os.remove(files[0]) logger.info("Finished " + logger.name)
def _check_license_availability(self, flicenses=None, opendefinition=False): """Method to verify the license Zenodo metadata des have an non-optional keyword *access_right*, that if it is set to open or embargoed an optional keyword **can** be added: license. The license in this case has to be considered as open by Zenodo and be part of the list provided by the `Open Definition License Service<https://licenses.opendefinition.org/>`_ The method will look directly on internet where the service is providing a json file which contains all the acceptable license: https://licenses.opendefinition.org/licenses/groups/all.json This file is also provided by the software to be able to verify the validity of the license. .. important:: The file provided by the software **could** be out-dated. Since the upload of the data on Zenodo will do the verification it is not a major problem but the user as to be careful. Parameter --------- update: boolean if True will update the license file TODO: NOT IMPLEMENTED YET Exception --------- raise exception if license does not exist in the list accepted by Zenodo as open. TODO: modify method to use file on disk before and if license not there, TODO: look at the file on internet and retest it. """ # if access right is not 'open' or 'embargoed' there are no need to # test if the license is open compliant with Zenodo if not (self._metadata['access_right'] in ['open', 'embargoed']): logger.info('No need to check license for Zenodo upload.') return True # Get on the opendefinition website the file with the licenses # informations if opendefinition: licenses = self._get_opendefinition_file() # Get the licenses information from an input file or # from the default file else: if flicenses is None: flicenses = os.path.join(_dir, 'schemas', 'zenodo', 'opendefinition-licenses.json') try: with open(flicenses) as f: licenses = json.load(f) logger.info( 'Use file: {} to validate license'.format(flicenses)) except FileNotFoundError: licenses = self._get_opendefinition_file() if ('license' in self._metadata and self._metadata['access_right'] in ['open', 'embargoed']): self._metadata['license'] = self._metadata['license'].upper() mlicense = self._metadata['license'].upper() logger.info('License present in metadata file: ' '"{}"'.format(mlicense)) logger.info('access_right: ' '"{}"'.format(self._metadata['access_right'])) _tmp = '' for lic in licenses.keys(): if lic.startswith(mlicense): logger.info('license: "{}" validated.'.format(lic)) return True message = 'license: "{}" is not listed as ' \ 'open by Zenodo'.format(self._metadata['license']) logger.error(message) raise ZenodoMetadataException(message)
def run_monitoring_tool(): email_sender = AlertEmailSender(c.MAIL.GMAIL_USERNAME, c.MAIL.GMAIL_PASSWORD, c.MAIL.NOTIFICATION_EMAIL, c.MAIL.EMAIL_SUBJECT) # part online - offline addr = c.BITCOIN_WALLET_PUBLIC_ID nice_hash_client = NiceHashClient(addr) polling_interval_sec = 60 # 1 minute rig_names_to_monitor = c.RIG_HOSTNAMES previous_rig_statuses = [True] * len(rig_names_to_monitor) # initial statuses rig_statuses = list(previous_rig_statuses) # part balance interval_between_balance_reporting_sec = 60 * 60 * 4 # in seconds last_balance_reporting_time = 0 while True: logger.debug('run_monitoring_tool() - RUNNING') try: # PART ONLINE - OFFLINE INSPECTION rig_names, speeds, up_time_minutes, locations, algo_ids = nice_hash_client.get_mining_rigs() connected_rig_names = set(rig_names) for i, rig_name_to_monitor in enumerate(rig_names_to_monitor): if rig_name_to_monitor not in connected_rig_names: logger.debug('{} is down.'.format(rig_name_to_monitor)) rig_statuses[i] = False if previous_rig_statuses[i] is True: email_sender.send_email(email_content='[{}] host is down. Please check.'.format( rig_name_to_monitor)) else: logger.debug('{} is connected.'.format(rig_name_to_monitor)) rig_statuses[i] = True if previous_rig_statuses[i] is False: email_sender.send_email(email_content='[{}] host successfully connected.'.format( rig_name_to_monitor)) previous_rig_statuses = list(rig_statuses) # PART BALANCE if (time() - last_balance_reporting_time) > interval_between_balance_reporting_sec: ref_fiat_currencies = c.REFERENCE_FIAT_CURRENCY if ',' in ref_fiat_currencies: ref_fiat_currencies = ref_fiat_currencies.split(',') else: ref_fiat_currencies = [ref_fiat_currencies] unpaid_balance_btc = nice_hash_client.get_unpaid_balance_btc() unpaid_balance_fiat_list = list() for ref_fiat_currency in ref_fiat_currencies: price_for_one_btc_in_fiat_currency = get_btc_usd_rate(ref_fiat_currency) if price_for_one_btc_in_fiat_currency is None: price_for_one_btc_in_fiat_currency = -1.0 unpaid_balance_fiat = unpaid_balance_btc * price_for_one_btc_in_fiat_currency unpaid_balance_fiat_list.append(unpaid_balance_fiat) d = ', '.join( ['{0:.2f} {1}'.format(u, v) for (u, v) in zip(unpaid_balance_fiat_list, ref_fiat_currencies)]) d = 'Your unpaid balance is now {0:.8f} BTC ({1} approx).'.format(unpaid_balance_btc, d) email_sender.send_email(email_content=d) last_balance_reporting_time = time() except Exception as e: logger.error(e) logger.debug('Going to sleep for {} seconds.'.format(polling_interval_sec)) sleep(polling_interval_sec)