def post(self, uri, data=None, files=None, verify=False): """ Sends a POST request. @param uri: Uri of Service API. @param data: Requesting Data. Default: None @raise NetworkAPIClientError: Client failed to access the API. """ try: request = requests.post( self._url(uri), data=json.dumps(data), files=files, auth=self._auth_basic(), headers=self._header(), verify=verify ) request.raise_for_status() return self._parse(request.text) except HTTPError: error = self._parse(request.text) self.logger.error(error) raise NetworkAPIClientError(error.get('detail', '')) finally: self.logger.info('URI: %s', uri) self.logger.info('Status Code: %s', request.status_code) self.logger.info('X-Request-Id: %s', request.headers.get('x-request-id')) self.logger.info('X-Request-Context: %s', request.headers.get('x-request-context'))
def perform_rest_action(self, endpoint, hdrs, params, regions): """ construct the POST or GET request""" if params: endpoint += '?' + urllib.parse.urlencode(params) else: endpoint += '?' data = None # check if rate limit is needed if self.req_count >= self.reqs_per_sec: delta = time.time() - self.last_req if delta < 1: time.sleep(1 - delta) self.last_req = time.time() self.req_count = 0 # submit the POST or GET request to Ensembl REST API server # and try to catch errors returned by the server if regions: request = requests.post(self.server + endpoint, headers=hdrs, data=json.dumps(regions)) else: request = requests.get(self.server + endpoint, headers=hdrs) if not request.ok: request.raise_for_status() sys.exit() data = request.json() self.req_count += 1 return data
def get_html_text(url): b_overtime = True while b_overtime: try: my_header = { "User-Agent": random.choice(my_headers.common_headers) } request = requests.get(url, headers=my_header, timeout=10) request.raise_for_status() request.encoding = request.apparent_encoding b_overtime = False return request.text except ConnectionError and RuntimeError: print('timeout. waiting retry....') except Exception as e: print(e)
def url_retry(url, num_retries=3): try: r = requests.get('http://www.itwhy.org') request = requests.get(url, timeout=60) #raise_for_status(),如果不是200会抛出HTTPError错误 request.raise_for_status() soup = get_soup(url) except request.HTTPError as e: soup = None write_err(e) with open('log.txt', 'a') as f: f.write() if num_retries > 0: #如果不是200就重试,每次递减重试次数,使用函数获取soup数据 return url_retry(url, num_retries - 1) #如果url不存在会抛出ConnectionError错误,这个情况不做重试 except request.exceptions.ConnectionError as e: return except request.exceptions.TimeOut: return return soup
def ensure(self): # Create parent directory if not self._path.parent.exists(): logging.info("Creating directory '%s'", self._path.parent) #3.5 #self._path.parent.mkdir(mode=0o755, parents=True, exists_ok=True) self._path.parent.mkdir(mode=0o755, parents=True) # If the file exists, validate it if self._path.exists(): if self._md5sum is not None: hasher = hashlib.md5() with self._path.open(mode='rb') as f: while True: hasher.update(f.read(1024*1024)) if f.eof(): break file_md5sum = hasher.hexdigest() if (file_md5sum == self._md5sum): # All is well logging.info("Hash ok for '%s', not downloading", self._path) return else: # There should not be any mismatches here, strange. logging.warning("Hash mismatch for '%s' (expected: %s, got: %s) removing file", (self._path, self._md5sum, file_md5sum)) self._path.unlink() # Make sure that we don't use a link to a url shortener service self._unshorten() # Download the file headers = {} if self._md5sum is not None: headers['etag'] = self._md5sum logging.info("Downloading %s", self._url) request = requests.get(self._url, headers=headers) if not request.status_code == 200: logging.error("Unable to download url '%s'" % (self._url,)) request.raise_for_status() # Write the file, and check the md5 checksum while doing it hasher = hashlib.md5() with self._path.open(mode='wb') as f: hasher.update(request.content) f.write(request.content) file_md5sum = hasher.hexdigest() # Check the ETag, if we got one from the server #etag = request.headers.get('etag') etag = None if (not etag is None) and etag != file_md5sum: self._path.unlink() raise DownloadException("ETag mismatch for '%s' (expected: %s, got: %s) removing file" % (self._url, etag, file_md5sum)) # Check the md5sum, if we got one from the caller if not (self._md5sum is None or file_md5sum == self._md5sum): # The server did not serve the expected file, raise an error self._path.unlink() raise DownloadException("Hash mismatch for '%s' (expected: %s, got: %s) removing file" % (self._url, self._md5sum, file_md5sum))
def main(): # Log File Setting time = datetime.datetime.now() logFileName = str(time.strftime('%d%m%y%H%M%S')) + '.log' logging.basicConfig(filename=logFileName, format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') logger = logging.getLogger(__name__) # Config.json location configFile = 'config.json' with open(configFile) as globalSettings: config = json.load(globalSettings) # Loading Global settings teamNum = str(config['team']) state = str(config['state']).lower() ACCESS_KEY = str(config['AWSAccess']) SECRET_KEY = str(config['AWSSecret']) link = str(config['link']) email = str(config['notificationEmail']) assignNum = 1 # Variables bucketName = 'team' + str(teamNum) + state + 'assignment' + str(assignNum) # Connect to S3 s3Session = boto3.Session( aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, ) s3 = s3Session.resource('s3') logger.info('S3 Connected.') # Create a Bucket (Name should not be uppercase) # Check Whether the bucket has been created isCreated = False bucket = None for bucket in s3.buckets.all(): if bucket.name == bucketName: isCreated = True print('Skip: ' + 'Bucket(' + bucketName + ')' + ' Already Created.') logger.warning('Bucket(' + bucketName + ')' + ' Already Created.') break if not isCreated: bucket = s3.create_bucket(Bucket=bucketName, CreateBucketConfiguration={ 'LocationConstraint': 'us-west-2'}) logger.info('Bucket(' + bucketName + ')' + 'Created') bucket.Acl().put(ACL='public-read') logger.info('Bucket Set to Public') # Access LCD print('###### Trying to Access LCD DataSet ######') try: request = requests.get(link) content = request.content request.raise_for_status() except requests.exceptions.HTTPError as e: print(e) logger.error(e) logger.error("No LCD DataSet Found, DataIngestion Stopped.") print('###### No LCD DataSet Found, DataIngestion Stopped. ######') return data = pd.read_csv(io.StringIO(content.decode('utf-8')), dtype=str, sep=',') logger.info('Reading DataSet from URL') stationId = str(data.ix[0, 'STATION']) station, id = stationId.split(':') date = time.date().strftime('%d%m%Y') fileName = str(config['state']) + '_' + date + '_' + station + '_' + id + '.csv' print('###### LCD DataSet Loading Completed ######') # Upload File to S3 # Check Whether File Exists logger.info('Checking Whether File Exists on S3...') isExist = check_file(bucket, fileName) # for key in bucket.objects.all(): # if key.key == fileName: # isExist = True # print('Skip: ' + 'File(' + fileName + ')' + ' Already Exists.') # break if not isExist: # Download to local file system logger.info('No Data on S3: Try Downloading DataSet from URL') # fileNameForToday = 'OneDay' + '_' + fileName # urllib.request.urlretrieve(link, fileNameForToday) urllib.request.urlretrieve(link, fileName) logger.info('Download Completed') # Upload Today's data to S3 logger.info('Starting Upload Data till Today to S3') s3.Object(bucketName, fileName).put(Body=open(fileName, 'rb')) s3.Object(bucketName, fileName).Acl().put(ACL='public-read') print('Upload: Success') logger.info('Data till Today Upload Succeed') else: logger.warning('File(' + fileName + ')' + ' Already Exists on S3.') # Program END logger.info('###### DataIngestion Finished ######') print('###### Find logs in ddmmyyHHMMSS.log ######')