def __init__(self, filename = "/run/media/eea1ee1d-e5c4-4534-9e0b-24308315e271/pynews/stream/clusteringData.db"): logger.info("Start building " + self.__class__.__name__) self.__mutex = threading.Semaphore() data = shelve.open(filename, protocol=-1, flag="r") langDetect = LangDetect.instance() vectors = [features(item["text"]) for digest, item in data.items() if item["text"] and item["text"] != "ERROR" and langDetect.detect(item["text"]) is "en"] self.__maxV = calcDiv(vectors) #vectors = normalize(vectors, self.__maxV) means = [array([10, 40, 0, 1]), array([30, 340, 2, 30]), array([120, 1500, 15, 50])] self.__clusterer = cluster.KMeansClusterer(3, euclidean_distance, initial_means=means, avoid_empty_clusters=True) self.__clusterer.cluster(vectors) klassIdToSize = {"0": 0, "1": 0, "2": 0} klassIdToWordsCount = {"0": 0, "1": 0, "2": 0} for item in data.itervalues(): text = item["text"] if text and text != "ERROR": feat = features(text) #feat = normalize(feat, self.__maxV) klass = str(self.__clusterer.classify(feat)) klassIdToSize[klass] += 1 klassIdToWordsCount[klass] += len(text.split()) data.close() results = [] for klassId in ["0", "1", "2"]: meanWordsInKlass = klassIdToWordsCount[klassId] / klassIdToSize[klassId] if klassIdToSize[klassId] != 0 else 0 results.append({"klass": klassId, "mean" : meanWordsInKlass}) logger.info("Clustering results: " + str(results)) sortedKlass = sorted(results, lambda x,y: x["mean"] < y["mean"]) self.__klassIdToLabel = {klassIdWithLabel[0]: klassIdWithLabel[1] for klassIdWithLabel in zip([item["klass"] for item in sortedKlass], ["short", "medium", "long"])}
def run_standalone_test(): run("service couchbase-server stop", warn_only=True) with shell_env(LD_LIBRARY_PATH="{}/forestdb/build".format(args.remote_workdir)): with cd(args.remote_workdir): run("rm -rf data/") run("mkdir data") run("ldd ./{}".format(prog_name)) run("./{}".format(prog_name)) run("cat incrementalsecondary.txt") # Now for internal processing and posting to showfast output_text = run("cat incrementalsecondary.txt") groups = re.search( r"initial index build time[^\d]*(\d*).*?seconds", output_text) initial_time = int(groups.group(1)) groups = re.search( r"incrmental index build time[^\d]*(\d*).*?seconds", output_text) incremental_time = int(groups.group(1)) logger.info("Grepped intial build time {}".format(initial_time)) logger.info("Grepped incremental build time {}".format( incremental_time)) if initial_time: post_initial(initial_time) if incremental_time: post_incremental(incremental_time)
def initialize_project(self): logger.info('Intializing local worker environment') with quiet(): local('virtualenv -p python2.7 env') local('PATH=/usr/lib/ccache:/usr/lib64/ccache/bin:$PATH ' 'env/bin/pip install ' '--download-cache /tmp/pip -r requirements.txt')
def consume(self): _, password = self.cluster_spec.rest_credentials for master in self.cluster_spec.yield_masters(): host = master.split(':')[0] for bucket in self.test_config.buckets: logger.info( 'Reading data via UPR from {}/{}'.format(host, bucket) ) upr_client = UprClient(host=host, port=11210) upr_client.sasl_auth_plain(username=bucket, password=password) mcd_client = MemcachedClient(host=host, port=11210) mcd_client.sasl_auth_plain(user=bucket, password=password) op = upr_client.open_producer("stream") response = op.next_response() if response['status'] != SUCCESS: logger.interrupt('Failed to open producer') for vb in range(1024): vb_stats = mcd_client.stats('vbucket-seqno {}'.format(vb)) uuid = long(vb_stats['vb_{}:uuid'.format(vb)]) high_seqno = long(vb_stats['vb_{}:high_seqno'.format(vb)]) op = upr_client.stream_req(vb=vb, flags=0, start_seqno=0, end_seqno=high_seqno, vb_uuid=uuid, high_seqno=high_seqno) while op.has_response(): response = op.next_response() if response['opcode'] != CMD_STREAM_REQ: break upr_client.close_stream(vbucket=vb) upr_client.shutdown()
def initialize_project(self): for worker, master in zip(self.cluster_spec.workers, self.cluster_spec.yield_masters()): state.env.host_string = worker run('killall -9 celery', quiet=True) for bucket in self.buckets: logger.info('Intializing remote worker environment') qname = '{}-{}'.format(master.split(':')[0], bucket) temp_dir = '{}-{}'.format(self.temp_dir, qname) r = run('test -d {}'.format(temp_dir), warn_only=True, quiet=True) if r.return_code == 0: if self.reuse_worker == 'true': return logger.error('Worker env exists, but reuse not specified') sys.exit(1) run('mkdir {}'.format(temp_dir)) with cd(temp_dir): run('git clone {}'.format(REPO)) with cd('{}/perfrunner'.format(temp_dir)): run('virtualenv -p python2.7 env') run('PATH=/usr/lib/ccache:/usr/lib64/ccache/bin:$PATH ' 'env/bin/pip install ' '--download-cache /tmp/pip -r requirements.txt')
def on_step_run(self): # `get_step_options` is provided by the `StepOptionsController` mixin. options = self.get_step_options() if options.run_pstrace and options.script: app_values = self.get_app_values() exe_path = path(app_values['pstrace_exe']) script = path(options.script) if not exe_path.isfile(): logger.error('[PSTraceLauncher] invalid exe-path: %s' % exe_path.abspath()) elif not script.isfile(): logger.error('[PSTraceLauncher] invalid script-path: %s' % script.abspath()) elif os.name != 'nt': logger.error('[PSTraceLauncher] This plugin is only supported ' 'on Windows') else: pstrace_processes = [p for p in psutil.process_iter() if safe_psutil_attr(p, 'exe') == exe_path.abspath()] if not pstrace_processes: if options.delay_ms > 0: logger.info('[PSTraceLauncher] delay: %s ms', options.delay_ms) gtk.timeout_add(options.delay_ms, self._execute, exe_path, script) else: self._execute(exe_path, script) else: logger.info('[PSTraceLauncher] skipping, since PSTrace is ' 'already running as process %s', [p.pid for p in pstrace_processes]) self.complete_step()
def upload_and_delete_file(path): """ Upload the given file to S3, then remove it from the local filesystem """ upload_file(path) logger.info('Upload complete. Deleting %s' % path) os.remove(path)
def wait_for_indexes_to_become_online(self, host, index_name=None): # POLL to ensure the indexes become online url = 'http://{}:8093/query/service'.format(host) data = { 'statement': 'SELECT * FROM system:indexes' } if index_name is not None: data = { 'statement': 'SELECT * FROM system:indexes WHERE name = "{}"'.format(index_name) } ready = False while not ready: time.sleep(10) resp = requests.Session().post(url=url, data=data) if resp.json()['status'] == 'success': results = resp.json()['results'] for result in results: if result['indexes']['state'] == 'online': ready = True else: ready = False break else: logger.error('Query:{} => Did not return a success!'.format(data['statement'])) if index_name is None: logger.info('All Indexes: ONLINE') else: logger.info('Index:{} is ONLINE'.format(index_name))
def set_data_path(self, host_port, data_path, index_path): logger.info('Configuring data paths: {}'.format(host_port)) api = 'http://{}/nodes/self/controller/settings'.format(host_port) data = { 'path': data_path, 'index_path': index_path } self.post(url=api, data=data)
def get_server_groups(self, host_port): logger.info('Getting server groups') api = 'http://{}/pools/default/serverGroups'.format(host_port) return { g['name']: g['addNodeURI'] for g in self.get(url=api).json()['groups'] }
def exec_n1ql_stmnt(self, host, stmnt): logger.info('Executing: {}'.format(stmnt)) api = 'http://{}:8093/query/service'.format(host) data = { 'statement': '{0}'.format(stmnt) } return self.post(url=api, data=data)
def create_bucket(self, host_port, name, ram_quota, replica_number, replica_index, eviction_policy, threads_number, password, proxy_port=None): logger.info('Adding new bucket: {}'.format(name)) api = 'http://{}/pools/default/buckets'.format(host_port) data = { 'name': name, 'bucketType': 'membase', 'ramQuotaMB': ram_quota, 'evictionPolicy': eviction_policy, 'flushEnabled': 1, 'replicaNumber': replica_number, 'replicaIndex': replica_index, } if proxy_port is None: data.update( { 'authType': 'sasl', 'saslPassword': password, }) else: data.update( { 'authType': 'none', 'proxyPort': proxy_port, }) logger.info('bucket specification: {}'.format(data)) if threads_number: data.update({'threadsNumber': threads_number}) self.post(url=api, data=data)
def get_version(self, host_port): logger.info('Getting Couchbase Server version') api = 'http://{}/pools/'.format(host_port) r = self.get(url=api).json() return r['implementationVersion']\ .replace('-rel-enterprise', '').replace('-community', '')
def __call__(self, environ, start_response): """ If the wsgi PATH_INFO starts with the static contents location, it will be returned. Otherwise the wrapped application will be called. """ if environ['REQUEST_METHOD'] == 'GET' and environ['PATH_INFO'].startswith('/%s/' % self._location): logger.info('GET from %s: %s' % (environ.get('REMOTE_ADDR', 'unknown'), environ['PATH_INFO'])) prefix = "/usr/share/skdrepo/" path = prefix + environ['PATH_INFO'][1:] try: f = open(path, 'r') data = f.read() f.close() (mime, encoding) = guess_type(path) status = '200 OK' response_headers = [('Content-Type', mime)] response_body = [data] except IOError, e: logger.warning('failed to open file: %s' % path) status = '404 Not Found' response_headers = [('Content-Type', 'text/plain')] response_body = ['404 Not Found - \'%s\'' % path] start_response(status, response_headers) logger.debug('response to %s: %s, %s' % (environ['REMOTE_ADDR'], status, str(response_headers))) return response_body
def grouphandler(self, data): #logger.debug(data) #content_list = data['content'][1] content_list = data['content'] # a list contains cface text content = '' for piece in content_list: if type(piece) == list: continue else: content += piece content = content.strip() if len(content)==0: # cface without text pass else: #logger.debug(content) if(content[0] =='@'): re = u'命令行可以使用' cmdcontent=content[1:] cmds = cmdcontent.split(':',2) h=self.findHandler(cmds[0]) if(h!=None): h.grouphandler(data['from_uin'],cmds[1]) else: self.send_group_msg(data['from_uin'], u'你能说人话么, 我怎么听不懂') #re = u"命令:"+cmds[0]+'内容:'+cmds[1] #self.send_group_msg(data['from_uin'], re) else: re = self.bot.reply(content) self.send_group_msg(data['from_uin'], re) logger.info("IN:%s\nreply group:%s"%(content, re))
def send_friend_msg(self, reply_content, uin, msg_id, fail_times=0): fix_content = str(reply_content.replace("\\", "\\\\\\\\").replace("\n", "\\\\n").replace("\t", "\\\\t")) rsp = "" try: req_url = "http://d1.web2.qq.com/channel/send_buddy_msg2" data = ( ('r', '{{"to":{0}, "face":594, "content":"[\\"{4}\\", [\\"font\\", {{\\"name\\":\\"Arial\\", \\"size\\":\\"10\\", \\"style\\":[0, 0, 0], \\"color\\":\\"000000\\"}}]]", "clientid":{1}, "msg_id":{2}, "psessionid":"{3}"}}'.format( uin, self.client_id, msg_id, self.psessionid, fix_content)), ('clientid', self.client_id), ('psessionid', self.psessionid) ) rsp = self.client.post(req_url, data, self.smart_qq_refer) rsp_json = json.loads(rsp) if 'errCode' in rsp_json and rsp_json['errCode'] != 0: raise ValueError("reply pmchat error" + str(rsp_json['retcode'])) logger.info("RUNTIMELOG Reply successfully.") logger.debug("RESPONSE Reply response: " + str(rsp)) return rsp_json except: if fail_times < 5: logger.warning("RUNTIMELOG Response Error.Wait for 2s and Retrying." + str(fail_times)) logger.debug("RESPONSE " + str(rsp)) time.sleep(2) self.send_friend_msg(reply_content, uin, msg_id, fail_times + 1) else: logger.warning("RUNTIMELOG Response Error over 5 times.Exit.reply content:" + str(reply_content)) return False
def run_pre_configuration_commands(itf=None): """runs iptables commands to enable vrrp""" command_templ_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), KEEPALIVED_PRE_COMMANDS_FILE) cmd_templ=None cmds=None if os.path.exists(command_templ_file): logger.info("command file exists") else: logger.debug("command file doesn't exist") exit(1) with open(command_templ_file, "r") as f: cmd_templ = Template(f.read()) if itf: cmds = cmd_templ.substitute(interface=itf) else: logger.debug("Interface is None") if cmds: cmds = cmds.splitlines() for line in cmds: line.rstrip() try: cmd = line.split() print('cmd:', cmd) subprocess.check_call(cmd) except: logger.debug("ERROR running iptables command") else: logger.debug("ERROR getting cmds")
def get_friend_info(self, tuin): """ 获取好友详情信息 get_friend_info {"retcode":0,"result":{"face":0,"birthday":{"month":1,"year":1989,"day":30},"occupation":"","phone":"","allow":1,"college":"","uin":3964575484,"constel":1,"blood":3,"homepage":"http://blog.lovewinne.com","stat":20,"vip_info":0,"country":"中国","city":"","personal":"","nick":" 信","shengxiao":5,"email":"*****@*****.**","province":"山东","gender":"male","mobile":"158********"}} :return:dict """ uin = str(tuin) if uin not in self.friend_uin_list: logger.info("RUNTIMELOG Requesting the account info by uin: {}".format(uin)) info = json.loads(self.client.get( 'http://s.web2.qq.com/api/get_friend_info2?tuin={0}&vfwebqq={1}&clientid={2}&psessionid={3}&t={4}'.format( uin, self.vfwebqq, self.client_id, self.psessionid, self.client.get_timestamp() ) )) logger.debug("get_friend_info2 html: {}".format(str(info))) if info['retcode'] != 0: logger.warning('get_friend_info2 retcode unknown: {}'.format(info)) return None info = info['result'] info['account'] = self.uin_to_account(uin) info['longnick'] = self.get_friend_longnick(uin) self.friend_uin_list[uin] = info try: return self.friend_uin_list[uin] except: logger.warning("RUNTIMELOG get_friend_info return fail.") logger.debug("RUNTIMELOG now uin list: " + str(self.friend_uin_list[uin]))
def send_group_msg(self, reply_content, group_code, msg_id, fail_times=0): fix_content = str(reply_content.replace("\\", "\\\\\\\\").replace("\n", "\\\\n").replace("\t", "\\\\t")) rsp = "" try: logger.info("Starting send group message: %s" % reply_content) req_url = "http://d1.web2.qq.com/channel/send_qun_msg2" data = ( ('r', '{{"group_uin":{0}, "face":564,"content":"[\\"{4}\\",[\\"font\\",{{\\"name\\":\\"Arial\\",\\"size\\":\\"10\\",\\"style\\":[0,0,0],\\"color\\":\\"000000\\"}}]]","clientid":{1},"msg_id":{2},"psessionid":"{3}"}}'.format( group_code, self.client_id, msg_id, self.psessionid, fix_content)), ('clientid', self.client_id), ('psessionid', self.psessionid) ) rsp = self.client.post(req_url, data, self.smart_qq_refer) rsp_json = json.loads(rsp) if 'retcode' in rsp_json and rsp_json['retcode'] not in MESSAGE_SENT: raise ValueError("RUNTIMELOG reply group chat error" + str(rsp_json['retcode'])) logger.info("RUNTIMELOG send_qun_msg: Reply '{}' successfully.".format(reply_content)) logger.debug("RESPONSE send_qun_msg: Reply response: " + str(rsp)) return rsp_json except: logger.warning("RUNTIMELOG send_qun_msg fail") if fail_times < 5: logger.warning("RUNTIMELOG send_qun_msg: Response Error.Wait for 2s and Retrying." + str(fail_times)) logger.debug("RESPONSE send_qun_msg rsp:" + str(rsp)) time.sleep(2) self.send_group_msg(reply_content, group_code, msg_id, fail_times + 1) else: logger.warning("RUNTIMELOG send_qun_msg: Response Error over 5 times.Exit.reply content:" + str(reply_content)) return False
def uin_to_account(self, tuin): """ 将uin转换成用户QQ号 :param tuin: :return:str 用户QQ号 """ uin_str = str(tuin) try: logger.info("RUNTIMELOG Requesting the account by uin: " + str(tuin)) info = json.loads( self.client.get( 'http://s.web2.qq.com/api/get_friend_uin2?tuin={0}&type=1&vfwebqq={1}&t={2}'.format( uin_str, self.vfwebqq, self.client.get_timestamp() ), self.smart_qq_refer ) ) logger.debug("RESPONSE uin_to_account html: " + str(info)) if info['retcode'] != 0: raise TypeError('uin_to_account retcode error') info = info['result']['account'] return info except Exception: logger.exception("RUNTIMELOG uin_to_account fail") return None
def get_online_friends_list(self): """ 获取在线好友列表 get_online_buddies2 :return:list """ logger.info("RUNTIMELOG Requesting the online buddies.") response = self.client.get( 'http://d1.web2.qq.com/channel/get_online_buddies2?vfwebqq={0}&clientid={1}&psessionid={2}&t={3}'.format( self.vfwebqq, self.client_id, self.psessionid, self.client.get_timestamp(), ) ) # {"result":[],"retcode":0} logger.debug("RESPONSE get_online_buddies2 html:{}".format(response)) try: online_buddies = json.loads(response) except ValueError: logger.warning("get_online_buddies2 response decode as json fail.") return None if online_buddies['retcode'] != 0: logger.warning('get_online_buddies2 retcode is not 0. returning.') return None online_buddies = online_buddies['result'] return online_buddies
def async_reader(conn, loop): data = conn.recv() logger.debug("RECV DATA: %s", data) if data is None: return if data == b'': if conn in FD_MAP: forward_server = FD_MAP[conn] loop.remove_reader(forward_server.fd) forward_server.shutdown() del FD_MAP[forward_server] del FD_MAP[conn] loop.remove_reader(conn.fd) conn.shutdown() return if conn not in FD_MAP: target_host = find_host(data) if target_host is None: return forward_server = ProxySocket.get_client(host=target_host, port=80) forward_server.connect() if forward_server and forward_server.fd != -1: FD_MAP[conn] = forward_server FD_MAP[forward_server] = conn loop.add_reader(forward_server.fd, async_reader, forward_server, loop) logger.info("Create a connection to %s", target_host) else: logger.error("FAIL to connect to target host {0}".format(target_host)) FD_MAP[conn].sendall(data)
def add_snapshot(self, name, ts_from, ts_to): logger.info("Adding snapshot: {}".format(name)) url = self.base_url + "/add_snapshot/" data = {"cluster": self.settings.cluster, "name": name, "ts_from": ts_from, "ts_to": ts_to} self.post(url, data)
def __init__(self, Finder, Amapping, NSmapping, SOAmapping, servers): logger.info("[MapResolver] Init."); self.Finder = Finder self.Amapping = Amapping self.NSmapping = NSmapping self.SOAmapping = SOAmapping client.Resolver.__init__(self, servers=servers)
def main(): size = 10000 # lengt of vector v ITER = 50 # number of iterations to run for each report comm = MPI.COMM_WORLD # size = comm.Get_size() # Configure and connect to the Kafka data backbone server. # This is done only once by the rank 0 MPI process. # if comm.rank == MASTER: # Parse the command line # try: options = parse_commandline() except Exception, ex: logger.error("Command line parsing error: %s", str(ex)) comm.Abort(-1) # Connect to Kafka # kw_cfg = KafkaWriterConfig(Host=options.KafkaHost, Topic=options.KafkaTopic) try: kw = KafkaWriter(kw_cfg) kw.connect() logger.info('connecting to Kafka backbone via %s', kw_cfg.Host) except Exception, ex: logger.error("Error connecting to Kafka backbone via %s: %s", kw_cfg.Host, str(ex) ) comm.Abort(-1)
def upload_shot( screenshot ): while True: try: get_upload_form( screenshot ) opened_file = open( screenshot.filename, 'rb' ) file_length = len( open( screenshot.filename, 'rb').read() ) logger.debug('%s: length %d' % (screenshot.filename, file_length ) ) file_arg = {'file': opened_file } r=requests.post( url = screenshot.form.url, data=screenshot.form.payload, files=file_arg, verify=False) opened_file.close() logger.debug(r.request.headers) logger.debug(r.headers) screenshot.uploaded = True logger.info("Done uploading %s"%(screenshot.filename) ) return except AttributeError as e: logger.debug( e ) logger.info( "Could not upload %s - need upload credentials" % screenshot.filename ) logger.info( "Waiting 10 seconds then retrying upload") sleep(10) except requests.ConnectionError as e: logger.info( "Could not upload %s - connection error %s " % (screenshot.filename, e) ) logger.info( "Waiting 10 seconds then retrying upload") sleep(10)
def start_server(self): logger.info('Starting Couchbase Server') getosname = run('uname -a|cut -c1-6') if(getosname.find("CYGWIN") != -1): run('net start CouchbaseServer') else: run('/etc/init.d/couchbase-server start')
def __init__(self, resource_obj, ready_to_transfer_input_queue, ready_to_exec_q, ready_to_transfer_output_q, done_q, failed_q): """DS """ # Multiprocessing stuff multiprocessing.Process.__init__(self) self.daemon = True self._stop = False # The resource object binds the worker to the public API & callbacks self._res_obj = resource_obj # BigJob handles self._pilot_job = None self._pilot_service = None self._physical_tasks = [] # All queue an InputFileTransferWorker can access self._tasks_done_q = done_q self._tasks_failed_q = failed_q self._tasks_ready_to_exec_q = ready_to_exec_q self._tasks_ready_to_transfer_output_q = ready_to_transfer_output_q self._tasks_ready_to_transfer_input_q = ready_to_transfer_input_queue logger.info("Starting BigJobWorker using BigJob version %s" % pilot.version)
def upload_forever(): while True: screenshot = upload_queue.get() if screenshot == messages.QUIT: logger.info("Uploader is quitting...") sys.exit(0) upload_shot( screenshot )
def start_samplers(self): logger.info('Creating seriesly dbs') seriesly = Seriesly(host='{}'.format(self.test_config.gateload_settings.seriesly_host)) for i, _ in enumerate(self.remote.gateways, start=1): seriesly.create_db('gateway_{}'.format(i)) seriesly.create_db('gateload_{}'.format(i)) self.remote.start_sampling()
def add_entry(self, key, service): if not self.get_entry(key): logger.info(f"DATABASE:ADD {key}, {service}") self.entries.append([key, json.dumps(service)])
def get_s3_client(): """Returns an S3 client, if you're credentials are not set via a .aws folder or other way set them here.""" logger.info("Getting an S3 client...") s3_client = boto3.client('s3') return s3_client
def parse(self, response): logger.info("SCRAPING '%s' " % response.url) logger.info("RESPONSE: %s" % response.status) hxs = Selector(response) merchant = None try: if "amazon.co.uk" in response.url: merchant = AmazonUK(hxs, response.url) elif "amazon.com" in response.url: merchant = Amazon(hxs, response.url) elif "keds.com" in response.url: merchant = Keds(hxs, response.url) elif "6pm.com" in response.url: merchant = SixPM(hxs, response.url) elif "forever21.com" in response.url: merchant = ForeverTwentyOne(hxs, response.url) elif "harrods.com" in response.url: merchant = Harrods(hxs, response.url) elif "karmaloop.com" in response.url: merchant = Karmaloop(hxs, response.url) elif "adidas.com" in response.url: merchant = Adidas(hxs, response.url) elif "macys.com" in response.url: merchant = Macys(hxs, response.url) elif "eastbay.com" in response.url: merchant = Eastbay(hxs, response.url) elif "footaction.com" in response.url: merchant = FootAction(hxs, response.url) elif "walmart.com" in response.url: merchant = Walmart(hxs, response.url) elif "yoox.com" in response.url: merchant = Yoox(hxs, response.url) elif "zappos.com" in response.url: merchant = Zappos(hxs, response.url) elif "bowling.com" in response.url: merchant = Bowling(hxs, response.url) elif "nike.com" in response.url: merchant = Nike(hxs, response.url) elif "athleta.gap.com" in response.url: merchant = Athleta(hxs, response.url) elif "nordstrom.com" in response.url: merchant = Nordstrom(hxs, response.url) elif "sportsdirect.com" in response.url: merchant = SportsDirect(hxs, response.url) elif "carters.com" in response.url: merchant = Carters(hxs, response.url) elif "oshkosh.com" in response.url: merchant = OshKosh(hxs, response.url) elif "babytula.com" in response.url: merchant = BabyTula(hxs, response.url) elif "case-mate.com" in response.url: merchant = CaseMate(hxs, response.url) if merchant is not None: item = merchant.extractProductData() # Parse item _encoder = ScrapyJSONEncoder() jsonString = json.loads(_encoder.encode(item)) keywords = jsonString['keywords'] # Create a CSV file for product-category data # with open('train_data.csv', 'ab') as csvfile: # trainwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_ALL) # for keyword in keywords: # trainwriter.writerow([jsonString['category'], str(''.join(keyword['title'])).lower()]) # Create a CSV file for training data with open('train_data.csv', 'ab') as csvfile: trainwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_ALL) trainwriter.writerow([ jsonString['category'], str(jsonString['keywords']).lower() ]) return item except Exception, e: print ' ' print '--------------------BEGIN ERROR-------------------' print('Error: %s' % e.message) exc_type, exc_value, exc_traceback = sys.exc_info() traceback_details = { 'filename': exc_traceback.tb_frame.f_code.co_filename, 'lineno': exc_traceback.tb_lineno, 'name': exc_traceback.tb_frame.f_code.co_name, 'type': exc_type.__name__, 'message': exc_value.message, } del (exc_type, exc_value, exc_traceback) print traceback.format_exc() print traceback_template % traceback_details print '--------------------END ERROR-------------------' print ' ' return item
l = val_label.asnumpy() pred = [] ff = [] tt = [] fff = [] ttf = [] ttc = [] allf = [] the = 0.90 for i in range(len(p)): pp = p[i] mm = pp.argmax() if mm != l[i]: allf.append(pp[mm]) if mm != l[i] and pp[mm] < the: ff.append(pp[mm]) else: if pp[mm] < the: tt.append(pp[mm]) ttc.append(mm) logger.info(len(tt), len(ff), len(ttf), len(fff), len(allf)) logger.info((len(tt) + len(ff)) / res_final.shape[0]) logger.info((len(allf) - len(ff)) / res_final.shape[0]) preds = res_final.asnumpy() preds = preds.argmax(axis=1) from sklearn.metrics import confusion_matrix, classification_report, f1_score, recall_score, accuracy_score cm = confusion_matrix(l, preds) cm_norm = (cm / cm.sum(axis=1)[:, np.newaxis]) from pprint import pprint logger.info(cm_norm) logger.info(classification_report(y_pred=preds, y_true=l))
def predict(): # initialize the data dictionary that will be returned from the # view data = {"success": False} dt = strftime("[%Y-%b-%d %H:%M:%S]") # ensure an image was properly uploaded to our endpoint if flask.request.method == "POST": request_json = flask.request.get_json() if request_json["embarked"]: embarked = request_json['embarked'] if request_json["sex"]: sex = request_json['sex'] if request_json["pclass"]: pclass = request_json['pclass'] if request_json["age"]: age = request_json['age'] if request_json["sibsp"]: sibsp = request_json['sibsp'] if request_json["parch"]: parch = request_json['parch'] if request_json["fare"]: fare = request_json['fare'] if request_json["fullname"]: fullname = request_json['fullname'] logger.info( f'{dt} Data: fullname={fullname} embarked={embarked}, sex={sex}, pclass={pclass}, age={age}, sibsp={sibsp}, parch={parch}, fare={fare}' ) try: preds = model.predict( pd.DataFrame({ "Embarked": [embarked], "Sex": [sex], "Pclass": [pclass], "Age": [age], "SibSp": [sibsp], "Parch": [parch], "Fare": [fare] })) except AttributeError as e: logger.warning(f'{dt} Exception: {str(e)}') data['predictions'] = str(e) data['success'] = False return flask.jsonify(data) data["predictions"] = str(preds[0]) # indicate that the request was a success data["success"] = True # return the data dictionary as a JSON response return flask.jsonify(data)
def recv(self, asBytes=False): if self.sessionKey: iv = self._recvBytes(self.blockSize) logger.info("{}: iv received: {}".format(prefix, iv)) cipher = Cipher(algorithms.AES(self.sessionKey), modes.CBC(iv), backend=default_backend()) decryptor = cipher.decryptor() ct = b'' blk = self._recvBytes(self.blockSize) ct += blk dblk = decryptor.update(blk) gen = self._blkGenerator(dblk) headerBytes, state = self._recvHeaderBytes(generator=gen) logger.debug("state: {}".format(state)) while state != None: blk = self._recvBytes(self.blockSize) ct += blk dblk = decryptor.update(blk) gen = self._blkGenerator(dblk) temp, state = self._recvHeaderBytes(generator=gen) headerBytes += temp logger.debug("HeaderBytes: {}".format(headerBytes)) header = Header(self.blockSize) header.update(headerBytes) body = b'' try: while True: body += next(gen) except StopIteration: pass leftoverBodySize = header.getSize() - len(body) + header.getPaddingSize() leftoverBodyEncrypted = self._recvBytes(leftoverBodySize) ct += leftoverBodyEncrypted body += decryptor.update(leftoverBodyEncrypted) + decryptor.finalize() logger.info("{}: cipher text received: {}".format(prefix, ct)) checkHmac = self._recvBytes(self.hashSize) logger.info("{}: received cipher text hmac: {}".format(prefix, checkHmac)) h = hmac.HMAC(self.sessionKey, hashes.SHA256(), backend=default_backend()) h.update(ct) h.verify(checkHmac) logger.info("{}: complete received plain-text: {}".format(prefix, headerBytes + body)) body = body[:len(body) - header.getPaddingSize()] if not asBytes: body = body.decode() return body else: header = Header() (headerBytes, _) = self._recvHeaderBytes() header.update(headerBytes) body = b'' totalSize = header.getSize() while len(body) < totalSize: body += self.conn.recv(int(totalSize - len(body))) if not asBytes: return body.decode() return body
def remove_entry(self, key): entry = self.get_entry(key) if entry: logger.info(f"DATABASE:REMOVE {key}") self.entries.remove(entry)
def __on_event_callback(self, event_json): logger.info('Received event') decoded_event = json.loads(event_json) logger.info('Publishing event {}'.format(decoded_event))
def _estimate_internal( self, append_system_fn: Callable[['SystemTransition'], None]) -> None: count_triviality = 0 count_with_unknowns = 0 self.dump_system('start estimation') some_transitions_removed = True while some_transitions_removed: logger.debug("Start analyse from last transition") some_transitions_removed = False for x in range(len(self.__transitions) - 1, -1, -1): transition = self.__transitions[x] logger.debug("Analyse transition {}".format(transition)) left = transition.get_left_side() right = transition.get_right_side() assert len(left) > 0 and len(right) > 0 # 2 sides does not have unknowns if not left.contains_unknown() and not right.contains_unknown( ): count_triviality += 1 logger.debug( "Transition {} is triviality".format(transition)) continue is_left_non_zero = self._is_side_non_zero(left) is_right_non_zero = self._is_side_non_zero(right) if is_left_non_zero and is_right_non_zero: logger.debug( "Both sides '{}' and '{}' are not zero".format( left, right)) # do nothing, just increase counter count_with_unknowns += 1 continue elif is_left_non_zero and not is_right_non_zero: logger.debug( "Left side '{}' is NON ZERO. Rigth is undefined '{}'". format(left, right)) # fixed left side - not fork nz = Condition.create_non_zero_condition(right.copy()) logger.debug("Create non zero condition '{}'".format(nz)) self._conds_non_zero.append(nz) count_with_unknowns += 1 continue elif not is_left_non_zero and is_right_non_zero: logger.debug( "Right side '{}' is NON ZERO. Left is undefined '{}'". format(right, left)) # fixed right side - not fork nz = Condition.create_non_zero_condition(left.copy()) logger.debug("Create non zero condition: '{}'".format(nz)) self._conds_non_zero.append(nz) count_with_unknowns += 1 continue else: fork = False # both sides not zero # check that they does not contain unkwowns if not left.contains_unknown( ) or not right.contains_unknown(): logger.info( 'Left or right sides does not contains UNKNOWN') # need divide in two cases: zero and not zero # zero case else: logger.info( "Left and right contains UNKNOWN and sides in undefined. 'Fork' will processing" ) fork = True new_system = self.__clone(is_fork=fork) # create non zero condition and add them to new system # logger.debug("Creating new conditions for non zero case") left_nzc = Condition.create_non_zero_condition(left.copy()) right_nzc = Condition.create_non_zero_condition( right.copy()) logger.debug( "New non zero conditions '{}' and '{}'".format( left_nzc, right_nzc)) new_system._conds_non_zero.append(left_nzc) new_system._conds_non_zero.append(right_nzc) append_system_fn(new_system) logger.debug("New system with id {} added to queue".format( new_system._id)) # logger.debug("Creating new conditions for zero case") left_zc = Condition.create_zero_condition(left.copy()) right_zc = Condition.create_zero_condition(right.copy()) logger.debug("New zero conditions '{}' and '{}'".format( left_zc, right_zc)) self._use_and_append_zero_cond(left_zc) self._use_and_append_zero_cond(right_zc) some_transitions_removed = False res = 1 while res > 0: res = self._remove_empty_transitions() res += self._analyse_and_generate_new_conditions() some_transitions_removed |= bool(res) if some_transitions_removed: break continue self._mark = None for tr in self.__transitions: if self._mark is None: self._mark = Symbol(tr.get_probability()) else: self._mark *= Symbol(tr.get_probability()) N = Symbol('N') amount = self._count_special_equal_conds() for i in range(amount): if self._mark is None: self._mark = N else: self._mark *= N if self._mark is not None: collector.make_node_leaf(self._node, self._mark) self.save_node(self._node) self.dump_system('Estimated with mark {}'.format(str(self._mark))) return
def _lambda_handler(event, context): logger.info('Event:' + json.dumps(event)) records = event['Records'] now = datetime.datetime.utcnow() ddb_deserializer = StreamTypeDeserializer() cnt_insert = cnt_modify = cnt_remove = 0 for record in records: # Handle both native DynamoDB Streams or Streams data from Kinesis (for manual replay) if record.get('eventSource') == 'aws:dynamodb': ddb = record['dynamodb'] ddb_table_name = get_table_name_from_arn(record['eventSourceARN']) doc_seq = ddb['SequenceNumber'] elif record.get('eventSource') == 'aws:kinesis': ddb = json.loads(base64.b64decode(record['kinesis']['data'])) ddb_table_name = ddb['SourceTable'] doc_seq = record['kinesis']['sequenceNumber'] else: logger.error('Ignoring non-DynamoDB event sources: %s', record.get('eventSource')) continue # Compute DynamoDB table, type and index for item doc_table = DOC_TABLE_FORMAT.format( ddb_table_name.lower()) # Use formatter doc_type = DOC_TYPE_FORMAT.format( ddb_table_name.lower()) # Use formatter doc_index = compute_doc_index(ddb['Keys'], ddb_deserializer) # Dispatch according to event TYPE event_name = record['eventName'].upper() # INSERT, MODIFY, REMOVE items, LastEvaluatedKey = general_storage.scan_items( general_storage.get_dynamodb_table('client_configuration'), Attr('table_name').eq(doc_table)) cf = general_config.create_configuration(items[0]) # Treat events from a Kinesis stream as INSERTs if event_name == 'AWS:KINESIS:RECORD': event_name = 'INSERT' # Update counters if event_name == 'INSERT': cnt_insert += 1 elif event_name == 'MODIFY': cnt_modify += 1 elif event_name == 'REMOVE': cnt_remove += 1 else: logger.warning('Unsupported event_name: %s', event_name) # If DynamoDB INSERT only, send 'item' to RDS if event_name == 'INSERT': if 'NewImage' not in ddb: logger.warning( 'Cannot process stream if it does not contain NewImage') continue # Deserialize DynamoDB type to Python types doc_fields = ddb_deserializer.deserialize({'M': ddb['NewImage']}) # Now only store own post and replies if doc_fields['object_type'] == 'post' and str( doc_fields['user_id']) != str(cf.twitter_user_id): continue # Now only store own post and replies if doc_fields['object_type'] == 'comment' and str( doc_fields['asset_id']) != str(cf.twitter_user_id): continue # Normalize DynamoDB object to Mysql object and write to RDS normalizer_mysql.insert_dynamodb_item_into_mysql(cf, doc_fields)
def print(self, idx): logger.info("Sentence %d, Correctness: %.2f, Loss: %.2f", idx, self.correct_count / self.total_count * 100, self.total_loss) self.reset()
# logger.debug(features.get_params().keys()) # pipe.fit(X_train, y_train) # scores = cross_val_score(pipe, X_train, y_train, cv=5, scoring='accuracy') # logger.info("Validation Accuracy: {:.3f} ± {:.3f}".format(np.mean(scores), 2 * np.std(scores))) # clf = pipe.steps[1][1] # self.print_importances(clf, X_train) # logger.info(format_as_text(explain_weights_lightgbm(lgb=clf, vec=features))) return pipe def predict(self, pipe, X_test, k=3): """ :param pipe: :param X_test: :return: """ y_test = pipe.predict(X_test) return y_test if __name__ == '__main__': fn = "https://raw.githubusercontent.com/amueller/scipy-2017-sklearn/master/notebooks/datasets/titanic3.csv" model = NewPipeline() X_train, X_test, y_train, y_test = model.get_data(fn) pipe = model.get_pipe() pipe.fit(X_train, y_train) y_pred = pipe.predict(X_test) logger.info(accuracy_score(y_pred, y_test))
async def create(self, file: UploadFile, subdir: str = ''): '''Creates a file using UploadFile object from fastapi. Expected next arguments: :param file: An UploadFile object :type file: bytes :param subdir: The optional sub directory for the file. :type subdir: str :return: Status.done.value and name of the file, if the file was created or status.exists.value and name of the file, if it is already exist. :rtype: tuple(str, str) ''' file_type = file.filename.split('.')[-1] temp_folder_name = 'tempfolder' temp_folder_path = os.path.join(subdir, temp_folder_name) logger.info('temp folder path:\n{0}\n'.format(temp_folder_path)) temp_folder = Folder(temp_folder_path) temp_folder.prepare() temp_file_name = 'tempfile' temp_file_path = os.path.join(temp_folder_path, temp_file_name) logger.info('temp file path:\n{0}\n'.format(temp_file_path)) temp_file = open(temp_file_path, 'wb') hash_code = md5() while True: chunk = await file.read(20000) if not chunk: break hash_code.update(chunk) temp_file.write(chunk) temp_file.close() the_hash = hash_code.hexdigest() filename = '.'.join([the_hash, file_type]) sub_folder_name = filename[:2] sub_folder_path = os.path.join(subdir, sub_folder_name) logger.info('sub folder path:\n{0}\n'.format(sub_folder_path)) # If the file already exists? if sub_folder_name in os.listdir(subdir): if filename in os.listdir(sub_folder_path): shutil.rmtree(temp_folder_path) return status.exists.value, filename # Rename the exiting temp file: renamed_file_path = os.path.join(temp_folder_path, filename) logger.info('renamed file path:\n{0}\n'.format(renamed_file_path)) os.rename(os.path.join(temp_folder_path, temp_file_name), renamed_file_path) # Rename the temp folder: shutil.move(temp_folder_path, sub_folder_path) logger.info('renamed dir path:\n{0}\n'.format(sub_folder_path)) return status.done.value, filename
def _use_and_append_zero_cond(self, condition: Condition) -> None: self.__apply_condition(condition) self.dump_system("System after apply condition {}".format(condition), with_main_conds=False) for nzcondition in self._conds_non_zero: if nzcondition.update_with(condition): self.dump_system( "Conditions after apply condition {}".format(condition), with_trans=False) if not nzcondition.is_correct(): func = inspect.currentframe().f_back.f_code raise ConditionException( "[{}:{}] Contradiction detected '{}'".format( func.co_name, func.co_firstlineno, nzcondition)) new_zero_conds = [] useless_econds = [] for econdition in self._conds_equals: if econdition.update_with(condition): self.dump_system( "[_use_and_append_zero_cond] Conditions after apply condition {}" .format(condition), with_trans=False) if not econdition.is_correct(): func = inspect.currentframe().f_back.f_code raise ConditionException( "[{}:{}] Contradiction detected '{}'".format( func.co_name, func.co_firstlineno, econdition)) if econdition.get_state() == ConditionState.IS_ZERO: # condition was converted new_zero_conds.append(econdition) if econdition.is_useless(): useless_econds.append(econdition) logger.debug("useless: Condition {} true ".format(econdition)) else: logger.debug("useless: Condition {} false ".format(econdition)) if condition.get_state() == ConditionState.IS_EQUAL: self._conds_equals.append(condition) else: self._conds_zero.append(condition) self.dump_system("Added condition {}".format(condition), with_trans=False) # remove useless condition if len(useless_econds): for ucondition in useless_econds: self._conds_equals.remove(ucondition) logger.info( "_use_and_append_zero_cond: remove useless condition from _conds_equals '{}'" .format(ucondition)) self.dump_system("Clear _conds_equals", with_trans=False) # remove and apply new zero conditions if len(new_zero_conds) > 0: for zcondition in new_zero_conds: self._conds_equals.remove(zcondition) logger.info( "_use_and_append_zero_cond: remove new zero condition from _conds_equals '{}'" .format(zcondition)) self.dump_system("Clear _conds_equals 2", with_trans=False) for zcondition in new_zero_conds: self._use_and_append_zero_cond(zcondition)
def connect_display(self, id): print(self.displays[id]) logger.info("Connected to remote display " + str(id))
def measure(self, client, kvclient, metric, bucket): if self.n1ql_op == 'create': self.curr_items += 1 key, ttl = self.new_keys.next(curr_items=self.curr_items) key = "stat" + key[5:] elif self.n1ql_op == 'delete' or self.n1ql_op == 'update': self.curr_items += 1 key, ttl = self.new_keys.next(curr_items=self.curr_items) key = "stat" + key[5:] doc = self.new_docs.next(key) doc['key'] = key doc['bucket'] = bucket kvclient.create(key, doc) ddoc_name, view_name, query = self.new_queries.next(doc) _, latency = client.query(ddoc_name, view_name, query=query) return 1000 * latency # s -> ms elif self.n1ql_op == 'rangeupdate': if self.smallcappedinit == False: logger.info( "Initiating load for rangeupdate latency collection") for i in range(100): key, ttl = self.new_keys.next(curr_items=self.curr_items) key = "stat" + key[5:] doc = self.new_docs.next(key) doc['key'] = key doc['bucket'] = bucket doc['capped_small'] = "stat" kvclient.create(key, doc) self.curr_items += 1 self.smallcappedinit = True logger.info( "Completed load for rangeupdate latency collection") return 0 key, ttl = self.new_keys.next(curr_items=self.curr_items) key = "stat" + key[5:] doc = self.new_docs.next(key) doc['capped_small'] = "stat" ddoc_name, view_name, query = self.new_queries.next(doc) query[ 'statement'] = "UPDATE `bucket-1` SET name = name||'' WHERE capped_small=$1;" del query['prepared'] _, latency = client.query(ddoc_name, view_name, query=query) return 1000 * latency # s -> ms elif self.n1ql_op == 'rangedelete': if self.smallcappedinit == False: logger.info( "Initiating load for range update latency collection") for i in range(10000): key, ttl = self.new_keys.next(curr_items=self.curr_items) key = "stat" + key[5:] doc = self.new_docs.next(key) doc['key'] = key doc['bucket'] = bucket doc['capped_small'] = "stat" + str(i / 100) kvclient.create(key, doc) self.curr_items += 1 self.smallcappedinit = True logger.info( "Completed load for range delete latency collection") return 0 key, ttl = self.new_keys.next(curr_items=self.curr_items) key = "stat" + key[5:] doc = self.new_docs.next(key) doc['capped_small'] = "stat" + str(self.cappedcounter) ddoc_name, view_name, query = self.new_queries.next(doc) _, latency = client.query(ddoc_name, view_name, query=query) self.cappedcounter += 1 return 1000 * latency # s -> ms elif self.n1ql_op == 'merge': doc = {} ddoc_name, view_name, query = self.new_queries.next(doc) _, latency = client.query(ddoc_name, view_name, query=query) flushpath = '/pools/default/buckets/bucket-2/controller/doFlush' self.post_http(path=flushpath) return latency # s -> ms else: key = self.existing_keys.next(curr_items=self.items, curr_deletes=0) doc = self.new_docs.next(key) doc['key'] = key doc['bucket'] = bucket ddoc_name, view_name, query = self.new_queries.next(doc) _, latency = client.query(ddoc_name, view_name, query=query) return 1000 * latency # s -> ms
def terminate(self): logger.info('Terminating Celery workers') local.kill_process('celery')
def share_display(self, id): print(self.displays[id]) logger.info("Shared display " + str(id))
def start(self): logger.info('Starting local Celery worker') local.start_celery_worker(queue=self.next_worker())
def add_new_display(self): # cannot have more than 5 sessions if len(self.displays) >= 5: logger.warning("You have already 5 displays") return display_win = QDisplayDialog(list(self.displays.keys())) display_win.setModal(True) if display_win.exec() != 1: return display_hor_layout = QHBoxLayout() display_hor_layout.setContentsMargins(0, 2, 0, 2) display_hor_layout.setSpacing(2) display_ver_layout = QVBoxLayout() display_ver_layout.setContentsMargins(0, 0, 0, 0) display_ver_layout.setSpacing(0) display_ver_layout.addLayout(display_hor_layout) display_widget = QWidget() display_widget.setLayout(display_ver_layout) id = display_win.display_name print(id) name = QLabel() name.setText(str(id)[:16]) display_hor_layout.addWidget(name) status = QLabel() status.setText("Pending...") display_hor_layout.addWidget(status) time = QLabel() time.setText("24H") display_hor_layout.addWidget(time) resources = QLabel() resources.setText("1 Node") display_hor_layout.addWidget(resources) connect_btn = QPushButton() connect_btn.setIcon(self.connect_ico) connect_btn.setToolTip('Connect to the remote display') connect_btn.clicked.connect(lambda: self.connect_display(id)) display_hor_layout.addWidget(connect_btn) share_btn = QPushButton() share_btn.setIcon(self.share_ico) share_btn.setToolTip('Share the remote display via file') share_btn.clicked.connect(lambda: self.share_display(id)) display_hor_layout.addWidget(share_btn) kill_btn = QPushButton() kill_btn.setIcon(self.kill_ico) kill_btn.setToolTip('Kill the remote display') kill_btn.clicked.connect(lambda: self.kill_display(id)) display_hor_layout.addWidget(kill_btn) separator = QFrame() separator.setFrameShape(QFrame.HLine) separator.setFrameShadow(QFrame.Sunken) display_ver_layout.addWidget(separator) self.rows_ver_layout.addWidget(display_widget) self.displays[id] = display_widget logger.info("Added new display")
def wait_for_workers(self): logger.info('Waiting for all tasks to finish') for async_result in self.async_results: async_result.get() logger.info('All tasks are done')
def abort(self): logger.info('Interrupting Celery workers') os.kill(self.pid, signal.SIGTERM) self.wait_for_workers()
def terminate(self): logger.info('Terminating local Celery workers') with quiet(): local('killall -9 celery') for db in self.SQLITE_DBS: local('rm -fr {}'.format(db))
def terminate(self): logger.info('Terminating Celery workers') if self.dynamic_infra: self.remote.terminate_client_pods(self.worker_path) else: self.remote.terminate_client_processes()
def get(self, q=""): self.q = q logger.info("query for '%s'" % q) self.set_header('Content-Type', 'application/json') self.write(json.dumps(self.get_papers()))
def start(self): logger.info('Initializing remote worker environment') if self.dynamic_infra: self.start_kubernetes_workers() else: self.start_remote_workers()
def update_offerings(self, slug, justwatch_id): if slug not in self.films: logger.warning('Could not update "%s", not in watchlist' % (slug)) return None # Get offerings logger.info('Getting offerings for "%s" using JustWatch id=%s' % (slug, justwatch_id)) try: providers = { p['id']: p['clear_name'] for p in self.justwatch.get_providers() } justwatch = self.justwatch.get_title(title_id=justwatch_id) print dumps(justwatch, indent=4) offers = justwatch.get('offers', []) justwatch_id = justwatch['id'] justwatch_url = justwatch.get('full_paths', {}).get('MOVIE_DETAIL_OVERVIEW') except: logger.exception( 'No offerings found for "%s" using JustWatch id=%s' % (slug, justwatch_id)) return {} # if not offers: # logger.error('No offerings found for "%s" using JustWatch id=%s' % (slug, justwatch_id)) # return {} # Parse JustWatch data try: # Offerings offerings = {} for offer in offers: if offer.get('provider_id') not in offerings: offerings[offer.get('provider_id')] = { 'name': providers.get(offer.get('provider_id')), 'offers': [], 'offer_types': [], } offerings[offer.get('provider_id')]['offers'].append({ 'date_created': offer.get('date_created'), 'monetization_type': offer.get('monetization_type'), 'presentation_type': offer.get('presentation_type'), # 'provider_id': offer.get('provider_id'), 'urls': offer.get('urls', {}), 'price': offer.get('retail_price'), 'currency': offer.get('currency'), }) if offer.get('monetization_type') not in offerings[offer.get( 'provider_id')]['offer_types']: offerings[offer.get('provider_id')]['offer_types'].append( offer.get('monetization_type')) # Scoring tomato_id = None scoring = {} average_score = None scores = [] for score in justwatch.get('scoring', []): if ':id' not in score['provider_type']: key = score['provider_type'].replace(':', '_') scoring[key] = score['value'] if key == 'imdb_score': scores.append(float(score['value'])) if key == 'tmdb_score': scores.append(float(score['value'])) if key == 'tomato_score': scores.append((float(score['value']) / 10)) if key == 'metacritic_score': scores.append((float(score['value']) / 10)) if score['provider_type'] == 'tomato:id': tomato_id = score['value'] # Calculate average if len(scores) > 0: average_score = (float(sum(scores)) / len(scores)) average_score = round(average_score, 2) except: logger.exception('Could not parse metadata for %s' % (slug)) return {} # Update film logger.info('Updating offerings for "%s"' % (slug)) self.films[slug]['ids']['justwatch'] = justwatch_id self.films[slug]['ids']['tomato'] = tomato_id self.films[slug]['offerings'] = offerings self.films[slug]['offerings_updated'] = time() self.films[slug]['offerings_updated_str'] = datetime.now().strftime( '%Y-%m-%d') self.films[slug]['justwatch_url'] = justwatch_url self.films[slug]['scoring'] = scoring self.films[slug]['scoring']['average'] = average_score self.save() return offerings
def sendMsg(html, receiver_email, failure_num=1, is_path=True, is_send=True): """ 发送邮件 :param html: 邮件正文用到的html文件路径,或者html :param receiver_email: 收件人邮箱地址的txt文件路径 :param failure_num: 失败的用例数 :param is_path: bool,True表示html是一个路径,False表示html是html :param is_send: bool,是否发邮件,仅用于第一次发送失败后,再次发送 :return: """ flag = 0 is_email = int(cfg.getConfig('is_email')) if is_email: if is_email == 1: flag = 1 if is_email == 2: if failure_num > 0: flag = 1 else: logger.info('所有用例执行成功,不发送邮件,已跳过。') if is_email == 3: if failure_num == 0: flag = 1 else: logger.info('有执行失败的用例,不发送邮件,已跳过。') else: logger.info('设置为不自动发送邮件,已跳过。') if flag: try: receive_name = re.findall('email_(.*?).txt', receiver_email)[0] # 提取收件人姓名 with open(receiver_email, 'r', encoding='utf-8') as f: sends = f.readlines() subject = sends[0] send_to = sends[1] logger.info('开始发送邮件,收件人{}'.format(send_to)) message = MIMEMultipart() message['From'] = Header(cfg.getConfig('sender_name')) # 发件人名字 message['To'] = Header(receive_name) # 收件人名字 message['Subject'] = Header(subject, 'utf-8') # 邮件主题 if is_path: with open(html, 'r', encoding='utf-8') as f: fail_case = f.read() else: fail_case = html email_text = MIMEText(fail_case, 'html', 'utf-8') message.attach(email_text) # 添加邮件正文 try: server = smtplib.SMTP_SSL(cfg.getConfig('smtp'), 465) # server.connect(cfg.getConfig('smtp')) except Exception as err: logger.error(err) server = smtplib.SMTP(cfg.getConfig('smtp'), 25) # server.connect(cfg.getConfig('smtp')) server.login(cfg.getConfig('sender_email'), '123456') # 登陆邮箱 server.sendmail(cfg.getConfig('sender_email'), send_to.split(','), message.as_string()) # 发送邮件 server.quit() del fail_case, email_text, message, server logger.info('邮件发送成功') except Exception as err: logger.error(err) if is_send: sendMsg(html, receiver_email, is_path=is_path, is_send=False) # 发送失败后,重发一次
def update_metadata(self, slug, tmdb_id): if slug not in self.films: logger.warning('Could not update "%s", not in watchlist' % (slug)) return None # Get metadata logger.info('Getting metadata for "%s" using TMDb id=%s' % (slug, tmdb_id)) details = self.tmdb.details(tmdb_id) if not details or details.get('status_code'): raise Exception('No metadata found for %s' % (slug)) # Parse TMDb details try: # Details year = details.get('release_date') year = int(year.split('-')[0]) if year else None credits = details.get('credits', {}) crew = credits.get('crew', []) metadata = { 'title': details.get('title'), 'original_title': details.get('original_title'), 'year': year, 'overview': details.get('overview'), 'genres': [g['name'] for g in details.get('genres', [])], 'runtime': details.get('runtime'), 'original_language': details.get('original_language'), 'spoken_languages': [l['name'] for l in details.get('spoken_languages', [])], 'directors': [p['name'] for p in crew if p['job'] == 'Director'], 'writers': [p['name'] for p in crew if p['job'] == 'Writer'], } # Images if details.get('backdrop_path') and not path.isfile( path.join(BACKDROPS_PATH, '%s.jpg' % (slug))): try: backdrop_url = TMDB_BACKDROP_URL % ( details.get('backdrop_path')) logger.info('Fetching backdrop for "%s", url=%s' % (slug, backdrop_url)) r = get(backdrop_url, stream=True) r.raise_for_status() with open(path.join(BACKDROPS_PATH, '%s.jpg' % (slug)), 'wb') as f: r.raw.decode_content = True copyfileobj(r.raw, f) except: logger.exception('Could not save backdrop image') else: logger.warning('No backdrop found for "%s"' % (slug)) except: logger.exception('TMDb parse error') raise Exception('Could not parse metadata for %s' % (slug)) # Update film logger.info('Updating metadata for "%s"' % (slug)) self.films[slug]['ids']['tmdb'] = details.get('id') self.films[slug]['ids']['imdb'] = details.get('imdb_id') self.films[slug]['metadata'] = metadata self.save() return metadata
def sync(self): logger.info('Syncing watchlist') results = {'new': {}, 'removed': []} # Fetch Letterboxd watchlist logger.info('> Existing films: %d' % (len(self.films.keys()))) lb_watchlist = self.letterboxd.watchlist() logger.info('> Got %d films from Letterboxd' % (len(lb_watchlist.keys()))) logger.info('Updating watchlist') for slug, metadata in lb_watchlist.iteritems(): if slug in self.films: # Update self.films[slug]['ids']['letterboxd'] = metadata['id'] self.films[slug]['title'] = metadata['title'] # self.films[slug]['year'] = metadata['year'] else: # Create self.films[slug] = { 'ids': { 'letterboxd': metadata['id'] }, 'title': metadata['title'], # 'year': metadata['year'], } results['new'][slug] = self.films[slug] logger.info('> Added %s' % (slug)) # Find removed removed = [ f for f in self.films.keys() if f not in lb_watchlist.keys() ] for slug in removed: logger.info('> Removed %s' % (slug)) del self.films[slug] results['removed'] = removed # Save self.save() return results