def get_3rd_party_campaign_phones(camp_info_today): running_3rd_camp_phones, running_3rd_camp_ids = set(), Utils.get_running_3rd_party_campaign_ids(camp_info_today) for run_id in running_3rd_camp_ids: camp_file = Utils.get_raw_campaign_file(config.CAMPAIGN_DIR, run_id) running_3rd_camp_phones.update(helper.read_phones_file(camp_file)) return running_3rd_camp_phones
def filter(self, data, config): sellVolume = self.getSellVolume(data) sellTrend = Utils.trendline(sellVolume['volume']) # print('sellVolumn and trend', sellVolume['volume'], sellTrend) buyVolume = self.getBuyVolume(data) buyTrend = Utils.trendline(buyVolume['volume']) # print('buyVolume and trend', buyVolume['volume'], buyTrend) return sellTrend < 0 and buyTrend > 0
def build_spend_message(to_spend, pk, sequence, txouts): spend_msg = Utils.sha256d( Utils.serialize(to_spend) + str(sequence) + binascii.hexlify(pk).decode() + Utils.serialize(txouts)).encode() return spend_msg
def checkToken(token): if token is None: return Utils.JsonMessage("Token parameter not found!", 500) if not LoginService.validateJWTToken(token): return Utils.JsonMessage("Unauthorized", 401) return None
def canTradeToday(self): if Utils.isTodayOneDayBeforeWeeklyExpiryDay() == True: logging.info('%s: Today is one day before weekly expiry date hence going to trade this strategy', self.getName()) return True if Utils.isTodayWeeklyExpiryDay() == True: logging.info('%s: Today is weekly expiry day hence going to trade this strategy today', self.getName()) return True logging.info('%s: Today is neither day before expiry nor expiry day. Hence NOT going to trade this strategy today', self.getName()) return False
def loadAndUpdateStrikesFromFile(strikesFilepath): if os.path.exists(strikesFilepath) == False: logging.warn( 'loadAndUpdateStrikesFromFile() stikes Filepath %s does not exist', strikesFilepath) return OptionBuyingStrategy.trades = [] OptionBuyingStrategy.updatedTrades = [] tFile = open(strikesFilepath, 'r') tradesData = json.loads(tFile.read()) if not tradesData: return else: while True: isDataUpdate = False OptionBuyingStrategy.trades = [] tFile = open(strikesFilepath, 'r') tradesData = json.loads(tFile.read()) for tr in tradesData: trade = OptionBuyingStrategy.convertJSONToTrade(tr) if (trade.isTradeLive == False): #logging.info("As trade is not live , low will be verified") optionQuote = Quotes.getOptionBuyingQuote( trade.tradingSymbol, True) if (OptionBuyingStrategy.isWithinTradingRange( optionQuote.low)): if ( trade.low != optionQuote.low ): # Check whether the low is still withing range else it has to be updated isDataUpdate = True message = "OptionBuyingStrategy: Low is updated low for {0}".format( trade.tradingSymbol) Utils.sendMessageTelegramBot(message) logging.info(message) trade.low = optionQuote.low OptionBuyingStrategy.updatedTrades.append( trade) else: isDataUpdate = True newStrikeTrade = OptionBuyingStrategy.getUpdatedStrike( int(trade.tradingSymbol[-7:-2]), trade.tradingSymbol[-2:]) message = "OptionBuyingStrategy: Strike is updated from {0} to {1}".format( trade.tradingSymbol, newStrikeTrade.tradingSymbol) Utils.sendMessageTelegramBot(message) logging.info(message) trade = newStrikeTrade OptionBuyingStrategy.trades.append(trade) if (len(OptionBuyingStrategy.trades) != 0 and isDataUpdate): #OptionBuyingStrategy.trades=[] #OptionBuyingStrategy.trades=Test.updatedTrades OptionBuyingStrategy.writeStrikesToFile( OptionBuyingStrategy.trades) time.sleep(60)
def get_all_disks(self): for disk in self.allDisks.split(): self.disksList.update({ disk: Disk( Utils.produce_command( "sudo fdisk -l | grep /dev/sda: |awk '{print $3$4}'"), Utils.produce_command("cat /sys/block/sd?/device/vendor")) })
def __init__(self, bot, client, torrentsQueue: Queue, ytQueue, megaQueue): self.bot = bot self.client = client self.ytQueue = ytQueue self.megaQueue = megaQueue self.torrentsQueue = torrentsQueue self.utils = Utils() self.db = DB()
def test_no_details_login(self): login_page = LoginPage(self.driver) login_page.login('', '') utils = Utils(self.driver) utils.wait_for_element(login_page.errorMessage) # animation still not finished, tried to solve it but couldn't, so added a small extra sleep even if not ideal sleep(1) assert 'We didn\'t recognize that email and/or password.' in login_page.get_error_message_text()
def __init__(self, bot, client, torrentsQueue: Queue, megaQueue: Queue, ytQueue: Queue): self.bot = bot self.client = client self.torrentsQueue = torrentsQueue self.megaQueue = megaQueue self.ytQueue = ytQueue self.utils = Utils() self.db = DB() self.logger = logging.getLogger(' Admin Conv ')
def _generate_filtered_promotion_phones_source(self, cid, camp_info_today, today, directory=config.PROMOTION_DIR): out_dir = os.path.join(directory, "id{id_num}/{current_day}".format(id_num=cid, current_day=today)) # /path/to/id###/yyyymmdd Utils.mkdirs_if_not_exist(out_dir) output_file = os.path.join(out_dir, "id{id_num}.filtered.csv".format(id_num=cid)) promotion_phones = self._filter(today, cid, camp_info_today, directory) self._logger.debug("write to file :{0}".format(output_file)) Utils.write_to_file(promotion_phones, output_file) return output_file
def command(string, fromConsole): if string.lower() == 'stop': logger.log('info', 'Stopping proxy...') Utils.killServer() elif string.lower() == '': pass elif string.lower() == 'help': logger.log('info', '/stop: Stops the proxy') else: logger.log('error', str(base.get("invalidCommand")))
def setTradeToCompleted(trade, exit, exitReason=None): trade.tradeState = TradeState.COMPLETED trade.exit = exit trade.exitReason = exitReason if trade.exitReason == None else trade.exitReason trade.endTimestamp = Utils.getEpoch() trade = Utils.calculateTradePnl(trade) logging.info( 'TradeManager: setTradeToCompleted strategy = %s, symbol = %s, qty = %d, entry = %f, exit = %f, pnl = %f, exit reason = %s', trade.strategy, trade.tradingSymbol, trade.filledQty, trade.entry, trade.exit, trade.pnl, trade.exitReason)
def list(self, page, size, proName, userName, modName, limitTime, currentUser): """ 查询分配任务记录 """ start = (int(page) - 1) * int(size) try: sql = """ SELECT task.*, member.PRO_NAME, member.USER_NAME FROM man_pro_task AS task LEFT JOIN man_pro_member AS member ON task.PRO_ID = member.PRO_ID AND task.USER_ID = member.USER_ID WHERE 1=1 {}{}{}{}{} ORDER BY MODULE_END ASC, CREATE_TIME DESC LIMIT {start}, {size}; """.format( ' AND PRO_NAME like "%{}%"'.format(proName) if proName else '', ' AND USER_NAME like "%{}%"'.format(userName) if userName else '', ' AND MODULE_NAME like "%{}%"'.format(modName) if modName else '', ' AND MODULE_END <= "{}"'.format(limitTime) if limitTime else '', ' AND USER_CREATOR = "{}"'.format(currentUser) if currentUser else '', start=start, size=size) Utils.log('查询数据库任务列表SQL', sql) list = PySQL.get(sql) sqlTotal = """ SELECT COUNT(1) FROM man_pro_task AS task LEFT JOIN man_pro_member AS member ON task.PRO_ID = member.PRO_ID AND task.USER_ID = member.USER_ID WHERE 1=1 {}{}{}{}{} """.format( ' AND PRO_NAME like "%{}%"'.format(proName) if proName else '', ' AND USER_NAME like "%{}%"'.format(userName) if userName else '', ' AND MODULE_NAME = "{}"'.format(modName) if modName else '', ' AND MODULE_END <= "{}"'.format(limitTime) if limitTime else '', ' AND USER_CREATOR = "{}"'.format(currentUser) if currentUser else '', ) total = PySQL.count(sqlTotal) return list, total except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return [], 0
def status(self, bugId, status, currentUser): """ 查询调试记录列表 """ try: bugSQL = """ UPDATE man_pro_bug SET BUG_STATUS='{status}', HANDLE_BY='{handleBy}', HANDLE_TIME='{handleTime}' WHERE BUG_ID='{bugId}'; """.format(status='created' if status == 'reset' else status, handleBy=currentUser, handleTime=Utils.getLocalTime(), bugId=bugId) recordSQL = """ INSERT INTO man_bug_record( ID, BUG_ID, BUG_STATUS, HANDLE_BY, HANDLE_TIME )VALUES( '{}','{}','{}','{}','{}' ); """.format(Utils.makeId(), bugId, status, currentUser, Utils.getLocalTime()) Utils.log('更新问题状态SQL', bugSQL) Utils.log('插入调试记录SQL', recordSQL) return PySQL.execute(bugSQL) > 0 and PySQL.execute(recordSQL) > 0 except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return False
def __init__(self, vmc_file, user_agent, is_file=False): self.STORAGE_CERT_DIR = Config.STORAGE_CERT_DIR self.vmc_file = vmc_file self.Utils = Utils() self.vmc_response = { "status": False, "errors": [], "vmc_link": vmc_file } self.is_file = is_file self.user_agent = user_agent
def subProcessTask(df_today, result, start, sm, stockManager, engine, setting, todayStr): for index, row in df_today.iterrows(): # sleep(0.1) code = row['code'] print('thread %s run %s' % (threading.current_thread().name, code)) def cb(**kw): return ts.get_k_data(kw['kw']['code'], start=kw['kw']['start']) df_3m = Utils.queryData('k_data_' + code, 'code', engine, cb, forceUpdate=setting.get_updateToday(), code=code, start=start) if df_3m is None: continue # 数据少于180天 if df_3m.empty or len(df_3m) < setting.get_trendPeriod(): continue #添加最后一行 if df_3m.iloc[-1].get('date') != todayStr: today_df = pd.DataFrame([[ todayStr, row['open'], row['trade'], row['high'], row['low'], row['volume'] / 100, code ]], columns=list([ 'date', 'open', 'close', 'high', 'low', 'volume', 'code' ])) df_3m = df_3m.append(today_df, ignore_index=True) # df_3m = df_3m[::-1] #计算指标使用半年D数据 Utils.macd(df_3m) Utils.myKdj(df_3m) ######################################开始配置计算########################################### data = { 'code': code, 'df_3m': df_3m, 'df_realTime': row, 'engine': engine, 'macd': None, 'kdj': None, 'ma': None, 'turnover': None, 'volume': None, 'bigMoney': None, 'concept': None } if sm.start(code, setting.get_Strategy(), data, setting): # buildStockModels(code,data,stockManager) result.append(code)
def getList(self, page, size, proId, userCreator, userId, userName): """ 查询数据库 user 列表 """ start = (int(page) - 1) * int(size) try: sql = """ SELECT user.* FROM man_auth_user AS user WHERE (user.USER_CREATOR='{userCreator}' OR user.USER_ID='{userCreator}') {id}{name} AND user.USER_ID NOT IN ( SELECT member.USER_ID FROM man_pro_member AS member WHERE member.USER_CREATOR='{userCreator}' AND member.PRO_ID='{proId}' ) ORDER BY USER_ID LIMIT {start}, {size}; """.format( userCreator=userCreator, proId=proId, id=' AND USER_ID like "%{}%"'.format(userId) if userId else '', name=' AND USER_NAME like "%{}%"'.format(userName) if userName else '', start=start, size=size) sqlTotal = """ SELECT COUNT(1) FROM man_auth_user AS user WHERE (user.USER_CREATOR='{userCreator}' OR user.USER_ID='{userCreator}') {id}{name} AND user.USER_ID NOT IN ( SELECT member.USER_ID FROM man_pro_member AS member WHERE member.USER_CREATOR='{userCreator}' AND member.PRO_ID='{proId}' ) ORDER BY USER_ID; """.format( userCreator=userCreator, proId=proId, id=' AND USER_ID like "%{}%"'.format(userId) if userId else '', name=' AND USER_NAME like "%{}%"'.format(userName) if userName else '') Utils.log('查询项目成员列表 {}'.format(sql)) list = PySQL.get(sql) total = PySQL.count(sqlTotal) return list, total except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return [], 0
def __init__(self, svg_file, user_agent, is_file=False): self.RNG_SCHEMA_FILE = Config.RNG_SCHEMA_FILE self.STORAGE_SVG_DIR = Config.STORAGE_SVG_DIR self.svg_file = svg_file self.Utils = Utils() self.svg_response = { "status": False, "errors": [], "svg_link": svg_file } self.is_file = is_file self.user_agent = user_agent
def _generate_filtered_3rd_party_campaign_source(self, cid, given_date, directory=config.CAMPAIGN_DIR): output_directory = os.path.join(directory, "id{id_num}/{current_day}".format(id_num=cid, current_day=given_date)) Utils.mkdirs_if_not_exist(output_directory) output_file = output_directory + "/" + "id{id_num}.filtered.csv".format(id_num=cid) filtered_phones = helper.read_phones_file(Utils.get_raw_campaign_file(config.CAMPAIGN_DIR, cid)) if time.strftime("%Y%m%d") == given_date: self._logger.info("[stats]campaign id{0} has {1} base subs, {2} eligible subs".format(cid, len(filtered_phones), len(filtered_phones))) Utils.write_to_file(filtered_phones, output_file) return output_file
def getList(self): """ 查询 projec type 列表 """ try: sql = "SELECT CONVERT(TYPE_ID, char) AS TYPE_ID, TYPE_TEXT FROM man_pro_type;" list = PySQL.get(sql) return list except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return []
def updatePwd(self, id, pwd, new): """ 检查登录信息,修改账户密码 """ try: sql = "UPDATE man_auth_user SET USER_PWD='{2}' WHERE USER_ID='{0}' AND USER_PWD='{1}';".format( id, pwd, new) return PySQL.execute(sql) except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return 0
def __init__(self,domain): self.approved_nameservers = None self.approved_mx_hostnames = None self.skip_tls = True self.include_dmarc_tag_descriptions = False self.nameservers = None self.timeout = 2.0 self.parked = False # self.wait=0.0 self.Utils = Utils() self.domain_results = {} self.domain = domain.rstrip(".\r\n").strip().lower() self.base_domain = checkdmarc.get_base_domain(self.domain)
def getList(self, page, size, userId, userName, userAuth, userLogin, userCreator): """ 查询数据库用户列表 """ start = (int(page) - 1) * int(size) try: sql = """ SELECT USER_ID,USER_NAME,USER_AUTH,USER_LOGIN,USER_CREATOR,USER_CRE_TIME FROM man_auth_user WHERE 1=1 {}{}{}{}{}{} ORDER BY USER_AUTH, USER_ID LIMIT {start}, {size}; """.format( ' AND USER_ID like "%{}%"'.format(userId) if userId else '', ' AND USER_Name like "%{}%"'.format(userName) if userName else '', ' AND USER_AUTH = "{}"'.format(userAuth) if userAuth else '', ' AND USER_LOGIN = "******"'.format(userLogin) if userLogin else '', ' AND USER_CREATOR = "{}"'.format(userCreator) if userCreator and userCreator != 'admin' else '', ' OR USER_ID = "{}"'.format(userCreator) if userCreator and userCreator != 'admin' else '', start=start, size=size) Utils.log('查询数据库用户列表SQL', sql) list = PySQL.get(sql) sqlTotal = """ SELECT COUNT(1) FROM man_auth_user WHERE 1=1 {}{}{}{}{}{}; """.format( ' AND USER_ID like "%{}%"'.format(userId) if userId else '', ' AND USER_Name like "%{}%"'.format(userName) if userName else '', ' AND USER_AUTH = "{}"'.format(userAuth) if userAuth else '', ' AND USER_LOGIN = "******"'.format(userLogin) if userLogin else '', ' AND USER_CREATOR = "{}"'.format(userCreator) if userCreator and userCreator != 'admin' else '', ' OR USER_ID = "{}"'.format(userCreator) if userCreator and userCreator != 'admin' else '', ) total = PySQL.count(sqlTotal) return list, total except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return [], 0
def __init__(self, vmc_file, user_agent, svg_link='', is_file=False): self.STORAGE_CERT_DIR = Config.STORAGE_CERT_DIR self.vmc_file = vmc_file self.Utils = Utils() self.vmc_response = { "status": False, "errors": [], "vmc_link": vmc_file } self.is_file = is_file self.user_agent = user_agent self.parsed_vmc = None self.svg_link = svg_link self.pem_file_path = None
def getInfo(self, page, size, proId, userId, userName): """ 查询数据库 project-user 列表 """ start = (int(page) - 1) * int(size) try: sql = """ SELECT mem.*, task.MODULE_ID, task.MODULE_NAME FROM man_pro_member AS mem LEFT JOIN man_pro_task AS task ON mem.PRO_ID = task.PRO_ID AND mem.USER_ID = task.USER_ID WHERE mem.PRO_ID='{proId}' {id}{name} ORDER BY mem.USER_ID LIMIT {start}, {size}; """.format( proId=proId, id=' AND mem.USER_ID like "%{}%"'.format(userId) if userId else '', name=' AND mem.USER_NAME like "%{}%"'.format(userName) if userName else '', start=start, size=size) sqlTotal = """ SELECT COUNT(1) FROM man_pro_member AS mem LEFT JOIN man_pro_task AS task ON mem.PRO_ID = task.PRO_ID AND mem.USER_ID = task.USER_ID WHERE mem.PRO_ID='{proId}' {id}{name} ORDER BY mem.USER_ID LIMIT {start}, {size}; """.format( proId=proId, id=' AND mem.USER_ID like "%{}%"'.format(userId) if userId else '', name=' AND mem.USER_NAME like "%{}%"'.format(userName) if userName else '', start=start, size=size) list = PySQL.get(sql) total = PySQL.count(sqlTotal) Utils.log('查询项目成员负责模块 {}'.format(sql)) return list, total except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return [], 0
def detection(self, session): r = session.get(self.logined_url, headers=self.headers3) Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND) if r.status_code == 200: r.encoding = 'utf-8' html = etree.HTML(r.text) name = html.xpath( '//*[@id="profile"]/div/div[2]/div[1]/div/div/text()[1]') if not name: return 1 else: return 0 else: return 1
def testOrders(): orderManager = ZerodhaOrderManager() exchange = 'NSE' tradingSymbol = 'SBIN' lastTradedPrice = Quotes.getCMP(exchange + ':' + tradingSymbol) logging.info(tradingSymbol + ' CMP = %f', lastTradedPrice) limitPrice = lastTradedPrice - lastTradedPrice * 1 / 100 limitPrice = Utils.roundToNSEPrice(limitPrice) qty = 1 direction = 'LONG' # place order origOrderId = orderManager.placeOrder(tradingSymbol, limitPrice, qty, direction) logging.info('Original order Id %s', origOrderId) # sleep for 10 seconds then modify order time.sleep(10) newPrice = lastTradedPrice if origOrderId: orderManager.modifyOrder(origOrderId, newPrice) # sleep for 10 seconds and then place SL order time.sleep(10) slPrice = newPrice - newPrice * 1 / 100 slPrice = Utils.roundToNSEPrice(slPrice) slDirection = 'SHORT' if direction == 'LONG' else 'LONG' slOrderId = orderManager.placeSLOrder(tradingSymbol, slPrice, qty, slDirection) logging.info('SL order Id %s', slOrderId) # sleep for 10 seconds and then place target order time.sleep(10) targetPrice = newPrice + newPrice * 2 / 100 targetPrice = Utils.roundToNSEPrice(targetPrice) targetDirection = 'SHORT' if direction == 'LONG' else 'LONG' targetOrderId = orderManager.placeOrder(tradingSymbol, targetPrice, qty, targetDirection) logging.info('Target order Id %s', targetOrderId) # sleep for 10 seconds and cancel target order time.sleep(10) if targetOrderId: orderManager.cancelOrder(targetOrderId) logging.info('Cancelled Target order Id %s', targetOrderId) logging.info( "Algo done executing all orders. Check ur orders and positions in broker terminal." )
def validInsert(self, userId): """ 检查用户名是否存在 """ count = 0 try: sql = """ SELECT COUNT(1) FROM man_auth_user WHERE USER_ID="{}"; """.format(userId) count = PySQL.count(sql) except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return count > 0
def run(): helptool = HelpTool() cookieList = helptool.getCookie() base_url = 'https://movie.douban.com/top250' headers = {'User-Agent': random.choice(constants.USER_AGENT)} doubanIds = [] savePath = 'data/top250id.txt' # 指明当前用第几个cookie index = 0 # 获取豆瓣TOP250id数据 r = requests.get( base_url, headers=headers, cookies=cookieList[index] ) r.encoding = 'utf-8' while True: html = etree.HTML(r.text) result = html.xpath('//div[@class="item"]') # 如果获取的数据为空,延时以减轻对目标服务器的压力,并跳过。 if not result: continue for item in result: doubanid = item.xpath('div/div[@class="hd"]/a/@href') if doubanid: tmp = doubanid[0].strip().split('/') if tmp[-1] == '': value = tmp[-2] else: value = tmp[-1] doubanIds.append(value) print('----------电影id ' + value + ':爬取成功' + '----------') nextUrl = html.xpath('//span[@class="next"]/a/@href') if not nextUrl: break url = base_url+nextUrl[0] index += 1 if index >= constants.UserNum: index = 0 Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND) r = requests.get( url, headers=headers, cookies=cookieList[index] ) r.encoding = 'utf-8' if doubanIds: helptool.storeFailData(savePath, doubanIds)
def mine(cls, block: Block, mine_interrupt: threading.Event = None) -> Union[Block, None]: start = time.time() nonce = 0 target = (1 << (256 - block.bits)) mine_interrupt.clear() logger.info(f'[consensus] mining after block {block.prev_block_hash}') while int(Utils.sha256d(block.header(nonce)), 16) >= target: nonce += 1 if mine_interrupt.is_set(): logger.info(f'[consensus] mining interrupted +++ {nonce}') mine_interrupt.clear() return None block = block._replace(nonce=nonce) duration = max(time.time() - start, 0.0001) khs = (block.nonce // duration) // 1000 logger.info( f'[consensus] mining block found at nonce={nonce} using {round(duration, 4)} s at rate {khs} KH/s: {block.id}' ) return block
def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0) self.csvWriter = Csv("nisbets.csv") self.mainUrl = "http://www.nisbets.co.uk" csvHeaderList = [ "URL", "Product Code", "Product Technical Specifications", "Product Name", "Brand", "Product Price", "Product Short Description", "Product Long Description", "Image File Name", "User Manual File Name", "Exploded View File Name", "Spares Code", "Accessories", "Product Status" "Category1", "Category2", "Category3", "Category4", ] if "URL" not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils()
def delete(self, fileId, userCreator): """ 删除文件 """ count = 0 try: sql = """ DELETE FROM man_pro_file WHERE ID='{}'; """.format(fileId) Utils.log("删除项目文件", sql) count = PySQL.execute(sql) except Exception as e: print('ERROR {}'.format(e)) Utils.log('ERROR {}'.format(e)) return count > 0
def getTigerCodeList(): def cb(**kw): return ts.inst_tops() df_tiger = Utils.queryData('tiger', 'code', engine, cb, forceUpdate=True) df_tiger = df_tiger[df_tiger['net'] > 0] return df_tiger['code'].tolist()
def get_whitelist_name_and_source_path(cid, camp_info_date, given_date): credit_list = camp_info_date[cid] min_credit, max_credit = credit_list[0], credit_list[1] promotion_filter = PromotionFilter(logger) campaign_filter = CampaignFilter(logger) return (config.PROMOTION_SET, promotion_filter.get_promotion_source_path(cid, camp_info_date, given_date)) \ if Utils.is_promotion(min_credit, max_credit) else ( config.CAMPAIGN_SET, campaign_filter.get_3rd_party_campaign_sourse_path(cid, camp_info_date, given_date))
def get_update_info(update_ids, camp_info_yesterday, camp_info_today, today, yesterday): tuples_list = list() for cid in update_ids: set_name, yesterday_file = get_whitelist_name_and_source_path(cid, camp_info_yesterday, yesterday) set_name, today_file = get_whitelist_name_and_source_path(cid, camp_info_today, today) today_set, yesterday_set = set(), set() try: today_set = helper.read_phones_file(today_file) yesterday_set = helper.read_phones_file(yesterday_file) except IOError as ie: logger.error("file doesn't not exist:{0}".format(ie)) directory = config.PROMOTION_DIR if set_name == config.PROMOTION_SET else config.CAMPAIGN_DIR out_dir = os.path.join(directory, "id{0}/{1}/".format(cid, today)) output_add, output_remove = os.path.join(out_dir, "add.csv"), os.path.join(out_dir, "remove.csv") Utils.write_to_file(today_set - yesterday_set, output_add) Utils.write_to_file((yesterday_set - today_set), output_remove) id_tuple =(cid, set_name, output_add, output_remove) tuples_list.append(id_tuple) logger.debug(id_tuple) return tuples_list
def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 0) self.csvWriter = Csv('cs_product.csv') self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.utils = Utils() self.csvWriter.writeCsvRow( ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount', 'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty', 'Delivery', 'Product Image', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image']) self.totalProducts = 0
def startStt(self, audioFd): # logging log.startLogging(sys.stdout) hostname = "stream.watsonplatform.net" headers = {} # authentication header if self.configData["tokenauth"]: headers['X-Watson-Authorization-Token'] = Utils.getAuthenticationToken("https://" + hostname, 'speech-to-text', self.configData["user"], self.configData["password"]) else: string = self.configData["user"] + ":" + self.configData["password"] headers["Authorization"] = "Basic " + base64.b64encode(string) # create a WS server factory with our protocol url = "wss://" + hostname + "/speech-to-text/api/v1/recognize?model=" + self.configData["model"] summary = {} factory = WSInterfaceFactory(audioFd, summary, self.CONTENT_TYPE, self.configData["model"], url, headers, debug=False) factory.setListeners(self.listeners) factory.protocol = WSInterfaceProtocol if factory.isSecure: contextFactory = ssl.ClientContextFactory() else: contextFactory = None connectWS(factory, contextFactory) reactor.run()
address_obj.phone("647-788-3901") address_obj.recipientName("Jane Doe") profile_obj1 = Profile(None) profile_obj1.id(response_object.id) address_obj.profile(profile_obj1) response_object2 = optimal_obj.customer_vault_service_handler().create_address(address_obj) # Create EFT Bank Account eftbankaccount_obj = EFTBankAccount(None) eftbankaccount_obj.nickName("Sally's Bank of Montreal Account") eftbankaccount_obj.accountHolderName("Sally") eftbankaccount_obj.accountNumber(RandomTokenGenerator().generateNumber()) eftbankaccount_obj.transitNumber("25039") eftbankaccount_obj.institutionId("001") eftbankaccount_obj.billingAddressId(response_object2.id) profile_obj2 = Profile(None) profile_obj2.id(response_object.id) eftbankaccount_obj.profile(profile_obj2) response_object3 = optimal_obj.customer_vault_service_handler().create_eft_bank_account(eftbankaccount_obj) print ('\nProfile Id : ', response_object.id) print ('\nAddress Id : ', response_object2.id) print ("\nResponse Values ==========> ") Utils.print_response(response_object3)
class NisbetProduct(QtCore.QThread): scrapProductData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0) self.csvWriter = Csv("nisbets.csv") self.mainUrl = "http://www.nisbets.co.uk" csvHeaderList = [ "URL", "Product Code", "Product Technical Specifications", "Product Name", "Brand", "Product Price", "Product Short Description", "Product Long Description", "Image File Name", "User Manual File Name", "Exploded View File Name", "Spares Code", "Accessories", "Product Status" "Category1", "Category2", "Category3", "Category4", ] if "URL" not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils() def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapProductData.emit("<font color=green><b>Main URL: </b>%s</font>" % self.mainUrl) self.logger.debug("===== URL [" + self.mainUrl + "] =====") data = self.spider.fetchData(self.mainUrl) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category1Chunk = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data) if category1Chunk: for category1Data in category1Chunk: category1 = self.regex.getSearchedData('(?i)<a href="[^"]*">([^<]*)</a>', category1Data) category2Chunk = self.regex.getAllSearchedData( '(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data ) if category2Chunk: for category2Data in category2Chunk: self.scrapCategory2Data(self.mainUrl + category2Data[0], category1, category2Data[1]) self.scrapProductData.emit("<font color=red><b>Finish Scraping Product data from %s</b></font>" % self.mainUrl) def scrapCategory2Data(self, url, category1, category2): if self.isExiting: return self.scrapProductData.emit("<b>Category 2 URL: </b>%s" % url) self.logger.debug("== Category 2 URL [" + url + "] ==") data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category3Chunks = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category3Chunks: category3Chunk = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<', category3Chunks) if category3Chunk: for category3Data in category3Chunk: self.scrapCategory3Data(self.mainUrl + category3Data[0], category1, category2, category3Data[1]) def scrapCategory3Data(self, url, category1, category2, category3): if self.isExiting: return self.scrapProductData.emit("<b>Category 3 URL: </b>%s" % url) self.logger.debug("== Category 3 URL [" + url + "] ==") data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category4Chunks = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category4Chunks: category4Chunk = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<', category4Chunks) if category4Chunk: for category4Data in category4Chunk: category4Url = self.mainUrl + category4Data[0] self.scrapCategory4Data(category4Url, category1, category2, category3, category4Data[1]) def scrapCategory4Data(self, url, category1, category2, category3, category4): if self.isExiting: return self.scrapProductData.emit("<b>Category 4 URL: </b>%s" % url) self.logger.debug("== Category 4 URL [" + url + "] ==") data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categoryChunk = self.regex.getAllSearchedData( '(?i)<div class="product-list-row clear-after">(.*?)</fieldset>', data ) if categoryChunk: for categoryData in categoryChunk: if self.isExiting: return productInfo = self.regex.getSearchedDataGroups( '(?i)<h3 class="product-name"> <a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryData ) productUrl = self.mainUrl + productInfo.group(1) productName = productInfo.group(2) if productUrl not in self.dupCsvRows: self.dupCsvRows.append(productUrl) else: self.scrapProductData.emit( "<font color=green><b>Already exists this item in csv Skip it</b></font>" ) self.logger.debug("========= Already exists this item Skip it ===========") return productImageInfo = self.regex.getSearchedDataGroups( '(?i)<img class="primaryImage" src="([^"]*)" alt="([^"]*)"', categoryData ) image = self.regex.replaceData("(?i)medium", "xlarge", str(productImageInfo.group(1))) productImageUrl = self.mainUrl + image productImage = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_.]*)$", image) self.utils.downloadFile(productImageUrl, "images/" + productImage) productCode = productImageInfo.group(2) productTechSpecs = self.regex.getSearchedData( '(?i)<p class="description">([^<]*)</p>', categoryData ) brandName = self.regex.getSearchedData( '(?i)<img class="brand-image" src="[^"]*" alt="([^"]*)"', categoryData ) price = self.regex.getSearchedData( '(?i)<div class="reduced-price"> <span class="[^"]*">([^<]*)</span>', categoryData ) if price: price = price.strip()[1:] productStatus = self.regex.getSearchedData( '(?i)<div class="availibility"> <img alt="([^"]*)"', categoryData ) productDesc = "" productLongDesc = "" spareCodes = "" accessoryCode = "" userManual = "" explodedView = "" self.scrapProductData.emit( "<br /><font color=green><b>Product Details URL: </b>%s</font>" % productUrl ) productChunk = self.spider.fetchData(productUrl) if productChunk: productChunk = self.regex.reduceNewLine(productChunk) productChunk = self.regex.reduceBlankSpace(productChunk) productDesc = self.regex.getSearchedData( '(?i)<div class="productDesc"> <h1 class="[^"]*"[^>]*?>[^<]*?</h1>.*?<p>([^<]*)</p>', productChunk, ) productLongDesc = self.regex.getSearchedData( '(?i)<div class="info-product[^>]*?>(.*?)</div>', productChunk ) otherUrl = self.regex.getSearchedData("(?i)(^.*?/)[a-zA-Z0-9._-]*?$", productUrl) self.logger.debug("== Common Product URL [" + otherUrl + "] ==") sparesUrl = otherUrl + "AjaxProductSpares.raction" self.logger.debug("== Spares URL [" + sparesUrl + "] ==") spares = self.spider.fetchData(sparesUrl) if spares: spares = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', spares ) if spares: spareCodes = ", ".join(spares) accessoriesUrl = otherUrl + "AjaxProductAccessories.raction" self.logger.debug("== Accessories URL [" + accessoriesUrl + "] ==") accessories = self.spider.fetchData(accessoriesUrl) if accessories: accessories = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', accessories ) if accessories: accessoryCode = ", ".join(accessories) docUrl = otherUrl + "AjaxProductDocuments.raction" self.logger.debug("== Document URL[" + docUrl + "] ==") userManuals = self.spider.fetchData(docUrl) if userManuals: userManual = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download User Manual</a>', userManuals, ) self.logger.debug("Manual URL: " + userManual) if userManual: userManualUrl = self.mainUrl + self.regex.replaceData(" ", "%20", userManual) self.logger.debug("User Manual URL: " + userManualUrl) self.scrapProductData.emit("<b>User Manual PDF URL: </b>%s" % userManualUrl) userManual = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_. ]*)$", userManual) userManual = self.regex.replaceData("\s+", "_", userManual.strip()) self.scrapProductData.emit( "<font color=green><b>Downloading User Manual: </b>%s <b>Please Wait...</b>" % userManual ) self.utils.downloadFile(userManualUrl, "user_manual/" + userManual) explodedView = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download Exploded Diagram</a>', userManuals, ) if explodedView: explodedViewUrl = self.mainUrl + self.regex.replaceData(" ", "%20", explodedView) self.scrapProductData.emit("<b>Exploded Diagram PDF URL: </b>%s" % explodedViewUrl) explodedView = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_. ]*)$", explodedView) explodedView = self.regex.replaceData("\s+", "_", explodedView.strip()) self.scrapProductData.emit( "<font color=green><b>Downloading Exploded Diagram: </b>%s <b>Please Wait...</b>" % explodedView ) self.utils.downloadFile(explodedViewUrl, "exploded_view/" + explodedView) csvData = [ productUrl, productCode, productTechSpecs, productName, brandName, price.strip(), productDesc, productLongDesc, productImage, userManual, explodedView, spareCodes, accessoryCode, productStatus, category1, category2, category3, category4, ] self.csvWriter.writeCsvRow(csvData) self.logger.debug("Scraped data " + str(csvData)) self.scrapProductData.emit("<div><b>Scraped Data: </b>%s<br /></div>" % str(csvData))
def __init__(self): constants.__init__(self) Utils.__init__(self)
class CsProduct(QThread): notifyProduct = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 0) self.csvWriter = Csv('cs_product.csv') self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.utils = Utils() self.csvWriter.writeCsvRow( ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount', 'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty', 'Delivery', 'Product Image', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image']) self.totalProducts = 0 def run(self): self.scrapProduct() self.notifyProduct.emit('<font color=red><b>Finished Scraping All products.</b></font>') def scrapProduct(self): self.logger.debug('Main URL: ' + self.mainUrl) self.notifyProduct.emit('<font color=green><b>Main URL: %s</b></font>' % self.mainUrl) data = self.spider.fetchData(self.mainUrl) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)" class="level-top" title="([^"]*)"', data) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: category1Name = unicode(category[1]).strip() self.scrapCategory1Data(str(category[0]).strip(), category1Name) def scrapCategory1Data(self, url, category1Name): self.logger.debug('Category 1 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category1Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: self.scrapCategory2Data(category[0], category1Name, category[1]) def scrapCategory2Data(self, url, category1Name, category2Name): self.logger.debug('Category 2 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category2Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: for category in categories: print 'category2: ' + category[0] self.scrapCategory3Data(category[0], category1Name, category2Name, category[1]) def scrapCategory3Data(self, url, category1Name, category2Name, category3Name): self.logger.debug('Category 3 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category3Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: for category in categories: print [category1Name, category2Name, category3Name, category[1]] self.scrapProductsDetails(category[0], category1Name, category2Name, category3Name, category[1]) def scrapProductsDetails(self, url, category1Name, category2Name, category3Name, category4Name): self.logger.debug('Product Details URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all products under Category[%s]</b>' % category4Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url + '?limit=10000&mode=list') if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) products = self.regex.getAllSearchedData('(?i)<div class="listing-item[^"]*?">(.*?)</div>', data) if products and len(products) > 0: self.totalProducts += len(products) self.notifyProduct.emit('<font color=green><b>Total Products Found [%s]</b></font>' % unicode(self.totalProducts)) for product in products: productDetailUrl = self.regex.getSearchedData('(?i)<a href="([^"]*)"', product) if productDetailUrl not in self.dupCsvRows: self.scrapProductDetails(productDetailUrl, category1Name, category2Name, category3Name, category4Name) else: self.notifyProduct.emit( '<font color=green><b>Already Exists This Product Under Category[%s]. Skip It.</b></font>' % category4Name) def scrapProductDetails(self, url, category1Name, category2Name, category3Name, category4Name): self.logger.debug('Product Detail URL: ' + url) self.notifyProduct.emit('<b>Try to scrap product details under Category[%s]</b>' % category4Name) self.notifyProduct.emit('<font color=green><b>Product Detail URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) manufacturer = self.regex.getSearchedData( '(?i)<span class="manufacturer-box-label">Manufacturer:</span>([^<]*)</p>', data) productCode = self.regex.getSearchedData( '(?i)<span class="manufacturer-box-label">Model No:</span>([^<]*)</p>', data) productName = self.regex.getSearchedData('(?i)<div class="product-name"> <h1>([^<]*)</h1>', data) productTechnicalDesc = self.regex.getSearchedData('(?i)<div class="product-short-description">([^<]*)</div>' , data) productDescriptions = self.regex.getSearchedData('(?i)<div class="product-specs">(.*?)</div>', data) productShortDesc = '' productFullDesc = '' if productDescriptions and len(productDescriptions) > 0: print 'desc: ' + productDescriptions productShortDesc = self.regex.getSearchedData('(?i)<p>(.*?)</p>', productDescriptions) productFullDesc = '\n'.join( self.regex.getAllSearchedData('(?i)<li>([^<]*)</li>', productDescriptions)) listPriceChunk = self.regex.getSearchedData('(?i)<div class="rrp-price regular-price">(.*?)</div>', data) listPrice = '' if listPriceChunk and len(listPriceChunk) > 0: listPrice = self.regex.getSearchedData('(?i)([0-9,.]+)', listPriceChunk) savePriceChunk = self.regex.getSearchedData('(?i)<div class="regular-price saving-price">(.*?)</div>', data) savePrice = '' if savePriceChunk and len(savePriceChunk) > 0: savePrice = self.regex.getSearchedData('(?i)([0-9%]+)', savePriceChunk) priceChunk = self.regex.getSearchedData('(?i)<div class="[^"]*" id="product-price-\d+">(.*?)</div>', data) price = '' if priceChunk and len(priceChunk) > 0: price = self.regex.getSearchedData('(?i)([0-9,.]+)', priceChunk) deliveryChunk = self.regex.getSearchedData('(?i)<div class="delivery">(.*?)</div>', data) delivery = '' if deliveryChunk and len(deliveryChunk) > 0: delivery = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', deliveryChunk) warrantyChunk = self.regex.getSearchedData('(?i)<div class="warranty">(.*?)</div>', data) warranty = '' if warrantyChunk and len(warrantyChunk) > 0: warranty = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', warrantyChunk) ## Download and save product images productImageUrl = self.regex.getSearchedData( '(?i)src="(http://assets.cs-catering-equipment.co.uk/media/catalog/product/cache/1/image/256x/[^"]*)"', data) print productImageUrl productImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', productImageUrl) if productImage and len(productImage) > 0: print productImage self.notifyProduct.emit('<b>Downloading Product Image [%s]. Please wait...</b>' % productImage) self.utils.downloadFile(productImageUrl, 'product_image/' + productImage) ## Download and save brand images brandImageUrl = self.regex.getSearchedData( '(?i)<div class="manufacturer-box-left"><a href="[^"]*"[^>]*?><img src="([^"]*)"', data) brandImage = '' if brandImageUrl and len(brandImageUrl) > 0: brandImageUrl = self.regex.replaceData('(?i)logo/', '', brandImageUrl) brandImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', brandImageUrl) if brandImage and len(brandImage) > 0: self.notifyProduct.emit('<b>Downloading Brand Image [%s]. Please wait...</b>' % brandImage) self.utils.downloadFile(brandImageUrl, 'brand_image/' + brandImage) csvData = [url, productCode, productName, manufacturer, listPrice, price, savePrice, productShortDesc, productFullDesc, productTechnicalDesc, warranty, delivery, productImage, category1Name, category2Name, category3Name, category4Name, brandImage] self.csvWriter.writeCsvRow(csvData) self.logger.debug(unicode(csvData)) self.notifyProduct.emit('<b>Product Details: %s</b>' % unicode(csvData))
def radioParser(self): pages_array = [] all_genres = self.genresParser() for item in range(len(all_genres)): radio_url_genres = 'http://vtuner.com/setupapp/guide/asp/BrowseStations/BrowsePremiumStations.asp?sCategory=' + all_genres[item] + '&sBrowseType=Format&sViewBy=&sSortby=&sWhatList=&sNiceLang=&iCurrPage=1' url_clean = urllib.urlopen(radio_url_genres) soup = BeautifulSoup(url_clean, "lxml") pages = soup.findAll('div') for row in pages: y = row.findAll('a', {"class":"paging"}) for row_1 in y: k = row_1.findAll('b') for row_2 in k: l = row_2.getText() pages_array.append(l) for number in range(len(pages_array)): radio_urls = 'http://vtuner.com/setupapp/guide/asp/BrowseStations/BrowsePremiumStations.asp?sCategory=' + all_genres[item] + '&sBrowseType=Format&sViewBy=&sSortby=&sWhatList=&sNiceLang=&iCurrPage=' + pages_array[number] m3u_url = 'http://vtuner.com/setupapp/guide/asp/' url_ready = urllib.urlopen(radio_urls) soup_radios = BeautifulSoup(url_ready, "lxml") main_table = soup_radios.find('table', id='table1').findAll('tr') for tab in main_table: table = tab.findAll('table') for tagz in table: oi = tagz.findAll('tr') # Need try..catch cuz some URLs can be broken # so whole for going to be try..catch try: for tr in oi: station_url = '' station_name = '' station_location = '' station_country = '' station_genre = '' station_quality = '' station_updated = datetime.datetime.now() alTds = tr.findAll('td') if len(alTds) < 5: continue all_td_string = ''.join([str(x) for x in alTds]) bg_tag = 'bgcolor="#FFFFFF"' strong_tag = '<strong>Station Name</strong>' if bg_tag in all_td_string or strong_tag in all_td_string: continue if len(alTds) > 0: allTdLinks = alTds[0].findAll('a') if len(allTdLinks) > 0: station_url = m3u_url + allTdLinks[0]['href'] print "DEBUG URL #1 ::::> " + station_url station_url = station_url.replace('../', '') print "DEBUG URL #2 ::::> " + station_url station_url = Utils.parse_m3u_file(station_url) real_station_url = station_url[0] clean_url = station_url[1] print "STATION URL: " + str(real_station_url) print "CLEAN URL: " + str(clean_url) logging.info('\n') logging.info('--- Radio block starts here ---') logging.info("URL of Radio: " + str(real_station_url)) if len(alTds) > 1: allTdLinks = alTds[1].findAll('a') if len(allTdLinks) > 0: station_name = allTdLinks[0].getText() logging.info("Name of Radio: " + station_name) if len(alTds) > 2: station_location = alTds[2].getText() station_country = self.countryParseObj.get_country(station_location) logging.info("Location of Radio: " + station_location) logging.info("Country of Radio: " + station_country) if len(alTds) > 3: allTdLinks = alTds[3].findAll('a') if len(allTdLinks) > 0: station_genre = allTdLinks[0].getText() logging.info("Genre of Radio: " + station_genre) if len(alTds) > 4: station_quality = alTds[4].getText() logging.info("Quality of Radio: " + station_quality) logging.info('--- Radio block ends here ---') # Remove quotes for MySQL inserts station_name = self.utilsObj.replace_quots(station_name) ''' look IF station already EXIST in DB ''' check_station = "SELECT id from `radio_station_stream_urls` where url REGEXP ('" + clean_url + "') LIMIT 1;" check_station_result = self.mysql_obj.make_select(check_station) logging.info("Station ID is: " + str(check_station_result)) if not check_station_result: query_radio = "INSERT INTO `radio_stations`(`name`, `location`, `country`, `updated`) VALUES ('" + station_name + "'," + "'" + station_location + "'," + "'" + str(station_country) + "'," + "'" + str(station_updated) + "');" insert_id = self.mysql_obj.make_insert(query_radio) if insert_id != -1: station_quality = re.sub("\D", "", station_quality) query_url_and_bitrate = "INSERT INTO `radio_station_stream_urls`(`station_id`, `url`, `bitrate`) VALUES('" + str(insert_id) + "'," + "'" + real_station_url + "'," + "'" + station_quality + "');" self.mysql_obj.make_insert(query_url_and_bitrate) sep = "/" genre = station_genre.split(sep, 1)[0] query_get_genre_id = "SELECT `id` from `music_genres` WHERE `name`= " + "'" + genre + "'" + ";" result_genre_id = self.mysql_obj.make_select(query_get_genre_id) if not result_genre_id: query_insert_genre = "INSERT INTO `music_genres` (`name`) VALUES ('" + str(genre) + "');" id_genre_is = self.mysql_obj.make_insert(query_insert_genre) logging.info("Result is NONE, Adding tnew genre!") else: print "GENRE ID: " + str(result_genre_id[0]['id']) + "\n" id_genre_is = str(result_genre_id[0]['id']) query_insert_id_of_genre = "INSERT into `radio_station_genres` (`station_id`, `genre_id`) VALUES ('" + str(insert_id) + "','" + str(id_genre_is) + "');" self.mysql_obj.make_insert(query_insert_id_of_genre) else: print "Radio station - ALREADY EXIST!" + "\n" except StandardError, e: print e