def create_objects(collection_name, objects, limit=None, offset=None, *args, **kwargs): try: response = createResponse200() mongo = Mongo() mongo.connect() database = mongo.use_db(database_name) if type(objects) is not list: objects = [objects] # Checking if any of the object already exist by _id objects_id = [ object.get('_id', None) for object in objects ] query = {'_id' : {'$in' : objects_id }} existing = mongo.find(collection_name, query) existing_ids = [ e.get('_id') for e in existing ] if existing: raise Exception("Some objects already exist in '%s' (same _id). Trying using UPDATE method. No objects created." % collection_name) # no limits or offset so far created = mongo.insert(collection_name, objects) response['result']['objects'] = created response['result']['count'] = len(created) response['result']['limit'] = limit response['result']['offset'] = offset except Exception, e: response = createResponse401(str(e))
def testFindCountry(self): mongo = Mongo('config.ini') data = mongo.find('data') country_map = {} for line in data: if 'country' not in line: continue for country in line['country']: if country not in country_map: country_map[country] = 1 else: country_map[country] += 1 for k in sorted(country_map, key = country_map.get, reverse=True): print(k,',',country_map[k])
class Index: def __init__(self, conf, xmlConf=""): config = Config(conf) es_host = config.get("es", "host") es_batch = config.get("es", "batch") es_type = config.get("es", "type") self.es = ESIndex(es_host, es_batch, es_type) mongo_host = config.get("mongo", "host") mongo_port = int(config.get("mongo", "port")) mongo_db = config.get("mongo", "db") mongo_table = config.get("mongo", "table") self.mongo = Mongo(db=mongo_db, host=mongo_host, port=mongo_port, table=mongo_table) self.step = int(config.get("mongo", "step")) if xmlConf == "": self.fields = None else: self.fields = IndexXmlParser(xmlConf) def setIndex(self): self.es.create() for doc_type in index.fields.keys(): mapping = {} for field in index.fields[doc_type]: mapping[field.name] = field.attrs() self.es.putMapping({"properties": mapping}, doc_type=doc_type) def run(self): size = self.mongo.size() iterNum = size / self.step actualSize = 0 while (actualSize < size): dataset = self.fetchMongo(skip=actualSize, limit=self.step) actualSize += self.step try: for row in self.es.bulk(list(dataset)): if row[0] == False: print "%s write to es falied" % ( row[1]["index"]["_id"]) except: print traceback.format_exc() def fetchMongo(self, skip=0, limit=0): return self.mongo.find().skip(skip).limit(limit)
class Index: def __init__(self, conf, xmlConf=""): config = Config(conf) es_host = config.get("es", "host") es_batch = config.get("es", "batch") es_type = config.get("es", "type") self.es = ESIndex(es_host, es_batch, es_type) mongo_host = config.get("mongo", "host") mongo_port = int(config.get("mongo", "port")) mongo_db = config.get("mongo", "db") mongo_table = config.get("mongo", "table") self.mongo = Mongo(db=mongo_db, host=mongo_host, port=mongo_port, table=mongo_table) self.step = int(config.get("mongo", "step")) if xmlConf == "": self.fields = None else : self.fields = IndexXmlParser(xmlConf) def setIndex(self): self.es.create() for doc_type in index.fields.keys(): mapping = {} for field in index.fields[doc_type]: mapping[field.name] = field.attrs() self.es.putMapping({"properties":mapping}, doc_type = doc_type) def run(self): size = self.mongo.size() iterNum = size / self.step actualSize = 0 while (actualSize < size): dataset = self.fetchMongo(skip = actualSize, limit = self.step) actualSize += self.step try: for row in self.es.bulk(list(dataset)): if row[0] == False: print "%s write to es falied" %(row[1]["index"]["_id"]) except: print traceback.format_exc() def fetchMongo(self, skip = 0, limit = 0): return self.mongo.find().skip(skip).limit(limit)
def testFindKeywords(self): mongo = Mongo('config.ini') data = mongo.find('data') keywords = [] for line in data: if 'keywords' not in line: continue for keyword in line['keywords']: keywords.append(keyword) text = ','.join(keywords) stopwords = set(STOPWORDS) stopwords.add("said") wc = WordCloud(background_color="white", max_words=2000, stopwords=stopwords, min_font_size=10, width = 800, height = 800) wc.generate(text) wc.to_file("cloud.png")
def testTime(self): mongo = Mongo('config.ini') data = mongo.find('data') dic = {} for line in data: if 'time' not in line: continue times = line['time'].split() for time in times: if time.startswith('20'): time = time.split(':')[0] time = time.split('-')[0] if time not in dic: dic[time] = 1 else: dic[time] +=1 break print(dic)
def testGenWord(self): mongo = Mongo('config.ini') data = mongo.find('data') keywords = [] for line in data: if 'keywords' not in line: continue if 'country' not in line: continue if 'CN' in line['country']: for keyword in line['keywords']: keywords.append(keyword) text = ','.join(keywords) stopwords = set(STOPWORDS) stopwords.add("said") mask = np.array(Image.open('china.png')) wc = WordCloud(background_color="white", max_words=2000, stopwords=stopwords, min_font_size=10, mask=mask) wc.generate(text) wc.to_file("word_cn.png")
def read_objects(collection_name, query={}, limit=0, offset=0, sort=settings.DEFAULT_SORT, *args, **kwargs): try: response = createResponse200() mongo = Mongo() mongo.connect() database = mongo.use_db(database_name) count = mongo.count(collection_name, query, limit=0, offset=0, sort=sort) found = mongo.find(collection_name, query, limit=limit, offset=offset, sort=sort) response['result']['total'] = count response['result']['has_more'] = True if limit + offset < count else False response['result']['objects'] = found response['result']['count'] = len(found) response['result']['limit'] = limit response['result']['offset'] = offset except Exception, e: response = createResponse401(str(e))
def create_objects(collection_name, objects, limit=None, offset=None, *args, **kwargs): try: response = createResponse200() mongo = Mongo() mongo.connect() database = mongo.use_db(database_name) if type(objects) is not list: objects = [objects] # Checking if any of the object already exist by _id objects_id = [object.get('_id', None) for object in objects] query = {'_id': {'$in': objects_id}} existing = mongo.find(collection_name, query) existing_ids = [e.get('_id') for e in existing] if existing: raise Exception( "Some objects already exist in '%s' (same _id). Trying using UPDATE method. No objects created." % collection_name) # no limits or offset so far created = mongo.insert(collection_name, objects) response['result']['objects'] = created response['result']['count'] = len(created) response['result']['limit'] = limit response['result']['offset'] = offset except Exception, e: response = createResponse401(str(e))
def read_objects(collection_name, query={}, limit=0, offset=0, sort=settings.DEFAULT_SORT, *args, **kwargs): try: response = createResponse200() mongo = Mongo() mongo.connect() database = mongo.use_db(database_name) count = mongo.count(collection_name, query, limit=0, offset=0, sort=sort) found = mongo.find(collection_name, query, limit=limit, offset=offset, sort=sort) response['result']['total'] = count response['result'][ 'has_more'] = True if limit + offset < count else False response['result']['objects'] = found response['result']['count'] = len(found) response['result']['limit'] = limit response['result']['offset'] = offset except Exception, e: response = createResponse401(str(e))
str(i) + '_panelBlocLieuxExec"]').text if (Type in section) and (Ville in Villes): wait("//*[@id=\"ctl0_CONTENU_PAGE_resultSearch_tableauResultSearch_ctl" + str(i) + "_panelAction\"]/a[1]") Navigateur.find_element_by_xpath( "//*[@id=\"ctl0_CONTENU_PAGE_resultSearch_tableauResultSearch_ctl" + str(i) + "_panelAction\"]/a[1]").click() #check if exist in mongo wait( '//*[@id="ctl0_CONTENU_PAGE_idEntrepriseConsultationSummary_reference"]' ) reference = Navigateur.find_element_by_xpath( '//*[@id="ctl0_CONTENU_PAGE_idEntrepriseConsultationSummary_reference"]' ).text result = mydb.find('offres-info', '_id', reference) if result.count() == 0: #if not exist insert to db & continue #collect elements to insert lien = Navigateur.current_url offre = {'_id': reference, 'lien': lien} #add reference to .txt file f.write(str(reference + " ")) #inserting mydb.insert(offre, 'offres-info') #continue action wait("//*[@id=\"ctl0_CONTENU_PAGE_linkDownloadDce\"]") Navigateur.find_element_by_xpath( "//*[@id=\"ctl0_CONTENU_PAGE_linkDownloadDce\"]").click()
def save_screw_otc(): mongo = Mongo() dics = mongo.find('threshold', {}, {'场外代码': True}) funds = [dic['场外代码'] for dic in dics if dic['场外代码']] save_fund_nav(funds, mongo)
class Server: def __init__(self): self.__config = Config() self.__TOKEN = self.__config.getValue('Telegram', 'TOKEN') if self.__TOKEN is None: raise TeleException(Type.NoneException, 'TOKEN is none') self.__bot = telepot.Bot(self.__TOKEN) self.__id = self.__bot.getMe()['id'] self.__user = self.__bot.getMe()['username'] self.__mongo = Mongo() self.__db_msg = self.__config.getValue('Config', 'DB_MSG') self.__db_user = self.__config.getValue('Config', 'DB_USER') self.messageProcessing() def getBot(self): return self.__bot def getId(self): return self.__id def getUserName(self): return self.__user def messageProcessing(self): def handle(msg): msg_chat_id = msg['chat']['id'] msg_username = msg['chat']['username'] msg_body = msg['text'] msg_date = msg['date'] msg_type = msg['chat']['type'] if msg_type == 'private': self.__mongo.saveUpdateOne({'chat_user': msg_username}, { '$set': { 'chat_id': msg_chat_id, 'chat_type': msg_type, 'chat_date': msg_date } }, self.__db_user) self.__bot.sendMessage(msg_chat_id, "I have receieved your message!") MessageLoop(self.__bot, handle).run_as_thread() def start(self): try: print('server started...') while True: time.sleep(int(self.__config.getValue('Config', 'SLEEP_TIME'))) print('process msgs') unsent_msgs = self.__mongo.find(self.__db_msg) idList = [] for msg in unsent_msgs: user_list = self.__mongo.find( self.__db_user, {'chat_user': msg['chat_user']}) if len(user_list) == 0: raise TeleException(Type.NoneException, 'No such user!' + username) else: self.__bot.sendMessage(user_list[0]['chat_id'], msg['chat_body'], parse_mode='HTML') idList.append(msg['_id']) self.__mongo.deleteMany(idList, self.__db_msg) except TeleException as te: LOGGER.error(str(te))