Python find示例，pymongo.collection.find Python示例

示例#1

0

显示文件

文件： mim.py 项目： duilio/Ming

 def command(self, command,
             value=1, check=True, allowable_errors=None, **kwargs):
     if isinstance(command, basestring):
         command = {command:value}
         command.update(**kwargs)
     if 'filemd5' in command:
         checksum = md5()
         for chunk in self.chef.file.chunks.find().sort('n'):
             checksum.update(chunk['data'])
         return dict(md5=checksum.hexdigest())
     elif 'findandmodify' in command:
         coll = self._collections[command['findandmodify']]
         before = coll.find_one(command['query'], sort=command.get('sort'))
         upsert = False
         if before is None:
             upsert = True
             if command.get('upsert'):
                 before = dict(command['query'])
                 coll.insert(before)
             else:
                 raise OperationFailure, 'No matching object found'
         coll.update(command['query'], command['update'])
         if command.get('new', False) or upsert:
             return dict(value=coll.find_one(dict(_id=before['_id'])))
         return dict(value=before)
     elif 'mapreduce' in command:
         collection = command.pop('mapreduce')
         return self._handle_mapreduce(collection, **command)
     elif 'distinct' in command:
         collection = self._collections[command['distinct']]
         key = command['key']
         return list(set(_lookup(d, key) for d in collection.find()))
     else:
         raise NotImplementedError, repr(command)

示例#2

0

显示文件

文件： utilities.py 项目： akatrevorjay/netshed

def pymongo_query(query, logtype, local=False):
    """ generic Django view for netshed, takes in query dictionary
        and type of log and returns the resulting lines of logs """

    loglines = []
    collections = []

    if 'limit' in query:
        limit = int(query['limit'])

    # pymongo raw query
    if 'date' in query:

        if query['date'] == 'all':
            collections = connect_db(logtype, local).collection_names()
        else:
            collections.append(logtype + '_' + query['date'])
            try:
                # search two days if start time > end time
                if span_two_days(query['start_hr'], query['start_min'], query['end_hr'], query['end_min']):
                    collections.append(logtype + increment_date(query['date']))
            except KeyError:
                pass

        # get collection object for each collection to search on and query
        for collection in collections:
            collection = connect_collection(logtype, collection, local)

            results = [log for log in collection.find(format_query_input(query)).limit(limit)]
            results = sorted(results, key=lambda log:log['time'])
            loglines += [log['line'] for log in results]

    return loglines

示例#3

0

显示文件

def get_available_rooms_db(bookedDates, startTime, endTime ):
   roomArray = []
   bookedRooms = collection.find({"booked": {'$elemMatch': {'date': bookedDates , 'startTime' : startTime, 'endTime' : endTime} }})
   for bookedRoom in bookedRooms : 
       roomArray.append(bookedRoom["roomName"])
       
   return (roomArray)

示例#4

0

显示文件

文件：测试7.py 项目： houwudi14010/github_huanghyw_jd_seckill-master

def func():
    # 每2s执行一次

    aa = ''
    datas = (list(collection.find({}, {"_id": 0})))
    for userid in datas:
        daili()
        try:
            time.sleep(5)
            # userid = 'trumpchinese1'
            userid = userid['userName']
            paramsTwo = (('variables', '{"screen_name":"' + userid +
                          '","withHighlightedLabel":true}'), )
            paramss = dict(paramsTwo)
            response = ss.get(
                'https://twitter.com/i/api/graphql/hc-pka9A7gyS3xODIafnrQ/UserByScreenName',
                headers=headerss,
                params=paramss)
            content = response.content
            id = re.compile('"rest_id":"(.*?)",').findall(str(content))
            httpId = id[0]
            article(httpId, params, aa)
            threading.Timer(2, func).start()
        except Exception as err:
            pass

示例#5

0

显示文件

文件： test_total2.py 项目： a-nogikh/ruwiki-vandalism-detection

def set_features(collection_name):
    raw_list = []
    raw_res = []
    raw_ids = []
    counter = Counter(100)
    collection = client.wiki[collection_name]  # type: collection.Collection
    for raw in collection.find({}):
        if "rwords" not in raw or len(raw["rwords"]) == 0:
            continue

        if len(raw["revs"]) <= 1 or "f" not in raw or len(raw["f"]) < 25:
            continue

        if any(f not in raw["f"] for f in OK_FEATURES):
            continue

        raw_list.append([x for n, x in raw["f"].items() if n in OK_FEATURES])
        raw_res.append(1 if raw["vandal"] else 0)
        raw_ids.append(raw["_id"])
        counter.tick()

    pred = frst.predict_proba(raw_list)
    for i, x in enumerate(pred[:, 1]):
        collection.update_one({"_id": raw_ids[i]},
                              {"$set": {
                                  "f.forest_score": x
                              }})

示例#6

0

显示文件

文件： database_helpers.py 项目： jskinn/robot-vision-experiment-framework

def check_reference_is_valid(collection: pymongo.collection.Collection, id_: bson.ObjectId) -> bool:
    """
    Check if a given id exists within the given collection
    :param collection: The pymongo collection to search
    :param id_: The id to find
    :return: True if the id exists within the collection, false otherwise
    """
    return collection.find({'_id': id_}).count() > 0

示例#7

0

显示文件

def lista_paises1():
    data = collection.find(
        {'$or': [{
            "languages.eng": "English"
        }, {
            "languages.eng": "Spanish"
        }]})
    return ({'mensaje': 'correcto'})

示例#8

0

显示文件

    def command(self,
                command,
                value=1,
                check=True,
                allowable_errors=None,
                **kwargs):
        if isinstance(command, six.string_types):
            command = {command: value}
            command.update(**kwargs)
        if 'filemd5' in command:
            checksum = md5()
            for chunk in self.chef.file.chunks.find().sort('n'):
                checksum.update(chunk['data'])
            return dict(md5=checksum.hexdigest())
        elif 'findandmodify' in command:
            coll = self._collections[command['findandmodify']]
            before = coll.find_one(command['query'], sort=command.get('sort'))
            upsert = False
            if before is None:
                upsert = True
                if command.get('upsert'):
                    before = dict(command['query'])
                    coll.insert(before)
                else:
                    raise OperationFailure('No matching object found')
            coll.update(command['query'], command['update'])
            if command.get('new', False) or upsert:
                return dict(value=coll.find_one(dict(_id=before['_id'])))
            return dict(value=before)
        elif 'mapreduce' in command:
            collection = command.pop('mapreduce')
            return self._handle_mapreduce(collection, **command)
        elif 'distinct' in command:
            collection = self._collections[command['distinct']]
            key = command['key']
            filter = command.get('filter')
            return list(
                set(_lookup(d, key) for d in collection.find(filter=filter)))
        elif 'getlasterror' in command:
            return dict(connectionId=None, err=None, n=0, ok=1.0)
        elif 'collstats' in command:
            collection = self._collections[command['collstats']]

            # We simulate everything based on the first object size,
            # doesn't probably make sense to go through all the objects to compute this.
            # Also instead of evaluating their in-memory size we use pickle
            # as python stores references.
            first_object_size = len(
                pickle.dumps(next(iter(collection._data.values()), {})))
            return {
                "ns": '%s.%s' % (collection.database.name, collection.name),
                "count": len(collection._data),
                "size": first_object_size * len(collection._data),
                "avgObjSize": first_object_size,
                "storageSize": first_object_size * len(collection._data)
            }
        else:
            raise NotImplementedError(repr(command))

示例#9

0

显示文件

文件： Basic.py 项目： mac-zhenfang/my-test

 def basic_map_reduce_test(self):
     map = Code(open('./map.js', 'r').read())
     reduce = Code(open('./reduce.js', 'r').read())
     result = self.collection.map_reduce(map, reduce, {"query": {}})
     # result = self.collection.map_reduce(map, reduce);
     print result
     collection = self.db[result["result"]]
     for item in collection.find():
         print item

示例#10

0

显示文件

文件： mongo.py 项目： wolfpirker/pythonnotesapp

	def get_notebook_filenames(self, notebook):
		"""return would-be filenames with the notebook_*.note pattern"""
		self.db = self.check_connection()
		collection = self.db[notebook] 
		uids = collection.find().distinct('_id')
		file_list = []
		for noteid in uids:
			file_list.append(notebook+'_'+str(noteid)+'.note') 
		return file_list

示例#11

0

显示文件

文件： Basic.py 项目： mac-zhenfang/my-test

 def basic_map_reduce_test(self):
     map = Code(open('./map.js','r').read());        
     reduce = Code(open('./reduce.js','r').read());
     result = self.collection.map_reduce(map, reduce, {"query":{}});
    # result = self.collection.map_reduce(map, reduce);
     print result;
     collection = self.db[result["result"]];
     for item in collection.find():
         print item;

示例#12

0

显示文件

文件： demo_curd.py 项目： lei025/Notes

    def find(self, table, dic):
        '''
        :param table: str 数据库中的集合
        :param dic: dict 查询条件
        :return: list 返回查询到记录的列表
        '''
        collection = self.db[table]
        rep = list(collection.find(dic))

        return rep

示例#13

0

显示文件

def get_all_routes():
    global client
    try:
        db = client['route']
        collection = db['routeCollection']
        routes = [route for route in collection.find()]
        for route in routes:
            route['_id'] = str(route['_id'])
        return routes
    except Exception as e:
        print('Exception in get all routes: ' + str(e))

示例#14

0

显示文件

def index():
    client = MongoClient('localhost', 27017)
    print(client)  # 成功则说明连接成功
    # 用户验证 连接mydb数据库,账号密码认证
    db = client.article  # 连接对应的数据库名称，系统默认数据库admin

    # db.authenticate('root', '你的密码password')

    # 连接所用集合，也就是我们通常所说的表
    collection = db.article_list_zhongguoyouzheng
    datas = (list(collection.find({"push_state": 0}, {"push_state": 0, "only_id": 0})))
    data = json.dumps(datas, cls=JSONEncoder,ensure_ascii=False)
    return data

示例#15

0

显示文件

def get_route(source_city: str, dest_city: str):
    global client
    try:
        db = client['route']
        collection = db['routeCollection']
        routes = [
            route for route in collection.find({
                'source_city': source_city,
                'dest_city': dest_city
            })
        ]
        if len(routes) > 0:
            return routes[0]
        return None
    except Exception as e:
        print('Exception in get route: ' + str(e))

示例#16

0

显示文件

def get_all_flights_by_route():
    global client
    flights = []
    try:
        route_db = client['route']
        route_collection = route_db['routeCollection']
        flight_db = client['flight']
        collection = flight_db['flightCollection']

        for each in route_collection.find():
            flights.extend(flight for flight in collection.find(
                {'route_id': ObjectId(each['_id'])}))

        # print(flights)
        return flights
    except Exception as e:
        print('Exception in get flights by route: ' + str(e))

示例#17

0

显示文件

def set_features(collection_name):
    raw_list = []
    raw_res = []
    raw_ids = []
    raw_list_opp = []
    counter = Counter(100)
    collection = client.wiki[collection_name]  # type: collection.Collection
    for raw in collection.find({}, {
            "_id": 1,
            TEXT_FEATURE_KEY: 1,
            "vandal": 1
    }):
        if TEXT_FEATURE_KEY not in raw:  # or len(raw[TEXT_FEATURE_KEY]) == 0:
            continue

        filtered = {x: sign(y)
                    for x, y in raw[TEXT_FEATURE_KEY].items()
                    }  #if not x.isdigit()
        filtered2 = {
            x: 1
            for x, y in filtered.items()
            if y > 0 and not check_rgb(x) and ' ' not in x
        }  #

        raw_list.append(filtered2)
        raw_list_opp.append({
            x: y * (-1)
            for x, y in filtered.items()
            if y < 0 and not check_rgb(x) and ' ' not in x
        })  #
        #raw_list.append(raw[TEXT_FEATURE_KEY])
        raw_res.append(1 if raw["vandal"] else 0)
        raw_ids.append(raw["_id"])
        counter.tick()

    pred = lr.predict_proba(fh.transform(raw_list))
    pred2 = lr2.predict_proba(fh.transform(raw_list_opp))
    for i, x in enumerate(pred[:, 1]):
        collection.update_one(
            {"_id": raw_ids[i]},
            {
                "$set": {
                    "f.t_biscore": x,  #max(x,pred2[i,1])
                    'f.t_biscore_opp': pred2[i, 1]
                }
            })

示例#18

0

显示文件

def load_data_from_mongo(collection_name):
    """
    Load data from specified collection
    within mongo yelp database
    into pandas dataframe.

    Args:
        collection_name (string): Name of collection to load.

    Returns:
        Dataframe: Semi-flattened mongo collection.
    """
    collection = access_specific_collection(collection_name)
    data = list(collection.find({}))
    df = pd.json_normalize(data, errors="ignore")
    print(df.head(5))
    return df

示例#19

0

显示文件

def request_cancel(flight_id: str, date: datetime, email: str):
    global client
    try:

        user_db = client['users']
        user_collection = user_db['usersCollection']
        usr = user_collection.find_one({'email': email})
        cancel_db = client['cancels']

        collection = cancel_db['cancelsCollection']

        for each in usr['bookings']:
            if (each['flight_id'] == flight_id and each['date'] == date):

                item = {
                    'user_id': usr['_id'],
                    'flight_id': flight_id,
                    'e_count': each['e_count'],
                    'b_count': each['b_count'],
                    'date': date
                }

                d = datetime.date.today()

                x = int(date[len(date) - 2:])
                month = int(date[len(date) - 5:len(date) - 3])
                if (x - d.day < 2 and month == d.month):
                    return -2
                if (d.month != month and x >= 30 and d.day <= 2):
                    return -2

                if (collection.find(item).count() != 0):

                    return -1

                result = collection.insert_one(item)
                print('inserted into db',
                      collection.find_one({'flight_id': flight_id}))

                return 1

    except Exception as e:
        print('Exception in requesting cancel: ' + str(e))

示例#20

0

显示文件

def get_flights_by_route(source_city: str, dest_city: str):
    global client
    route = get_route(source_city=source_city, dest_city=dest_city)
    if route is None:
        return
    try:
        flight_db = client['flight']
        collection = flight_db['flightCollection']
        flights = [
            flight
            for flight in collection.find({'route_id': route.get('_id')})
        ]
        for flight in flights:
            flight['route_id'] = str(flight['route_id'])
            flight['_id'] = str(flight['_id'])
        return flights

    except Exception as e:
        print('Exception in get flights by route: ' + str(e))

示例#21

0

显示文件

def get_route_from_flight_id(flight_id: str):
    global client
    route = []
    route_id = "temp"

    try:
        route_db = client['route']
        route_collection = route_db['routeCollection']
        flight_db = client['flight']
        collection = flight_db['flightCollection']
        for each in collection.find():
            if (str(each['_id']) == flight_id):
                route_id = each['route_id']
        for each in route_collection:
            if (str(each['_id']) == route_id):
                route.append(each['source_city'])
                route.append(each['dest_city'])
        print('route: ', route)
        return route
    except Exception as e:
        print('Exception in get route by flight id: ' + str(e))

示例#22

0

显示文件

def load_data_from_mongo_in_batches(collection_name, batch_size=5000):
    """
    Load data from specified collection
    within mongo yelp database
    into pandas dataframe.
    One batch at a time to avoid memory use errors.

    Args:
        collection_name (string): Name of collection to load.
        batch_size (int): Number to records to load in at one time.
                          Defaults to 5000. Depends on memory usage.

    Returns:
        Dataframe: Semi-flattened mongo collection.
    """
    collection = access_specific_collection(collection_name)
    cursor = collection.find()
    df = pd.DataFrame()
    for batch in batched(cursor, 5000):
        df = df.append(batch, ignore_index=True)
    return df

示例#23

0

显示文件

 def command(self,
             command,
             value=1,
             check=True,
             allowable_errors=None,
             **kwargs):
     if isinstance(command, basestring):
         command = {command: value}
         command.update(**kwargs)
     if 'filemd5' in command:
         checksum = md5()
         for chunk in self.chef.file.chunks.find().sort('n'):
             checksum.update(chunk['data'])
         return dict(md5=checksum.hexdigest())
     elif 'findandmodify' in command:
         coll = self._collections[command['findandmodify']]
         before = coll.find_one(command['query'], sort=command.get('sort'))
         upsert = False
         if before is None:
             upsert = True
             if command.get('upsert'):
                 before = dict(command['query'])
                 coll.insert(before)
             else:
                 raise OperationFailure, 'No matching object found'
         coll.update(command['query'], command['update'])
         if command.get('new', False) or upsert:
             return dict(value=coll.find_one(dict(_id=before['_id'])))
         return dict(value=before)
     elif 'mapreduce' in command:
         collection = command.pop('mapreduce')
         return self._handle_mapreduce(collection, **command)
     elif 'distinct' in command:
         collection = self._collections[command['distinct']]
         key = command['key']
         return list(set(_lookup(d, key) for d in collection.find()))
     elif 'getlasterror' in command:
         return dict(connectionId=None, err=None, n=0, ok=1.0)
     else:
         raise NotImplementedError, repr(command)

示例#24

0

显示文件

文件： set_char_scores.py 项目： a-nogikh/ruwiki-vandalism-detection

def set_features(collection_name):
    raw_list = []
    raw_ids = []
    counter = Counter(100)
    collection = client.wiki[collection_name]  # type: collection.Collection
    for raw in collection.find({}):
        if len(raw["revs"]) <= 1:
            continue

        if "rwords" not in raw:
            continue
        distr = calculate(raw)

        collection.update_one(
            {"_id": raw["_id"]},
            {"$set": {
                "f.t_charscore": BHdist(distr, good_distr)
            }})

        raw_list.append({key: value for key, value in distr.items()})
        raw_ids.append(raw["_id"])
        counter.tick()
    '''

示例#25

0

显示文件

    def update(self, ii, data_to_send):

        # Access collection corresponding to the current time-step:
        collection_name = '%s' % ii
        try:
            collection = self.db.create_collection(
                collection_name, **{
                    'capped': True,
                    'size': 100000
                })
        except (pymongo.errors.OperationFailure,
                pymongo.errors.CollectionInvalid):
            collection = self.db[collection_name]

        # Push my data:
        collection.insert({
            "rank": self.rank(),
            'data': json.dumps(data_to_send)
        })

        #Get data:
        max_record = len(self.outside_rank_list)
        cursor = collection.find({'rank': {
            "$in": self.outside_rank_list
        }},
                                 cursor_type=CursorType.TAILABLE_AWAIT)
        result_dict = {}
        while len(result_dict) < max_record:
            try:
                found_document = cursor.next()
                result_dict[found_document['rank']] = found_document['data']
            except StopIteration:
                pass

        for source_rank, payload in result_dict.items():
            self.received_message_dict_external[source_rank][ii] = json.loads(
                payload)

示例#26

0

显示文件

def pymongo_query(query, logtype, local=False):
    """ generic Django view for netshed, takes in query dictionary
        and type of log and returns the resulting lines of logs """

    loglines = []
    collections = []

    if 'limit' in query:
        limit = int(query['limit'])

    # pymongo raw query
    if 'date' in query:

        if query['date'] == 'all':
            collections = connect_db(logtype, local).collection_names()
        else:
            collections.append(logtype + '_' + query['date'])
            try:
                # search two days if start time > end time
                if span_two_days(query['start_hr'], query['start_min'],
                                 query['end_hr'], query['end_min']):
                    collections.append(logtype + increment_date(query['date']))
            except KeyError:
                pass

        # get collection object for each collection to search on and query
        for collection in collections:
            collection = connect_collection(logtype, collection, local)

            results = [
                log for log in collection.find(format_query_input(
                    query)).limit(limit)
            ]
            results = sorted(results, key=lambda log: log['time'])
            loglines += [log['line'] for log in results]

    return loglines

示例#27

0

显示文件

def check_collection(collection: pymongo.collection.Collection,
                     db_client: database.client.DatabaseClient):
    """
    Check all the entities in a collection
    :param collection:
    :param db_client:
    :return:
    """
    all_entities = collection.find()
    for s_entity in all_entities:
        # patch the entity type if appropriate
        if '.' not in s_entity['_type']:
            qual_types = database.entity_registry.find_potential_entity_classes(
                s_entity['_type'])
            if len(qual_types) == 1 and qual_types[0] != s_entity['_type']:
                logging.getLogger(__name__).error(
                    "Entity {0} had unqualified type {1}".format(
                        s_entity['_id'], s_entity['_type']))
                collection.update_one({'_id': s_entity['_id']},
                                      {'$set': {
                                          '_type': qual_types[0]
                                      }})

        # Try and deserialize the entity, and validate it if we succeed
        try:
            entity = db_client.deserialize_entity(s_entity)
        except Exception:
            entity = None
            logging.getLogger(__name__).error(
                "Exception occurred deserializing object {0}:\n{1}".format(
                    s_entity['_id'], traceback.format_exc()))

        if entity is not None and hasattr(entity, 'validate'):
            if not entity.validate():
                logging.getLogger(__name__).error(
                    "Entity {0} ({1}) failed validation".format(
                        entity.identifier, s_entity['_type']))

示例#28

0

显示文件

文件：测试15.py 项目： houwudi14010/github_huanghyw_jd_seckill-master

    '?1',
    'sec-fetch-dest':
    'document',
    'accept-language':
    'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6',
    'cookie':
    'xhsTrackerId=5994dcc9-8274-48c7-c897-954a0fc3b1f8; _ga=GA1.2.1321150109.1626320152; smidV2=202107271802381b04791812bfde1e5152773c38891bc800efd1442f4be7e20; xhsTracker=url=index&searchengine=baidu; timestamp2=20210802f5ec7988daec2eeaa389bbfb; timestamp2.sig=5bNd_MzQomdySNZPcQIAzx-IQjEN72xKzJX1QkBNbJ0; _gid=GA1.2.98589560.1627867537; extra_exp_ids=gif_exp1,ques_clt1; _gat=1',
}
# agentUrl = "http://192.168.1.26:16666/get/"
# res = requests.get(agentUrl)
#
# agenContent = res.content.decode("utf-8")
# dataip = re.compile('"proxy": "(.*?)",').findall(str(agenContent))
# ip = dataip[0]
# proxy = {
#     'https://' + ip,
# }
# requests.proxies = proxy
datas = (list(
    collection.find({
        "push_state": 0
    }, {
        "url": 1,
        "_id": 0
    }).sort([("download_time", -1)])))

response = ss.get(
    'https://www.xiaohongshu.com/discovery/item/61011452000000002103e959',
    headers=headersss)
print(response.status_code)

示例#29

0

显示文件

文件：小红书web.py 项目： houwudi14010/github_huanghyw_jd_seckill-master

def my_job():
    # agentUrl = "http://192.168.1.26:16666/get/"
    # res = requests.get(agentUrl)
    #
    # agenContent = res.content.decode("utf-8")
    # dataip = re.compile('"proxy": "(.*?)",').findall(str(agenContent))
    # ip = dataip[0]
    # proxy = {
    #     'https://' + ip,
    # }
    # requests.proxies = proxy
    datas = (list(
        collection.find({
            "push_state": 0
        }, {
            "url": 1,
            "_id": 0
        }).sort([("pub_time", -1)])))
    for ur in datas:
        try:
            print(ur['url'])
            response = ss.get(ur['url'], headers=headersss)
            if response.status_code == 461:
                print()

            content = response.content.decode('utf-8')
            contentWEB = re.compile('相关笔记[\s\S]*?.查看更多').findall(str(content))
            id = re.compile('/item/(.*?)"').findall(str(contentWEB[0]))
            title = re.compile('<p class="desc" .*?>(.*?)</p>').findall(
                str(contentWEB[0]))
            likeCount = re.compile(
                '<span class="counts".*?>(.*?)</span>').findall(
                    str(contentWEB[0]))
            type = re.compile(' <i class="(.*?)"').findall(str(contentWEB[0]))

            for a, b, c, d in zip(id, title, type, likeCount):
                if '万' in str(d):
                    num = d.replace('万', '')
                    d = float(num) * 10000
                    d = int(d)
                try:
                    imgText = ''
                    videoText = ''
                    url = 'https://www.xiaohongshu.com/discovery/item/' + str(
                        a)
                    # url = 'https://www.xiaohongshu.com/discovery/item/60f8be50000000002103c8f8'
                    response = ss.get(
                        url,
                        headers=headersss,
                    )
                    content = response.content.decode('utf-8')
                    articleContent = re.compile(
                        '"description": "(.*?)",').findall(str(content))
                    time = re.compile('发布于 (.*?)</span>').findall(str(content))
                    pubTime = datetime.datetime.strptime(
                        time[0],
                        '%Y-%m-%d %H:%M').strftime('%Y-%m-%d %H:%M:%S')
                    if c == 'normal':
                        contentText = re.compile(
                            '<body>([\s\S]*?.)<h1').findall(str(content))
                        contentImg = re.compile(':url\((.*?)\?').findall(
                            str(contentText))

                        for i in contentImg:
                            if str.find(str(i), 'http'):
                                print()
                            else:
                                imgText += "<img src='" + str(i) + "'></br>"
                    else:
                        content = response.content.decode('utf-8')
                        content = re.compile(
                            '<video [\s\S]*?.src="(.*?)"').findall(
                                str(content))
                        ac = content[0].encode('utf-8').decode(
                            'unicode_escape')
                        aa = str(ac)
                        aa = aa.replace('&amp;', '&')
                        videoText += '<video src="' + aa + '" controls="controls"></br>'
                    contentText = imgText + videoText + articleContent[0]
                    site = "小红书"
                    siteId = 1048926
                    data = []
                    articleStatue = 0
                    downloadTime = datetime.datetime.now().strftime(
                        '%Y-%m-%d %H:%M:%S')
                    data.append(
                        InsertOne({
                            "url": url,
                            "title": b,
                            "pub_time": pubTime,
                            "content": contentText,
                            "download_time": downloadTime,
                            "site": site,
                            "site_id": siteId,
                            "aid": a,
                            'push_state': articleStatue,
                            'like_num': d
                        }))
                    insertdb(data)
                except Exception as err:
                    import traceback
                    traceback.print_exc()
                    pass
        except Exception as err:
            import traceback
            traceback.print_exc()
            pass

示例#30

0

显示文件

def my_job():
    # 读取数据
    try:
        datas = (list(collection.find({"Code": {"$ne": 200}})))
        agentUrl = "http://47.96.91.228:82/get/"
        res = requests.get(agentUrl)

        agenContent = res.content.decode("utf-8")
        dataip = re.compile('"proxy": "(.*?)",').findall(str(agenContent))
        ip = dataip[0]
        proxy = {
            'https://': ip,
        }
        for line in datas:
            paramsss = dict(line)
            url = paramsss['url']
            url = url.replace("www.", "")
            urlName = paramsss['urlName']
            httpUrls = "http://" + url
            httpUrl = "http://www." + url
            httpsUrl = "https://www." + url
            data = []
            try:
                res = requests.get(httpUrls,
                                   verify=False,
                                   proxies=proxy,
                                   allow_redirects=True,
                                   timeout=5)
                httpCode = res.status_code
                xinUrl = res.url
                if httpCode == 200:
                    data.append(
                        InsertOne({
                            "urlName": urlName,
                            "url": url,
                            "xinUrl": xinUrl,
                            "Code": httpCode
                        }))
                    insertdb(data)
                else:
                    if httpCode != 404:
                        data.append(
                            InsertOne({
                                "urlName": urlName,
                                "url": url,
                                "xinUrl": xinUrl,
                                "Code": httpCode
                            }))
                        insertdb(data)
                    else:
                        data.append(
                            InsertOne({
                                "urlName": urlName,
                                "url": url,
                                "xinUrl": xinUrl,
                                "Code": 0
                            }))
                        insertdb(data)

            except Exception as err:
                try:
                    res = requests.get(httpUrl,
                                       verify=False,
                                       proxies=proxy,
                                       allow_redirects=True,
                                       timeout=5)
                    httpCode = res.status_code
                    xinUrl = res.url
                    if httpCode == 200:
                        data.append(
                            InsertOne({
                                "urlName": urlName,
                                "url": url,
                                "xinUrl": xinUrl,
                                "Code": httpCode
                            }))
                        insertdb(data)
                    else:
                        if httpCode != 404:
                            data.append(
                                InsertOne({
                                    "urlName": urlName,
                                    "url": url,
                                    "xinUrl": xinUrl,
                                    "Code": httpCode
                                }))
                            insertdb(data)
                        else:
                            data.append(
                                InsertOne({
                                    "urlName": urlName,
                                    "url": url,
                                    "xinUrl": xinUrl,
                                    "Code": 0
                                }))
                            insertdb(data)
                except Exception as err:
                    try:
                        res = requests.get(httpsUrl,
                                           verify=False,
                                           proxies=proxy,
                                           allow_redirects=True,
                                           timeout=5)
                        httpCode = res.status_code
                        xinUrl = res.url
                        if httpCode == 200:
                            data.append(
                                InsertOne({
                                    "urlName": urlName,
                                    "url": url,
                                    "xinUrl": xinUrl,
                                    "Code": httpCode
                                }))
                            insertdb(data)
                        else:
                            if httpCode != 404:
                                data.append(
                                    InsertOne({
                                        "urlName": urlName,
                                        "url": url,
                                        "xinUrl": xinUrl,
                                        "Code": httpCode
                                    }))
                                insertdb(data)
                            else:
                                data.append(
                                    InsertOne({
                                        "urlName": urlName,
                                        "url": url,
                                        "xinUrl": xinUrl,
                                        "Code": 0
                                    }))
                                insertdb(data)
                    except Exception as err:
                        data.append(
                            InsertOne({
                                "urlName": urlName,
                                "url": url,
                                "xinUrl": url,
                                "Code": 0
                            }))
                        insertdb(data)

    except Exception as err:
        traceback.print_exc()
        pass

示例#31

0

显示文件

文件： dbtest.py 项目： vikasyadav079/Alexa-Outlook-Calendar-Booking-in-python

'''
Created on May 24, 2018

@author: vikasy
'''
import pymongo
import pprint

from pymongo import MongoClient, collection
client = MongoClient()
client = MongoClient('mongodb://localhost:27017/')
db = client.meetingrooms
collection = db.meetingRoomDetails


try :
    bookedRoomsDetails = collection.find({"booked": {'$elemMatch': {'date': '2018-05-25' , 'startTime' : '15:30', 'endTime' : '16:30'} }})
       
    for posts in bookedRoomsDetails :
        pprint.pprint(posts)
    print("Rows fetched ")

except Exception  as  e :
    print (e)
    
    #bookedRoomsDetails =  collection.find({'$not':[{'availableSlots': {'$elemMatch': {'startTime':startTime , 'endTime':endTime, 'isAvailable' : 0}}}]})