def _analyze_users_similarity(args): user, data_set, min_similarity, offset, limit = args logger.info("{} {}".format(offset, limit)) repository = Repository(data_set=data_set) progress = Progress(limit - 1) for users in batch( repository.get_users_products(offset=offset, limit=limit - 1), 1000): for user2 in users: if user['_id'] == user2['_id']: continue progress.advance() similarity, common, additional1, additional2 = calculate_products_similarity( user['products'], user2['products']) if similarity >= min_similarity: similar = dict(user1_id=user['user_id'], user2_id=user2['user_id'], similarity=similarity, common_products=common, add_products1=additional1, add_products2=additional2) repository.add_users_similarity(similar) logger.info("{:.1f}% ETA {}".format(progress.get_progress(), progress.get_estimated_time()))
def analyze_orders_similarity_multi(data_set, samples, orders, last_order_id, user_id): repository = Repository(data_set=data_set) progress = Progress(orders) min_similarity = 0.2 # offset = 1 processes = 5 pool = multiprocessing.Pool(processes=processes) step = math.ceil(samples / processes) logger.info("Last order {}".format(last_order_id)) for orders in batch(repository.get_orders_for_user(user_id=user_id), 10): tasks = [] for order in orders: progress.advance() last_order_id = order['_id'] for from_sample in range(0, samples, step): tasks.append( (order, data_set, min_similarity, from_sample, step)) logger.info("Last order {}".format(last_order_id)) pool.map(_analyze_orders_similarity, tasks) logger.info("{:.1f}% ETA {}".format(progress.get_progress(), progress.get_estimated_time())) pool.close() pool.join()
def most_frequently_bought(data_set, user_id): repository = Repository(data_set=data_set) user_products = repository.get_products_bought_globally() recommended = sorted(user_products, key=lambda item: item['count'], reverse=True)[:20] for p in recommended: product = repository.get_product(p['_id']) if product: logger.info("{} {}".format(product['product_name'], p['count']))
def load_products(data_set): reader = Reader(data_set=data_set) repository = Repository(data_set=data_set) loaded = 0 logger.info("Loading products") for products in batch(reader.load_products(), 100): repository.add_products(products) loaded += len(products) logger.info("Loaded products {}".format(loaded))
def analyze_users_similarity_multi(data_set, samples, user_id): repository = Repository(data_set=data_set) min_similarity = 0.2 processes = 5 pool = multiprocessing.Pool(processes=processes) step = math.ceil(samples / processes) user1 = repository.get_user_products(user_id=user_id) tasks = [] for from_sample in range(0, samples, step): tasks.append((user1, data_set, min_similarity, from_sample, step)) pool.map(_analyze_users_similarity, tasks) pool.close() pool.join()
def load_orders(data_set): reader = Reader(data_set=data_set) repository = Repository(data_set=data_set) loaded = 0 logger.info("Loading orders") for orders in batch(reader.load_orders(), 100): orders_products = [] for order in orders: order_products = repository.find_order_products(order['order_id']) order_products = [p.copy() for p in order_products] if not order_products: continue order['products'] = order_products orders_products.append(order) if orders_products: repository.add_orders(orders_products) loaded += len(orders_products) logger.info("Loaded orders {}".format(loaded))
def most_frequently_bought_by_similar_users(data_set, user_id): repository = Repository(data_set=data_set) users = repository.get_similar_users_for_user(user_id=user_id) recommend_products = defaultdict(float) common_products = defaultdict(float) count = 0 for user in users: if user['user1_id'] == user_id: products = user['add_products2'] elif user['user2_id'] == user_id: products = user['add_products1'] else: raise Exception() for p in products: recommend_products[p] += user['similarity'] for p in user['common_products']: common_products[p] += 1 logger.info("-- Most frequent common products:") common_products = sorted(common_products.items(), key=lambda item: item[1], reverse=True)[:10] for product_id, count in common_products: product = repository.get_product(product_id) if product: logger.info("{} {}".format(product['product_name'], count)) logger.info("-- Recommended products:") recommended = sorted(recommend_products.items(), key=lambda item: item[1], reverse=True)[:10] for product_id, count in recommended: product = repository.get_product(product_id) if product: logger.info("{} {}".format(product['product_name'], count))
def process(self, tweet): stored_tweet = Repository.create(tweet) if stored_tweet is None: return None username = tweet.user.username sorted_scores = self.get_potential_places(tweet) actual_places = self.places[username] actual_communes = self.communes[username] # Iterate over all the potential places by score to find commune for potential_place, score in sorted_scores: potential_commune = potential_place # If the place is a commune, restrict all places to be inside that commune if potential_commune in actual_communes: actual_places = actual_communes[potential_place] # We found a commune, do not look for any more break # Iterate over all the potential places by score for potential_place, score in sorted_scores: # Check if the potential place is in the actual places if potential_place in actual_places: actual_place = actual_places[potential_place] # Create a relation between the tweet and the place position = self.link_tweet_to_place(tweet, actual_place) # We found a place, so lets move on to the next weet break return Repository.read(tweet.id)
def test_stevens_info(self): """ test whole correct files """ stevens = Repository('Stevens') students_info = {'10103': ['10103', 'Baldwin, C', 'SFEN', {'SSW 567': 'A', 'SSW 564': 'A-', 'SSW 687': 'B', 'CS 501': 'B'}], '10115': ['10115', 'Wyatt, X', 'SFEN', {'SSW 567': 'A', 'SSW 564': 'B+', 'SSW 687': 'A', 'CS 545': 'A'}], '10172': ['10172', 'Forbes, I', 'SFEN', {'SSW 555': 'A', 'SSW 567': 'A-'}], '10175': ['10175', 'Erickson, D', 'SFEN', {'SSW 567': 'A', 'SSW 564': 'A', 'SSW 687': 'B-'}], '10183': ['10183', 'Chapman, O', 'SFEN', {'SSW 689': 'A'}], '11399': ['11399', 'Cordova, I', 'SYEN', {'SSW 540': 'B'}], '11461': ['11461', 'Wright, U', 'SYEN', {'SYS 800': 'A', 'SYS 750': 'A-', 'SYS 611': 'A'}], '11658': ['11658', 'Kelly, P', 'SYEN', {'SSW 540': 'F'}], '11714': ['11714', 'Morton, A', 'SYEN', {'SYS 611': 'A', 'SYS 645': 'C'}], '11788': ['11788', 'Fuller, E', 'SYEN', {'SSW 540': 'A'}]} instructors_info = {'98765': ['98765', 'Einstein, A', 'SFEN', {'SSW 567': 4, 'SSW 540': 3}], '98764': ['98764', 'Feynman, R', 'SFEN', {'SSW 564': 3, 'SSW 687': 3, 'CS 501': 1, 'CS 545': 1}], '98763': ['98763', 'Newton, I', 'SFEN', {'SSW 555': 1, 'SSW 689': 1}], '98762': ['98762', 'Hawking, S', 'SYEN', {}], '98761': ['98761', 'Edison, A', 'SYEN', {}], '98760': ['98760', 'Darwin, C', 'SYEN', {'SYS 800': 1, 'SYS 750': 1, 'SYS 611': 2, 'SYS 645': 1}]} majors_info = {'SFEN': ['SFEN', ('SSW 555', 'SSW 564', 'SSW 567', 'SSW 540'), ('CS 513', 'CS 545', 'CS 501')], 'SYEN': ['SYEN', ('SYS 800', 'SYS 612', 'SYS 671'), ('SSW 565', 'SSW 810', 'SSW 540')]} students_dic = dict() for CWID, person in stevens.students.items(): students_dic[CWID] = person.get_whole_info() instructors_dic = dict() for CWID, person in stevens.instructors.items(): instructors_dic[CWID] = person.get_whole_info() majors_dic = dict() for major, major_info in stevens.majors.items(): majors_dic[major] = major_info.get_whole_info() self.assertEqual(students_dic, students_info) self.assertEqual(instructors_dic, instructors_info) for item, major in majors_dic.items(): self.assertEqual(major[0], majors_info[item][0]) self.assertTrue(major[1], majors_info[item][1]) self.assertTrue(major[2], majors_info[item][2])
def test_student_courses_info(self): """ test student successfully completed courses, remaining required courses and electives""" stevens = Repository('Stevens') courses_info = {'10103': [['SSW 567', 'SSW 564', 'SSW 687', 'CS 501'], ['SSW 540', 'SSW 555'], None], '10115': [['SSW 567', 'SSW 564', 'SSW 687', 'CS 545'], ['SSW 540', 'SSW 555'], None], '10172': [['SSW 555', 'SSW 567'], ['SSW 540', 'SSW 564'], ['CS 501', 'CS 513', 'CS 545']], '10175': [['SSW 567', 'SSW 564', 'SSW 687'], ['SSW 540', 'SSW 555'], ['CS 501', 'CS 513', 'CS 545']], '10183': [['SSW 689'], ['SSW 540', 'SSW 555', 'SSW 564', 'SSW 567'], ['CS 501', 'CS 513', 'CS 545']], '11399': [['SSW 540'], ['SYS 612', 'SYS 671', 'SYS 800'], None], '11461': [['SYS 800', 'SYS 750', 'SYS 611'], ['SYS 612', 'SYS 671'], ['SSW 540', 'SSW 565', 'SSW 810']], '11658': [[], ['SYS 612', 'SYS 671', 'SYS 800'], ['SSW 540', 'SSW 565', 'SSW 810']], '11714': [['SYS 611', 'SYS 645'], ['SYS 612', 'SYS 671', 'SYS 800'], ['SSW 540', 'SSW 565', 'SSW 810']], '11788': [['SSW 540'], ['SYS 612', 'SYS 671', 'SYS 800'], None]} courses_dic = dict() for CWID, person in stevens.students.items(): courses_dic[CWID] = stevens.majors[person.major].update_courses_info(person.courses) self.assertTrue(courses_dic, courses_info)
def analyze_products_by_user(data_set): repository = Repository(data_set=data_set) users = repository.get_users() count = 0 total = len(users) for user_ids in batch(users, 100): users_products = [] for user_id in user_ids: user_products = repository.get_products_bought_by_user(user_id) user_products = dict(user_id=user_id, products=[ dict(product_id=p['_id'], count=p['count']) for p in user_products ]) users_products.append(user_products) count += 1 logger.info("{}/{}".format(count, total)) repository.add_user_products(users_products)
def analyze_orders_similarity(data_set, samples): repository = Repository(data_set=data_set) progress = Progress(math.ceil(((samples - 1) * samples) / 2)) similarity_threshold = 0.2 offset = 1 for orders1 in batch(repository.get_orders(limit=samples - 1), 100): for o1 in orders1: max_similarity = similarity_threshold similar = None count = 0 for orders2 in batch( repository.get_orders(offset=offset, limit=samples - offset), 100): for o2 in orders2: progress.advance() similarity, common, additional1, additional2 = calculate_products_similarity( o1['products'], o2['products']) if similarity > max_similarity: max_similarity = similarity similar = dict(order1_id=o1['order_id'], user1_id=o1['user_id'], order2_id=o2['order_id'], user2_id=o2['user_id'], similarity=similarity, common_products=common, add_products1=additional1, add_products2=additional2) logger.info("Similarity {} {} {}".format( similar['user1_id'], similar['user2_id'], similarity)) if similar is not None: repository.add_orders_similarity(similar) offset += 1 logger.info("{:.1f}% ETA {}".format(progress.get_progress(), progress.get_estimated_time()))
def analyze_users_similarity(data_set, samples): repository = Repository(data_set=data_set) progress = Progress(math.ceil(((samples - 1) * samples) / 2)) offset = 1 for user_products1 in batch( repository.get_user_products(limit=samples - 1), 100): for up1 in user_products1: max_similarity = 0.1 similar = None for user_products2 in batch( repository.get_user_products(offset=offset, limit=samples - offset), 100): for up2 in user_products2: progress.advance() similarity, common, additional1, additional2 = calculate_products_similarity( up1['products'], up2['products']) if similarity > max_similarity: max_similarity = similarity similar = dict(user1_id=up1['user_id'], user2_id=up2['user_id'], similarity=similarity, common_products=common, add_products1=additional1, add_products2=additional2) logger.info("{} {} {}".format(similar['user1_id'], similar['user2_id'], similarity)) if similar is not None: repository.add_users_similarity(similar) offset += 1 logger.info("{:.1f}% ETA {}".format(progress.get_progress(), progress.get_estimated_time()))
def __init__(self, repo_client=Repository(adapter=MongoRepository)): self.repo_client = repo_client
from database import Repository from locator import PlaceExtractor from locator import ScoreCalculator tweets = Repository.all() for tweet in tweets: print(tweet.content, "\n") places = PlaceExtractor(tweet).find_potential_places() for place in places: score = ScoreCalculator(tweet).for_word(place) print(place, score) print("\n\n")
def test_processor_creates_a_tweet_in_the_database(self): processor = Processor() processor.process(self.tweet) stored_tweet = Repository.read('6969') assert stored_tweet.id == '6969'
def pushToImgBB(visionBase64): payload = {'image': visionBase64} response = requests.request( "POST", "https://api.imgbb.com/1/upload?key=a4335073f815a159ee957016a7a2a65c", headers={}, data=payload, files=[]) jsonData = response.json() return jsonData['data']['url'] # --> Object Initiate Database db = Repository() stream_db = parse_json(db.get_all()) # Routing Root and Rendering Index ( SyncMode, Regstered Device ) @app.route('/') def index(): return render_template('index.html', async_mode=socketio.async_mode, devices=devices, stream_db=stream_db, stackholder=stackholder) #---------------------- SOCKET
def missing_major_info(self): """ test missing major info when a student is in that major """ with self.assertRaises(ValueError): Repository('missed_major_info')
def missing_person(self): """ test no corresponding person is found based on grades.txt """ with self.assertRaises(ValueError): Repository('grade_person_not_match')
def analyze_products_totally(data_set): repository = Repository(data_set=data_set) global_products = repository.get_products_bought_globally() for product in global_products: repository.set_product_global(product['_id'], product['count']) logger.info("{} {}".format(product['_id'], product['count']))
def __init__(self): self.communes, self.places = \ Repository.all_users_with_places()
def link_tweet_to_place(self, tweet, place): return Repository.map_place_to_tweet(tweet, place.id)
# SERVERNAME = 'http://escoca.ap-1.evennode.com/' # --> Registerd Device ( chipID : name ) devices = {'951950972': "ESP-A", '805658940': "ESP-B", '000000000': "ESP-C"} # --> Registered User ( whatsapp : name ) stackholder = {'628561655028': "Lasida"} # --> Setup Flask app = Flask(__name__) app.config['SECRET_KEY'] = 'secreto!' app.config["DEBUG"] = True app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False # --> Object Initiate Database db = Repository() # Routing Root and Rendering Index ( SyncMode, Regstered Device ) @app.route('/') def index(): return render_template('index.html', devices=devices, stackholder=stackholder) def isset(data, key, typedata="str"): if typedata == "str": return str(data[key]) if data.get(key) else "" else:
def missing_info(self): """ test missing info in instructors.txt """ with self.assertRaises(ValueError): Repository('missed_info')