示例#1
0
	def start_requests(self):
		AppProducts = self.getURLS()
		for items in AppProducts:
			allProductPriceDict = {}
			product_id = items[0]
			brand = items[1]
			productName = items[2]
			urlList = items[3]
			priceList = []
			update_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M")
			current_dict = DBOperations.getCollectionProduct( self.priceClient, self.PriceCollection, product_id)
			for url in urlList:
				# url = "http://www.amazon.in/dp/B00MPDR6PW"
				print url
				try:
					response = requests.get(url)
					priceDict = self.parse(response, current_dict)
					priceDict = self.checkForNonZeroPrice(priceDict, current_dict)
					priceList.append(priceDict)
				except Exception, err:
					print(traceback.format_exc()), "Error", url
			print "Extracting Price for Produt: " , product_id
				# self.parse( meta = {'outputFilePath': outputFilePath,'brand':brand,'productName':productName,'product_id':product_id, 'snapDealMatch':snapDealMatch, 'amazonMatch':amazonMatch})
			allProductPriceDict['product_id'] = product_id
			allProductPriceDict['brand'] = brand
			allProductPriceDict['productName'] = productName
			allProductPriceDict['priceList'] = priceList
			allProductPriceDict["update_time"] = update_time
			DBOperations.mongoSaveDocument(allProductPriceDict, self.PriceCollection, self.priceClient, 'product_id', False)
def upDateProductRecommendation():
	ProductRecommendationClient = DBOperations.getMongoDBClient(ProductRecommendationDBName)
	fileList = getProductRecommendationFile(ProductRecommendationPath)
	for file in fileList:
		for row in __getListFromCSV(file):
			itemDict = {}
			product_id, category, recommendedProducts = getProductElementsFromMasterFileRow(row)
			itemDict['product_id'] = product_id
			itemDict['category'] = category
			itemDict['recommendedProducts'] = recommendedProducts
			if recommendedProducts != []:
				DBOperations.mongoSaveDocument(itemDict,"allComp", ProductRecommendationClient, 'product_id', False)
	def getURLS(self):
		inputFile = self.ProductMasterFilePath
		fileObj = open(inputFile)
		ProductList = []
		reader = csv.reader(fileObj)
		DBMasterClient = DBOperations.getMongoDBClient(self.ProductMasterDBName)
		masterCursor = DBOperations.getCollectionCursorObject(DBMasterClient, "allproducts")
		for items in masterCursor:
			product_id = items['product_id']
			brand = items['brand']
			category = items['category']
			product_name = items['product_name']
			product_urlList = items['product_urlList']
			url = self.__getOrderedURL(product_urlList)
			ProductList.append([product_id,brand,product_name, url, category])		
		return ProductList
示例#4
0
def upDateProductRecommendation():
    ProductRecommendationClient = DBOperations.getMongoDBClient(
        ProductRecommendationDBName)
    fileList = getProductRecommendationFile(ProductRecommendationPath)
    for file in fileList:
        for row in __getListFromCSV(file):
            itemDict = {}
            product_id, category, recommendedProducts = getProductElementsFromMasterFileRow(
                row)
            itemDict['product_id'] = product_id
            itemDict['category'] = category
            itemDict['recommendedProducts'] = recommendedProducts
            if recommendedProducts != []:
                DBOperations.mongoSaveDocument(itemDict, "allReco",
                                               ProductRecommendationClient,
                                               'product_id', False)
	def parse(self,response,url, brand, productName, product_id, snapDealMatch, amazonMatch):
		productJSON = {}
		if ("flipkart" in url):
			flipKartScrapper = FlipKartScrapper()
			productJSON = flipKartScrapper.downloadProductDetails(response.content, productName, brand)
		if ("snapdeal" in url):
			snapdealScrapper = SnapDealScrapper()
			productJSON = snapdealScrapper.downloadProductDetails(response.content, productName, brand, snapDealMatch)
		if ("amazon" in url):
			amazonScrapper = AmazonScrapper()
			productJSON = amazonScrapper.downloadProductDetails(response.content, productName, brand, amazonMatch)
		
		# self.saveOutPut(productJSON, outputFilePath)
		productJSON['product_id'] = product_id
		productJSON['spec_url'] = response.url
		# print productJSON
		DBOperations.mongoSaveDocument(productJSON,self.SpecificationCollection, self.specificationClient, "product_id", False) 
示例#6
0
 def getURLS(self):
     inputFile = self.ProductMasterFilePath
     fileObj = open(inputFile)
     ProductList = []
     reader = csv.reader(fileObj)
     DBMasterClient = DBOperations.getMongoDBClient(
         self.ProductMasterDBName)
     masterCursor = DBOperations.getCollectionCursorObject(
         DBMasterClient, "allproducts")
     for items in masterCursor:
         product_id = items['product_id']
         brand = items['brand']
         category = items['category']
         product_name = items['product_name']
         product_urlList = items['product_urlList']
         url = self.__getOrderedURL(product_urlList)
         ProductList.append(
             [product_id, brand, product_name, url, category])
     return ProductList
示例#7
0
	def getURLS(self):
		productMasterClient = DBOperations.getMongoDBClient(self.ProductMasterDBName)
		cursor = productMasterClient.allproducts.find()
		ProductList = []
		for row in cursor:
			product_id = row['product_id']
			brand = row['brand']
			product_name = row['product_name']
			product_urlList = row['product_urlList']
			ProductList.append([product_id,brand,product_name,product_urlList])
		return ProductList
def getURLS():
    productMasterClient = DBOperations.getMongoDBClient(ProductMasterDBName)
    cursor = productMasterClient.allproducts.find()
    ProductList = []
    for row in cursor:
        product_id = row["product_id"]
        brand = row["brand"]
        product_name = row["product_name"]
        product_urlList = row["product_urlList"]
        url = __getOrderedURL(product_urlList)
        ProductList.append([product_id, url])
    return ProductList
def getURLS():
    productMasterClient = DBOperations.getMongoDBClient(ProductMasterDBName)
    cursor = productMasterClient.allproducts.find()
    ProductList = []
    for row in cursor:
        product_id = row['product_id']
        brand = row['brand']
        product_name = row['product_name']
        product_urlList = row['product_urlList']
        url = __getOrderedURL(product_urlList)
        ProductList.append([product_id, url])
    return ProductList
示例#10
0
class SpecificationScrapper():
    specificationClient = DBOperations.getMongoDBClient("ProductSpecification")

    ProductMasterDBName = "Productmaster"
    SpecificationCollection = "allproducts"
    ProductMasterFilePath = "/home/" + getpass.getuser(
    ) + "/BazaarfundaSrapperFiles/ProductMaster/MasterFile_Overall.csv"

    def start_requests(self):
        AppProducts = self.getURLS()
        # AppProducts = [["D11111","HP","HP-1", "http://www.snapdeal.com/product/apple-imac-mf886hna-desktop-4th/633082501991", "desktop"]]
        allCategory = [category[4] for category in AppProducts]

        allCategory = list(set(allCategory))

        snapDealSpecificationMatchDict = {}
        amazonSpecificationMatchDict = {}

        for cat in allCategory:
            if cat != "Category" and cat != '':
                snapDealSpecificationMatchDict[cat] = self.createMatchDict(
                    './SpecificationMatch/' + cat +
                    '/SpecificationMatchSnapDeal.csv')
                amazonSpecificationMatchDict[cat] = self.createMatchDict(
                    './SpecificationMatch/' + cat +
                    '/SpecificationMatchAmazon.csv')

        for items in AppProducts:
            product_id = items[0]
            brand = items[1]
            productName = items[2]
            url = items[3]
            category = items[4]
            try:
                snapDealMatch = snapDealSpecificationMatchDict[category]

                amazonMatch = amazonSpecificationMatchDict[category]
                # outputFilePath = self.outputFilePath + productName.replace("/","") + ".json"
                outputFilePath = ""
                print product_id
                if not DBOperations.isIdPresent(self.specificationClient,
                                                self.SpecificationCollection,
                                                "product_id", product_id):
                    try:
                        response = requests.get(url, timeout=5)
                        self.parse(response, url, brand, productName,
                                   product_id, snapDealMatch, amazonMatch)
                    except Exception, err:
                        print(traceback.format_exc()), "Error", url, items
                    # self.parse( meta = {'outputFilePath': outputFilePath,'brand':brand,'productName':productName,'product_id':product_id, 'snapDealMatch':snapDealMatch, 'amazonMatch':amazonMatch})
            except Exception, err:
                print(traceback.format_exc())
示例#11
0
    def execute(trial=False):
        startTime = datetime.datetime.now()

        # Set up the database connection.
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('ojhamb_runtongy_sgullett_zybu',
                          'ojhamb_runtongy_sgullett_zybu')
        repo.dropCollection("Tweets")
        repo.createCollection("Tweets")
        #Setting Up the criteria for selecting 10000 tweets
        tweetCriteria = got.manager.TweetCriteria().setNear('Amman').setWithin(
            '150mi').setMaxTweets(10000)
        tweets = got.manager.TweetManager.getTweets(tweetCriteria)

        #Projecting a list of tweets
        listTweets = dbo.project(
            tweets, lambda t:
            (t.id, t.date.strftime("%Y-%m-%d %H:%M"), t.retweets, t.favorites,
             t.text, t.geo, t.hashtags))

        #Selecting the list of tweets which have a Geo Location
        tweetsWithIDs = dbo.select(listTweets, lambda x: x[5] != '')
        print(tweetsWithIDs)

        repo.dropCollection("Tweets")
        repo.createCollection("Tweets")
        repo['ojhamb_runtongy_sgullett_zybu.Tweets'].insert_many(tweetsWithIDs)
        repo['ojhamb_runtongy_sgullett_zybu.Tweets'].metadata(
            {'complete': True})
        print(repo['ojhamb_runtongy_sgullett_zybu.Tweets'].metadata())

        repo.logout()

        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}
示例#12
0
    def parse(self, response, url, brand, productName, product_id,
              snapDealMatch, amazonMatch):
        productJSON = {}
        if ("flipkart" in url):
            flipKartScrapper = FlipKartScrapper()
            productJSON = flipKartScrapper.downloadProductDetails(
                response.content, productName, brand)
        if ("snapdeal" in url):
            snapdealScrapper = SnapDealScrapper()
            productJSON = snapdealScrapper.downloadProductDetails(
                response.content, productName, brand, snapDealMatch)
        if ("amazon" in url):
            amazonScrapper = AmazonScrapper()
            productJSON = amazonScrapper.downloadProductDetails(
                response.content, productName, brand, amazonMatch)

        # self.saveOutPut(productJSON, outputFilePath)
        productJSON['product_id'] = product_id
        productJSON['spec_url'] = response.url
        # print productJSON
        DBOperations.mongoSaveDocument(productJSON,
                                       self.SpecificationCollection,
                                       self.specificationClient, "product_id",
                                       False)
示例#13
0
def submit_job(min, max, N, O, res_up, res_dw, path):

    # node string
    node_range_str = "min:%d max:%d" % (min, max)

    # N and O string
    n_str = "N:" + ",".join([str(_) for _ in N])
    o_str = "O:" + ",".join([str(_) for _ in O])

    # res string
    resup_str = "res_up:" + str(res_up)
    resdown_str = "res_dw:" + str(res_dw)

    # horovod command string
    path_str = "path:" + path
    jobString = " ".join(
        [node_range_str, n_str, o_str, resup_str, resdown_str, path_str])

    return DBOperations.submit_job_2_DBQueue(DB_path(), jobString)
	def start_requests(self):
		AppProducts = self.getURLS()
		# AppProducts = [["D11111","HP","HP-1", "http://www.snapdeal.com/product/apple-imac-mf886hna-desktop-4th/633082501991", "desktop"]]
		allCategory = [category[4] for category in AppProducts]

		allCategory = list(set(allCategory))

		snapDealSpecificationMatchDict = {}
		amazonSpecificationMatchDict = {}

		for cat in allCategory:
			if cat != "Category" and cat != '':
				snapDealSpecificationMatchDict[cat] = self.createMatchDict('./SpecificationMatch/' + cat + '/SpecificationMatchSnapDeal.csv')
				amazonSpecificationMatchDict[cat] = self.createMatchDict('./SpecificationMatch/' + cat + '/SpecificationMatchAmazon.csv')
				
		for items in AppProducts:
			product_id = items[0]
			brand = items[1]
			productName = items[2]
			url = items[3]
			category = items[4]
			try:
				snapDealMatch = snapDealSpecificationMatchDict[category]

				amazonMatch = amazonSpecificationMatchDict[category]
				# outputFilePath = self.outputFilePath + productName.replace("/","") + ".json"
				outputFilePath = ""
				print product_id
				if not DBOperations.isIdPresent(self.specificationClient, self.SpecificationCollection, "product_id", product_id):
					try:
						response = requests.get(url, timeout=5)
						self.parse(response,url, brand, productName, product_id, snapDealMatch, amazonMatch)
					except Exception, err:
						print(traceback.format_exc()), "Error", url, items
					# self.parse( meta = {'outputFilePath': outputFilePath,'brand':brand,'productName':productName,'product_id':product_id, 'snapDealMatch':snapDealMatch, 'amazonMatch':amazonMatch})
			except Exception, err:
				print  (traceback.format_exc())
示例#15
0
def get_a_job_from_DB():
    if get_job_queue_len() > 0:
        return DBOperations.get_Job_from_DBQueue(DB_path())
    else:
        return None
  es = EarlyStopping('val_loss', mode="min", patience=10, verbose=1)
  model = Model([cust_input, food_input], out)
  model.compile('adam', 'mean_squared_error')

  if os.path.exists('latest1.h5'):
      model = load_model('latest1.h5')
  else:
      history = model.fit([train.userId, train.foodId], train.rating, epochs=100, verbose=1, validation_split=0.3, callbacks=[es]) # 111111111111 11111 1111111
      model.save('latest1.h5')
      plt.plot(history.history['val_loss'], label="Validation Loss")
      plt.xlabel("Epochs")
      plt.ylabel("Loss")

      plt.plot(history.history['loss'], label="Training Loss")
  plt.legend()
  plt.show()
  model.evaluate([test.userId, test.foodId], test.rating)
  foodData = np.array(list(set(df.foodId)))
  user = np.array([63 for i in range(len(foodData))])
  predictions = model.predict([user, foodData])
  predictions = np.array([a[0] for a in predictions])
  food_ids = (-predictions).argsort()[:10]

  eaten = (db.getEatenFoodsOfUser(63))
  new_foods = (db.getFoodsFromFoodIDs(food_ids))
  recomms = [food for food in new_foods if food not in eaten][:5]
  indexes =  [id_ for id_, food in enumerate(new_foods) if food not in eaten][:5]
  print(eaten)
  print()
  print(recomms)
  print(predictions[indexes])
示例#17
0
def get_job_queue_len():
    return DBOperations.get_DB_queue_len(DB_path())
示例#18
0
from Scheduler import *

# Bottle container and error handler (error.py is the router)
app = Bottle()
app.error_handler = error.handler

# a simple list to store system messages
system_messages = []
system_warnings = []
system_errors = []

# plans that are running
current_actions = []

# database class
db = DBOperations()

# scheduler
scheduler = Scheduler(db)

original_dir = os.getcwd()


# function to clear the system messages list
def clear_messages():
    system_messages.clear()
    system_warnings.clear()
    system_errors.clear()


def check_current_builds():