def run_ingest(config, xmlfile): with open(xmlfile, 'rb') as f: xmlstr = f.read() xmldict = xmltodict.parse(xmlstr) tables = xmldict["soap:Envelope"]["soap:Body"]["showResponse"]["showResult"]["diffgr:diffgram"]["NewDataSet"]["Table"] data = [] for t in tables: obj = { "arrival_date": t["Arrival_Date"], "district": t["District"], "market": t["Market"], "max_price": t["Max_x0020_Price"], "min_price": t["Min_x0020_Price"], "modal_price": t["Modal_x0020_Price"], "state": t["State"], "variety": t["Variety"] } if "Commodity" in t: obj["commodity"] = t["Commodity"] elif "Column1" in t: obj["commodity"] = t["Column1"] else: logger.warn("Commodity not found in %s" % t["@diffgr:id"]) continue logger.debug("Inserted %s %s %s" % (obj["commodity"], obj["market"], obj["arrival_date"])) data.append(obj) mongo_helper = MongoHelper(config) mongo_helper.rename_collection("mandi_prices") mongo_helper.save("mandi_prices", docs=data)
def __cics_geocode(self, state, district, market): f_str_state = "" f_str_district = "" for st in self.states: if st['name'].lower() == state.lower(): state_id = st['id'] f_str_state = '"state":"' + state_id + '"' for di in st['districts']: if di['name'].lower() == district.lower(): district_id = di['id'] f_str_district = ', "district":"' + district_id + '"' break f_param = "{" + f_str_state + f_str_district + "}" data = urllib.urlencode({"q": market, "f": f_param}) req = requests.post( "http://india.csis.u-tokyo.ac.jp/geocode-cgi/census_ajax_json.cgi", data=data) logger.debug("cics request for %s %s %s" % (state, district, market)) xmldict = xmltodict.parse(req.text) if "markers" in xmldict: results = xmldict["markers"] if results and "marker" in results: return results["marker"] return None
def check_mandi_locations(self): logger.debug("Check mandi locations") mandi_prices = self.mongo_helper.db["mandi_prices"] mandi_locations = self.mongo_helper.db["mandi_locations"] cursor = mandi_prices.find() for mp in cursor: state = (mp["state"]).lower() district = (mp["district"]).lower() market = (mp["market"]).lower() query = {"state": state, "district": district, "market": market} doc = mandi_locations.find_one(query) if doc is None: doc = { "state": state, "district": district, "market": market } doc["_id"] = mandi_locations.insert(doc) if not "cics_geocode" in doc: cics_data = self.__cics_geocode(state, district, market) if cics_data and len(cics_data) > 0: mandi_locations.update({"_id": doc["_id"]}, {"$set": {"cics_geocode": cics_data}}) if not "nm_geocode" in doc: nm_data = self.__nominatim_geocode(state, district, market) if nm_data and len(nm_data) > 0: mandi_locations.update({"_id": doc["_id"]}, {"$set": {"nm_geocode": nm_data}}) logger.debug("Inserted new mandi location for %s %s %s" % (state, district, market))
def __nominatim_geocode(self, state, district, market): query = "%s, %s, %s" % (market, district, state) params = {"format": "json", "q": query} req = requests.get(NOMINATIM["api_url"], params=params) logger.debug("nm request for %s %s %s" % (state, district, market)) resp = req.json() return resp
def __nominatim_geocode(self, state, district, market): query = "%s, %s, %s" % (market, district, state) params = { "format": "json", "q": query } req = requests.get(NOMINATIM["api_url"], params=params) logger.debug("nm request for %s %s %s" % (state, district, market)) resp = req.json() return resp
def download_file(): dl_url = OGD["mandi_prices_xml_url"] try: req = requests.get(dl_url) except: logger.critical("Error while downloading %s" % dl_url) raise dl_file_name = os.path.join(data_dir, "%s_%s.xml" % (os.path.splitext(os.path.basename(dl_url))[0], datetime.now().strftime("%y-%m-%d"))) if os.path.exists(dl_file_name): logger.debug("Deleted existing file %s" % dl_file_name) return False with open(dl_file_name, "w") as dlf: dlf.write(req.text) return dl_file_name
def download_file(): dl_url = OGD["mandi_prices_xml_url"] try: req = requests.get(dl_url) except: logger.critical("Error while downloading %s" % dl_url) raise dl_file_name = os.path.join( data_dir, "%s_%s.xml" % (os.path.splitext( os.path.basename(dl_url))[0], datetime.now().strftime("%y-%m-%d"))) if os.path.exists(dl_file_name): logger.debug("Deleted existing file %s" % dl_file_name) return False with open(dl_file_name, "w") as dlf: dlf.write(req.text) return dl_file_name
def check_mandi_locations(self): logger.debug("Check mandi locations") mandi_prices = self.mongo_helper.db["mandi_prices"] mandi_locations = self.mongo_helper.db["mandi_locations"] cursor = mandi_prices.find() for mp in cursor: state = (mp["state"]).lower() district = (mp["district"]).lower() market = (mp["market"]).lower() query = {"state": state, "district": district, "market": market} doc = mandi_locations.find_one(query) if doc is None: doc = {"state": state, "district": district, "market": market} doc["_id"] = mandi_locations.insert(doc) if not "cics_geocode" in doc: cics_data = self.__cics_geocode(state, district, market) if cics_data and len(cics_data) > 0: mandi_locations.update( {"_id": doc["_id"]}, {"$set": { "cics_geocode": cics_data }}) if not "nm_geocode" in doc: nm_data = self.__nominatim_geocode(state, district, market) if nm_data and len(nm_data) > 0: mandi_locations.update({"_id": doc["_id"]}, {"$set": { "nm_geocode": nm_data }}) logger.debug("Inserted new mandi location for %s %s %s" % (state, district, market))
def run_ingest(config, xmlfile): with open(xmlfile, 'rb') as f: xmlstr = f.read() xmldict = xmltodict.parse(xmlstr) tables = xmldict["soap:Envelope"]["soap:Body"]["showResponse"][ "showResult"]["diffgr:diffgram"]["NewDataSet"]["Table"] data = [] for t in tables: obj = { "arrival_date": t["Arrival_Date"], "district": t["District"], "market": t["Market"], "max_price": t["Max_x0020_Price"], "min_price": t["Min_x0020_Price"], "modal_price": t["Modal_x0020_Price"], "state": t["State"], "variety": t["Variety"] } if "Commodity" in t: obj["commodity"] = t["Commodity"] elif "Column1" in t: obj["commodity"] = t["Column1"] else: logger.warn("Commodity not found in %s" % t["@diffgr:id"]) continue logger.debug("Inserted %s %s %s" % (obj["commodity"], obj["market"], obj["arrival_date"])) data.append(obj) mongo_helper = MongoHelper(config) mongo_helper.rename_collection("mandi_prices") mongo_helper.save("mandi_prices", docs=data)
def __cics_geocode(self, state, district, market): f_str_state = "" f_str_district = "" for st in self.states: if st['name'].lower() == state.lower(): state_id = st['id'] f_str_state = '"state":"' + state_id + '"' for di in st['districts']: if di['name'].lower() == district.lower(): district_id = di['id'] f_str_district = ', "district":"' + district_id + '"' break f_param = "{" + f_str_state + f_str_district + "}" data = urllib.urlencode({"q": market, "f": f_param}) req = requests.post("http://india.csis.u-tokyo.ac.jp/geocode-cgi/census_ajax_json.cgi", data=data) logger.debug("cics request for %s %s %s" % (state, district, market)) xmldict = xmltodict.parse(req.text) if "markers" in xmldict: results = xmldict["markers"] if results and "marker" in results: return results["marker"] return None