def process_row(self, row, task): l = ItemLoader(PA_Spud()) l.County_in = lambda slist: [s[:20] for s in slist] l.Municipality_in = lambda slist: [s[:20] for s in slist] l.Created_By_in = lambda slist: [s[:20] for s in slist] l.Modified_By_in = lambda slist: [s[:20] for s in slist] l.Well_Type_in = lambda slist: [s[:20] for s in slist] # l.add_value ('OGO__', row['OPERATOR_OGO_NUM']) l.add_value("OGO__", row["OGO_NUM"]) l.add_value("SPUD_Date", self.parse_date(row["SPUD_DATE"])) l.add_value("County", row["COUNTY"]) l.add_value("Municipality", row["MUNICIPALITY"]) l.add_value("Operator_s_Name", row["OPERATOR"]) l.add_value("Farm_Name", row["FARM_NAME"]) # l.add_value ('Well_Number', row['WELL_NUM']) l.add_value("Well_Number", "") # Now included in FARM_NAME l.add_value("Latitude", row["LATITUDE"]) l.add_value("Longitude", row["LONGITUDE"]) # l.add_value ('Marcellus_Ind_', row['MARCELLUS_IND']) # l.add_value ('Horizontal_Ind_', row['HORIZONTAL_WELL_IND']) if row["CONFIGURATION"] in ("Horizontal Well", "Deviated Well"): horiz = "Y" else: horiz = "N" if row["CONFIGURATION"] not in ("Vertical Well",): self.log("Unknown PA Configuration: {0}.".format(row["CONFIGURATION"]), log.INFO) l.add_value("Horizontal_Ind_", horiz) # l.add_value ('Creation_Date', self.parse_date(row['CREATED_DATE'])) # l.add_value ('Created_By', row['CREATED_BY']) # l.add_value ('Modification_Date', self.parse_date(row['MODIFIED_DATE'])) # l.add_value ('Modified_By', row['MODIFIED_BY']) # l.add_value ('Well_Type', row['WELL_TYPE']) l.add_value("Well_Type", row["WELL_CODE_DESC"]) l.add_value("Unconventional", row["UNCONVENTIONAL"]) l.add_value("Region", row["REGION"]) # l.add_value ('Well_API__', '37-%s-00-00' % row['PERMIT_NUMBER']) l.add_value("Well_API__", "37-%s-00-00" % row["API"]) item = l.load_item() if item["Well_API__"] and item["SPUD_Date"]: stats = self.crawler.stats existing_item = self.db.loadItem(item, {"Well_API__": item["Well_API__"], "SPUD_Date": item["SPUD_Date"]}) if existing_item: stats.inc_value("_existing_count", spider=self) else: stats.inc_value("_new_count", spider=self) yield item params = dict(item) for f in item.fields: params[f] = escape("%s" % params.get(f, "")) if task.get("no_alert"): pass else: # create a new feed item l = ItemLoader(FeedEntry()) url = "%s/%s/%s" % (task["target_url"], item["Well_API__"], item["SPUD_Date"]) # feed_entry_id = uuid.uuid3(uuid.NAMESPACE_URL, url.encode('ASCII')) feed_entry_id = self.db.uuid3_str(name=url.encode("ASCII")) l.add_value("id", feed_entry_id) l.add_value( "title", "%s Reports Drilling Started (SPUD) in %s Township" % (item.get("Operator_s_Name"), item.get("Municipality")), ) l.add_value("incident_datetime", item.get("SPUD_Date")) l.add_value("link", task["about_url"]) l.add_value("summary", self.summary_template().substitute(params)) l.add_value("content", self.content_template().substitute(params)) l.add_value("lat", item.get("Latitude")) l.add_value("lng", item.get("Longitude")) l.add_value("source_id", 5) feed_item = l.load_item() if feed_item.get("lat") and feed_item.get("lng"): yield feed_item yield self.create_tag(feed_entry_id, "PADEP") yield self.create_tag(feed_entry_id, "frack") yield self.create_tag(feed_entry_id, "spud") yield self.create_tag(feed_entry_id, "drilling") well_type = item.get("Well_Type") if well_type: yield self.create_tag(feed_entry_id, well_type.lower()) if item.get("Unconventional") == "Yes": yield self.create_tag(feed_entry_id, "unconventional")
def process_row (self, row, task): l=ItemLoader (PA_Spud()) l.County_in = lambda slist: [s[:20] for s in slist] l.Municipality_in = lambda slist: [s[:20] for s in slist] l.Created_By_in = lambda slist: [s[:20] for s in slist] l.Modified_By_in = lambda slist: [s[:20] for s in slist] l.Well_Type_in = lambda slist: [s[:20] for s in slist] #l.add_value ('OGO__', row['OPERATOR_OGO_NUM']) l.add_value ('OGO__', row['OGO_NUM']) l.add_value ('SPUD_Date', self.parse_date(row['SPUD_DATE'])) l.add_value ('County', row['COUNTY']) l.add_value ('Municipality', row['MUNICIPALITY']) l.add_value ('Operator_s_Name', row['OPERATOR']) l.add_value ('Farm_Name', row['FARM_NAME']) #l.add_value ('Well_Number', row['WELL_NUM']) l.add_value ('Well_Number', '') # Now included in FARM_NAME l.add_value ('Latitude', row['LATITUDE']) l.add_value ('Longitude', row['LONGITUDE']) # l.add_value ('Marcellus_Ind_', row['MARCELLUS_IND']) #l.add_value ('Horizontal_Ind_', row['HORIZONTAL_WELL_IND']) if row['CONFIGURATION'] in ("Horizontal Well", "Deviated Well"): horiz = 'Y' else: horiz = 'N' if row['CONFIGURATION'] not in ("Vertical Well",): self.log("Unknown PA Configuration: {0}." .format(row['CONFIGURATION']), log.INFO) l.add_value ('Horizontal_Ind_', horiz) #l.add_value ('Creation_Date', self.parse_date(row['CREATED_DATE'])) #l.add_value ('Created_By', row['CREATED_BY']) #l.add_value ('Modification_Date', self.parse_date(row['MODIFIED_DATE'])) #l.add_value ('Modified_By', row['MODIFIED_BY']) #l.add_value ('Well_Type', row['WELL_TYPE']) l.add_value ('Well_Type', row['WELL_CODE_DESC']) l.add_value ('Unconventional', row['UNCONVENTIONAL']) l.add_value ('Region', row['REGION']) #l.add_value ('Well_API__', '37-%s-00-00' % row['PERMIT_NUMBER']) l.add_value ('Well_API__', '37-%s-00-00' % row['API']) item = l.load_item() if item['Well_API__'] and item ['SPUD_Date']: stats = self.crawler.stats existing_item = self.db.loadItem (item, {'Well_API__': item['Well_API__'], 'SPUD_Date': item ['SPUD_Date']}) if existing_item: stats.inc_value ('_existing_count', spider=self) else: stats.inc_value ('_new_count', spider=self) yield item params = dict(item) for f in item.fields: params[f] = escape ("%s" % params.get(f,'')) if task.get('no_alert'): pass else: # create a new feed item l=ItemLoader (FeedEntry()) url = "%s/%s/%s" % (task['target_url'], item['Well_API__'], item ['SPUD_Date']) #feed_entry_id = uuid.uuid3(uuid.NAMESPACE_URL, url.encode('ASCII')) feed_entry_id = self.db.uuid3_str(name=url.encode('ASCII')) l.add_value ('id', feed_entry_id) l.add_value ('title', "%s Reports Drilling Started (SPUD) in %s Township" % (item.get('Operator_s_Name'), item.get('Municipality') )) l.add_value ('incident_datetime', item.get('SPUD_Date')) l.add_value ('link', task['about_url']) l.add_value ('summary', self.summary_template().substitute(params)) l.add_value ('content', self.content_template().substitute(params)) l.add_value ('lat', item.get('Latitude')) l.add_value ('lng', item.get('Longitude')) l.add_value ('source_id', 5) feed_item = l.load_item() if feed_item.get('lat') and feed_item.get('lng'): yield feed_item yield self.create_tag (feed_entry_id, 'PADEP') yield self.create_tag (feed_entry_id, 'frack') yield self.create_tag (feed_entry_id, 'spud') yield self.create_tag (feed_entry_id, 'drilling') well_type = item.get('Well_Type') if well_type: yield self.create_tag (feed_entry_id, well_type.lower()) if item.get('Unconventional') == 'Yes': yield self.create_tag (feed_entry_id, 'unconventional')
def process_row(self, row, task): l = ItemLoader(PA_Spud()) l.County_in = lambda slist: [s[:20] for s in slist] l.Municipality_in = lambda slist: [s[:20] for s in slist] l.Created_By_in = lambda slist: [s[:20] for s in slist] l.Modified_By_in = lambda slist: [s[:20] for s in slist] l.Well_Type_in = lambda slist: [s[:20] for s in slist] #l.add_value ('OGO__', row['OPERATOR_OGO_NUM']) l.add_value('OGO__', row['OGO_NUM']) l.add_value('SPUD_Date', self.parse_date(row['SPUD_DATE'])) l.add_value('County', row['COUNTY']) l.add_value('Municipality', row['MUNICIPALITY']) l.add_value('Operator_s_Name', row['OPERATOR']) l.add_value('Farm_Name', row['FARM_NAME']) #l.add_value ('Well_Number', row['WELL_NUM']) l.add_value('Well_Number', '') # Now included in FARM_NAME l.add_value('Latitude', row['LATITUDE']) l.add_value('Longitude', row['LONGITUDE']) # l.add_value ('Marcellus_Ind_', row['MARCELLUS_IND']) #l.add_value ('Horizontal_Ind_', row['HORIZONTAL_WELL_IND']) if row['CONFIGURATION'] in ("Horizontal Well", "Deviated Well"): horiz = 'Y' else: horiz = 'N' if row['CONFIGURATION'] not in ("Vertical Well", ): self.log( "Unknown PA Configuration: {0}.".format( row['CONFIGURATION']), log.INFO) l.add_value('Horizontal_Ind_', horiz) #l.add_value ('Creation_Date', self.parse_date(row['CREATED_DATE'])) #l.add_value ('Created_By', row['CREATED_BY']) #l.add_value ('Modification_Date', self.parse_date(row['MODIFIED_DATE'])) #l.add_value ('Modified_By', row['MODIFIED_BY']) #l.add_value ('Well_Type', row['WELL_TYPE']) l.add_value('Well_Type', row['WELL_CODE_DESC']) l.add_value('Unconventional', row['UNCONVENTIONAL']) l.add_value('Region', row['REGION']) #l.add_value ('Well_API__', '37-%s-00-00' % row['PERMIT_NUMBER']) l.add_value('Well_API__', '37-%s-00-00' % row['API']) item = l.load_item() if item['Well_API__'] and item['SPUD_Date']: stats = self.crawler.stats existing_item = self.db.loadItem(item, { 'Well_API__': item['Well_API__'], 'SPUD_Date': item['SPUD_Date'] }) if existing_item: stats.inc_value('_existing_count', spider=self) else: stats.inc_value('_new_count', spider=self) yield item params = dict(item) for f in item.fields: params[f] = escape("%s" % params.get(f, '')) if task.get('no_alert'): pass else: # create a new feed item l = ItemLoader(FeedEntry()) url = "%s/%s/%s" % (task['target_url'], item['Well_API__'], item['SPUD_Date']) #feed_entry_id = uuid.uuid3(uuid.NAMESPACE_URL, url.encode('ASCII')) feed_entry_id = self.db.uuid3_str(name=url.encode('ASCII')) l.add_value('id', feed_entry_id) l.add_value( 'title', "%s Reports Drilling Started (SPUD) in %s Township" % (item.get('Operator_s_Name'), item.get('Municipality'))) l.add_value('incident_datetime', item.get('SPUD_Date')) l.add_value('link', task['about_url']) l.add_value('summary', self.summary_template().substitute(params)) l.add_value('content', self.content_template().substitute(params)) l.add_value('lat', item.get('Latitude')) l.add_value('lng', item.get('Longitude')) l.add_value('source_id', 5) feed_item = l.load_item() if feed_item.get('lat') and feed_item.get('lng'): yield feed_item yield self.create_tag(feed_entry_id, 'PADEP') yield self.create_tag(feed_entry_id, 'frack') yield self.create_tag(feed_entry_id, 'spud') yield self.create_tag(feed_entry_id, 'drilling') well_type = item.get('Well_Type') if well_type: yield self.create_tag(feed_entry_id, well_type.lower()) if item.get('Unconventional') == 'Yes': yield self.create_tag(feed_entry_id, 'unconventional')