def test_dev2_feed(self): filepath = download_feed_file(self.buid_id) results = DEv2JobFeed(filepath) jobs = results.jobparse() self.assertEqual(results.jsid, self.buid_id) self.assertEqual(results.job_source_name, self.businessunit.title) self.assertEqual(len(jobs), self.numjobs)
def test_salt_date(self): """ Test to ensure that job postings show up in a quasi-random fashion by sorting by the `salted_date` attribute in the index vice strictly by `date_new`. """ filepath = download_feed_file(self.buid_id) jobs = DEv2JobFeed(filepath) solrjobs = jobs.solr_jobs() self.conn.add(solrjobs) results = self.conn.search(q="*:*", sort="salted_date asc") self.assertEqual(self.numjobs, results.hits) # We can't really test for inequality between the two result sets, # since sometimes results.docs will equal results2.docs. results2 = self.conn.search(q="*:*", sort="date_new asc") self.assertItemsEqual(results2.docs, results.docs)
def test_zipcode(self): """ Tests to ensure proper behavior of zipcode field in being entered in Solr. """ filepath = download_feed_file(self.buid_id) dbresults = DEv2JobFeed(filepath) solrresults = dbresults.solr_jobs() zips_from_feedfile = ['30269', '30269', '48332', '30269', '30269', '30269', '30269', '30269', '48332', '48332', '30269', None, '30269', '30269'] solrzips = [i['zipcode'] for i in solrresults] for coll in [solrzips]: self.assertItemsEqual(zips_from_feedfile, coll)
def test_mocids(self): """ Tests that mocid fields exist when jobs are imported from a feed and added to a solr connnection """ filepath = download_feed_file(self.buid_id) results = DEv2JobFeed(filepath) jobs = results.solr_jobs() # Since we're going to be adding/updating data in the Solr index, we're # hardcoding in the local Solr instance so that we don't accidentally # alter production data. self.conn.add(jobs) num_hits = self.conn.search(q="*:*", fq="buid:%s -mocid:[* TO *]" % self.buid_id) self.assertEqual(num_hits.hits, self.numjobs) for job in jobs: self.assertTrue('mocid' in job)
def test_date_updated(self): """ Test to ensure proper behavior of date updated field when added to Solr. """ filepath = download_feed_file(self.buid_id) jobs = DEv2JobFeed(filepath) solrjobs = jobs.solr_jobs() self.conn.add(solrjobs) dates_updated = [datetime.datetime.strptime("4/16/2015 11:35:13 PM", "%m/%d/%Y %I:%M:%S %p"), datetime.datetime.strptime("4/16/2015 11:35:14 PM", "%m/%d/%Y %I:%M:%S %p"), datetime.datetime.strptime("4/16/2015 11:35:15 PM", "%m/%d/%Y %I:%M:%S %p")] solr_dates = [i['date_updated'] for i in solrjobs] for solr_date in solr_dates: self.assertIn(solr_date, dates_updated)
def seoxml_to_mongo(buid, data_dir=DATA_DIR): filepath = download_feed_file(buid, data_dir=data_dir) jobfeed = DEv2JobFeed(filepath, jsid=buid, markdown=False, company=None) # If the feed file did not pass validation, return. The return value is # '(0, 0)' to match what's returned on a successful parse. if jobfeed.errors: error = jobfeed.error_messages logging.error("BUID:%s - Feed file has failed validation on line %s. " "Exception: %s" % (buid, error['line'], error['exception'])) raise FeedImportError(error) # A dictionary of uids jobfeed.jobparse() jobs = jobfeed.solr_jobs() collection = connect_db().db.jobs bulk = collection.initialize_unordered_bulk_op() for job in jobs: bulk.find({'guid': job['guid']}).upsert().replace_one(job) bulk.execute()
def _get_feedfile(self): # Download the 'real' feed file then copy the empty feed file in its # place. realfeed = download_feed_file(self.buid_id) shutil.copyfile(realfeed, "%s.bak" % realfeed) shutil.copyfile(self.emptyfeed, realfeed)