def test_join(self): self._create_aux_dataset_from_file() self.wait() result = Dataset.join(self.dataset, self.aux_dataset, 'food_type', connection=self.connection) self.assertTrue(isinstance(result, Dataset)) self._cleanup(result)
def test_join_bad_on(self): self._create_aux_dataset_from_file() self.wait() result = Dataset.join(self.dataset, self.aux_dataset, 'BAD', connection=self.connection) self.assertFalse(result)
def test_join(self): self._create_aux_dataset_from_file() self.wait() result = Dataset.join(self.dataset, self.aux_dataset, 'food_type', connection=self.connection) self.assertTrue(isinstance(result, Dataset)) self._cleanup(result)
def test_join_default_connection(self): dataset = Dataset(path=self.CSV_FILE, connection=self.default_connection) aux_dataset = Dataset(path=self.AUX_CSV_FILE, connection=self.default_connection) self.wait() result = Dataset.join(dataset, aux_dataset, 'food_type') self.wait() self.assertTrue(isinstance(result, Dataset)) self._cleanup(dataset) self._cleanup(aux_dataset) self._cleanup(result)
def test_join_default_connection(self): dataset = Dataset(path=self.CSV_FILE, connection=self.default_connection) aux_dataset = Dataset(path=self.AUX_CSV_FILE, connection=self.default_connection) self.wait() result = Dataset.join(dataset, aux_dataset, 'food_type') self.wait() self.assertTrue(isinstance(result, Dataset)) self._cleanup(dataset) self._cleanup(aux_dataset) self._cleanup(result)
def test_join_bad_other_dataset(self): with self.assertRaises(PyBambooException): Dataset.join(self.dataset, Exception(), 'food_type', connection=self.connection)
def main(): # Download CSV for all forms. for form in FORMS: form_csv = path(u'%s.csv' % form) if not form_csv.isfile(): print(u"Downloading CSV for %s" % form) url = u"https://www.formhub.org/atasoils/forms/%s/data.csv" % form form_csv_tmp = path(download_formhub(url, login=FH_LOGIN, password=FH_PASSWORD)) shutil.copy(form_csv_tmp, u'%s.csv' % form) print(form_csv, form_csv.isfile()) # Parse EthioSIS and build a cleaned-up version cleanup_ethiosis(csv_in=u'%s.csv' % FORMS[0], csv_out=u'%s_clean.csv' % FORMS[0]) print(u"Cleanup done.") print(u"\n") # Generate FH submissions for each cleaned sample. submissions_done = path('submissions_done') if not submissions_done.isfile(): print(u"Generating FH submissions") generate_fh_submission(csv_in=u'%s_clean.csv' % FORMS[0], form=NEW_FORMS[0]) submissions_done.touch() # flat list of available IDs to pop out for id_list in EXISTING.values(): for soil_id in id_list: if not soil_id in AVAILABLES: AVAILABLES.append(soil_id) # Parse Steps 1-6, cleanup (duplicates), clean PC names for findex, form in enumerate(FORMS): if findex == 0: continue step = u'step%d' % findex step_done = path(u'%s_done' % step) if not step_done.isfile(): print(u"Generating STEP %d submissions" % findex) generate_fh_steps(csv_in=u'%s.csv' % form, form=form, step=step) step_done.touch() # join the datasets print(u"Joining datasets") joined_dataset = None bamboo_conn = Connection(BAMBOO_URL) for form in NEW_FORMS: try: form_dataset = json.loads(requests.get(PUBLIC_API_URL % {'form': form}).text)['bamboo_dataset'] except: form_dataset = u'' if not form_dataset: continue print(u"%s: %s" % (form, form_dataset)) if not joined_dataset: joined_dataset = form_dataset continue print(u"Joined dataset: %s" % joined_dataset) ds_joined = Dataset(connection=bamboo_conn, dataset_id=joined_dataset) ds_form = Dataset(connection=bamboo_conn, dataset_id=form_dataset) dataset = Dataset.join(left_dataset=ds_joined, right_dataset=ds_form, on=u'barcode', connection=bamboo_conn) time.sleep(10) joined_dataset = dataset.id print(u"Merged dataset: %s" % dataset.id) print(u"Ultimate dataset: %s" % dataset.id)
def test_join_bad_on(self): self._create_aux_dataset_from_file() self.wait() result = Dataset.join(self.dataset, self.aux_dataset, 'BAD', connection=self.connection) self.assertFalse(result)
def test_join_bad_other_dataset(self): with self.assertRaises(PyBambooException): Dataset.join(self.dataset, Exception(), 'food_type', connection=self.connection)