Exemplo n.º 1
0
 def test_join(self):
     self._create_aux_dataset_from_file()
     self.wait()
     result = Dataset.join(self.dataset, self.aux_dataset,
                           'food_type', connection=self.connection)
     self.assertTrue(isinstance(result, Dataset))
     self._cleanup(result)
Exemplo n.º 2
0
 def test_join_bad_on(self):
     self._create_aux_dataset_from_file()
     self.wait()
     result = Dataset.join(self.dataset,
                           self.aux_dataset,
                           'BAD',
                           connection=self.connection)
     self.assertFalse(result)
Exemplo n.º 3
0
 def test_join(self):
     self._create_aux_dataset_from_file()
     self.wait()
     result = Dataset.join(self.dataset,
                           self.aux_dataset,
                           'food_type',
                           connection=self.connection)
     self.assertTrue(isinstance(result, Dataset))
     self._cleanup(result)
Exemplo n.º 4
0
 def test_join_default_connection(self):
     dataset = Dataset(path=self.CSV_FILE,
                       connection=self.default_connection)
     aux_dataset = Dataset(path=self.AUX_CSV_FILE,
                           connection=self.default_connection)
     self.wait()
     result = Dataset.join(dataset, aux_dataset, 'food_type')
     self.wait()
     self.assertTrue(isinstance(result, Dataset))
     self._cleanup(dataset)
     self._cleanup(aux_dataset)
     self._cleanup(result)
Exemplo n.º 5
0
 def test_join_default_connection(self):
     dataset = Dataset(path=self.CSV_FILE,
                       connection=self.default_connection)
     aux_dataset = Dataset(path=self.AUX_CSV_FILE,
                           connection=self.default_connection)
     self.wait()
     result = Dataset.join(dataset, aux_dataset, 'food_type')
     self.wait()
     self.assertTrue(isinstance(result, Dataset))
     self._cleanup(dataset)
     self._cleanup(aux_dataset)
     self._cleanup(result)
Exemplo n.º 6
0
 def test_join_bad_other_dataset(self):
     with self.assertRaises(PyBambooException):
         Dataset.join(self.dataset,
                      Exception(),
                      'food_type',
                      connection=self.connection)
Exemplo n.º 7
0
def main():
    # Download CSV for all forms.
    for form in FORMS:
        form_csv = path(u'%s.csv' % form)
        if not form_csv.isfile():
            print(u"Downloading CSV for %s" % form)
            url = u"https://www.formhub.org/atasoils/forms/%s/data.csv" % form
            form_csv_tmp = path(download_formhub(url, login=FH_LOGIN, password=FH_PASSWORD))
            shutil.copy(form_csv_tmp, u'%s.csv' % form)
        print(form_csv, form_csv.isfile())

    # Parse EthioSIS and build a cleaned-up version
    cleanup_ethiosis(csv_in=u'%s.csv' % FORMS[0],
                     csv_out=u'%s_clean.csv' % FORMS[0])
    print(u"Cleanup done.")
    print(u"\n")
    # Generate FH submissions for each cleaned sample.
    submissions_done = path('submissions_done')
    if not submissions_done.isfile():
        print(u"Generating FH submissions")
        generate_fh_submission(csv_in=u'%s_clean.csv' % FORMS[0],
                               form=NEW_FORMS[0])
        submissions_done.touch()

    # flat list of available IDs to pop out
    for id_list in EXISTING.values():
        for soil_id in id_list:
            if not soil_id in AVAILABLES:
                AVAILABLES.append(soil_id)

    # Parse Steps 1-6, cleanup (duplicates), clean PC names
    for findex, form in enumerate(FORMS):
        if findex == 0:
            continue
        step = u'step%d' % findex
        step_done = path(u'%s_done' % step)
        if not step_done.isfile():
            print(u"Generating STEP %d submissions" % findex)
            generate_fh_steps(csv_in=u'%s.csv' % form,
                              form=form,
                              step=step)
            step_done.touch()

    # join the datasets
    print(u"Joining datasets")
    joined_dataset = None
    bamboo_conn = Connection(BAMBOO_URL)
    for form in NEW_FORMS:
        try:
            form_dataset = json.loads(requests.get(PUBLIC_API_URL
                                      % {'form': form}).text)['bamboo_dataset']
        except:
            form_dataset = u''

        if not form_dataset:
            continue

        print(u"%s: %s" % (form, form_dataset))

        if not joined_dataset:
            joined_dataset = form_dataset
            continue

        print(u"Joined dataset: %s" % joined_dataset)

        ds_joined = Dataset(connection=bamboo_conn, dataset_id=joined_dataset)
        ds_form = Dataset(connection=bamboo_conn, dataset_id=form_dataset)
        dataset = Dataset.join(left_dataset=ds_joined,
                               right_dataset=ds_form,
                               on=u'barcode',
                               connection=bamboo_conn)
        time.sleep(10)
        joined_dataset = dataset.id

        print(u"Merged dataset: %s" % dataset.id)
    print(u"Ultimate dataset: %s" % dataset.id)
Exemplo n.º 8
0
 def test_join_bad_on(self):
     self._create_aux_dataset_from_file()
     self.wait()
     result = Dataset.join(self.dataset, self.aux_dataset,
                           'BAD', connection=self.connection)
     self.assertFalse(result)
Exemplo n.º 9
0
 def test_join_bad_other_dataset(self):
     with self.assertRaises(PyBambooException):
         Dataset.join(self.dataset, Exception(), 'food_type',
                      connection=self.connection)