Пример #1
0
def test_cleanvalue():
    """ see what we can do with cleaning strings"""
    assert abs(10.54 - csu.cleanvalue("10.54%")) < 0.01
    assert csu.cleanvalue("Did NOt Collect") == "did not collect"
    assert csu.cleanvalue("<0.2") == "< 0.2"
    assert csu.cleanvalue(" ") is None
    assert csu.cleanvalue("NA") == "n/a"
    assert abs(csu.cleanvalue("10") - 10.0) < 0.0001
    csu.cleanvalue("QQQ")
    assert "QQQ" in csu.CLEANVALUE_COMPLAINED
Пример #2
0
 val = worksheet.get_cell_value(1, col)
 # print("considering col:%s val:'%s'" % (col, repr(val)))
 if val is None:
     continue
 if val.lower().replace(" ", "").find("plotid") == 0:
     plotidcol = col
     # print("Plot ID column is: %s val: %s" % (plotidcol, val))
 if val.find("AGR") != 0:
     continue
 varname = val.split()[0]
 for row in range(3, worksheet.rows+1):
     plotid = worksheet.get_cell_value(row, plotidcol)
     if plotid is None:
         continue
     inval = worksheet.get_cell_value(row, col)
     val = util.cleanvalue(inval)
     if inval is not None and val is None:
         print(("harvest_agronomic found None. site: %s year: %s "
                " row: %s col: %s varname: %s"
                ) % (siteid, YEAR, row, col, varname))
     # print row, col, plotid, varname, YEAR, val
     try:
         pcursor.execute("""
             INSERT into agronomic_data
             (uniqueid, plotid, varname, year, value)
             values (%s, %s, %s, %s, %s) RETURNING value
             """, (siteid, plotid, varname, YEAR, val))
         if pcursor.rowcount == 1:
             newvals += 1
     except Exception, exp:
         print('HARVEST_AGRONOMIC TRACEBACK')
Пример #3
0
 # if depth not in allowed_depths:
 #    print 'site: %s year: %s has illegal depth: %s' % (siteid, YEAR,
 #                                                       depth)
 #    continue
 if plotid is None or depth is None:
     continue
 subsample = "1"
 for col in range(3, worksheet.cols + 1):
     if worksheet.get_cell_value(1, col) is None:
         print(
             ('harvest_soil_texture Year: %s Site: %s Col: %s is null')
             % (YEAR, siteid, col))
         continue
     varname = worksheet.get_cell_value(1, col).strip().split()[0]
     inval = worksheet.get_cell_value(row, col)
     val = util.cleanvalue(inval)
     if inval is not None and val is None:
         print(("harvest_soil_texture found None. site: %s year: %s "
                " row: %s col: %s varname: %s") %
               (siteid, YEAR, row, col, varname))
     if varname == 'subsample':
         subsample = "%.0f" % (float(val), )
         continue
     elif varname[:4] != 'SOIL':
         print(('Invalid varname: %s site: %s year: %s') %
               (worksheet.get_cell_value(1, col).strip(), siteid, YEAR))
         continue
     # if subsample != "1":
     #    continue
     try:
         pcursor.execute(
Пример #4
0
def main():
    """Go"""
    config = util.get_config()

    pgconn = psycopg2.connect(
        database="sustainablecorn", host=config["database"]["host"]
    )
    pcursor = pgconn.cursor()

    # Get me a client, stat
    spr_client = util.get_spreadsheet_client(config)
    drive_client = util.get_driveclient(config)

    res = (
        drive_client.files()
        .list(q="title contains 'Soil Nitrate Data'")
        .execute()
    )

    # Load up what data we have for this year
    current = {}
    pcursor.execute(
        """
        SELECT uniqueid, plotid, varname, depth, subsample, sampledate
        from soil_data WHERE year = %s and varname in %s
    """,
        (YEAR, tuple(DOMAIN)),
    )
    for row in pcursor:
        key = "%s|%s|%s|%s|%s|%s" % row
        current[key] = True

    for item in res["items"]:
        if item["mimeType"] != "application/vnd.google-apps.spreadsheet":
            continue
        spreadsheet = util.Spreadsheet(spr_client, item["id"])
        spreadsheet.get_worksheets()
        worksheet = spreadsheet.worksheets.get(YEAR)
        if worksheet is None:
            continue
        worksheet.get_cell_feed()
        siteid = item["title"].split()[0]
        if worksheet.get_cell_value(1, 1) != "plotid":
            print(
                ('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping')
                % (siteid, YEAR, worksheet.get_cell_value(1, 1))
            )
            continue
        startcol = 3
        if worksheet.get_cell_value(1, 2) == "depth":
            depthcol = 2
        elif worksheet.get_cell_value(1, 3) == "depth":
            depthcol = 3
            startcol = 4
        if worksheet.get_cell_value(1, 2) == "location":
            locationcol = 2
        else:
            locationcol = None

        for row in range(3, worksheet.rows + 1):
            plotid = worksheet.get_cell_value(row, 1)
            depth = worksheet.get_cell_value(row, depthcol)
            if depth.find(" to ") == -1:
                print(
                    ("harvest_soil_nitrate found invalid depth: %s %s %s")
                    % (depth, siteid, YEAR)
                )
                continue
            if plotid is None or depth is None:
                continue
            subsample = "1"
            if locationcol is not None:
                subsample = worksheet.get_cell_value(row, locationcol)

            for col in range(startcol, worksheet.cols + 1):
                if worksheet.get_cell_value(1, col) is None:
                    print(
                        ("h_soil_nitrate site: %s year: %s col: %s is null")
                        % (siteid, YEAR, col)
                    )
                    continue
                colheading = worksheet.get_cell_value(1, col).strip()
                if not colheading.startswith("SOIL"):
                    print(
                        ("Invalid colheading: %s site: %s year: %s")
                        % (colheading, siteid, YEAR)
                    )
                    continue
                # Attempt to tease out the sampledate
                tokens = colheading.split()
                varname = tokens[0]
                datetest = tokens[1]
                if len(datetest.split("/")) == 3:
                    date = datetime.datetime.strptime(datetest, "%m/%d/%Y")
                else:
                    if row == 3:
                        print(
                            ("h_soil_nitrate %s[%s] unknown sample date %s")
                            % (siteid, YEAR, repr(colheading))
                        )
                    date = DUMMY_DATES.get(datetest, None)
                    if date is None and row == 3:
                        print(
                            (
                                "FIXME h_soil_nitrate %s[%s] "
                                "double unknown date %s"
                            )
                            % (siteid, YEAR, repr(colheading))
                        )
                inval = worksheet.get_cell_value(row, col)
                val = util.cleanvalue(inval)
                if inval is not None and val is None:
                    print(
                        (
                            "harvest_soil_nitrate found None. "
                            "site: %s year: %s "
                            " row: %s col: %s varname: %s"
                        )
                        % (siteid, YEAR, row, col, varname)
                    )
                if varname not in DOMAIN:
                    print(
                        (
                            "harvest_soil_nitrate %s[%s] "
                            "found additional var: %s"
                        )
                        % (siteid, YEAR, varname)
                    )
                    DOMAIN.append(varname)
                key = ("%s|%s|%s|%s|%s|%s") % (
                    siteid,
                    plotid,
                    varname,
                    depth,
                    subsample,
                    date if date is None else date.strftime("%Y-%m-%d"),
                )
                if key in current:
                    del current[key]
                    continue
                try:
                    pcursor.execute(
                        """
                        INSERT into soil_data(uniqueid, plotid, varname, year,
                        depth, value, subsample, sampledate)
                        values (%s, %s, %s, %s, %s, %s, %s, %s)
                        """,
                        (
                            siteid,
                            plotid,
                            varname,
                            YEAR,
                            depth,
                            val,
                            subsample,
                            date,
                        ),
                    )
                except Exception as exp:
                    print(
                        ("site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK")
                        % (siteid, YEAR)
                    )
                    print(exp)
                    print(
                        ("%s %s %s %s %s %s")
                        % (siteid, plotid, varname, depth, date, val)
                    )
                    sys.exit()

    for key in current:
        (siteid, plotid, varname, depth, subsample, date) = key.split("|")
        if date != "None":
            datesql = " and sampledate = '%s' " % (date,)
        else:
            datesql = " and sampledate is null "
        print(
            ("h_soil_nitrate rm %s %s %s %s %s %s %s")
            % (YEAR, siteid, plotid, varname, depth, subsample, date)
        )
        pcursor.execute(
            """
            DELETE from soil_data where uniqueid = %s and
            plotid = %s and varname = %s and year = %s and depth = %s and
            subsample = %s """
            + datesql
            + """
        """,
            (siteid, plotid, varname, YEAR, depth, subsample),
        )

    pcursor.close()
    pgconn.commit()
    pgconn.close()
Пример #5
0
 def test_cleanvalue(self):
     """ see what we can do with cleaning strings"""
     self.assertAlmostEquals(10.54, cleanvalue("10.54%"), 2)
     self.assertEquals(cleanvalue('Did NOt Collect'), 'did not collect')
     self.assertEquals(cleanvalue('<0.2'), '< 0.2')
     self.assertTrue(cleanvalue(' ') is None)
Пример #6
0
def test_cleanvalue():
    """ see what we can do with cleaning strings"""
    assert abs(10.54 - cleanvalue("10.54%")) < 0.01
    assert cleanvalue('Did NOt Collect') == 'did not collect'
    assert cleanvalue('<0.2') == '< 0.2'
    assert cleanvalue(' ') is None
Пример #7
0
def main():
    """Go"""
    config = util.get_config()

    pgconn = psycopg2.connect(database='sustainablecorn',
                              host=config['database']['host'])
    pcursor = pgconn.cursor()

    # Get me a client, stat
    spr_client = util.get_spreadsheet_client(config)
    drive_client = util.get_driveclient(config)

    res = drive_client.files(
            ).list(q="title contains 'Soil Nitrate Data'").execute()

    # Load up what data we have for this year
    current = {}
    pcursor.execute("""
        SELECT uniqueid, plotid, varname, depth, subsample, sampledate
        from soil_data WHERE year = %s and varname in %s
    """, (YEAR, tuple(DOMAIN)))
    for row in pcursor:
        key = "%s|%s|%s|%s|%s|%s" % row
        current[key] = True

    for item in res['items']:
        if item['mimeType'] != 'application/vnd.google-apps.spreadsheet':
            continue
        spreadsheet = util.Spreadsheet(spr_client, item['id'])
        spreadsheet.get_worksheets()
        worksheet = spreadsheet.worksheets.get(YEAR)
        if worksheet is None:
            # print("Missing Year: %s from %s" % (YEAR, spreadsheet.title))
            continue
        worksheet.get_cell_feed()
        siteid = item['title'].split()[0]
        # print 'Processing %s Soil Nitrate Year %s' % (siteid, YEAR),
        if worksheet.get_cell_value(1, 1) != 'plotid':
            print(('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping'
                   ) % (siteid, YEAR, worksheet.get_cell_value(1, 1)))
            continue
        startcol = 3
        if worksheet.get_cell_value(1, 2) == 'depth':
            depthcol = 2
        elif worksheet.get_cell_value(1, 3) == 'depth':
            depthcol = 3
            startcol = 4
        if worksheet.get_cell_value(1, 2) == 'location':
            locationcol = 2
        else:
            locationcol = None

        for row in range(3, worksheet.rows+1):
            plotid = worksheet.get_cell_value(row, 1)
            depth = worksheet.get_cell_value(row, depthcol)
            if depth.find(" to ") == -1:
                print(("harvest_soil_nitrate found invalid depth: %s %s %s"
                       ) % (depth, siteid, YEAR))
                continue
            if plotid is None or depth is None:
                continue
            subsample = "1"
            if locationcol is not None:
                subsample = worksheet.get_cell_value(row, locationcol)

            for col in range(startcol, worksheet.cols+1):
                if worksheet.get_cell_value(1, col) is None:
                    print(("h_soil_nitrate site: %s year: %s col: %s is null"
                           ) % (siteid, YEAR, col))
                    continue
                colheading = worksheet.get_cell_value(1, col).strip()
                if not colheading.startswith('SOIL'):
                    print(('Invalid colheading: %s site: %s year: %s'
                           ) % (colheading, siteid, YEAR))
                    continue
                # Attempt to tease out the sampledate
                tokens = colheading.split()
                varname = tokens[0]
                datetest = tokens[1]
                if len(datetest.split("/")) == 3:
                    date = datetime.datetime.strptime(datetest, '%m/%d/%Y')
                else:
                    if row == 3:
                        print(("h_soil_nitrate %s[%s] unknown sample date %s"
                               ) % (siteid, YEAR, repr(colheading)))
                    date = DUMMY_DATES.get(datetest, None)
                    if date is None and row == 3:
                        print(("FIXME h_soil_nitrate %s[%s] "
                               "double unknown date %s"
                               ) % (siteid, YEAR, repr(colheading)))
                inval = worksheet.get_cell_value(row, col)
                val = util.cleanvalue(inval)
                if inval is not None and val is None:
                    print(("harvest_soil_nitrate found None. "
                           "site: %s year: %s "
                           " row: %s col: %s varname: %s"
                           ) % (siteid, YEAR, row, col, varname))
                if varname not in DOMAIN:
                    print(("harvest_soil_nitrate %s[%s] "
                           "found additional var: %s"
                           ) % (siteid, YEAR, varname))
                    DOMAIN.append(varname)
                key = ("%s|%s|%s|%s|%s|%s"
                       ) % (siteid, plotid, varname, depth, subsample,
                            date
                            if date is None
                            else date.strftime("%Y-%m-%d"))
                if key in current:
                    del current[key]
                    continue
                try:
                    pcursor.execute("""
                        INSERT into soil_data(uniqueid, plotid, varname, year,
                        depth, value, subsample, sampledate)
                        values (%s, %s, %s, %s, %s, %s, %s, %s)
                        """, (siteid, plotid, varname, YEAR, depth, val,
                              subsample, date))
                except Exception as exp:
                    print(('site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK'
                           ) % (siteid, YEAR))
                    print(exp)
                    print(('%s %s %s %s %s %s'
                           ) % (siteid, plotid, varname, depth,
                                date, val))
                    sys.exit()

    for key in current:
        (siteid, plotid, varname, depth, subsample, date) = key.split("|")
        if date != 'None':
            datesql = " and sampledate = '%s' " % (date, )
        else:
            datesql = " and sampledate is null "
        print(('h_soil_nitrate rm %s %s %s %s %s %s %s'
               ) % (YEAR, siteid, plotid, varname, depth, subsample,
                    date))
        pcursor.execute("""
            DELETE from soil_data where uniqueid = %s and
            plotid = %s and varname = %s and year = %s and depth = %s and
            subsample = %s """ + datesql + """
        """, (siteid, plotid, varname, YEAR, depth, subsample))

        # print "...done"
    pcursor.close()
    pgconn.commit()
    pgconn.close()
Пример #8
0
 def test_cleanvalue(self):
     """ see what we can do with cleaning strings"""
     self.assertAlmostEquals(10.54, cleanvalue("10.54%"), 2)
     self.assertEquals(cleanvalue('Did NOt Collect'), 'did not collect')
     self.assertEquals(cleanvalue('<0.2'), '< 0.2')
     self.assertTrue(cleanvalue(' ') is None)