Пример #1
0
def write_json(dat_path, target_path=None):
    with open(dat_path, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(
            fp, delimiter=" ", quotechar="", escapechar=""
        )
        rows = list(reader)

    as_dicts = {t: int(x) for t, x in rows}

    time = sorted(as_dicts.keys())
    values = [as_dicts[t] for t in time]
    series = [{"label": "V1", "type": "int", "raw": values}]

    data = {
        "name": "measles",
        "longname": "Measles cases (England & Wales)",
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": "%Y-%F",
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(target_path, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #2
0
def main():
    args = parse_args()

    with open(args.input_file, "r", newline="", encoding="UTF-8-SIG") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar='"',
                                  escapechar="")
        rows = list(reader)
    rows = rows[4:]
    header = rows.pop(0)

    as_dicts = []
    for row in rows:
        as_dicts.append({h: v for h, v in zip(header, row)})

    iran = next(
        (d for d in as_dicts if d["Country Name"] == "Iran, Islamic Rep."),
        None,
    )

    tuples = []
    for key in iran:
        try:
            ikey = int(key)
        except ValueError:
            continue
        if not iran[key]:
            continue
        tuples.append((ikey, float(iran[key])))

    name = "gdp_iran"
    longname = "GDP Iran"
    time = [str(t[0]) for t in tuples]
    time_fmt = "%Y"
    series = [{
        "label": "GDP (constant LCU)",
        "type": "float",
        "raw": [t[1] for t in tuples],
    }]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(args.output_file, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #3
0
 def test_read_dict_fieldnames_from_file(self):
     with tempfile.TemporaryFile("w+") as fp:
         fp.write("f1,f2,f3\r\n1,2,abc\r\n")
         fp.seek(0)
         reader = clevercsv.DictReader(fp,
                                       fieldnames=next(
                                           clevercsv.reader(fp)))
         self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
         self.assertEqual(next(reader), {"f1": "1", "f2": "2", "f3": "abc"})
Пример #4
0
    def test_with_gen(self):
        def gen(x):
            for l in x:
                yield l

        r = clevercsv.reader(gen(["line,1", "line,2", "line,3"]))
        self.assertEqual(next(r), ["line", "1"])
        self.assertEqual(next(r), ["line", "2"])
        self.assertEqual(next(r), ["line", "3"])
Пример #5
0
 def test_read_linenum(self):
     r = clevercsv.reader(["line,1", "line,2", "line,3"])
     self.assertEqual(r.line_num, 0)
     self.assertEqual(next(r), ["line", "1"])
     self.assertEqual(r.line_num, 1)
     self.assertEqual(next(r), ["line", "2"])
     self.assertEqual(r.line_num, 2)
     self.assertEqual(next(r), ["line", "3"])
     self.assertEqual(r.line_num, 3)
     self.assertRaises(StopIteration, next, r)
     self.assertEqual(r.line_num, 3)
Пример #6
0
    def shelf(self):
        """ 
    
    Load CSV of scraped data from Speculative Fiction Database
    into program memory as a list of title/author pairs. 
    
    """

        with open("isfdb_catalog.csv", "r", encoding="UTF-8") as isfdb_catalog:

            isfdb_catalog = clevercsv.reader(isfdb_catalog)

            return [[row[0], row[1]] for row in isfdb_catalog if len(row) > 1]
Пример #7
0
    def get_data(self):
        buf = io.StringIO(self.data)
        if self.dialect is None:
            message = tie_break_message(buf)
            raise AIAssistantInfo(message)
        reader = clevercsv.reader(buf, self.dialect)
        tmp_df = pd.DataFrame.from_records(list(reader))
        clean_df = tmp_df.replace(np.nan, "", regex=True)

        hdl, tmpfname = tempfile.mkstemp(prefix="clevercsv_", suffix=".csv")
        with os.fdopen(hdl, "w") as fp:
            clean_df.to_csv(fp, index=False, header=False)
        print(tmpfname)
        sys.stdout.flush()
Пример #8
0
 def get_rows(
         self, text: str,
         processed_cmd: ProcessedCommand) -> (List[Any], Mapping[int, str]):
     headers_list = []
     delimiters = [processed_cmd.delimiter
                   ] if processed_cmd.delimiter else None
     dialect = clevercsv.Sniffer().sniff(text[:10000],
                                         delimiters=delimiters)
     raw_lines = [line.strip() for line in text.split("\n") if line.strip()]
     reader = clevercsv.reader(raw_lines, dialect=dialect)
     rows = list(reader)
     if processed_cmd.has_header and rows:
         headers_list, rows = rows[0], rows[1:]
     return rows, headers_list
Пример #9
0
def main():
    args = parse_args()

    with open(args.input_file, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar="",
                                  escapechar="")
        rows = list(reader)

    header = rows.pop(0)

    name = "run_log"
    longname = "Run Log"

    time = [r[0].rstrip("Z").replace("T", " ") for r in rows]
    time_fmt = "%Y-%m-%d %H:%M:%S"
    pace = [float(r[3]) for r in rows]
    distance = [float(r[4]) for r in rows]

    series = [
        {
            "label": "Pace",
            "type": "float",
            "raw": pace
        },
        {
            "label": "Distance",
            "type": "float",
            "raw": distance
        },
    ]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(args.output_file, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #10
0
def find_header(iostream, **kwargs):
    """From an open csv file descriptor, locates header and returns iterable data from there.

    Args:
        iostream (_io.TextIOWrapper): fileobj containing csv data.
        **kwargs (dict): keyword arguments for csv.reader().

    Returns:
        iterable: csv data started from the head
    """
    delimiter, has_header, raw_headers = analyze_csv_format(iostream, **kwargs)

    if not raw_headers:
        # user did not provide the headers but sniffer found some
        if has_header:
            return csv.reader(iostream, delimiter=delimiter)
        # no user provided headers and sniffer could not find any.
        # we cannot locate the headers
        else:
            raise csv.Error(
                'csv.Sniffer() could not detect file headers and modelmapper was not provided the raw headers',
                'Please add a subset of the raw headers to the `identify_header_by_column_names` key in your setup.toml.'
            )

    records = csv.reader(iostream, delimiter=delimiter)
    # find headers
    cleaning_func = kwargs.pop('cleaning_func', None) or do_nothing
    for record in records:
        if record and raw_headers <= set(
                map(cleaning_func, record
                    )):  # finding if the raw headers are subset of the record
            return chain([record],
                         records)  # chaining the header line (record)
    raise ValueError(
        'Could not find the headers line. Please double check the identify_header_by_column_names that were provided.'
    )
Пример #11
0
def read_csv(csv_file):
    with open(csv_file, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(
            fp, delimiter=",", quotechar="", escapechar=""
        )
        rows = list(reader)

    header = rows.pop(0)
    dicts = [dict(zip(header, row)) for row in rows]

    AL = [d for d in dicts if d["lgID"] == "AL"]
    years = sorted(set((d["yearID"] for d in AL)))
    by_year = {
        int(y): sum(int(d["HR"]) for d in [x for x in AL if x["yearID"] == y])
        for y in years
    }
    return by_year
Пример #12
0
def write_json(csv_path, target_path=None):
    with open(csv_path, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(
            fp, delimiter=",", quotechar="", escapechar=""
        )
        rows = list(reader)

    header = rows.pop(0)
    rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0]

    as_dicts = [{h: v for h, v in zip(header, row)} for row in rows]
    by_date = {
        reformat_time(d["datetime"]): float(d["data_mean_global"])
        for d in as_dicts
    }

    # trim off anything before 1600
    by_date = {k: v for k, v in by_date.items() if k.split("-")[0] >= "1600"}

    time = sorted(by_date.keys())
    values = [by_date[t] for t in time]

    name = "global_co2"
    longname = "Global CO2"
    time_fmt = "%Y-%m-%d"
    series = [{"label": "Mean", "type": "float", "raw": values}]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(values),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }
    if time is None:
        del data["time"]

    with open(target_path, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #13
0
def main():
    args = parse_args()

    with open(args.input_file, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar="",
                                  escapechar="")
        rows = list(reader)

    rows.pop(0)

    time = [reformat_time(r[0]) for r in rows]
    values = [int(r[-1]) for r in rows]

    # Manually split Jan-08 into two, see readme for details.
    jan08idx = time.index("2008-01")
    values[jan08idx] /= 2
    time.insert(jan08idx + 1, "2008-02")
    values.insert(jan08idx + 1, values[jan08idx])

    name = "shanghai_license"
    longname = "Shanghai License"
    time_fmt = "%Y-%m"
    series = [{"label": "No. of Applicants", "type": "int", "raw": values}]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(args.output_file, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #14
0
def main():
    args = parse_args()

    with open(args.input_file, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar="",
                                  escapechar="")
        rows = list(reader)

    rows = rows[5:]
    rows = list(reversed(rows))

    rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0]

    idx2000 = next((i for i, x in enumerate(rows) if x[0].endswith("2000")))
    rows = rows[idx2000:]

    name = "brent_spot"
    longname = "Brent Spot Price"
    time = [date_to_iso(r[0]) for r in rows]
    time_fmt = "%Y-%m-%d"
    values = [float(r[1]) for r in rows]

    series = [{"label": "Dollars/Barrel", "type": "float", "raw": values}]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(args.output_file, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #15
0
def write_json(csv_path, target_path=None):
    with open(csv_path, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar="",
                                  escapechar="")
        rows = list(reader)

    header = rows.pop(0)

    rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0]

    # take the first 600 rows
    rows = rows[:600]

    name = "ratner_stock"
    longname = "Ratner Group Stock Price"
    time = [r[0] for r in rows]
    time_fmt = "%Y-%m-%d"

    values = [float(r[4]) for r in rows]

    series = [{"label": "Close Price", "type": "float", "raw": values}]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(target_path, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #16
0
def write_json(txt_path, target_path=None):
    with open(txt_path, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar='"',
                                  escapechar="")
        rows = list(reader)

    header = rows.pop(0)
    header.insert(0, "id")
    as_dicts = [dict(zip(header, r)) for r in rows]

    var_include = ["Temperature", "Humidity", "Light", "CO2"]

    time = [x["date"] for x in as_dicts]
    time = [time[i] for i in range(0, len(time), SAMPLE)]

    data = {
        "name": "occupancy",
        "longname": "Occupancy",
        "n_obs": len(time),
        "n_dim": len(var_include),
        "time": {
            "type": "string",
            "format": "%Y-%m-%d %H:%M:%S",
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": [],
    }
    for idx, var in enumerate(var_include, start=1):
        lbl = "V%i" % idx
        obs = [float(x[var]) for x in as_dicts]
        obs = [obs[i] for i in range(0, len(obs), SAMPLE)]
        data["series"].append({"label": lbl, "type": "float", "raw": obs})

    with open(target_path, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #17
0
def main():
    args = parse_args()

    with open(args.input_file, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.reader(fp,
                                  delimiter=",",
                                  quotechar="",
                                  escapechar="")
        rows = list(reader)

    header = rows.pop(0)

    total = [r for r in rows if r[0] == "Total emissions"]
    time = [r[2] for r in total]
    values = [int(r[-1]) for r in total]

    name = "ozone"
    longname = "Ozone-Depleting Emissions"
    time_fmt = "%Y"

    series = [{"label": "Total Emissions", "type": "int", "raw": values}]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(args.output_file, "w") as fp:
        json.dump(data, fp, indent="\t")
Пример #18
0
Created on Tue Feb 18 15:27:26 2020

@author: ar3
"""

# Code generated with CleverCSV version 0.5.5

import clevercsv
import time
import os.path

date = time.strftime('%Y%m%d')
#print(date)

with open("vehicle_urls.txt", "r", newline="", encoding="ascii") as fp:
    reader = clevercsv.reader(fp, delimiter=",", quotechar="\"", escapechar="")
    rows = list(reader)

for row in rows:
    vehicle, url = row
    #print(vehicle, url) ## replace with function call
    html_file = date + "_" + vehicle + ".html"
    #print(html_file, os.path.exists(html_file))
    if os.path.exists(html_file):
        print(html_file + " exists")
        file = open(html_file)
        html = file.read()
        file.close()
        print(html)
    else:
        print(html_file + " not exists")
Пример #19
0
 def _read_test(self, input, expect, **kwargs):
     reader = clevercsv.reader(input, **kwargs)
     result = list(reader)
     self.assertEqual(result, expect)