def generate_filepaths(metadata_file, output_dir):
    data = metadata.read_csv(metadata_file)
    for row in data:
        output_path = output_dir + "/" + row['filename']
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        row['output_path'] = output_path
    return data
Exemplo n.º 2
0
def read_all_metadata(columns, path):
    all_metadata = list()
    files = find_metadata_files(path)
    for filename in files:
        m = metadata.read_csv(os.path.join(path, filename))
        for row in m:
            try:
                all_metadata.append({col: row[col] for col in columns})
            except KeyError as err:
                print('Missing column: {}- {}'.format(row, err))
    return all_metadata
Exemplo n.º 3
0
def get_common_columns(path):
    headers = dict()
    common = list()
    header_counts = defaultdict(int)
    metadata_files = find_metadata_files(path)
    for filename in metadata_files:
        data = metadata.read_csv(os.path.join(path, filename))
        headers[filename] = data[0]

    for filename, columns in headers.items():
        for col in columns:
            # col = col.lower().strip()
            header_counts[col] += 1

    for name, count in header_counts.items():
        if count == len(metadata_files):
            common.append(name)
    return common
def main():
    settings = get_settings()
    metadata = m.read_csv(settings.input)
    if (not os.path.exists(settings.output_dir)):
        os.makedirs(settings.output_dir)
    for row in metadata:
        filename = row['filename']
        try:
            text = wf.get_text_from_file(filename)
        except FileNotFoundError as err:
            print('Skipping: {}'.format(err))
            continue
        except IsADirectoryError:
            continue  # bad metadata, ignore
        outfile = wf.output_filename(settings.output_dir, filename)
        pos_tagged_text = wf.tag_pos(text, settings.language)
        reduced_text = reduce(pos_tagged_text, settings.pos_list)
        new_filename = get_new_path(filename, settings.output_dir)
        row['filename'] = new_filename
        fh = open(new_filename, 'w')
        fh.write(reduced_text)
        fh.close()
    new_filename = get_new_path(settings.input, settings.output_dir)
    m.write_csv(new_filename, metadata)
Exemplo n.º 5
0
def main():
  global settings
  settings = get_settings()
  md = metadata.read_csv(settings.input)
  filtered = filter_dates(md, settings.start, settings.end)
  metadata.write_csv(settings.output, filtered)
Exemplo n.º 6
0
def main():
    global settings
    settings = get_settings()
    md = metadata.read_csv(settings.input)
    delete_files(md)
Exemplo n.º 7
0
def main():
  global settings
  settings = get_settings()
  md = metadata.read_csv(settings.input)
  delete_files(md)
def main():
    global settings
    settings = get_settings()
    md = metadata.read_csv(settings.input)
    filtered = filter_dates(md, settings.start, settings.end)
    metadata.write_csv(settings.output, filtered)