def write_metadata(context, metadata: dp.DataFrame, to_tag): """ Write available metadata, including GPS tags, to high-res JPEGs """ metadata.fillna(value="", inplace=True) metadata["Source ID"] = metadata["Source ID"].str.upper() metadata.set_index("Source ID", inplace=True) for item in tqdm(to_tag, "Embedding metadata in files..."): if item.endswith(".jpg"): basename = os.path.split(item)[1] name = basename.split(".")[0] date = metadata.loc[name.upper(), "Date"] byline = metadata.loc[name.upper(), "Creator"] headline = metadata.loc[name.upper(), "Title"] caption = metadata.loc[name.upper(), "Description (Portuguese)"] objecttype = metadata.loc[name.upper(), "Type"] # dimensions = f'{metadata.loc[name.upper(), "image_width"]}cm x {metadata.loc[name.upper(), "image_height"]}cm' keywords = metadata.loc[name.upper(), "Depicts"].split("||") latitude = metadata.loc[name.upper(), "Latitude"] longitude = metadata.loc[name.upper(), "Longitude"] # altitude = metadata.loc[name.upper(), "Altitude"] # imgdirection = metadata.loc[name.upper(), "heading"] params = [ "-IPTC:Source=Instituto Moreira Salles/IMS", "-IPTC:CopyrightNotice=This image is in the Public Domain.", "-IPTC:City=Rio de Janeiro", "-IPTC:Province-State=RJ", "-IPTC:Country-PrimaryLocationName=Brasil", "-GPSLatitudeRef=S", "-GPSLongitudeRef=W", "-GPSAltitudeRef=0", "-GPSImgDirectionRef=T", f"-IPTC:DateCreated={date}", f"-IPTC:By-line={byline}", f"-IPTC:ObjectName={name}", f"-IPTC:Headline={headline}", f"-IPTC:Caption-Abstract={caption}", f"-IPTC:ObjectTypeReference={objecttype}", # f"-IPTC:Dimensions={dimensions}", f"-IPTC:Keywords={keywords}", f"-GPSLatitude={latitude}", f"-GPSLongitude={longitude}", # f"-GPSAltitude={altitude}", # f"-GPSImgDirection={imgdirection}", ] with ExifTool(executable_=context.solid_config) as et: for param in params: param = param.encode(encoding="utf-8") dest = item.encode(encoding="utf-8") et.execute(param, dest) to_upload = to_tag return to_upload
def transform_table_desc_df(context, table_desc: DataFrame) -> DataFrame: """ Transform the DataFrame of data types in database table :param context: execution context :param table_desc: panda DataFrame containing details of the Postgres database table :return: panda DataFrame containing details of the Postgres database table :rtype: panda.DataFrame """ table_desc.fillna('', inplace=True) # get names of indices for which column ignore contains #. i.e. comment lines index_names = table_desc[table_desc['ignore'].str.contains('#')].index # drop comment row indexes from dataFrame table_desc.drop(index_names, inplace=True) # drop column ignore from dataFrame table_desc.drop(['ignore'], axis=1, inplace=True) return table_desc
def organise_creator(_, quickstate: dp.DataFrame): creators = { "Augusto Malta": "Q16495239", "Anônimo": "Q4233718", "Marc Ferrez": "Q3180571", "Georges Leuzinger": "Q5877879", "José dos Santos Affonso": "Q63993961", "N. Viggiani": "Q65619909", "Archanjo Sobrinho": "Q64009665", "F. Basto": "Q55089601", "J. Faria de Azevedo": "Q97570600", "S. H. Holland": "Q65619918", "Augusto Monteiro": "Q65619921", "Jorge Kfuri": "Q63166336", "Camillo Vedani": "Q63109123", "Fritz Büsch": "Q63109492", "Armando Pittigliani": "Q19607834", "Braz": "Q97487621", "Stahl & Wahnschaffe": "Q63109157", "Gomes Junior": "Q86942676", "A. Ruelle": "Q97570551", "Guilherme Santos": "Q55088608", "Albert Frisch": "Q21288396", "José Baptista Barreira Vianna": "Q63166517", "Alfredo Krausz": "Q63166405", "Therezio Mascarenhas": "Q97570728", "Torres": "Q65619905", "Theodor Preising": "Q63109140", "Augusto Stahl": "Q4821327", "Luiz Musso": "Q89538832", "Carlos Bippus": "Q63109147", "Thiele": "Q64825643", "Revert Henrique Klumb": "Q3791061", "Juan Gutierrez": "Q10312614", "F. Manzière": "Q65619915", "Antonio Luiz Ferreira": "Q97570558", "Etienne Farnier": "Q97570575", "José Francisco Corrêa": "Q10309433", "Chapelin": "Q97570376", "J. Teixeira": "Q89642578", "F. Garcia": "Q97570588", "A. de Barros Lobo": "Q97570363", "Bloch": "Q61041099", } def name2qid(name): """ Takes a string and returns the corresponding Wikidata QID """ try: qid = creators[f"{name}"] except KeyError: qid = "" return qid quickstate["P170"] = quickstate["P170"].apply(name2qid) quickstate = quickstate.drop(columns="date_accuracy") quickstate.name = "import_wikidata" def df2quickstatements(df): create_str = "" edit_str = "" str_props = ["Lpt-br", "Dpt-br", "Den", "P217", "P7835"] no_ref_props = ["Lpt-br", "Dpt-br", "Den"] for _, row in df.iterrows(): row = dict(row) props = [] if row["qid"]: for key in row.keys(): if row[key]: if key in str_props: row[key] = '"{0}"'.format(row[key]) prop_str = "|".join([ str(row["qid"]), str(key).replace("P31_a", "P31"), str(row[key]), ]) if key == "P217": prop_str += "|P195|Q71989864" if key == "P195": prop_str += "|P217|" + '"{0}"'.format(row["P217"]) if key not in no_ref_props: prop_str += "|S248|Q64995339|S813|+{0}Z/11".format( dt.now().strftime("%Y-%m-%dT00:00:00")) props.append(prop_str) item_str = "||".join(props) if not edit_str: edit_str += item_str else: edit_str += "||" + item_str else: props.append("CREATE") for key in row.keys(): if row[key]: if key in str_props: row[key] = '"{0}"'.format(row[key]) prop_str = "|".join([ "LAST", str(key).replace("P31_a", "P31"), str(row[key]), ]) if key == "P217": prop_str += "|P195|Q71989864" if key == "P195": prop_str += "|P217|" + '"{0}"'.format(row["P217"]) if key not in no_ref_props: prop_str += "|S248|Q64995339|S813|+{0}Z/11".format( dt.now().strftime("%Y-%m-%dT00:00:00")) props.append(prop_str) item_str = "||".join(props) if not create_str: create_str += item_str else: create_str += "||" + item_str return {"create": create_str, "edit": edit_str} quickstate.fillna("", inplace=True) with open("data/output/quickstatements_create.txt", "w+") as f: f.write(df2quickstatements(quickstate)["create"]) with open("data/output/quickstatements_edit.txt", "w+") as f: f.write(df2quickstatements(quickstate)["edit"]) return quickstate.set_index("qid")