for f in omeka_client.get_files_for_item(item['id']): fname = f['original_filename'] name, ext = os.path.splitext(fname) if ext.lower() in [".docx", ".doc", ".odt", ".rtf"]: num_docs_found += 1 res, data = omeka_client.get_file(f['file_urls']['original']) download_file = os.path.join(temp_dir, fname) out = open(download_file, 'wb') out.write(data) out.close() logger.info("Converting office doc file %s to HTML", f['id']) out_dir, x = os.path.split(download_file) html_file = os.path.join(temp_dir, name + ".html") word2html.convert(download_file, html_file , True, True, False) if omeka_client.post_file_from_filename(html_file, item['id']): num_html_uploaded += 1 logger.info("Uploaded %s successfully", f['id']) logger.info("********************") logger.info("SUMMARY:") logger.info("Deleted %s HTML", num_html_deleted) logger.info("Docs found: %s", num_docs_found) logger.info("HTML files converted and added: %s", num_html_uploaded) if num_docs_found == num_html_uploaded: logger.info("No errors detected") else: logger.error("Number of docs does not match number of HTML files uploaded")
element_texts.append(element_text) element_texts.append({"html": False, "text" : file_path, "element" : {"id" : title_id}}) item_to_upload = {"collection": {"id": collection_id}, "item_type": {"id":item_type_id}, "public": args["public"]} item_to_upload["element_texts"] = element_texts jsonstr = json.dumps(item_to_upload) previous_id = id_map[file_path] if file_path in id_map else None if previous_id <> None: print "Re-uploading ", previous_id response, content = omeka_client.put("items" , previous_id, jsonstr) if response['status'] == '404': previous_id = None if previous_id == None: response, content = omeka_client.post("items", jsonstr) print content new_item = json.loads(content) new_item_id = new_item['id'] print "Item ID", new_item_id id_map[file_path] = new_item_id #Save ID map every time - make this an option with open(file_stash, 'w') as outfile: json.dump(id_map, outfile) print omeka_client.post_file_from_filename(file_path, new_item_id ) with open(file_stash, 'w') as outfile: json.dump(id_map, outfile)
if not args['do_not_convert']: for f in omeka_client.get_files_for_item(item['id']): fname = f['original_filename'] name, ext = os.path.splitext(fname) if ext.lower() in [".docx", ".doc", ".odt", ".rtf"]: num_docs_found += 1 res, data = omeka_client.get_file(f['file_urls']['original']) download_file = os.path.join(temp_dir, fname) out = open(download_file, 'wb') out.write(data) out.close() logger.info("Converting office doc file %s to HTML", f['id']) out_dir, x = os.path.split(download_file) html_file = os.path.join(temp_dir, name + ".html") word2html.convert(download_file, html_file, True, True, False) if omeka_client.post_file_from_filename(html_file, item['id']): num_html_uploaded += 1 logger.info("Uploaded %s successfully", f['id']) logger.info("********************") logger.info("SUMMARY:") logger.info("Deleted %s HTML", num_html_deleted) logger.info("Docs found: %s", num_docs_found) logger.info("HTML files converted and added: %s", num_html_uploaded) if num_docs_found == num_html_uploaded: logger.info("No errors detected") else: logger.error("Number of docs does not match number of HTML files uploaded")