def index_city(country_abbrev, state, county, city): resp = query("SELECT * FROM persons " + "WHERE country = \'" + country_abbrev.upper() + "\' " + "AND state =\'" + common.capitalize(state) + "\' " + "AND county =\'" + common.capitalize(county) + "\' " + "AND city =\'" + common.capitalize(city) + "\';") return json.dumps(resp)
def _gen_record_type(name, fields, private_fields, writer): writer(" public static class {} implements java.io.Serializable {{\n". format(name)) writer(" private static final long serialVersionUID = 1L;\n") for f, ty in fields: writer(" /*private*/ {} {};\n".format(ty, f)) writer(" public {t} get{F}() {{ return {f}; }}\n".format( t=ty, f=f, F=capitalize(f))) for f, ty in private_fields: writer(" /*private*/ {} {};\n".format(ty, f)) writer(" public {}({}) {{\n".format( name, ", ".join("{} {}".format(ty, f) for f, ty in fields))) for f, ty in fields: writer(" this.{f} = {f};\n".format(f=f)) writer(" }\n") writer(" @Override\n") writer(" public String toString() {\n") writer(' return new StringBuilder().append("{}(")'.format(name)) first = True for f, ty in fields: if not first: writer(".append(',')") writer('.append("{}=")'.format(f)) writer(".append({})".format(f)) first = False writer(".append(')').toString();\n") writer(" }\n") writer(" }\n")
def write(self, fields, queries, js="-", js_class="DataStructure", **kwargs): with open_maybe_stdout(js) as f: writer = f.write RECORD_NAME = js_class + "Entry" writer("/*\n * USAGE SUMMARY\n") writer(" * initialization:\n * ds = new {}();\n".format(js_class)) writer(" * get # of entries:\n * ds.size();\n") writer(" * add:\n * ds.add(new {}({}));\n".format(RECORD_NAME, ", ".join(f for f in fields))) writer(" * remove:\n * ds.remove(elem);\n") writer(" * update all fields:\n * ds.update(elem, {});\n".format(", ".join("new_{}".format(f) for f in fields))) for f in fields: writer(" * update {f}:\n * ds.update{F}(elem, new_{f});\n".format(f=f, F=capitalize(f))) writer(" * queries:\n") for q in queries: writer(" * ds.{n}({args}, function(elem) {{ ... }});\n".format(n=q.name, args=", ".join(a for a,t in q.vars))) writer(" * NOTE: Be careful not to add the same {} object more than once.\n".format(RECORD_NAME)) writer(" * NOTE: Be careful not to remove or update an entry that is not in the data structure.\n") writer(" * NOTE: You may not make changes (add/remove/update) in query callbacks.\n") writer(" * NOTE: Elements can be removed in-place during iteration: if your query callback returns a truthy value, then the element is removed.\n") writer(" */\n\n\n") # record type private_members = [] for q in queries: private_members += list((f, ty.gen_type(self)) for f, ty in q.impl.private_members()) _gen_record_type(RECORD_NAME, list(fields.items()), private_members, writer) # auxiliary type definitions seen = set() for q in queries: for t in q.impl.auxtypes(): _gen_aux_type(t, self, writer, seen) this = TupleInstance("this") # # constructor writer("function {}() {{\n".format(js_class)) writer(indent(" ", "this.my_size = 0;\n")) for q in queries: writer(indent(" ", q.impl.construct(self, this))) writer("}\n") # get current size writer("{}.prototype.size = function() {{ return this.my_size; }};\n".format(js_class)) # add routine writer("{}.prototype.add = function(x) {{\n".format(js_class)) writer(" ++this.my_size;\n") for q in queries: writer(indent(" ", q.impl.gen_insert(self, "x", this))) writer("};\n") # remove routine writer("{}.prototype.remove = function(x) {{\n".format(js_class)) writer(" --this.my_size;\n") for q in queries: writer(indent(" ", q.impl.gen_remove(self, "x", this))) writer("};\n") # update routines for f, ty in fields.items(): writer("{}.prototype.update{} = function(__x, new_val) {{\n".format(js_class, capitalize(f))) writer(" if ({} != new_val) {{\n".format(self.get_field("__x", f))) for q in queries: writer(indent(" ", q.impl.gen_update(self, fields, "__x", {f: "new_val"}, this))) writer(" {} = new_val;\n".format(self.get_field("__x", f))) writer(" }\n") writer(" }\n") writer("{}.prototype.update = function(__x, {}) {{\n".format(js_class, ", ".join(f for f, ty in fields.items()))) for q in queries: writer(indent(" ", q.impl.gen_update(self, fields, "__x", {f:f for f in fields}, this))) for f, ty in fields.items(): writer(" {} = {};\n".format(self.get_field("__x", f), f)) writer(" }\n") # query routines for q in queries: writer("{}.prototype.{} = function({}, __callback) {{\n".format(js_class, q.name, ", ".join(v for v,ty in q.vars))) proc, stateExps = q.impl.gen_query(self, q.vars, this) writer(indent(" ", proc)) state = q.impl.state() for (f, ty), e in zip(state, stateExps): writer(" var {} = {};\n".format(f, e)) writer(" for (;;) {\n") proc, has_next = q.impl.gen_has_next(self, parent_structure=this, iterator=This()) writer(indent(" ", proc)) writer(" if (!({})) break;\n".format(has_next)) proc, next = q.impl.gen_next(self, parent_structure=this, iterator=This()) writer(indent(" ", proc)) writer(" if (__callback({})) {{\n".format(next)) proc, next = q.impl.gen_remove_in_place(self, parent_structure=this, iterator=This()) writer(indent(" ", proc)) writer(" }\n") writer(" }\n") writer(" }\n")
def _box(ty): if ty == "int": return "Integer" if ty == "char": return "Character" return capitalize(ty)
def write(self, fields, queries, java_package=None, java_class="DataStructure", java="-", **kwargs): with open_maybe_stdout(java) as f: writer = f.write if java_package: writer("package {};\n\n".format(java_package)) writer( "public class {} implements java.io.Serializable {{\n".format( java_class)) # record type private_members = [] RECORD_NAME = self.record_type() for q in queries: private_members += list((f, ty.gen_type(self)) for f, ty in q.impl.private_members()) _gen_record_type(RECORD_NAME, list(fields.items()), private_members, writer) # auxiliary type definitions seen = set() for q in queries: for t in q.impl.auxtypes(): _gen_aux_type(t, self, writer, seen) # constructor writer(" public {}() {{\n".format(java_class)) for q in queries: writer(indent(" ", q.impl.construct(self, This()))) writer(" }\n") # get current size writer(" int my_size = 0;\n") writer(" int size() { return my_size; }\n") # add routine writer(" public void add({} x) {{\n".format(RECORD_NAME)) writer(" ++my_size;\n") for q in queries: writer(indent(" ", q.impl.gen_insert(self, "x", This()))) writer(" }\n") # remove routine writer(" public void remove({} x) {{\n".format(RECORD_NAME)) writer(" --my_size;\n") for q in queries: writer(indent(" ", q.impl.gen_remove(self, "x", This()))) writer(" }\n") # update routines for f, ty in fields.items(): writer(" void update{}({} x, {} val) {{\n".format( capitalize(f), self.record_type(), ty)) writer(" if ({} != val) {{\n".format(self.get_field("x", f))) for q in queries: writer( indent( " ", q.impl.gen_update(self, fields, "x", {f: "val"}, This()))) writer(" {} = val;\n".format(self.get_field("x", f))) writer(" }\n") writer(" }\n") writer(" void update({} x, {}) {{\n".format( self.record_type(), ", ".join("{} {}".format(ty, f) for f, ty in fields.items()))) for q in queries: writer( indent( " ", q.impl.gen_update(self, fields, "x", {f: f for f in fields}, This()))) for f, ty in fields.items(): writer(" {} = {};\n".format(self.get_field("x", f), f)) writer(" }\n") # query routines for q in queries: for f, ty in q.impl.fields(): writer(" /*private*/ {} {};\n".format( ty.gen_type(self), f)) it_name = "{}_iterator".format(q.name) writer( " /*private*/ static final class {} implements java.util.Iterator<{}> {{\n" .format(it_name, RECORD_NAME)) state = q.impl.state() writer(" {} parent;\n".format(java_class)) vars_needed = [(v, ty) for v, ty in q.vars if q.impl.needs_var(v)] for v, ty in vars_needed: writer(" final {} {};\n".format(ty, v)) for f, ty in state: writer(" {} {};\n".format(ty.gen_type(self), f)) writer(" {}({} parent{}{}) {{\n".format( it_name, java_class, "".join(", {} {}".format(ty, v) for v, ty in vars_needed), "".join(", {} {}".format(ty.gen_type(self), f) for f, ty in state))) writer(" this.parent = parent;\n") for v, ty in vars_needed: writer(" this.{v} = {v};\n".format(v=v)) for f, ty in state: writer(" this.{f} = {f};\n".format(f=f)) writer(" }\n") writer(" @Override public boolean hasNext() {\n") proc, ret = q.impl.gen_has_next( self, parent_structure=TupleInstance("parent"), iterator=This()) writer(indent(" ", proc)) writer(" return {};\n".format(ret)) writer(" }\n") writer( " @Override public {} next() {{\n".format(RECORD_NAME)) proc, ret = q.impl.gen_next( self, parent_structure=TupleInstance("parent"), iterator=This()) writer(indent(" ", proc)) writer(" return {};\n".format(ret)) writer(" }\n") writer(" @Override public void remove() {\n") writer(" --parent.my_size;\n") proc, removed = q.impl.gen_remove_in_place( self, parent_structure=TupleInstance("parent"), iterator=This()) writer(indent(" ", proc)) for q2 in queries: if q2 != q: writer( indent( " ", q2.impl.gen_remove( self, removed, parent_structure=TupleInstance("parent")))) writer(" }\n") writer(" }\n") writer(" public java.util.Iterator<{}> {}({}) {{\n".format( RECORD_NAME, q.name, ", ".join("{} {}".format(ty, v) for v, ty in q.vars))) proc, stateExps = q.impl.gen_query(self, q.vars, This()) writer(indent(" ", proc)) writer(" return new {}(this{}{});\n".format( it_name, "".join(", {}".format(v) for v, ty in vars_needed), "".join(", {}".format(e) for e in stateExps))) writer(" }\n") writer(" public {} {}_1({}) {{\n".format( RECORD_NAME, q.name, ", ".join("{} {}".format(ty, v) for v, ty in q.vars))) proc, result = q.impl.gen_query_one(self, q.vars, This()) writer(indent(" ", proc)) writer(" return {};\n".format(result)) writer(" }\n") writer("}\n")
def parse_state(browser, state, missing_persons=None): if not missing_persons: missing_persons = {} browser.get("https://www.findthemissing.org/en") #search by state select_state(browser, state) browser.find_element_by_name("commit").click() #wait for new entries to show up table = browser.wait_until_visible("list", timeout=30) #wait_for_table_to_load(table, 10, timeout=30) browser.get("https://www.findthemissing.org/en/ajax/search_results?page=1&rows=100&sidx=DateLKA&sord=desc&_search=false") dct = json.loads(browser.find_element_by_css_selector("body").text) pgs = int(dct["total"]) print "found {} pages".format(pgs) for pg in range(1, pgs + 1): browser.get("https://www.findthemissing.org/en/ajax/search_results?page=" + str(pg) + "&rows=100&sidx=DateLKA&sord=desc&_search=false") dct = json.loads(browser.find_element_by_css_selector("body").text) print "page " + str(pg) + " of " + str(pgs) for (num, person) in enumerate(dct["rows"]): print "person " + str(num + 1) + " of " + str(len(dct["rows"])) new_person = common.create_new_record() #organization new_person["namus_number"] = person["cell"][0] new_person["org_name"] = "National Missing and Unidentified Persons System" new_person["org"] = "NAMUS" new_person["org_contact"] = "1-855-626-7600" #personal characteristics new_person["sex"] = common.capitalize(person["cell"][4]) new_person["race"] = person["cell"][5] new_person["age"] = float(person["cell"][6]) arr = person["id"].split("_") browser.get("https://www.findthemissing.org/en/cases/" + arr[0] + "/" + arr[1]) time.sleep(10) has_NCMEC_lbl = False if browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[6]/td/label").text == "NCMEC number": has_NCMEC_lbl = True ncmec_case_number = browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[6]/td[2]").text.strip() if "NCMEC_" + ncmec_case_number in missing_persons.keys(): print "found NCMEC_" + ncmec_case_number + " so merging..." missing_persons["NCMEC_" + ncmec_case_number]["namus_number"] = new_person["namus_number"] continue #case info photo = browser.find_element_by_css_selector("dt.photo > img").get_attribute("src") if "no_photo" not in photo: new_person["photo"] = browser.find_element_by_css_selector("dt.photo > img").get_attribute("src") new_person["first_name"] = common.capitalize(browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[2]/td[2]").text) new_person["middle_name"] = common.capitalize(browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[3]/td[2]").text.replace("\"", "")) new_person["last_name"] = common.capitalize(browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[4]/td[2]").text) if has_NCMEC_lbl: date = browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[7]/td[2]").text else: date = browser.find_element_by_xpath("//div[@id='case_information']/div/table/tbody/tr[6]/td[2]").text new_person["date"] = common.clean_date(date) #determining white or non-white hispanic if new_person["race"] == "White" or new_person["race"] == "Other": ethnicity = browser.find_element_by_xpath("//div[@id='case_information']/div[2]/table/tbody/tr[4]/td[2]").text if ethnicity == "Hispanic/Latino" and new_person["race"] == "White": new_person["race"] = "White Hispanic/Latino" if ethnicity == "Hispanic/Latino" and new_person["race"] == "Other": new_person["race"] = "Non-White Hispanic/Latino" new_person["race"] = common.clean_race(new_person["race"]) height = browser.find_element_by_xpath("//div[@id='case_information']/div[2]/table/tbody/tr[6]/td[2]").text if "to" in height: arr = height.split("to") height = arr[1].strip() new_person["height"] = float(height) weight = browser.find_element_by_xpath("//div[@id='case_information']/div[2]/table/tbody/tr[7]/td[2]").text if "to" in weight: arr = weight.split("to") weight = arr[1].strip() new_person["weight"] = float(weight) browser.find_element_by_link_text("Circumstances").click() time.sleep(3) #circumstance new_person["city"] = common.capitalize(browser.find_element_by_css_selector("div.column1-unit > table > tbody > tr > td.view_field").text) new_person["state"] = common.capitalize(browser.find_element_by_xpath("//div[@id='circumstances']/div/table/tbody/tr[2]/td[2]").text) new_person["county"] = common.capitalize(browser.find_element_by_xpath("//div[@id='circumstances']/div/table/tbody/tr[4]/td[2]").text) new_person["country"] = "US" try: new_person["circumstance"] = browser.find_element_by_id("case_Circumstances").text except NoSuchElementException: new_person["circumstance"] = "" browser.find_element_by_link_text("Physical / Medical").click() time.sleep(3) #physical new_person["hair_color"] = common.clean_hair_color(browser.find_element_by_xpath("//div[@id='physical_characteristics']/div/table/tbody/tr/td[3]").text) left_eye_color = browser.find_element_by_xpath("//div[@id='physical_characteristics']/div/table/tbody/tr[5]/td[3]").text right_eye_color = browser.find_element_by_xpath("//div[@id='physical_characteristics']/div/table/tbody/tr[6]/td[3]").text if left_eye_color == right_eye_color: new_person["eye_color"] = common.clean_eye_color(left_eye_color) else: new_person["eye_color"] = "Multicolor" browser.find_element_by_link_text("Investigating Agency").click() time.sleep(3) state = browser.find_element_by_xpath("//div[@id='police_information']/div[2]/table/tbody/tr[6]/td[2]").text state_paren = "" if state: state_paren = " (" + state + ")" new_person["agency_name"] = browser.find_element_by_xpath("//div[@id='police_information']/div[2]/table/tbody/tr[2]/td[2]").text + state_paren new_person["agency_contact"] = browser.find_element_by_xpath("//div[@id='police_information']/div/table/tbody/tr[4]/td[2]").text #print new_person missing_persons["NAMUS_" + new_person["namus_number"]] = new_person return missing_persons
def write(self, fields, queries, cpp=None, cpp_header=None, cpp_class="DataStructure", cpp_record_class="Record", cpp_abstract_record=False, cpp_extra=None, cpp_namespace=None, **kwargs): self.cpp_record_class = cpp_record_class self.cpp_abstract_record = cpp_abstract_record self.fields = fields with open_maybe_stdout(cpp) as outfile: with open_maybe_stdout(cpp_header) as header_outfile: writer = outfile.write header_writer = header_outfile.write # --------------------------------------------------------------------- # HEADER guard = "HEADER_{}".format(fresh_name()) header_writer("#ifndef {}\n".format(guard)) header_writer("#define {} 1\n".format(guard)) header_writer("\n") if cpp_extra: header_writer("{}\n".format(cpp_extra)) header_writer("#include <cassert>\n") header_writer("#include <ctgmath>\n") # header_writer("#include <vector>\n") header_writer("#include <unordered_map>\n") header_writer("#include <map>\n") header_writer("#include <functional>\n") header_writer("#include <algorithm>\n") if self.with_qt: header_writer("#include <QHash>\n") header_writer(""" #include <cstdint> template <class T> class mystk { int32_t _end; static int32_t _cap; static T* _data; public: mystk() : _end(-1) { } void reserve(size_t n) { } bool empty() { return _end < 0; } T& back() { return _data[_end]; } void push_back(const T& x) { ++_end; if (_end >= _cap) { _cap *= 2; T* newdata = new T[_cap]; std::copy(_data, _data + _end, newdata); delete[] _data; _data = newdata; } // printf("inserting %p @ %d\\n", x, (int)_end); _data[_end] = x; } void pop_back() { --_end; } }; template<class T> int32_t mystk<T>::_cap = 10; template<class T> T* mystk<T>::_data = new T[10]; template <class T> class myarr { T* data; int length; public: myarr() : data(nullptr), length(0) { } myarr(int n) : data(new T[n]), length(n) { } myarr(const myarr& other) : data(new T[other.length]), length(other.length) { std::copy(other.data, other.data + other.length, data); } myarr(myarr&& other) : data(other.data), length(other.length) { other.data = nullptr; } myarr& operator=(const myarr& other) { if (this != &other) { length = other.length; data = new T[other.length]; std::copy(other.data, other.data + other.length, data); } return *this; } myarr& operator=(myarr&& other) { if (this != &other) { length = other.length; std::swap(data, other.data); } return *this; } ~myarr() { if (data != nullptr) delete[] data; } T& operator[](int n) { return data[n]; } const T& operator[](int n) const { return data[n]; } int size() const { return length; } T* begin() { return data; } T* end() { return data + length; } }; template <class T> bool operator==(const myarr<T>& lhs, const myarr<T>& rhs) { if (lhs.size() != rhs.size()) return false; for (int i = 0; i < lhs.size(); ++i) { if (lhs[i] != rhs[i]) return false; } return true; } template <class T> bool operator<(const myarr<T>& lhs, const myarr<T>& rhs) { if (lhs.size() < rhs.size()) return true; if (lhs.size() > rhs.size()) return false; for (int i = 0; i < lhs.size(); ++i) { if (lhs[i] < rhs[i]) return true; if (lhs[i] > rhs[i]) return false; } return false; } template <class T> bool operator!=(const myarr<T>& lhs, const myarr<T>& rhs) { return !(lhs == rhs); } template <class T> bool operator>=(const myarr<T>& lhs, const myarr<T>& rhs) { return !(lhs < rhs); } template <class T> bool operator>(const myarr<T>& lhs, const myarr<T>& rhs) { return (lhs != rhs) && (lhs >= rhs); } template <class T> bool operator<=(const myarr<T>& lhs, const myarr<T>& rhs) { return !(lhs > rhs); } """) header_writer("\n") if cpp_namespace is not None: header_writer("namespace {} {{\n".format(cpp_namespace)) # forward decls header_writer("class {};\n".format(cpp_record_class)) header_writer("class {};\n".format(cpp_class)) header_writer("\n") # auxiliary type definitions seen = set() for q in queries: for t in q.impl.auxtypes(): _gen_aux_type_fwd_decl(t, self, header_writer, seen) seen = set() for q in queries: for t in q.impl.auxtypes(): _gen_aux_type_header(t, self, header_writer, cpp_class, seen) # record type private_members = [] for q in queries: private_members += list((f, ty.gen_type(self)) for f, ty in q.impl.private_members()) self.private_members = private_members if cpp_abstract_record: header_writer("struct PrivateData {\n") for name, ty in private_members: header_writer(" {} {};\n".format(ty, name)) header_writer("};\n") for name, ty in list(fields.items()): header_writer("inline {}& read_{}({}); /* MUST BE IMPLEMENTED BY CLIENT */\n".format(ty, name, self.record_type())) header_writer("inline PrivateData& read_private_data({}); /* MUST BE IMPLEMENTED BY CLIENT */\n".format(self.record_type())) else: _gen_record_type(cpp_record_class, list(fields.items()), private_members, header_writer) header_writer("\n") header_writer("class {} {{\n".format(cpp_class)) header_writer("public:\n") # constructor header_writer(" inline {}();\n".format(cpp_class)) # get current size header_writer(" inline size_t size() const;\n") # add routine header_writer(" inline void add({} x);\n".format(self.record_type())) # remove routine header_writer(" inline void remove({} x);\n".format(self.record_type())) # update routines for f, ty in fields.items(): header_writer(" inline void update{}({} x, {} val);\n".format(capitalize(f), self.record_type(), ty)) header_writer(" inline void update({} x, {});\n".format(self.record_type(), ", ".join("{} {}".format(ty, f) for f, ty in fields.items()))) # query routines for q in queries: it_name = "{}_iterator".format(q.name) vars_needed = [(v, ty) for v, ty in q.vars if q.impl.needs_var(v)] # iterator class header_writer(" class {} {{\n".format(it_name)) header_writer(" friend class {};\n".format(cpp_class)) header_writer(" public:\n") header_writer(" inline bool hasNext();\n") header_writer(" inline {}* next();\n".format(cpp_record_class)) header_writer(" inline void remove();\n") header_writer(" private:\n") state = q.impl.state() header_writer(" {}* parent;\n".format(cpp_class)) vars_needed = [(v, ty) for v, ty in q.vars if q.impl.needs_var(v)] for v, ty in vars_needed: header_writer(" {} {};\n".format(ty, v)) for f, ty in state: header_writer(" {} {};\n".format(ty.gen_type(self), f)) header_writer(" inline {}({}* parent{}{});\n".format(it_name, cpp_class, "".join(", {} {}".format(ty, v) for v, ty in vars_needed), "".join(", {} {}".format(ty.gen_type(self), f) for f, ty in state))) header_writer(" };\n") # query method header_writer(" inline {} {}({});\n".format(it_name, q.name, ", ".join("{} {}".format(ty, v) for v,ty in q.vars))) header_writer(" inline {} {}_1({});\n".format(self.record_type(), q.name, ", ".join("{} {}".format(ty, v) for v,ty in q.vars))) # debugging header_writer(" inline void checkRep();\n") # private members header_writer("private:\n") header_writer(" size_t my_size;\n") for q in queries: for f, ty in q.impl.fields(): header_writer(" {} {};\n".format(ty.gen_type(self), f)) header_writer("};\n") if cpp_namespace is not None: header_writer("}\n") header_writer("\n") # --------------------------------------------------------------------- # CODE name = cpp_class if cpp_namespace is None else "{}::{}".format(cpp_namespace, cpp_class) # writer("#include \"DataStructure.hpp\"\n") writer = header_writer # constructor writer("{}::{}() : my_size(0) {{\n".format(name, cpp_class)) for q in queries: writer(indent(" ", q.impl.construct(self, This()))) writer("}\n") # size writer("size_t {}::size() const {{ return my_size; }}\n".format(name)) # add routine writer("void {}::add({} x) {{\n".format(name, self.record_type())) writer(" ++my_size;\n") for q in queries: writer(indent(" ", q.impl.gen_insert(self, "x", This()))) writer("}\n") # remove routine writer("void {}::remove({} x) {{\n".format(name, self.record_type())) writer(" --my_size;\n") for q in queries: writer(indent(" ", q.impl.gen_remove(self, "x", This()))) writer("}\n") # update routines for f, ty in fields.items(): writer("void {}::update{}({} x, {} val) {{\n".format(name, capitalize(f), self.record_type(), ty)) writer(" if ({} != val) {{\n".format(self.get_field("x", f))) for q in queries: writer(indent(" ", q.impl.gen_update(self, fields, "x", {f: "val"}, This()))) writer(" {} = val;\n".format(self.get_field("x", f))) writer(" }") writer("}\n") writer("void {}::update({} x, {}) {{\n".format(name, self.record_type(), ", ".join("{} {}".format(ty, f) for f, ty in fields.items()))) for q in queries: writer(indent(" ", q.impl.gen_update(self, fields, "x", {f:f for f in fields}, This()))) for f, ty in fields.items(): writer(" {} = {};\n".format(self.get_field("x", f), f)) writer("}\n") # query routines for q in queries: vars_needed = [(v, ty) for v, ty in q.vars if q.impl.needs_var(v)] state = q.impl.state() # query call writer("{prefix}::{q}_iterator {prefix}::{q}({}) {{\n".format(", ".join("{} {}".format(ty, v) for v,ty in q.vars), prefix=name, q=q.name)) proc, stateExps = q.impl.gen_query(self, q.vars, This()) writer(indent(" ", proc)) writer(" return {}_iterator(this{}{});\n".format(q.name, "".join(", {}".format(v) for v, ty in vars_needed), "".join(", {}".format(e) for e in stateExps))) writer(" }\n") # iterator constructor writer("{prefix}::{q}_iterator::{q}_iterator({}* _parent{}{}) :\n".format(cpp_class, "".join(", {} _{}".format(ty, v) for v, ty in vars_needed), "".join(", {} _{}".format(ty.gen_type(self), f) for f, ty in state), prefix=name, q=q.name)) writer(" parent(_parent){}{}\n".format("".join(", {f}(_{f})".format(f=v) for v, ty in vars_needed), "".join(", {f}(_{f})".format(f=v) for v, ty in state))) writer("{ }\n") # hasNext writer("bool {prefix}::{q}_iterator::hasNext() {{\n".format(prefix=name, q=q.name)) proc, ret = q.impl.gen_has_next(self, parent_structure=TupleInstance("parent"), iterator=This()) writer(indent(" ", proc)) writer(" return {};\n".format(ret)) writer("}\n") # next writer("{} {prefix}::{q}_iterator::next() {{\n".format(self.record_type(), prefix=name, q=q.name)) proc, ret = q.impl.gen_next(self, parent_structure=TupleInstance("parent"), iterator=This()) writer(indent(" ", proc)) writer(" return {};\n".format(ret)) writer("}\n") # remove writer("void {prefix}::{q}_iterator::remove() {{\n".format(prefix=name, q=q.name)) writer(" --(parent->my_size);\n") proc, removed = q.impl.gen_remove_in_place(self, parent_structure=TupleInstance("parent"), iterator=This()) writer(indent(" ", proc)) for q2 in queries: if q2 != q: writer(indent(" ", q2.impl.gen_remove(self, removed, parent_structure=TupleInstance("parent")))) writer("}\n") # singular query call writer("{rt} {prefix}::{q}_1({}) {{\n".format(", ".join("{} {}".format(ty, v) for v,ty in q.vars), rt=self.record_type(), prefix=name, q=q.name)) writer(" if (my_size == 0) { return nullptr; }\n") proc, result = q.impl.gen_query_one(self, q.vars, This()) writer(indent(" ", proc)) writer(" return {};\n".format(result)) writer("}\n") writer("void {}::checkRep() {{\n".format(name)) for q in queries: writer(indent(" ", q.impl.check_rep(self, This()))) writer("}\n") header_writer("#endif\n")
new_person["org_contact"] = detailed_person["orgContactInfo"] #skip unidentified remains cases if new_person["org_name"] == "NCMEC-Unidentified": continue if detailed_person["altContact"]: (agency_name, agency_phone) = common.extract_agency_info(detailed_person["altContact"]) new_person["agency_name"] = agency_name.replace(" ", " ") new_person["agency_contact"] = agency_phone #circumstance if "missingDate" in person.keys(): new_person["date"] = common.clean_date(person["missingDate"]) new_person["circumstance"] = detailed_person["circumstance"] new_person["city"] = common.capitalize(person["missingCity"]) new_person["county"] = common.capitalize(person["missingCounty"]) #skip US terrorities try: new_person["state"] = common.convert_state_abbrev(person["missingState"]) except KeyError: continue new_person["country"] = person["missingCountry"] #personal characteristics new_person["first_name"] = common.capitalize(person["firstName"]) middle_name = common.capitalize(person["middleName"]) if len(middle_name) == 1: middle_name += "." new_person["middle_name"] = middle_name
new_person["org_contact"] = detailed_person["orgContactInfo"] # skip unidentified remains cases if new_person["org_name"] == "NCMEC-Unidentified": continue if detailed_person["altContact"]: (agency_name, agency_phone) = common.extract_agency_info(detailed_person["altContact"]) new_person["agency_name"] = agency_name.replace(" ", " ") new_person["agency_contact"] = agency_phone # circumstance if "missingDate" in person.keys(): new_person["date"] = common.clean_date(person["missingDate"]) new_person["circumstance"] = detailed_person["circumstance"] new_person["city"] = common.capitalize(person["missingCity"]) new_person["county"] = common.capitalize(person["missingCounty"]) # skip US terrorities try: new_person["state"] = common.convert_state_abbrev(person["missingState"]) except KeyError: continue new_person["country"] = person["missingCountry"] # personal characteristics new_person["first_name"] = common.capitalize(person["firstName"]) middle_name = common.capitalize(person["middleName"]) if len(middle_name) == 1: middle_name += "." new_person["middle_name"] = middle_name
def parse_state(browser, state, missing_persons=None): if not missing_persons: missing_persons = {} browser.get("https://www.findthemissing.org/en") #search by state select_state(browser, state) browser.find_element_by_name("commit").click() #wait for new entries to show up table = browser.wait_until_visible("list", timeout=30) #wait_for_table_to_load(table, 10, timeout=30) browser.get( "https://www.findthemissing.org/en/ajax/search_results?page=1&rows=100&sidx=DateLKA&sord=desc&_search=false" ) dct = json.loads(browser.find_element_by_css_selector("body").text) pgs = int(dct["total"]) print "found {} pages".format(pgs) for pg in range(1, pgs + 1): browser.get( "https://www.findthemissing.org/en/ajax/search_results?page=" + str(pg) + "&rows=100&sidx=DateLKA&sord=desc&_search=false") dct = json.loads(browser.find_element_by_css_selector("body").text) print "page " + str(pg) + " of " + str(pgs) for (num, person) in enumerate(dct["rows"]): print "person " + str(num + 1) + " of " + str(len(dct["rows"])) new_person = common.create_new_record() #organization new_person["namus_number"] = person["cell"][0] new_person[ "org_name"] = "National Missing and Unidentified Persons System" new_person["org"] = "NAMUS" new_person["org_contact"] = "1-855-626-7600" #personal characteristics new_person["sex"] = common.capitalize(person["cell"][4]) new_person["race"] = person["cell"][5] new_person["age"] = float(person["cell"][6]) arr = person["id"].split("_") browser.get("https://www.findthemissing.org/en/cases/" + arr[0] + "/" + arr[1]) time.sleep(10) has_NCMEC_lbl = False if browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[6]/td/label" ).text == "NCMEC number": has_NCMEC_lbl = True ncmec_case_number = browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[6]/td[2]" ).text.strip() if "NCMEC_" + ncmec_case_number in missing_persons.keys(): print "found NCMEC_" + ncmec_case_number + " so merging..." missing_persons["NCMEC_" + ncmec_case_number][ "namus_number"] = new_person["namus_number"] continue #case info photo = browser.find_element_by_css_selector( "dt.photo > img").get_attribute("src") if "no_photo" not in photo: new_person["photo"] = browser.find_element_by_css_selector( "dt.photo > img").get_attribute("src") new_person["first_name"] = common.capitalize( browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[2]/td[2]" ).text) new_person["middle_name"] = common.capitalize( browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[3]/td[2]" ).text.replace("\"", "")) new_person["last_name"] = common.capitalize( browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[4]/td[2]" ).text) if has_NCMEC_lbl: date = browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[7]/td[2]" ).text else: date = browser.find_element_by_xpath( "//div[@id='case_information']/div/table/tbody/tr[6]/td[2]" ).text new_person["date"] = common.clean_date(date) #determining white or non-white hispanic if new_person["race"] == "White" or new_person["race"] == "Other": ethnicity = browser.find_element_by_xpath( "//div[@id='case_information']/div[2]/table/tbody/tr[4]/td[2]" ).text if ethnicity == "Hispanic/Latino" and new_person[ "race"] == "White": new_person["race"] = "White Hispanic/Latino" if ethnicity == "Hispanic/Latino" and new_person[ "race"] == "Other": new_person["race"] = "Non-White Hispanic/Latino" new_person["race"] = common.clean_race(new_person["race"]) height = browser.find_element_by_xpath( "//div[@id='case_information']/div[2]/table/tbody/tr[6]/td[2]" ).text if "to" in height: arr = height.split("to") height = arr[1].strip() new_person["height"] = float(height) weight = browser.find_element_by_xpath( "//div[@id='case_information']/div[2]/table/tbody/tr[7]/td[2]" ).text if "to" in weight: arr = weight.split("to") weight = arr[1].strip() new_person["weight"] = float(weight) browser.find_element_by_link_text("Circumstances").click() time.sleep(3) #circumstance new_person["city"] = common.capitalize( browser.find_element_by_css_selector( "div.column1-unit > table > tbody > tr > td.view_field"). text) new_person["state"] = common.capitalize( browser.find_element_by_xpath( "//div[@id='circumstances']/div/table/tbody/tr[2]/td[2]"). text) new_person["county"] = common.capitalize( browser.find_element_by_xpath( "//div[@id='circumstances']/div/table/tbody/tr[4]/td[2]"). text) new_person["country"] = "US" try: new_person["circumstance"] = browser.find_element_by_id( "case_Circumstances").text except NoSuchElementException: new_person["circumstance"] = "" browser.find_element_by_link_text("Physical / Medical").click() time.sleep(3) #physical new_person["hair_color"] = common.clean_hair_color( browser.find_element_by_xpath( "//div[@id='physical_characteristics']/div/table/tbody/tr/td[3]" ).text) left_eye_color = browser.find_element_by_xpath( "//div[@id='physical_characteristics']/div/table/tbody/tr[5]/td[3]" ).text right_eye_color = browser.find_element_by_xpath( "//div[@id='physical_characteristics']/div/table/tbody/tr[6]/td[3]" ).text if left_eye_color == right_eye_color: new_person["eye_color"] = common.clean_eye_color( left_eye_color) else: new_person["eye_color"] = "Multicolor" browser.find_element_by_link_text("Investigating Agency").click() time.sleep(3) state = browser.find_element_by_xpath( "//div[@id='police_information']/div[2]/table/tbody/tr[6]/td[2]" ).text state_paren = "" if state: state_paren = " (" + state + ")" new_person["agency_name"] = browser.find_element_by_xpath( "//div[@id='police_information']/div[2]/table/tbody/tr[2]/td[2]" ).text + state_paren new_person["agency_contact"] = browser.find_element_by_xpath( "//div[@id='police_information']/div/table/tbody/tr[4]/td[2]" ).text #print new_person missing_persons["NAMUS_" + new_person["namus_number"]] = new_person return missing_persons