def test_from_query(db): from dlx.marc import MarcSet, BibSet, AuthSet, QueryDocument, Condition bibset = BibSet.from_query({'_id': {'$in': [1, 2]}}) assert isinstance(bibset, (MarcSet, BibSet)) assert bibset.count == 2 assert isinstance(bibset.records, map) bibset.cache() assert isinstance(bibset.records, list) bibset = BibSet.from_query({}, skip=0, limit=1) assert bibset.count == 1 for bib in bibset: assert bib.id == 1 assert len(list(bibset.records)) == 0 assert bibset.count == 1 conditions = [ Condition(tag='150', subfields={'a': 'Header'}), Condition(tag='200', modifier='not_exists') ] authset = AuthSet.from_query(conditions) assert isinstance(authset, (MarcSet, AuthSet)) assert authset.count == 1 assert isinstance(authset.records, map) authset.cache() assert isinstance(authset.records, list) query = QueryDocument( Condition('245', modifier='exists') ) bibset = BibSet.from_query(query) assert isinstance(bibset, BibSet) assert bibset.count == 2
def test_xml_encoding(): from dlx.marc import BibSet, Bib from xmldiff import main control = '<collection><record><datafield ind1=" " ind2=" " tag="245"><subfield code="a">Title with an é</subfield></datafield></record></collection>' bibset = BibSet([Bib().set('245', 'a', 'Title with an é')]) assert main.diff_texts(bibset.to_xml(), control) == []
def show_xml856(path): query = QueryDocument( Condition( tag='191', #subfields={'a': re.compile('^'+path+'$')} subfields={'a': path})) #print(f" the imp query is -- {query.to_json()}") ts2 = time.time() bibset = BibSet.from_query(query, projection={ '029': 1, '091': 1, '191': 1, '245': 1, '269': 1, '650': 1, '991': 1 }) #add856 # this is where we isnert 856 tags for files info print(f"time for query is {time.time()-ts2}") ts3 = time.time() xml = add856(bibset) print(f"total time for adding 856 is {time.time()-ts3}") #xml=bibset.to_xml() #decoding to string and emoving double space from the xml; creates pbs with the job number on ODS export xml = xml.decode("utf-8").replace(" ", " ") return Response(xml, mimetype='text/xml')
def test_iterate(db): from dlx.marc import Bib, BibSet, Auth, AuthSet for bib in BibSet.from_query({}): assert isinstance(bib, Bib) for auth in AuthSet.from_query({}): assert isinstance(auth, Auth)
def test_to_str(db): from dlx.marc import BibSet control = '000\n leader\n008\n controlfield\n245\n a: This\n b: is the\n c: title\n520\n a: Description\n520\n a: Another description\n a: Repeated subfield\n650\n a: Header\n710\n a: Another header\n\n000\n leader\n245\n a: Another\n b: is the\n c: title\n650\n a: Header\n' assert BibSet.from_query({}).to_str() == control
def test_from_excel(): from dlx.marc import BibSet path = os.path.join(os.path.dirname(__file__), 'marc.xlsx') bibset = BibSet.from_excel(path, date_format='%Y-%m-%d') for bib in bibset.records: assert bib.get_value('246','b')[:8] == 'subtitle' assert bib.get_values('269','c')[1] == 'repeated'
def test_from_table(db): from dlx.marc import BibSet from dlx.util import Table t = Table([ ['246a', '1.246$b', '1.269c', '2.269c'], ['title', 'subtitle', '1999-12-31','repeated'], ['title2','subtitle2','2000-01-01','repeated'], ]) bibset = BibSet.from_table(t) for bib in bibset.records: assert bib.get_value('246','b')[:8] == 'subtitle' assert bib.get_values('269','c')[1] == 'repeated' with pytest.raises(Exception): bibset = BibSet.from_table(Table([['245a'], ['This']]), field_check='245a') with pytest.raises(Exception): bibset = BibSet.from_table(Table([['650a'], ['Should an int']]), auth_control=True) with pytest.raises(Exception): bibset = BibSet.from_table(Table([['650a'], ['Invalid']]), auth_control=False, auth_flag=True)
def test_init(bibs, auths): from dlx.marc import BibSet, Bib, AuthSet, Auth records = [Bib(x) for x in bibs] bibset = BibSet(records) assert isinstance(bibset, BibSet) assert len(bibset.records) == 2 assert bibset.count == 2 records = [Auth(x) for x in auths] authset = AuthSet(records) assert isinstance(authset, AuthSet) assert len(authset.records) == 2 assert authset.count == 2
def xml(date): ''' outputs records in MARCXML format for the date which is provided as a dynamic route in YYYYMMDD or YYYY-MM-DD formats /YYYYMMDD/xml?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. if the date is in wrong format the function returns today's records it uses DLX bibset.to_xml serialization function to output MARCXML ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") str_date = date.replace('-', '') print(f"the original str_date is {str_date}") if len(str_date) != 8: #date = datetime.datetime.now() str_date = str(date.year) + str(date.month) + str(date.day) print(f"the str_date is {str_date}") query = QueryDocument( Condition(tag='998', subfields={'z': re.compile('^' + str_date)}), Condition(tag='029', subfields={'a': 'JN'})) print(query.to_json()) start_time = datetime.now() bibset = BibSet.from_query(query, projection={ '029': 1, '091': 1, '191': 1, '245': 1, '269': 1, '650': 1, '991': 1 }, skip=skp, limit=limt) print(f"duration for 998z was {datetime.now()-start_time}") start_time_xml = datetime.now() xml = bibset.to_xml() #removing double space from the xml; creates pbs with the job number on ODS export xml = xml.replace(" ", " ") print( f"duration for xml serialization was {datetime.now()-start_time_xml}") return Response(xml, mimetype='text/xml')
def jsonf(date): ''' outputs records in native central DB schema json format for the date which is provided as a dynamic route inputed in YYYYMMDD or YYYY-MM-DD e.g. /YYYY-MM-DD/json e.g. /YYYYMMDD/json?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. if the date is in wrong format the function returns today's records it uses DLX's bibset.to_json serialization function to output json ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") str_date = date.replace('-', '') print(f"the original str_date is {str_date}") if len(str_date) != 8: date = datetime.datetime.now() str_date = str(date.year) + str(date.month) + str(date.day) print(f"the str_date is {str_date}") query = QueryDocument( Condition(tag='998', subfields={'z': re.compile('^' + str_date)}), Condition(tag='029', subfields={'a': 'JN'})) bibset = BibSet.from_query(query, projection={ '029': 1, '091': 1, '191': 1, '245': 1, '269': 1, '650': 1, '991': 1, '998': 1 }, skip=skp, limit=limt) jsonl = [] for bib in bibset.records: jsonl.append(bib.to_json()) return jsonify(jsonl)
def symbols(date): ''' outputs records in txt format for the date which is provided as a dynamic route in YYYYMMDD or YYYY-MM-DD formats e.g. /YYYYMMDD/symbols /YYYY-MM-DD/symbols?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. if the date is in wrong format the function returns today's records it uses DLX bibset.to_txt serialization function to output MARCXML ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") str_date = date.replace('-', '') print(f"the original str_date is {str_date}") if len(str_date) != 8: date = datetime.datetime.now() str_date = str(date.year) + str(date.month) + str(date.day) print(f"the str_date is {str_date}") query = QueryDocument( Condition(tag='998', subfields={'z': re.compile('^' + str_date)}), Condition(tag='029', subfields={'a': 'JN'})) bibset = BibSet.from_query(query, projection={ '029': 1, '191': 1 }, skip=skp, limit=limt) str_out = '' for bib in bibset.records: str_out += bib.to_str() return Response(str_out, mimetype='text/plain')
def show_xml(path): query = QueryDocument( Condition( tag='191', #subfields={'a': re.compile('^'+path+'$')} subfields={'a': path})) #print(f" the imp query is -- {query.to_json()}") bibset = BibSet.from_query(query, projection={ '029': 1, '091': 1, '191': 1, '245': 1, '269': 1, '650': 1, '856': 1, '991': 1 }) xml = bibset.to_xml() #removing double space from the xml; creates pbs with the job number on ODS export xml = xml.replace(" ", " ") return Response(xml, mimetype='text/xml')
def show_symbols(path): path = re.escape(path) data = "" return_data = "" query = QueryDocument( Condition( tag='191', subfields={'a': Regex('^' + path)}, ), ) print(f" the query is -- {query.to_json()}") bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0) a_res_en = [] for bib in bibset.records: bib_value = bib.get_value('191', 'a') a_res_en.append(bib.get_value('191', 'a')) return_data = sorted([quote(doc) for doc in a_res_en], key=lambda x: int(''.join(c for c in x if c.isdigit()))) #return_data=a_res_en return (jsonify(return_data))
def show_txt(path): query = QueryDocument( Condition( tag='191', #subfields={'a': re.compile('^'+path+'$')} subfields={'a': path})) #print(f" the imp query is -- {query.to_json()}") #export_fields={'089':1,'091':1,'191': 1,'239':1,'245':1,'249':1,'260':1,'269':1,'300':1,'500':1,'515':1,'520':1,'596':1,'598':1,'610':1,'611':1,'630:1,''650':1,'651':1,'710':1,'981':1,'989':1,'991':1,'992':1,'993':1,'996':1} bibset = BibSet.from_query(query) out_list = [('089', 'b'), ('091', 'a'), ('191', 'a'), ('191', 'b'), ('191', 'c'), ('191', '9'), ('239', 'a'), ('245', 'a'), ('245', 'b'), ('249', 'a'), ('245', 'a'), ('260', 'a'), ('260', 'b'), ('260', 'a'), ('260', 'c'), ('269', 'a'), ('300', 'a'), ('500', 'a'), ('515', 'a'), ('520', 'a'), ('596', 'a'), ('598', 'a'), ('610', 'a'), ('611', 'a'), ('630', 'a'), ('650', 'a'), ('651', 'a'), ('710', 'a'), ('981', 'a'), ('989', 'a'), ('989', 'b'), ('989', 'c'), ('991', 'a'), ('991', 'b'), ('991', 'c'), ('991', 'd'), ('992', 'a'), ('993', 'a'), ('996', 'a')] #print(f"duration for query was {datetime.now()-start_time_query}") jsonl = [] for bib in bibset.records: out_dict = {} #start_time_bib=datetime.now() for entry in out_list: #start_time_field=datetime.now() out_dict[entry[0] + '__' + entry[1]] = bib.get_values( entry[0], entry[1]) #print(f"for the field {entry[0]+'__'+entry[1]}") #print(f"duration for getting values was {datetime.now()-start_time_field}") jsonl.append(out_dict) print(f"for the bib {bib.get_values('191','a')}") #print(f"duration for getting bib values was {datetime.now()-start_time_bib}") #print(f"total duration was {datetime.now()-start_time_all}") return jsonify(jsonl)
def fetch_bib_data(self,proj_dict): query = QueryDocument( Or( Condition( tag='191', subfields={'b': self.body+'/','c':self.session} ), Condition( tag='791', subfields={'b': self.body+'/','c':self.session} ), Condition( tag='930', subfields={'a': 'ITP'+self.body+self.session} ) ) ) #print(query.to_json()) bibset=BibSet.from_query(query, projection=proj_dict, skip=0, limit=0) #l_temp=bibset.count #self.snapshot_len=l_temp lbibs=list(bibset.records) print(f"bibset length is : {len(lbibs)}") return lbibs#, l_temp
def votes(topic): ''' looks up UNBIS thesaurus labels and returns matching T codes .. skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. it uses DLX authset to output fields 035 and 150 ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 try: yr_from = request.args.get('year_from') except: yr_from = "1980" try: yr_to = request.args.get('year_to') except: yr_to = '2020' try: cntry = request.args.get('Country') except: cntry = 'CANADA' try: vt = request.args.get('Vote') except: vt = 'A' print(f"skip is {skp} and limit is {limt}") print(f"year_from is {yr_from} and year_to is {yr_to}") print(f"Country is {cntry}") print(f"Vote is {vt}") query = QueryDocument( Condition(tag='191', subfields={'d': re.compile(str(topic))}), Condition(tag='191', subfields={'a': re.compile('^A')})) print(query.to_json()) dict_auth_ids = {} authset = AuthSet.from_query(query, projection={ '001': 1, '191': 1 }, skip=skp, limit=limt) for auth in authset: dict_auth_ids[auth.get_value('191', 'a')] = auth.get_value('001') #unbis=authset.to_xml() #return Response(unbis, mimetype='text/xml') #return jsonify(dict_auth_ids) dict_bibs = {} str_bibs = '' votecountry = '' for key, value in dict_auth_ids.items(): #sample_id=int(dict_auth_ids['A/74/251']) print(f"the id of {key} is {value}") query_bib = QueryDocument( Condition(tag='991', subfields={'d': int(value)}), Condition(tag='989', subfields={'a': re.compile(str('Voting Data'))})) print(query_bib.to_json()) bibset = BibSet.from_query(query_bib, projection={ '001': 1, '791': 1, '967': 1 }, skip=skp, limit=limt) for bib in bibset: for field in bib.get_fields('967'): votecountry = field.get_value("d") + field.get_value("e") #print(f'Country+Vote: {votecountry}') if str(votecountry) == str(vt) + str( cntry ): # for the entries matching input query parameters using AND logic dict_bibs[bib.get_value('791', 'a')] = bib.get_value('001') str_bibs = str_bibs + ' OR 791:[' + bib.get_value( '791', 'a') + ']' print(str_bibs) return jsonify(dict_bibs)
def jsons(date): ''' outputs Security Council bib records in plain simple json format for the date which is provided as a dynamic route in YYYYMMDD or YYYY-MM-DD formats e.g. /YYYY-MM-DD/xml?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. if the date is in wrong format the function returns today's records it is used to publish S/ records for iSCAD+ in a plain json 22 July added fields 049:a and 260:a ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") #start_time_all=datetime.now() str_date = date.replace('-', '') print(f"the original str_date is {str_date}") if len(str_date) != 8: date = datetime.datetime.now() str_date = str(date.year) + str(date.month) + str(date.day) print(f"the str_date is {str_date}") #start_time_query=datetime.now() query = QueryDocument( Condition(tag='998', subfields={'z': re.compile('^' + str_date)}), Condition(tag='191', subfields={'b': re.compile('^S\/')})) export_fields = { '089': 1, '091': 1, '191': 1, '239': 1, '245': 1, '249': 1, '260': 1, '269': 1, '300': 1, '500': 1, '515': 1, '520': 1, '596': 1, '598': 1, '610': 1, '611': 1, '630:1,' '650': 1, '651': 1, '710': 1, '981': 1, '989': 1, '991': 1, '992': 1, '993': 1, '996': 1 } bibset = BibSet.from_query(query, projection=export_fields, skip=skp, limit=limt) out_list = [('089', 'b'), ('091', 'a'), ('191', 'a'), ('191', 'b'), ('191', 'c'), ('191', '9'), ('239', 'a'), ('245', 'a'), ('245', 'b'), ('249', 'a'), ('245', 'a'), ('260', 'a'), ('260', 'b'), ('260', 'a'), ('260', 'c'), ('269', 'a'), ('300', 'a'), ('500', 'a'), ('515', 'a'), ('520', 'a'), ('596', 'a'), ('598', 'a'), ('610', 'a'), ('611', 'a'), ('630', 'a'), ('650', 'a'), ('651', 'a'), ('710', 'a'), ('981', 'a'), ('989', 'a'), ('989', 'b'), ('989', 'c'), ('991', 'a'), ('991', 'b'), ('991', 'c'), ('991', 'd'), ('992', 'a'), ('993', 'a'), ('996', 'a')] #print(f"duration for query was {datetime.now()-start_time_query}") jsonl = [] for bib in bibset.records: out_dict = {} #start_time_bib=datetime.now() for entry in out_list: #start_time_field=datetime.now() out_dict[entry[0] + '__' + entry[1]] = bib.get_values( entry[0], entry[1]) #print(f"for the field {entry[0]+'__'+entry[1]}") #print(f"duration for getting values was {datetime.now()-start_time_field}") jsonl.append(out_dict) #print(f"for the bib {bib.get_values('191','a')}") #print(f"duration for getting bib values was {datetime.now()-start_time_bib}") #print(f"total duration was {datetime.now()-start_time_all}") return jsonify(jsonl)
def test_to_mrc(db): from dlx.marc import BibSet control = '00224r|||a2200097|||4500008001300000245002400013520001600037520004300053650001100096710001900107controlfield aThisbis thectitle aDescription aAnother descriptionaRepeated subfield aHeader aAnother header00088r|||a2200049|||4500245002700000650001100027 aAnotherbis thectitle aHeader' assert BibSet.from_query({}).to_mrc() == control
def test_to_mrk(db): from dlx.marc import BibSet control = '000 leader\n008 controlfield\n245 \\\\$aThis$bis the$ctitle\n520 \\\\$aDescription\n520 \\\\$aAnother description$aRepeated subfield\n650 \\\\$aHeader\n710 \\\\$aAnother header\n\n000 leader\n245 \\\\$aAnother$bis the$ctitle\n650 \\\\$aHeader\n' assert BibSet.from_query({}).to_mrk() == control
def test_to_xml(db): from dlx.marc import BibSet from xmldiff import main control = '<collection><record><controlfield tag="000">leader</controlfield><controlfield tag="008">controlfield</controlfield><datafield ind1=" " ind2=" " tag="245"><subfield code="a">This</subfield><subfield code="b">is the</subfield><subfield code="c">title</subfield></datafield><datafield ind1=" " ind2=" " tag="520"><subfield code="a">Description</subfield></datafield><datafield ind1=" " ind2=" " tag="520"><subfield code="a">Another description</subfield><subfield code="a">Repeated subfield</subfield></datafield><datafield ind1=" " ind2=" " tag="650"><subfield code="a">Header</subfield><subfield code="0">1</subfield></datafield><datafield ind1=" " ind2=" " tag="710"><subfield code="a">Another header</subfield><subfield code="0">2</subfield></datafield></record><record><controlfield tag="000">leader</controlfield><datafield ind1=" " ind2=" " tag="245"><subfield code="a">Another</subfield><subfield code="b">is the</subfield><subfield code="c">title</subfield></datafield><datafield ind1=" " ind2=" " tag="650"><subfield code="a">Header</subfield><subfield code="0">1</subfield></datafield></record></collection>' assert main.diff_texts(BibSet.from_query({}).to_xml(), control) == []
def xmlupdated(date): ''' outputs records in MARCXML format for the date which is provided as a dynamic route in YYYYMMDD or YYYY-MM-DD formats /YYYYMMDD/xml?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. if the date is in wrong format the function returns today's records it uses DLX bibset.to_xml serialization function to output MARCXML ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") str_date = date.replace('-', '') print(f"the original str_date is {str_date}") if len(str_date) != 8: date = datetime.datetime.now() str_date = str(date.year) + str(date.month) + str(date.day) date_from = date else: date_year = str_date[0:4] date_month = str_date[4:6] date_day = str_date[6:8] date_from = datetime.fromisoformat(date_year + "-" + date_month + "-" + date_day) #date_to=date_from+timedelta(days = 2) print(f"date_from is {date_from}") #print(f"date_to is {date_to}") dict_query = { "$and": [{ "updated": { "$gte": date_from, "$lt": date_from + timedelta(days=1) } }, { "029.subfields.value": "JN" }] } #dict_query= {"updated": {"$gte": date_from, "$lt": date_from+timedelta(days = 1)}} #print(query.to_json()) #print(f"son query is {son_query}") print(f"dict query is {dict_query}") start_time = datetime.now() bibset = BibSet.from_query(dict_query, projection={ '029': 1, '091': 1, '191': 1, '245': 1, '269': 1, '650': 1, '991': 1 }, skip=skp, limit=limt) xml = bibset.to_xml() #removing double space from the xml; creates pbs with the job number on ODS export xml = xml.replace(" ", " ") print(f"duration for updated was {datetime.now()-start_time}") return Response(xml, mimetype='text/xml')
def show_txt(path): '''displays the text of the document ''' data = "" return_data = "" doc_list = [] #path=quote(path) path = re.escape(path) ''' i2 = urllib.parse.quote(i.encode("utf-8")) #need to deal with special characters in each url uu2 = urllib.parse.urljoin(uu, i2) #create url ''' print(f" this is compiled path -- {'^' + str(path)+'$'}") doc_list = list( txts_coll.find({"doc_sym": { "$regex": "^" + str(path) + "$" }})) if len(doc_list) == 0 and path != 'favicon.ico': print(f"no exact DS {str(path)} - generating one") bib_value = '' #doc_list=list(txts_coll.find({"doc_sym":{"$regex":path}})) ''' extract text from DB''' #build list of tuples (striped_doc_sum, url to the pdf in s3) query = QueryDocument( Condition(tag='191', subfields={'a': Regex('^' + path + '$')})) #) print(f" the imp query is -- {query.to_json()}") bibset = BibSet.from_query(query, skip=0, limit=3) a_res_en = [] if bibset.count == 1: for bib in bibset.records: bib_value = bib.get_value('191', 'a') a_res_en.append( (bib.get_value('191', 'a'), 'http://' + ''.join(bib.files('EN')))) print(a_res_en) for url in a_res_en: #txt_name = url.split('/')[-1] #url is a tuple ; url[0] is a DS; url[1] is a s3 link to the pdf txt_name = url[0] # e.g. ARES721 #txt_name = txt_name.split('.')[0] +'.txt' #txt_name = txt_name +'.txt' #txt_loc='\\txts\\'+txt_name if len(url[1]) > 10: print(f" - - the {url[0]} is {url[1]} - -") pdf = PDFExtract(url[1]) parsed = parser.from_buffer( pdf.get_txt_from_url(url[1])) print(f"0----PDFExtract----0") txt = Txt(bib.get_value('191', 'a')) print(txt.set_txt(parsed["content"])) txt.title = bib.get_value('245', 'a') #txt.title=bib.get_value('239','a') ''' load text into txts''' if txt.txt is not None: query = {"doc_sym": txt.symbol} txts_coll.replace_one(query, txt.to_bson(), upsert=True) doc_list = [] doc_list = list( txts_coll.find({"doc_sym": { "$regex": "^" + str(path) + "$" }})) print(f" this is compiled path -- {'^' + str(path)+'$'}") if len(doc_list) == 1: print(f"-- it's a hit- 1") if doc_list[0]['doc_sym'][0] != 'S': return_data = doc_list[0]['raw_txt'] else: #for SC docs - temporary measure doc_1 = doc_list[0].pop('_id') return_data = doc_list[0] elif len(doc_list) > 1: print(f"-- it's a hit- many") return_data = sorted([doc['doc_sym'] for doc in doc_list], key=lambda x: int(''.join(c for c in x if c.isdigit()))) #return_data=sorted(["<a href="+doc['doc_sym']+">" for doc in doc_list]) #return_data=sorted([url_for('/'+doc_list[0]['raw_txt']) for doc in doc_list]) if return_data == "": return jsonify('text with document symbol:%s was not found' % path) #return(render_template('ds.html', data=return_data)) #print(return_data) return jsonify(return_data)
from bson import Regex from dlx import DB from dlx.marc import BibSet, QueryDocument, Condition from config import Config DB.connect(Config.connect_string) query = QueryDocument(Condition(tag='191', modifier='exists'), Condition(tag='269', subfields={'a': Regex('^1975')})) print(query.to_json()) bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0) print('There are {} results'.format(bibset.count)) bibset.cache() for bib in bibset.records: print('id: {}, symbol: {}'.format(bib.id, bib.get_value('191', 'a'))) print(bibset.to_xml())
def index(): today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) date = str(request.args.get('date', yesterday)) date_obj = datetime.datetime.strptime(date, '%Y-%m-%d') duty_station = request.args.get('dutyStation', 'NY') g = Gdoc(username=secrets["username"], password=secrets["password"]) g.set_param('dutyStation', duty_station) g.set_param('dateFrom', date) g.set_param('dateTo', date) g.set_param('includeFiles', 'false') next_date = date_obj.date() + datetime.timedelta(days=1) if next_date > today: next_date = None prev_date = date_obj.date() - datetime.timedelta(days=1) symbol_objects = {} for d in g.data: m = MetadataObject(d) f = FileObject(d) if m.symbol1 not in symbol_objects: m.files.append(f) symbol_objects[m.symbol1] = m else: symbol_objects[m.symbol1].files.append(f) for s in symbol_objects: symbol2 = symbol_objects[s].symbol2 if len(symbol2) > 0: returned_files = DLXFile.find({ 'identifiers': [{ 'type': 'symbol', 'value': s }, { 'type': 'symbol', 'value': symbol2 }], 'languages': ['EN'] }) else: returned_files = DLXFile.find({ 'identifiers': [{ 'type': 'symbol', 'value': s }], 'languages': ['EN'] }) for f in returned_files: symbol_objects[s].links.append(('PDF', f"https://{f.uri}")) query = Query.from_string(f'191__a:{s}') res = list(BibSet.from_query(query.compile())) try: my_s = res[0].get_value('191', 'a') symbol_objects[s].links.append( ('UNDL', f"{Config.dlx_endpoint}records/bibs/{res[0].id}")) except: pass return render_template('index.html', duty_stations=Config.duty_stations, data=symbol_objects, date=date, duty_station=duty_station, next_date=next_date, prev_date=prev_date)