def createPasscode2ResourceIdsAndResourceIds2SubmisionIds(self, verbose): name="SerializeResource.createPasscode2ResourceIdsAndResourceIds2SubmisionIds" p2ri=collections.defaultdict(list) #passcode 2 rid p2s=collections.defaultdict(list) #passcode 2 subid rid2ris=collections.defaultdict(list) templistrid=[] templistsubid=[] if (verbose==1): print "\tExecuting "+name for p, ris in self.__p2ris.iteritems(): for ri in ris: status=pruneName(ri.get_prodstatus(),"") status=modifyString(status,"/","_") pname=pruneName(ri.get_name(),"") pname=modifyString(pname,"/","-") pname=pname.replace("&", "&") subid=ri.get_conf()+ri.get_year()+self.__RIDSEP__+ri.get_passcode() rid=subid+self.__RIDSEP__+ri.get_type()+self.__RIDSEP__+pname+self.__RIDSEP__+status #print rid#.get_type() # if not (rid in templistrid): templistrid.append(rid) p2ri[p].append(rid) #print rid#.get_type() # rid2ris[rid].append(ri) if not (subid in templistsubid): templistsubid.append(subid) p2s[p].append(subid) self.set_p_2ris(p2ri) self.set_p_2subs(p2s) self.set_rid_2ris(rid2ris)
def serializeResourcesIntoManyFile(self,verbose): name ="SerializeResource.serializeResourcesIntoManyFile" rtype_subclassed =set() newrtype =collections.defaultdict(set) names_serialized =set() avails_serialized =set() status_serialized =set() mods_serialized =set() l1_serialized =set() uses_serialized =set() res_serialized =set() append_res="" ns="&lremap;" #print self.get_rid_2ris() if (verbose==1): print "\t\tExecuting "+name #idx file idxfile=self.outfile+self.__INDEXOFRES__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTRES__+"\n").encode('utf-8')) for p, rids in self.get_p_2ris().iteritems(): for rid in rids: rtype_subclassed =set() newrtype =collections.defaultdict(set) names_serialized =set() avails_serialized =set() status_serialized =set() mods_serialized =set() l1_serialized =set() uses_serialized =set() res_serialized =set() ''' prepare the structure for new classes ''' ris=self.get_rid_2ris().get(rid) if ris is not None: for ri in ris: #print rid + " "+ str(ri) ''' create the description file ''' rid1=shasum(rid) first_order_folder = str(rid1[:1]) second_order_folder = str(rid1[0:3]) if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder): os.makedirs(self.outfile+"/" + self.__RES__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder) append_res=self.__RES__+first_order_folder + "/" + second_order_folder #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&ri;"+"/"+append_res+"/"+str(rid1)+"\">" iFile.write((iLine+"\n").encode('utf-8')) label="Conference: "+ri.get_conf()+ " and " label=label+"Year: "+ri.get_year()+" and " label=label+"Passcode: "+ri.get_passcode()+" and " label=label+"Type: "+ri.get_type()+" and " label=label+"Name: "+ri.get_name().decode(encoding='UTF-8',errors='strict')+" " iLine=self.__t2+"<rdfs:label>"+label+"</rdfs:label>" #iFile.write((iLine+"\n").encode('utf-8')) iFile.write((iLine+"\n")) iLine=self.__t1+"</rdf:Description>" #iFile.write((iLine+"\n").encode('utf-8')) iFile.write((iLine+"\n")) # as usual rtype=ri.get_type() #print rid + " "+ str(rtype) rtype=pruneName(rtype, "") #rtype="Language_Resources/Technologies_Infrastructure" rtype=modifyString(rtype,"/","-") rtype=rtype.replace("&", "&") if not (rtype in self.__RTYPES__): ns="&ri;#" if not rtype in rtype_subclassed: #print "SUB CLASS "+ rtype rtype_subclassed.add(rtype) newrtype[rtype].add(1) else: nrc=newrtype.get(rtype) if nrc is not None: for num in nrc: #print num del newrtype[rtype] newrtype[rtype].add(num+1) else: if (self.verbose==1): print "\t\t Warning resource type "+rtype+ " NOT YET ENCOUNTERED" else: ns="&lremap;" ''' create the resourcename <owl:NamedIndividual rdf:about="&ri;myname"> <rdf:type rdf:resource="&lremap;ResourceName"/> </owl:NamedIndividual> ''' dst=self.outfile+"/"+append_res+"/"+rid1 copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+str(rid1)+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\""+ns+rtype+"\"/>" oFile.write((line + "\n").encode('utf-8')) rname=ri.get_name() #print rid + " "+ str(rtype) rname=pruneName(rname, "") rname=modifyString(rname,"^","-") #rname="-" rname=rname.replace("&", "&") if not (rname in names_serialized): names_serialized.add(rname) #avail="Freely_Available_aaa" avail= ri.get_avail() if avail is not None: avail=pruneName(avail, "") avail=modifyString(avail,"^","-") avail=avail.replace("&", "&") if not (avail in self.__RAVAILS__): avails_serialized.add(avail) line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&ri;"+append_res+"/"+avail+ "\"/>" else: line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&lremap;"+avail+ "\"/>" oFile.write((line + "\n").encode('utf-8')) status= ri.get_prodstatus() if (status is not None) and (status != "NoStatus"): status=pruneName(status, "") status=modifyString(status,"^","-") status=status.replace("&", "&") if not (status in self.__RSTATUS__): status_serialized.add(status) line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&ri;"+append_res+"/"+status+ "\"/>" else: line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&lremap;"+status+ "\"/>" oFile.write((line + "\n").encode('utf-8')) mod= ri.get_modality() if mod is not None: mod=pruneName(mod, "") mod=modifyString(mod,"^","-") mod=mod.replace("&", "&") if not (mod in self.__RMODS__): mods_serialized.add(mod) line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&ri;"+append_res+"/"+mod+ "\"/>" else: line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&lremap;"+mod+ "\"/>" oFile.write((line + "\n").encode('utf-8')) use= ri.get_resourceusage() if use is not None: use=pruneName(use, "") use=modifyString(use,"^","-") use=use.replace("&", "&") if not (use in self.__RUSES__): uses_serialized.add(use) line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&ri;"+append_res+"/"+use+ "\"/>" else: line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&lremap;"+use+ "\"/>" oFile.write((line + "\n").encode('utf-8')) ''' create the individual with all properties <owl:NamedIndividual rdf:about="&ri;MyCorpus"> <rdf:type rdf:resource="&lremap;Corpus"/> <lvont:language rdf:resource="&lexvo;id/iso639-3/ita"/> <lremap:hasResourceLanguageType rdf:resource="&lremap;Bi"/> <lremap:hasResourceAvailability rdf:resource="&lremap;Freely_Available"/> <lremap:hasResourceName rdf:resource="&ri;myname"/> </owl:NamedIndividual> ''' #line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&ri;"+append_res+"/"+rname+ "\"/>" line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&lremap;"+rname.decode(encoding='UTF-8',errors='strict')+ "\"/>" #oFile.write((line + "\n").encode('utf-8')) oFile.write(line+"\n") # referencing submissions pids =self.get_p_2subs().get(p) if pids is not None: for pid in pids: line=self.__t2+"<dcterms:references rdf:resource=\"⊂#"+str(pid)+"\"/>" oFile.write((line + "\n").encode('utf-8')) #closing individual line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRES__ + "\n").encode('utf-8')) # create subclass oFile.write((self.__STARTRESCLS__).encode('utf-8')) ''' create <owl:Class rdf:about="&ri;MyRT"> <rdfs:subClassOf rdf:resource="&lremap;ResourceType"/> </owl:Class> ''' for rc in rtype_subclassed: line=self.__t1+"<owl:Class rdf:about=\"&ri;"+append_res+"/"+rc+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:subClassOf rdf:resource=\"&lremap;ResourceType\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:Class>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESCLS__).encode('utf-8')) #resourcenames inds for rname in names_serialized: oFile.write((self.__STARTRESNAMES__).encode('utf-8')) #line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+rname+ "\">" line=self.__t1+"<owl:NamedIndividual rdf:about=\"&lremap;"+append_res+"/"+rname.decode(encoding='UTF-8',errors='strict')+ "\">" #oFile.write((line + "\n").encode('utf-8')) oFile.write(line+"\n") if rname!="-": line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceName\"/>" else: line=self.__t2+"<rdf:type rdf:resource=\"&lremap;NoName\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESNAMES__).encode('utf-8')) for avail in avails_serialized: oFile.write((self.__STARTRESAVAILS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+avail+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceAvailability\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESAVAILS__).encode('utf-8')) for status in status_serialized: oFile.write((self.__STARTRESSTATUS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+status+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceStatus\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESSTATUS__).encode('utf-8')) for mod in mods_serialized: oFile.write((self.__STARTRESMODS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+mod+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceModality\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESMODS__).encode('utf-8')) for use in uses_serialized: oFile.write((self.__STARTRESUSES__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+use+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceUse\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESUSES__).encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) # iFile.write((self.__ENDRESCLS__).encode('utf-8')) iFile.write((self.__CLOSELINE__).encode('utf-8')) #other stuff self.set_newrtype_num_of_instances(newrtype)