def __init__(self, gApi = None): if gApi != None: self.gApi = defineApi(gApi) else: self.gApi = gApi while self.gApi == None: self.gApi = defineApi() self.firstTime() self.search = EcoLexSearch() self.entriesAdded = 0 self.enterNewResults()
def __init__(self,entryurl, gApi = None): #check for google api key, request if necessary if gApi != None: self.gApi = defineApi(gApi) else: self.gApi = defineApi() self.entryurl = entryurl table = self.getHTMLTable() fieldList = self.getFieldList(table) entryList = self.getEntryList(table) self.ecolex_id = self.getEntry('Legislation ID number',fieldList,entryList) self.name = self.getEntry('Title of tex',fieldList,entryList) self.country = self.getEntry('Country',fieldList,entryList) self.date = self.getEntry('Date of tex',fieldList,entryList) self.legtype = self.getEntry('Type of documen',fieldList,entryList) self.source = self.getEntry('Source',fieldList,entryList) self.fulltext = self.getUrl('Link to full tex',fieldList,entryList) self.abstract = self.getEntry('Abstrac',fieldList,entryList) # concatenate subject and keywords only of there are entries keywordsA = self.getEntry('Keyword(s)',fieldList,entryList) keywordsB = self.getEntry('Subject(s)',fieldList,entryList) if keywordsA != None and keywordsB != None: self.keywords = keywordsA + '; ' + keywordsB elif keywordsA != None and keywordsB == None: self.keywords = keywordsA elif keywordsA == None and keywordsB != None: self.keywords = keywordsB else: self.keywords = None # check language and translate keywords and abstract if not english if self.abstract != None: languageSample = ' '.join(self.abstract.split(' ')[0:5]) self.language = identify(languageSample,self.gApi) if self.language != 'en': translationAB = translate(self.abstract,self.language,'en',self.gApi) self.abstractEN = to_unicode(translationAB) translationKW = translate(self.abstract,self.language,'en',self.gApi) self.keywordsEN = to_unicode(translationKW) else: self.abstractEN = self.abstract self.keywordsEN = self.keywords else: self.language = None self.abstractEN = None self.keywordsEN = None