def route_compfileread(filepaths): for filepath in filepaths: try: # extracting data from .gz file. gzipfile = gzip.GzipFile(filepath, 'rb') gzipdata = gzipfile.read() gzipfile.close() # getting complete file name of the .gz file compfilename = utility.filename_from_filepath(filepath) # extracting the original file name filename = compfilename.split('.gz')[0] print(filename) # creating file and writing data uncompfile = open( config.ConfigManager().PCFileFolder + '/' + filename, 'wb') uncompfile.write(gzipdata) uncompfile.close() except BaseException as ex: utility.log_exception_with_filepath(ex, filepath) # writing to file the file names that cannot be extracted using # gzip utility.write_to_file( config.ConfigManager().PCDataAnalysisResultsFile, 'a', compfilename + ' cannot be extracted') os.remove(filepath)
def automate_processes(): utility.write_to_file(config.ConfigManager().PromptcloudLogFile, 'a', 'PromptCloudautomationscript running') try: # download files into PCCompData with in mnt/nlpdata,xml format.. exec(open('pc_download_crawldata_threading.py').read(), globals()) # compress the PCCompdata folder exec(open('compress.py').read(), globals()) # unzip files created in PCData folder time stored in dataloadconfig.. exec(open('pc_unzip_gz.py').read(), globals()) # download data into pcdataanalysisresults.ods exec(open('analyze_crawldata.py').read(), globals()) # for automatically sending emails # exec(open('mailsend.py').read(), globals()) # store analysis file in s3 backup exec(open('pcdataanalysisbackup.py').read(), globals()) except BaseException as ex: exception_message = '\n' + 'Exception:' + \ str(datetime.datetime.now()) + '\n' exception_message += 'File: ' + '\n' exception_message += '\n' + str(ex) + '\n' exception_message += '-' * 100 # .encode('utf8')) utility.write_to_file(config.ConfigManager().PromptcloudLogFile, 'a', exception_message)
def automate_processes(): utility.write_to_file(config.ConfigManager().LogFile, 'a', ' master automationscript running') try: utility.update_config_coll_process_started_date() # Supplier master list load exec(open('st_master_supplier_data_read.py').read(), globals()) # Client master list load exec(open('stclientsdataread.py').read(), globals()) # Currency master list load exec(open('currencydataread.py').read(), globals()) # Industry master list load exec(open('industrydataread.py').read(), globals()) # MSP master list load exec(open('stmspdataread.py').read(), globals()) # Rates information transfer from Smart Track exec(open('stratesdataread.py').read(), globals()) # PromptCloud data load automation exec(open('prompt_cloud_automation.py').read(), globals()) # Transferring files from staging collection to masters collection exec(open('staging_data_read.py').read(), globals()) # Generating master integer graph exec(open('gen_docintgraph_from_db.py').read(), globals()) # Transfering file to webserver exec(open('master_int_graph_transfer.py').read(), globals()) # Learning automation exec(open('knowledge_build_automation.py').read(), globals()) except BaseException as ex: utility.log_exception_file(config.ConfigManager().LogFile, ex)
def analyze_data(filepaths): global totalrecords global invalidrecords global emptydesc global incompletedesc global smalldesc global nonedesc global nodesc global totaljobsdict global jobsitedict filecount = 0 dbrecordcount = 0 # looping through file paths for filepath in filepaths: filecount += 1 print(filepath) print('Processing file number: ' + str(filecount)) # getting xml tree from file tree = datareadfiletypes.read_xml_tree(filepath) # drilling xml to get the job info tag contents if config.ConfigManager().PromptCloudRecordLimitSet == "Yes": if dbrecordcount < int( config.ConfigManager().PromptCloudRecordLimit): for page in tree.getroot().findall('page'): # dbrecordcount = job_info_analysis(page, filepath, dbrecordcount) page_dict_object = utility.xml_to_dict(ET.tostring(page)) dbrecordcount = pc_rates_data_storage( page_dict_object, filepath, dbrecordcount) print(str(datetime.datetime.now())) os.remove(filepath)
def insert_to_db(dict_object_record_list): # dummy collection PCRatesDataColl global connection custom.insert_data_to_DB_dBCollection( dict_object_record_list, config.ConfigManager().stagingCollection, connection, config.ConfigManager().RatesDB)
def nounphrase_generate(): c = MongoClient(dcrconfig.ConfigManager().Datadb) db = c[config.ConfigManager().IntelligenceDb] col = db[config.ConfigManager().IntelligenceDataCollection] docs = col.find({'nounPhrases': ""}, { "description": 1, "doc_id": 1, "_id": 1 }) mongoport = int(config.ConfigManager().MongoDBPort) connection = dbmanager.mongoDB_connection(mongoport) for doc in docs: try: data = {} data['desc'] = doc['description'] data['_id'] = doc['_id'] data['doc_id'] = doc['doc_id'] data['connection'] = connection q.put(data) except BaseException as ex: exception_message = '\n' + 'Exception:' + '\n' str(datetime.datetime.now()) + '\n' exception_message += 'File: ' + '\n' exception_message += '\n' + str(ex) + '\n' exception_message += '-' * 100 utility.write_to_file( dcrconfig.ConfigManager().SemanticGraphLogFile, 'a', exception_message)
def route_compfileread(filepaths): for filepath in filepaths: try: # extracting data from .gz file. gzipfile = gzip.GzipFile(filepath, 'rb') gzipdata = gzipfile.read() gzipfile.close() # getting complete file name of the .gz file compfilename = utility.filename_from_filepath(filepath) # extracting the original file name filename = compfilename.split('.gz')[0] print(filename) # creating file and writing data uncompfile = open( config.ConfigManager().PCRatesFileFolder + '/' + filename, 'wb') uncompfile.write(gzipdata) uncompfile.close() except BaseException as ex: utility.log_exception_file_and_filepath( ex, config.ConfigManager().PromptcloudLogFile, filepath) os.remove(filepath)
def valid_records(): global totaljobsdict global jobsitedict # subtracting dictionary key values to get valid records per site validjobsdict = {key: totaljobsdict[key] - jobsitedict.get(key, 0) for key in totaljobsdict.keys()} utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, 'a', 'Total valid records per site: ') utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, 'a', str(validjobsdict))
def update_DB(configdocs, latestdate): connection = dbmanager.mongoDB_connection( int(config.ConfigManager().MongoDBPort)) dictionaries.UpdateTemplateSet = {} dictionaries.UpdateTemplateWhere = {} dictionaries.UpdateTemplateSet['PClastDate'] = latestdate dictionaries.UpdateTemplateWhere['_id'] = configdocs[0]['_id'] dictionaries.DBSet['$set'] = dictionaries.UpdateTemplateSet custom.update_data_to_Db_noupsert(int(config.ConfigManager().MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager().ConfigCollection, dictionaries.UpdateTemplateWhere, dictionaries.DBSet, connection)
def nounphrase_generate(): docs = custom.retrieve_rowdata_from_DB( int(config.ConfigManager().MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager().DataCollectionDBCollection, dictionaries.DBWhereConditon) connection = dbmanager.mongoDB_connection( int(config.ConfigManager().MongoDBPort)) description = '' for doc in docs: try: description = doc['description'] noun_phrases = dcrnlp.extract_nounphrases_sentences(description) dictionaries.UpdateTemplateSet['nounPhrases'] = noun_phrases dictionaries.UpdateTemplateWhere['_id'] = doc['_id'] dictionaries.DBSet['$set'] = dictionaries.UpdateTemplateSet custom.update_data_to_Db_con( int(config.ConfigManager().MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager().DataCollectionDBCollection, dictionaries.UpdateTemplateWhere, dictionaries.DBSet, connection) except BaseException as ex: exception_message = '\n' + 'Exception:' + \ str(datetime.datetime.now()) + '\n' exception_message += 'File: ' + '\n' exception_message += '\n' + str(ex) + '\n' exception_message += '-' * 100 utility.write_to_file(config.ConfigManager().LogFile, 'a', exception_message)
def automate_processes(): utility.write_to_file(config.ConfigManager().LogFile, 'a', 'stautomationscript running') try: # Reading requirement and candidate data from ST exec(open('stdataread.py').read(), globals()) # Extracting candidate resumes exec(open('resume_extract.py').read(), globals()) # Read extracted resumes and update to 'resumeText' field exec(open('resumeread.py').read(), globals()) # Appending 'resumeText' to description field exec(open('resume_append.py').read(), globals()) # Generate nounphrases for candidate table exec(open('stnounphrase_generate.py').read(), globals()) # Update requirements and rates for candidates exec(open('requirement_update_fastest.py').read(), globals()) # Update candidate statuses which changed exec(open('submission_status_update.py').read(), globals()) # Extracting requirement description files exec(open('req_desc_file_extract.py').read(), globals()) # Read extracted description files and update to 'reqFileDesc' field exec(open('req_desc_file_read.py').read(), globals()) # Appending 'reqFileDesc' to description field exec(open('req_desc_file_append.py').read(), globals()) # Generate nounPhrases for requirement tables exec(open('streqnounphrase_generate.py').read(), globals()) # Get supplier info exec(open('stsupplierdataread.py').read(), globals()) # Candidate resume screening exec(open('contactinfodetect.py').read(), globals()) # Client master list load # exec(open('stclientsdataread.py').read(), globals()) # # Currency master list load # exec(open('currencydataread.py').read(), globals()) # # Industry master list load # exec(open('industrydataread.py').read(), globals()) # # MSP master list load # exec(open('stmspdataread.py').read(), globals()) # currency code update exec(open('stcandidateCurrency_update_fastest.py').read(), globals()) except BaseException as ex: exception_message = '\n' + 'Exception:' + \ str(datetime.datetime.now()) + '\n' exception_message += 'File: ' + '\n' exception_message += '\n' + str(ex) + '\n' exception_message += '-' * 100 # .encode('utf8')) utility.write_to_file(config.ConfigManager().LogFile, 'a', exception_message)
def updateconfigcollection(docid, dateTime, whereID): connection = dbmanager.mongoDB_connection( int(config.ConfigManager().MongoDBPort)) UpdateTemplateWhere = utility.clean_dict() UpdateTemplateSet = utility.clean_dict() UpdateTemplateWhere['_id'] = whereID UpdateTemplateSet['masterDocId'] = docid UpdateTemplateSet['stagingDateModified'] = dateTime DBSet = utility.clean_dict() DBSet['$set'] = UpdateTemplateSet custom.update_data_to_Db_noupsert( int(config.ConfigManager().MongoDBPort), config.ConfigManager().RatesDB, config.ConfigManager().RatesConfigCollection, UpdateTemplateWhere, DBSet, connection)
def send_config(self): mode, ok = QInputDialog.getItem(self, "Select config sending mode", "Mode:", ("Modify", "Rewrite"), 0, False) if not ok or not mode: return path = QFileDialog.getOpenFileName( self, "Select configuration file", filter="Configs (*.ini *.txt *.cfg)")[0] if not path: return config = cfg.ConfigManager() config.load_only_config(path) data = config.full_dict(include_defaults=False) logging.info(f"Loaded config from {path}") copters = self.model.user_selected() for copter in copters: copter.client.send_message("config", kwargs={ "config": data, "mode": mode.lower() })
def pc_rates_data_storage(page_dict_object, filepath, dbrecordcount): global totalrecords global invalidrecords global emptydesc global incompletedesc global smalldesc global nonedesc global nodesc global totaljobsdict global jobsitedict dict_object_record_list = [] try: page_object_list = page_dict_object['page'] if isinstance(page_object_list['record'], list): for record_object in page_object_list['record']: record_object = pc_rates_add_fields(record_object, filepath) if sys.getsizeof(record_object['description']) < 13000000: dict_object_record_list.append(record_object) dbrecordcount += 1 else: record_object = page_object_list['record'] record_object = pc_rates_add_fields(record_object, filepath) if sys.getsizeof(record_object['description']) < 13000000: dict_object_record_list.append(record_object) dbrecordcount += 1 except BaseException as ex: utility.log_exception_file(ex, config.ConfigManager().PromptcloudLogFile) if dict_object_record_list: insert_to_db(dict_object_record_list) # updating doc_id in config table return dbrecordcount
def main(): parser = argparse.ArgumentParser(description=""" Service to calculate statistics """) parser.add_argument('--config', help='configuration file', default=None) args = parser.parse_args() confs = config.ConfigManager() if args.config is not None: with open(args.config, "r") as conffile: confs.load_from_file(conffile) logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper())) address = confs["address"] logging.info("Starting grpc server with address :{}".format(address)) logging.info("Starting grpc server {} workers".format(confs["workers"])) server = grpc.server(futures.ThreadPoolExecutor(max_workers=confs["workers"])) objs= grpc.insecure_channel(confs["object_service"]["url"]) data = grpc.insecure_channel(confs["data_service"]["url"]) stats_pb2_grpc.add_StatsServiceServicer_to_server(StatsServiceServ(data, objs), server) server.add_insecure_port(address) server.start() try: while True: time.sleep(10) except KeyboardInterrupt: logging.info("Stop signal got") server.stop(0)
def modifygeodata(): ratesData = mastercoll.find({}) for row in ratesData: try: if row['cityLocationFlag'] == 1: cityGeoLocation = [] cityGeoLocation.append(float(row['cityLongitude'])) cityGeoLocation.append(float(row['cityLatitude'])) row['coordinates'] = cityGeoLocation mastercoll.update({"doc_id": row['doc_id']}, {"$set": { "coordinates": cityGeoLocation }}) # if row['stateLocationFlag'] == 1: # stateGeoLocation = [] # stateGeoLocation.append(float(row['stateLatitude'])) # stateGeoLocation.append(float(row['stateLongitude'])) # row['stateGeoLocation'] = stateGeoLocation # # print(row,"\n") # mastercoll.update({"doc_id": row['doc_id']}, # {"$set": {"stateGeoLocation": stateGeoLocation}}) except BaseException as ex: utility.log_exception_file(ex, config.ConfigManager().LogFile)
def main(): parser = argparse.ArgumentParser(description=""" Service to store objects """) parser.add_argument('--config', help='configuration file', default=None) args = parser.parse_args() confs = config.ConfigManager() if args.config is not None: with open(args.config, "r") as conffile: confs.load_from_file(conffile) logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper())) address = confs["address"] logging.info("Starting grpc server with address :{}".format(address)) logging.info("Starting grpc server {} workers".format(confs["workers"])) server = grpc.server( futures.ThreadPoolExecutor(max_workers=confs["workers"])) dbconf = confs["database"] logging.info("Connecting to {} with username: {}, host: {}".format( dbconf["database"], dbconf["username"], dbconf["host"])) database = psycopg2.connect(dbname=dbconf["database"], user=dbconf["username"], password=dbconf["password"], host=dbconf["host"]) objects_pb2_grpc.add_ObjectServiceServicer_to_server( ObjectServiceServ(database), server) server.add_insecure_port(address) server.start() try: while True: time.sleep(10) except KeyboardInterrupt: logging.info("Stop signal got") server.stop(0)
def main(): parser = argparse.ArgumentParser(description=""" Service to store and process data companies """) parser.add_argument('--config', help='configuration file', default=None) args = parser.parse_args() confs = config.ConfigManager() if args.config is not None: with open(args.config, "r") as conffile: confs.load_from_file(conffile) logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper())) address = confs["address"] logging.info("Starting grpc server with address :{}".format(address)) logging.info("Starting grpc server {} workers".format(confs["workers"])) server = grpc.server( futures.ThreadPoolExecutor(max_workers=confs["workers"])) #TODO: Решить как лучше: так или как depend mgocli = MongoClient(confs["database"]["url"]) databse = UserDb(mgocli["user_database"]) users_pb2_grpc.add_UserInfoServiceServicer_to_server( UsersServiceServ(databse), server) server.add_insecure_port(address) server.start() try: while True: time.sleep(10) except KeyboardInterrupt: logging.info("Stop signal got") server.stop(0)
def main(): parser = argparse.ArgumentParser(description=""" Service to store data """) parser.add_argument('--config', help='configuration file', default=None) args = parser.parse_args() confs = config.ConfigManager() if args.config is not None: with open(args.config, "r") as conffile: confs.load_from_file(conffile) logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper())) address = confs["address"] objs = grpc.insecure_channel(confs["objs"]) logging.info("Starting grpc server with address :{}".format(address)) logging.info("Starting grpc server {} workers".format(confs["workers"])) mgocli = MongoClient(confs["database"]["url"]) server = grpc.server(futures.ThreadPoolExecutor(max_workers=confs["workers"])) data_pb2_grpc.add_DataServiceServicer_to_server(DataServiceServ(SensorDataModel(mgocli), objs), server) server.add_insecure_port(address) server.start() run_consumer(mgocli, confs["rabbit"], objs) try: while True: time.sleep(10) except KeyboardInterrupt: logging.info("Stop signal got") server.stop(0)
def control_loop(): """ Main program loop. Refers to state of files in target directory at regular (five second) intervals and stores any changes. Arguments: dir_path (str): path of target directory """ configure = config.ConfigManager() dir_path = configure.get_target_path() temp_path = configure.get_temp_path() interval = configure.get_interval() try: manager = manage.FileManager(dir_path, temp_path) except manage.InvalidDirectoryError: return while True: time.sleep(interval) interval = configure.get_interval() active = configure.get_active() if dir_path != configure.get_target_path(): dir_path = configure.get_target_path() manager.set_target_directory(dir_path) print(f"Change to {dir_path}") if not active or not manager.has_changed(): print(f"No changes (Active: {active})") continue changes = manager.store_changes() print(changes)
def automate_processes(): utility.write_to_file(config.ConfigManager().LogFile, 'a', 'pcanalysisautomationscript running') try: exec(open('download_crawldata_threading.py').read(), globals()) exec(open('unzip_gz.py').read(), globals()) exec(open('analyze_crawldata.py').read(), globals()) except BaseException as ex: exception_message = '\n' + 'Exception:' + \ str(datetime.datetime.now()) + '\n' exception_message += 'File: ' + '\n' exception_message += '\n' + str(ex) + '\n' exception_message += '-' * 100 # .encode('utf8')) utility.write_to_file(config.ConfigManager().LogFile, 'a', exception_message)
def readstagingdata(): utility.write_to_file( config.ConfigManager().LogFile, 'a', 'Staging dataread running' + ' ' + str(datetime.datetime.now())) ratesConfigValues = ratesConfig.find({}) ratesDate = ratesConfigValues[0]['stagingDateModified'] ratesData = stagingcoll.find({'dateModified': { "$gt": ratesDate }}, no_cursor_timeout=True) doc_id = ratesConfigValues[0]['masterDocId'] objectid = ratesConfigValues[0]['_id'] dateModifiedList = [] geoCountryQuery = "select distinct name,iso_alpha3, fips_code from geo_country order by name" geoStateQuery = "select ga1.name, gn.admin1, gn.latitude, gn.longitude from geo_admin1 ga1 inner join geo_name gn on ga1.geonameid = gn.geonameid" geoCityQuery = "select distinct sPlaceName, fLatitude, fLongitude from GeoPostal order by sPlaceName" geoZipCodeQuery = "select distinct sPostalCode, fLatitude, fLongitude from GeoPostal order by sPostalCode" countryDictList = custom.create_sql_dict_list( geoCountryQuery, config.ConfigManager().geographicalDataConnstr) stateDictList = custom.create_sql_dict_list( geoStateQuery, config.ConfigManager().geographicalDataConnstr) cityDictList = custom.create_sql_dict_list( geoCityQuery, config.ConfigManager().geographicalDataConnstr) zipCodeDictList = custom.create_sql_dict_list( geoZipCodeQuery, config.ConfigManager().geographicalDataConnstr) i = 0 for row in ratesData: dateModifiedList.append(row['dateModified']) i += 1 del row['_id'] doc_id += 1 row['doc_id'] = doc_id row['stagingDateModified'] = max(dateModifiedList) row['i'] = i row['objectid'] = objectid row['countryDictList'] = countryDictList row['stateDictList'] = stateDictList row['cityDictList'] = cityDictList row['zipCodeDictList'] = zipCodeDictList q.put(row) ratesData.close() del ratesData
def process_staging_row(row): try: global dataList "Step:1 data scrubbing for email,phone,url and candidate name" row = dataclean(row) "Step:2 nounphrases generation" row = generatenounphrases(row) "Step:3 signature generation" row = signaturegraph(row) "Step:4 rates calculation" row = rates_calculation.billratescalculation(row) # Put rate value calculation before this check "Step:5 verification of rate availability" row = rate_available(row) # geographical data check and additions row = custom.geo_data_check(row, row['countryDictList'], 'country') row = custom.geo_data_check(row, row['stateDictList'], 'state') row = custom.geo_data_check(row, row['cityDictList'], 'city') row = custom.geo_data_check(row, row['zipCodeDictList'], 'zipCode') del row['countryDictList'] del row['stateDictList'] del row['cityDictList'] del row['zipCodeDictList'] dataList.append(row) if row['i'] % int( config.ConfigManager().StagingMasterTransferStep) == 0: stagingDateModified = row['stagingDateModified'] del row['stagingDateModified'] objectid = row['objectid'] del row['objectid'] del row['i'] "Step:4 insert data to db" mastercoll.insert(dataList) dataList = [] docid = row['doc_id'] "Step:5 update config collection with doc_id and datetime" updateconfigcollection(docid, stagingDateModified, objectid) except BaseException as ex: utility.log_exception_file(ex, config.ConfigManager().LogFile)
def write_fileinfo(filepath, dict_object): filename = filepath.replace(config.ConfigManager().PCFileFolder + '/', '') # FileInfo = filename + ', ' + (dict_object['record'])['uniq_id'] # print(FileInfo) # listdata = [] sublist = [] sublist = [filename, (dict_object['record'])['uniq_id']] listdata_uniqueids.append(sublist)
def generatenounphrases(row): if row['source'] == config.ConfigManager().ST: description = row['description'] noun_phrases = dcrnlp.extract_nounphrases_sentences(description) elif row['source'] == config.ConfigManager().promptCloud: description = row['description'] noun_phrases = dcrnlp.extract_nounphrases_sentences(description) else: desc = str(row['jobTitle']) + ' ' + str( row['jobDescription']) + ' ' + str( row['mandatorySkills']) + ' ' + str(row['desiredSkills']) row['description'] = desc noun_phrases = dcrnlp.extract_nounphrases_sentences(desc) row['nounPhrases'] = noun_phrases row['nounPhraseFlag'] = 1 row['dateCreated'] = datetime.datetime.utcnow() row['dateModified'] = datetime.datetime.utcnow() return row
def pc_rates_add_fields(record_object, filepath): record_object['dateCreated'] = datetime.datetime.utcnow() record_object['dateModified'] = datetime.datetime.utcnow() record_object['createdUser'] = '******' record_object['modifiedUser'] = '******' record_object['source'] = config.ConfigManager().promptCloud record_object['fileName'] = filepath.replace( config.ConfigManager().PCRatesFileFolder + '/', '') record_object['description'] = '' # if 'rate_value' in record_object and record_object['rate_value'] != '': # record_object['maxBillRate'] = record_object['rate_value'] if 'job_description' in record_object and record_object[ 'job_description'] != '': record_object['jobDescription'] = record_object['job_description'] record_object['description'] = '' record_object['description'] += record_object['jobDescription'] if 'jobtitle' in record_object and record_object['jobtitle'] != '': record_object['jobTitle'] = record_object['jobtitle'] record_object['description'] += record_object['jobTitle'] if 'jobdescription' in record_object and record_object[ 'jobdescription'] != '': record_object['jobDescription'] = record_object['jobdescription'] record_object['description'] = '' record_object['description'] += record_object['jobDescription'] if 'jobtitle' in record_object and record_object['jobtitle'] != '': record_object['jobTitle'] = record_object['jobtitle'] record_object['description'] += record_object['jobTitle'] if 'skills' in record_object and record_object['skills'] != '': record_object['description'] += record_object['skills'] if 'postdate' in record_object and record_object['postdate'] != '': record_object['postDate'] = record_object['postdate'] else: record_object['postDate'] = datetime.datetime.today().strftime( '%Y-%m-%d') if 'sitename' in record_object and record_object['sitename'] != '': record_object['dataSource'] = record_object['sitename'] if 'site_name' in record_object and record_object['site_name'] != '': record_object['dataSource'] = record_object['site_name'] return record_object
def job_info_analysis_storage(page_dict_object, filepath, dbrecordcount): global totalrecords global invalidrecords global emptydesc global incompletedesc global smalldesc global nonedesc global nodesc global totaljobsdict global jobsitedict dict_object_record_list = [] try: dict_object = page_dict_object['page'] # outer if check is jobdescription tag is in the xml if 'jobdescription' in (dict_object['record']): # checking if job description is none if ((dict_object['record'])['jobdescription'] is not None): incorrectjobdescription = 0 if (((dict_object['record'])['jobdescription']).strip()) == '': incorrectjobdescription = 1 if (len(((dict_object['record'])['jobdescription'])) < 20): incorrectjobdescription = 1 if (((dict_object['record'])['jobdescription']).strip()[-3:] ) == '...': incorrectjobdescription = 1 if (incorrectjobdescription == 0): (dict_object['record'] )['dateCreated'] = datetime.datetime.now() (dict_object['record'] )['dateModified'] = datetime.datetime.now() (dict_object['record'])['createdUser'] = '******' (dict_object['record'])['modifiedUser'] = '******' (dict_object['record'])['source'] = 'PromptCloud' (dict_object['record'])['Url'] = dict_object['pageurl'] (dict_object['record'])['fileName'] = filepath.replace( config.ConfigManager().PCFileFolder + '/', '') dict_object_record_list.append(dict_object['record']) dbrecordcount += 1 except BaseException as ex: utility.log_exception_file( ex, dcrconfig.ConfigManager().SemanticGraphLogFile) if dict_object_record_list: insert_to_db(dict_object_record_list) # updating doc_id in config table return dbrecordcount
def automate_processes(): utility.write_to_file(config.ConfigManager().PromptcloudLogFile, 'a', 'PromptCloudautomationscript running') try: # download files into PCCompData with in mnt/nlpdata,xml format.. exec( open('rates_pc_download_crawldata_threading.py').read(), globals()) # compress the PCCompdata folder exec(open('compress.py').read(), globals()) # unzip files created in PCData folder time stored in dataloadconfig.. exec(open('pc_rates_unzip_gz.py').read(), globals()) # download data into pcdataanalysisresults.ods exec(open('pc_rates_dataload.py').read(), globals()) # for automatically sending emails # exec(open('mailsend.py').read(), globals()) # store analysis file in s3 backup # exec(open('pcdataanalysisbackup.py').read(), globals()) except BaseException as ex: utility.log_exception_file(ex, config.ConfigManager().PromptcloudLogFile)
def datamasking(row): maskingText = makingjsondata(row) maskingText = json.dumps(maskingText) headers = {"Content-Type": "application/json"} conn = http.client.HTTPConnection(config.ConfigManager().Host, config.ConfigManager().Port) conn.request(config.ConfigManager().JobServerMethod, config.ConfigManager().API, maskingText, headers) response = conn.getresponse() data = response.read() result = json.loads(data.decode('utf8')) try: row['supplierName'] = result['supplierName'] row['clientId'] = result['clientId'] row['mspId'] = result['mspId'] row['dataSource'] = result['dataSource'] # row['source'] = result['source'] except BaseException as ex: print(ex) utility.log_exception_file(ex, file) conn.close() return row
def __init__(self, parent, manager): """ Creates a new tab widget instance having the given parent and utilising the given manager. Arguments: parent (QWidget): parent of this widget manager (manage.FileManager): file management interface """ super().__init__(parent, manager) self.configure = config.ConfigManager() self.ignore_keywords = [] self.init_layout()