def runTestSuite():
  logger = loggerFetch("info")
  logger.info("BEGIN PROCESSING...")

  display = displayInitialize(1)
  driver = driverInitialize(path='/home/mayank/.mozilla/firefox/4s3bttuq.default/', timeout=timeout)

  driver.set_page_load_timeout(timeout)

  for version in versions:
    url = url_base + str(version)
    try:
      logger.info('Fetching URL[%s]' % url)
      driver.get(url)
      logger.info('After Fetch[%s]' % url)      
    except Exception as e:
      logger.info("Warning %s", e)
      if os.path.exists(csv_file):
        version_file = 'CSVs/' + str(version) + '.csv'
        logger.info('Writing %s' % version_file)
        os.rename(csv_file, version_file)
      else:
        logger.error('Missed file[%s] from URL[%s]' % (version_file, url))
    logger.info("CSV Fetched From [%s]" % url)

  driverFinalize(driver)
  displayFinalize(display)


  logger.info("...END PROCESSING")     
def runTestSuite():
  logger = loggerFetch("info")
  logger.info("BEGIN PROCESSING...")

  display = displayInitialize(0)
  driver = driverInitialize()

  for mandal in mandals:
    report = generate_report(logger, driver, mandal, districts[mandal])
    logger.info('Finally: \n%s' % report)

    filename = './mandals/' +mandal + '.csv'
    with open(filename, 'wb') as csv_file:
      logger.info("Writing to [%s]" % filename)
      csv_file.write(report.encode('utf-8'))

    if final_report[0][0] == '':
      
    rows = report.split('|')
    

  driverFinalize(driver)
  displayFinalize(display)

  logger.info("...END PROCESSING")     

  return

def main():
  runTestSuite()
  exit(0)

if __name__ == '__main__':
  main()
示例#3
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  db = dbInitialize(db="biharPDS", charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  inyear=args['year']
  
  logger.info(inyear)
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  
  #Start Program here
  url="http://www.google.com"
  driver.get(url)
  myhtml=driver.page_source
  print myhtml
  # End program here

  driverFinalize(driver)
  displayFinalize(display)
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors


  
  logger.info("...END PROCESSING")     
  exit(0)
 def setUp(self):
     self.logger = loggerFetch('info')
     self.logger.info('BEGIN PROCESSING')
     # Pass 0 for headless
     self.display = displayInitialize(1)
     #, path='/path/to/firefox/')
     self.driver = driverInitialize(browser='firefox')
     self.url = URL
示例#5
0
 def setUp(self):
     self.logger = loggerFetch('info')
     self.logger.info('BEGIN PROCESSING...')
     self.display = displayInitialize(0)
     # self.driver = driverInitialize(timeout=3)
     self.driver = driverInitialize(
         timeout=3, options='--headless'
     )  # driverInitialize(path='/opt/firefox/', timeout=3)
示例#6
0
 def setUp(self):
     self.args = argsFetch()
     self.logger = loggerFetch('info')
     self.logger.info('BEGIN PROCESSING')
     self.display = displayInitialize(self.args['visible'])
     self.driver = driverInitialize(browser=self.args['browser'] , path='/home/mayank/.mozilla/firefox/4s3bttuq.default/')
     self.cmd = 'nohup youtube-dl --username %s --password %s -o %s ' % (user, password, '"%(title)s.%(ext)s"')
     self.url = self.args['url']
示例#7
0
def downloadJobcards(logger, db, cmd=None, directory=None, url=None, isVisible=None, isPushInfo=None, query=None, fetch_jobcard_details=None):
  '''
  Crawl the html for the musters
  '''
  logger.info("BEGIN %s..." % cmd)

  if cmd == None:
    cmd="Downloading"
    
  if directory == None:
    directory = "./jobcards"

  if url == None:
    url = 'http://www.nrega.telangana.gov.in/Nregs/FrontServlet?requestType=HouseholdInf_engRH&actionVal=SearchJOB&JOB_No='

  if isVisible == None:
    isVisible = 0

  if isPushInfo == None:
    isPushInfo = False

  logger.info("Command[%s] Directory[%s] URL[%s]" % (cmd, directory, url))
    
  if not query:
    # Mynk - use when b.name is not all 'Ghattu'  query = 'select j.jobcard, p.name, b.name from jobcardRegister j, panchayats p, blocks b where j.blockCode=p.blockCode and j.panchayatCode=p.panchayatCode  and j.blockCode=b.blockCode'
    query = 'select j.jobcard, p.name, p.panchayatCode from jobcardRegister j, panchayats p, blocks b where j.blockCode=p.blockCode and j.panchayatCode=p.panchayatCode  and j.blockCode=b.blockCode and DATE_SUB(NOW(), INTERVAL 1 DAY) >= downloadDate order by j.downloadDate'

  logger.info('Executing query: [%s]', query)
  cur = db.cursor()
  cur.execute(query)
  jobcard_details = cur.fetchall()
  if jobcard_details:
    logger.debug("Jobcard Details [%s]" % str(jobcard_details))
  
  display = displayInitialize(isVisible)
  driver = driverInitialize()

  for (jobcard, panchayat, panchayat_code) in jobcard_details:
    logger.info( "jobcard[%s] panchayat[%s] panchayat_code[%s]" % (jobcard, panchayat, panchayat_code))
    dirname = directory + '/' + panchayat    
    html_source = downloadJobcardHTML(logger, driver, db, jobcard, dirname)

    if isPushInfo or fetch_jobcard_details:
      if html_source:
        pushMusterInfo(logger, db, html_source, jobcard, panchayat_code, fetch_jobcard_details)
      else:
        query = 'update jobcardRegister set isDownloaded=0 where jobcard="%s"' % (jobcard) # Mynk
        logger.info('Executing query: [%s]', query)
        cur = db.cursor()
        cur.execute(query)

  driverFinalize(driver)
  displayFinalize(display)

  logger.info("...END %s" % cmd)     
示例#8
0
def contextInitialize(self, logger, args):
  self.logger = logger
  self.db = dbInitialize(db="mahabubnagar")
  self.display = displayInitialize(args['visible'])
  if args['visible']:
    delay=5
  else:
    delay=1
  self.driver = driverInitialize(args['browser'])
  # contextRegister(self)
  logger.info("Context Initialized")
示例#9
0
 def setUp(self):
     self.args = argsFetch()
     self.logger = loggerFetch('info')
     self.logger.info('BEGIN PROCESSING')
     self.display = displayInitialize(self.args['visible'])
     self.driver = driverInitialize(
         browser=self.args['browser'],
         path='/home/mayank/.mozilla/firefox/4s3bttuq.default/')
     self.cmd = 'nohup youtube-dl --username %s --password %s -o %s ' % (
         user, password, '"%(title)s.%(ext)s"')
     self.url = self.args['url']
示例#10
0
 def setUp(self):
     args = argsFetch()
     level = args['log_level']
     if not level:
         level = 'info'
     self.logger = loggerFetch(level)
     self.logger.info('args: %s', str(args))
     self.logger.info('BEGIN PROCESSING...')
     self.parse = args['parse']
     dirname = args['directory']
     if not self.parse:
         self.display = displayInitialize(args['visible'])
         self.driver = driverInitialize(timeout=3, options='--headless') # driverInitialize(path='/opt/firefox/', timeout=3)
示例#11
0
    def __init__(self, logger=None, is_selenium=None):
        if logger:
            self.logger = logger
        else:
            logger = self.logger = logger_fetch('info')
        logger.info(f'Constructor({type(self).__name__})')
        #self.url = 'http://ceo.karnataka.gov.in/draftroll_2020/'
        self.url = 'http://ceo.karnataka.gov.in/finalrolls_2020/'
        self.status_file = 'status.csv'
        language = 'English' # 'Kannada' if KANNADA else 'English'
        if IS_DEBUG:
            self.dir = f'BBMP/{language}'
        else:
            self.dir = f'../Data/BBMP/{language}'
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)

        self.ignore = True
        self.is_selenium = False
        if is_selenium:
            self.is_selenium = is_selenium

        if self.is_selenium:
            self.display = displayInitialize(isDisabled = not is_virtual, isVisible = is_visible)
            self.driver = driverInitialize(timeout=3)
            #self.driver = driverInitialize(path='/opt/firefox/', timeout=3)

        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'../private/aap.bangaluru.json'
        self.project = 'BBMP-OCR'
        self.bucket_name = 'aap_bangaluru' # 'bbmp_bucket' # 'test_aap'
        self.storage_client = storage.Client()
        self.vision_client =  vision.ImageAnnotatorClient()

        df = pd.read_csv('Ward-AC-LS-Mapping.csv')
        #lookup_df = df[['AC#', 'LS#']].drop_duplicates('AC#', keep='last')
        self.ac2ls = dict(zip(df['AC#'], df['LS#']))
        self.ls_name_of = {
            '23': 'Bangalore Rural',
            '24': 'Bangalore North',
            '25': 'Bangalore Central',
            '26': 'Bangalore South',
            '27': 'Chikkabalapura',
        }
        self.ac_name_of = dict(zip(df['AC#'], df['Assembly constituency']))
        self.ac_name_of[177] = 'Anekal'
        self.ac2ls[177] = 26
        self.ward2ac = dict(zip(df['Ward No'], df['AC#']))
        self.ward_name_of = dict(zip(df['Ward No'], df['Ward Name']))
        df = pd.read_csv('Booths2WardMapping.csv')
        self.part2ward = dict(zip(df['Part'], df['Ward']))
        self.ward_name_of[999] = 'Extra Data'
示例#12
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  logger.info("BEGIN PROCESSING...")
  if args['limit']:
    limit = int(args['limit'])
  else:
    limit =1
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])

  url="http://nrega.nic.in/netnrega/sthome.aspx"
  driver.get(url)
  myMusters=Muster.objects.filter( Q(isDownloaded=False) | Q(musterDownloadAttemptDate__lt = musterTimeThreshold,isComplete=0) )[:limit]
  for eachMuster in myMusters:
    logger.info(eachMuster.musterURL)  
    logger.info("Processing musterNo: %s FullblockCode: %s " % (eachMuster.musterNo,eachMuster.block.fullBlockCode))
    driver.get(eachMuster.musterURL)
    driver.get(eachMuster.musterURL)
    myhtml = driver.page_source
    error,musterTable,musterSummaryTable=validateMusterHTML(eachMuster,myhtml)
    if error is None:  
      outhtml=''
      outhtml+=stripTableAttributes(musterSummaryTable,"musterSummary")
      outhtml+=stripTableAttributes(musterTable,"musterDetails")
      title="Muster: %s state:%s District:%s block:%s finyear:%s " % (eachMuster.musterNo,eachMuster.block.district.state.name,eachMuster.block.district.name,eachMuster.block.name,getFullFinYear(eachMuster.finyear))
      logger.info(title) 
      outhtml=htmlWrapperLocal(title=title, head='<h1 aling="center">'+title+'</h1>', body=outhtml)
      try:
        outhtml=outhtml.encode("UTF-8")
      except:
        outhtml=outhtml
      filename="%s.html" % (eachMuster.musterNo)
      eachMuster.musterFile.save(filename, ContentFile(outhtml))
      eachMuster.musterDownloadAttemptDate=datetime.now()
      eachMuster.isDownloaded=True
      eachMuster.save()
    else:
      logger.info("Muster Download Erorr: %s " % (error))
      eachMuster.musterDownloadAttemptDate=datetime.now()
      eachMuster.downloadError=error
      eachMuster.save()
#  myMusters=Muster.objects.filter(

  driverFinalize(driver)
  displayFinalize(display)
  logger.info("...END PROCESSING")     
  exit(0)
示例#13
0
def runTestSuite():
  logger = loggerFetch("info")
  logger.info("BEGIN PROCESSING...")

  display = displayInitialize(1)
  driver = driverInitialize(path='/opt/firefox/')

  driver.get(url)
  logger.info("Fetching...[%s]" % url)
  
  driver.get(url)    # A double refresh required for the page to load
  logger.info("Refreshing...[%s]" % url)
  
  html_source = driver.page_source.replace('<head>',
                                           '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
  logger.debug("HTML Fetched [%s]" % html_source)

  bs = BeautifulSoup(html_source, "html.parser")
  tr_list = bs.findAll('tr', attrs={'class':['normalRow', 'alternateRow']})
  logger.debug(str(tr_list))

  for tr in tr_list:
    td = tr.find('td')
    td = td.findNext('td')
    panchayat = td.text.strip()
    logger.info("Panchayat[%s]", panchayat)

    elem = driver.find_element_by_link_text(panchayat)
    elem.click()
    
    filename="/tmp/%s.html" % panchayat
    with open(filename, 'wb') as html_file:
      logger.info("Writing [%s]" % filename)
      html_file.write(driver.page_source.encode('utf-8'))

    driver.back()

  driverFinalize(driver)
  displayFinalize(display)


  logger.info("...END PROCESSING")     
示例#14
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")

  db = dbInitialize(db="surguja", charset="utf8")

  if not args['parse']:
    display = displayInitialize(args['visible'])
    driver = driverInitialize(args['browser'])

  if args['visible']:
    delay = 2

  download_dir = args['directory']
  if download_dir:
    download_dir = download_dir + '/' + strftime('%B-%Y')
    logger.info('download_dir[%s]' % download_dir)
    if not os.path.exists(download_dir):
      os.makedirs(download_dir)

  if args['prev']:
    pdsFetchPrev(logger, driver, db, download_dir, args['month'], args['year'])
  elif args['parse']:
    pdsReportParse(logger, db, download_dir)
  elif args['work_allocation']:
    downloadWorkAllocationHTML(driver, db, logger) # Mynk Fix Order
  elif args['fetch']:
    pdsFetchReports(logger, driver, db, download_dir)
  else:
    pdsFetch(logger, driver, db, download_dir)

  if not args['parse']:
    driverFinalize(driver)
    displayFinalize(display)

  dbFinalize(db)

  logger.info("...END PROCESSING")
  exit(0)
示例#15
0
def runTestSuite():
  logger = loggerFetch("info")
  logger.info("BEGIN PROCESSING...")

  display = displayInitialize(0)
  driver = driverInitialize()

  report = generate_report(logger, driver)
  logger.info('Finally: \n%s' % report)

  with open(filename, 'wb') as csv_file:
    logger.info("Writing to [%s]" % filename)
    csv_file.write(report.encode('utf-8'))

  driverFinalize(driver)
  displayFinalize(display)

  logger.info("...END PROCESSING")     

  return
def runTestSuite():
  logger = loggerFetch("info")
  logger.info("BEGIN PROCESSING...")

  display = displayInitialize(0)
  driver = driverInitialize()

  report = generate_report(logger, driver)
  logger.info('Finally: \n%s' % report)

  with open(filename, 'wb') as csv_file:
    logger.info("Writing to [%s]" % filename)
    csv_file.write(report.encode('utf-8'))

  driverFinalize(driver)
  displayFinalize(display)

  logger.info("...END PROCESSING")     

  return
示例#17
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  logger.info("BEGIN PROCESSING...")
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  
  if args['jobcard']:
    jobcard=args['jobcard']
  else:
    jobcard='141975701001010679'

  logger.info("Fetching Jobcard[%s]..." % jobcard)

  html = downloadJobcardHTML(logger, driver, jobcard)
  logger.info(html)

  driverFinalize(driver)
  displayFinalize(display)
  logger.info("...END PROCESSING")     
  exit(0)
示例#18
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  logger.info("BEGIN PROCESSING...")
  db = dbInitialize(db=nregaDB, charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  if args['limit']:
    limit = int(args['limit'])
  else:
    limit =50000
  limitString=" limit %s " % str(limit)
  additionalFilters=''
  if args['district']:
    additionalFilters+= " and b.districtName='%s' " % args['district']
  if args['finyear']:
    additionalFilters+= " and finyear='%s' " % args['finyear'] 
  if args["downloadWagelists"]: 
    display = displayInitialize(args['visible'])
    driver = driverInitialize(args['browser'])
   
    url="http://164.100.129.6/netnrega/nregasearch1.aspx"
    driver.get(url)
    time.sleep(22)
    htmlsource = driver.page_source
    #writeFile("/tmp/a.html",htmlsource)
    stateName="CHHATTISGARH"
    districtName="SURGUJA"
    wagelistNo="3305002062WL005160"
    try:
      maintab = driver.current_window_handle
      Select(driver.find_element_by_id("ddl_search")).select_by_visible_text("WageList")
      driver.find_element_by_css_selector("option[value=\"WageList\"]").click()
      Select(driver.find_element_by_id("ddl_state")).select_by_visible_text(stateName.upper())
      #myvalue='value="%s"' % stateCode
      driver.find_element_by_css_selector("option[value=\"33\"]").click()
     # driver.find_element_by_css_selector("option[%s]" % myvalue).click()
      Select(driver.find_element_by_id("ddl_district")).select_by_visible_text(districtName.upper())
      #myvalue='value="%s"' % (stateCode+districtCode)
      driver.find_element_by_css_selector("option[value=\"3305\"]").click()
      #driver.find_element_by_css_selector("option[%s]" % myvalue).click()
      driver.find_element_by_id("txt_keyword2").clear()
      driver.find_element_by_id("txt_keyword2").send_keys(wagelistNo)
      driver.find_element_by_id("btn_go").click()
      time.sleep(30)
      #logger.info("Currently the number of active tabs are %s" % str(len(driver.window_handles))) 
      error=0
    except:
      error=1
    wurl="http://164.100.129.6/netnrega/srch_wg_dtl.aspx?state_code=&district_code=3305&state_name=CHHATTISGARH&district_name=SURGUJA&block_code=3305002&wg_no=3305002062WL005160&short_name=CH&fin_year=2016-2017&mode=wg"
    driver.get(wurl)
    htmlsource = driver.page_source
    filename="%s/b.html" % tempDir
    writeFile(filename,htmlsource) 
   
    query="select w.id,w.wagelistNo,b.rawBlockName,b.fullBlockCode,b.blockCode,b.districtCode,b.stateCode,b.stateShortCode,w.finyear,b.stateName,b.districtName from wagelists w,blocks b where w.fullBlockCode=b.fullBlockCode and ( (w.isDownloaded=0) or (w.isComplete=0 and TIMESTAMPDIFF(HOUR, w.downloadAttemptDate, now()) > 48 ))  %s order by w.isDownloaded %s " % (additionalFilters,limitString)
    cur.execute(query)
    results=cur.fetchall()
    for row in results:
      [rowid,wagelistNo,blockName,fullBlockCode,blockCode,districtCode,stateCode,stateShortCode,finyear,stateName,districtName] = row
      fullfinyear=getFullFinYear(finyear)
      logger.info(" RowID : %s, wagelistNo: %s " % (str(rowid),wagelistNo)) 
      jobcardPrefix="%s-%s-" % (stateShortCode,districtCode)
      #logger.info("Jobcard Prefix : %s " % jobcardPrefix)
      fullDistrictCode=stateCode+districtCode
      if wagelistNo != '':
        #logger.info(wagelistNo)
        wurl="http://%s/netnrega/srch_wg_dtl.aspx?state_code=&district_code=%s&state_name=%s&district_name=%s&block_code=%s&wg_no=%s&short_name=%s&fin_year=%s&mode=wg" % (searchIP,fullDistrictCode,stateName.upper(),districtName.upper(),fullBlockCode,wagelistNo,stateShortCode,fullfinyear)
        logger.info("URL: %s " % wurl)
        driver.get(wurl)
        htmlsource = driver.page_source
        htmlsource=htmlsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
        success=0
        isComplete=0
        if ("WageList Agency Code" in htmlsource) and (jobcardPrefix in htmlsource):
          filepath=nregaRawDataDir.replace("stateName",stateName.upper()).replace("districtName",districtName.upper())
          filename=filepath+blockName.upper()+"/WAGELISTS/"+fullfinyear+"/"+wagelistNo+".html"
         # filename=filepath+blockName.upper()+"/WAGELIST/"+fullfinyear+"/"+wagelistNo+".html"
         # logger.info(filename)
          writeFile(tempDir+wagelistNo+".html",htmlsource)
          writeFile(filename,htmlsource)
          success=1
          isComplete=1
          htmlsoup=BeautifulSoup(htmlsource)
          tables=htmlsoup.findAll('table')
          for table in tables:
            #logger.info("Found the Table")
            rows=table.findAll("tr")
            for row in rows:
              cols=row.findAll("td")
              ftoNo=cols[12].text
              if ftoNo != "FTO No.":
                #logger.info("FTO No : %s " % ftoNo)
                if stateShortCode not in ftoNo:
                  isComplete=0
                else:
                  query="select * from ftos where finyear='%s' and fullBlockCode='%s' and ftoNo='%s'" % (finyear,fullBlockCode,ftoNo)
                  #logger.info(query)
                  cur.execute(query)
                  if cur.rowcount == 0:
                    query="insert into ftos (finyear,ftoNo,fullBlockCode,stateCode,districtCode,blockCode) values ('%s','%s','%s','%s','%s','%s') " % (finyear,ftoNo,fullBlockCode,stateCode,districtCode,blockCode)
                    #logger.info(query)
                    cur.execute(query)
        query="update wagelists set isDownloaded=%s,isComplete=%s,downloadAttemptDate=NOW()  where id=%s" %(str(success),str(isComplete),str(rowid))
        #logger.info(query)
        cur.execute(query)
           
   
   # driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
   # driver.switch_to_window(maintab) 
    driverFinalize(driver)
    displayFinalize(display)

  if args["downloadFTOs"]:
    #additionalFilters=''
    #limitString= "limit 1 "
    query="select f.id,f.ftoNo,f.blockCode,f.finyear,f.fullBlockCode,b.rawBlockName,b.districtName,b.stateName,b.stateCode from ftos f,blocks b where f.fullBlockCode=b.fullBlockCode and f.isComplete=0 and   ( (f.isDownloaded=0) or (TIMESTAMPDIFF(HOUR, f.downloadAttemptDate, now()) > 48 ) or f.downloadAttemptDate is NULL )  %s order by f.downloadAttemptDate,finyear  %s " % (additionalFilters,limitString)
    logger.info(query)
    cur.execute(query)
    results=cur.fetchall()
    for row in results:
      [rowid,ftoNo,blockCode,finyear,fullBlockCode,blockName,districtName,stateName,stateCode]=row
      logger.info("districtName: %s, blockName: %s finyear: %s ftoNo: %s " % (districtName,blockName,finyear,ftoNo))
      fullfinyear=getFullFinYear(finyear)
      if ftoNo != '':
       # filepath=nregaRawDataDir.replace("stateName",stateName.upper()).replace("districtName",districtName.upper())
        filepath=nregaWebDir.replace("stateName",stateName.upper()).replace("districtName",districtName.upper())
        filename=filepath+blockName.upper()+"/FTOs/"+fullfinyear+"/"+ftoNo+".html"
        logger.info("Downloading FTO: %s " % ftoNo)
        htmlresponse,htmlsource = getFTO(fullfinyear,stateCode,ftoNo)
        logger.info("Response = %s " % htmlresponse)
        success=0
        isPopulatedString=''
        if htmlresponse['status'] == '200':
          logger.info("Status is 200")
          isPopulatedString="isPopulated=0,"
          success,outhtml=alterFTO(cur,logger,htmlsource,stateName,districtName,blockName,ftoNo,rowid)
          #htmlsource=htmlsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
          if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
          myfile = open(filename, "wb")
          myfile.write(outhtml.encode("UTF-8"))
          logger.info(filename)
          #writeFile(filename,htmlsource)
          #writeFile3("/home/libtech/webroot/nreganic.libtech.info/temp/"+ftoNo+".html",htmlsource)
        query="update ftos set isDownloaded=%s,%sdownloadAttemptDate=NOW()  where id=%s" %(str(success),str(isPopulatedString),str(rowid))
        logger.info(query)
        cur.execute(query)
   




  dbFinalize(db) # Make sure you put this if there are other exit paths or errors
  logger.info("...END PROCESSING")     
  exit(0)
示例#19
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  db = dbInitialize(db="biharPDS", charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  inyear=args['year']
  
  logger.info(inyear)
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  
  #Start Program here

  base_url = "http://sfc.bihar.gov.in/"
  verificationErrors = []
  accept_next_alert = True
  driver.get("http://sfc.bihar.gov.in/login.htm")
  driver.get(base_url + "/fpshopsSummaryDetails.htm")
  Select(driver.find_element_by_id("year")).select_by_visible_text(inyear)
  time.sleep(10)
  select_box = driver.find_element_by_id("district_id") # if your select_box has a name.. why use xpath?..... this step could use either xpath or name, but name is sooo much easier.
  options = [x for x in select_box.find_elements_by_tag_name("option")] #this part is cool, because it searches the elements contained inside of select_box and then adds them to the list options if they have the tag name "options"
  for element in options:
    distCode=element.get_attribute("value") #
    distName=element.get_attribute("text") #
    logger.info("District Code: %s   District Name: %s " %(distCode,distName)) 
    Select(driver.find_element_by_id("district_id")).select_by_value(distCode)
    time.sleep(10)
    block_box = driver.find_element_by_id("block_id") # if your select_box has a name.. why use xpath?..... this step could use either xpath or name, but name is sooo much easier.
    blockOptions = [y for y in block_box.find_elements_by_tag_name("option")] #this part is cool, because it searches the elements contained inside of select_box and then adds them to the list options if they have the tag name "options"
    for blockElement in blockOptions:
      blockCode=blockElement.get_attribute("value") #
      blockName=blockElement.get_attribute("text") #
      logger.info("distCode:%s  distName:%s  blockCode:%s  blockName:%s " % (distCode,distName,blockCode,blockName))
      Select(driver.find_element_by_id("block_id")).select_by_value(blockCode)
      time.sleep(10)
      fps_box = driver.find_element_by_id("fpshop_id") # if your select_box has a name.. why use xpath?..... this step could use either xpath or name, but name is sooo much easier.
      fpsOptions = [z for z in fps_box.find_elements_by_tag_name("option")] #this part is cool, because it searches the elements contained inside of select_box and then adds them to the list options if they have the tag name "options"
      for fpsElement in fpsOptions:
        fpsCode=fpsElement.get_attribute("value") #
        fpsName=fpsElement.get_attribute("text") #
        
        myString=distCode+','+distName+','+blockCode+','+blockName+','+fpsCode+','+fpsName
	logger.info(myString)
        if "Select" in myString:
          logger.info("This will not be entered into Database")
        else: 
          fpsName1=cleanFPSName(fpsName)
          whereClause="where fpsCode='%s' and blockCode='%s' and distCode='%s' " % (fpsCode,blockCode,distCode)
          query="select * from pdsShops %s " % (whereClause)
          cur.execute(query)
          if cur.rowcount == 0:
            query="insert into pdsShops (fpsCode,blockCode,distCode) values ('%s','%s','%s') " % (fpsCode,blockCode,distCode)
            cur.execute(query)
          query="update pdsShops set distName='%s',blockName='%s',fpsName='%s' %s " % (distName,blockName,fpsName1,whereClause)
          logger.info(query) 
          cur.execute(query)



  # End program here

  driverFinalize(driver)
  displayFinalize(display)
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors


  
  logger.info("...END PROCESSING")     
  exit(0)
示例#20
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  districtName=args['district']
  logger.info("DistrictName "+districtName)
  db = dbInitialize(db=districtName.lower(), charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  jobcardPrefix="%s-%s" % (stateShortCode,districtCode)
  logger.info("crawlIP "+crawlIP)
  logger.info("State Name "+stateName)
  #Start Program here
  url="http://nrega.nic.in/netnrega/sthome.aspx"
  driver.get(url)
  elem = driver.find_element_by_link_text(stateName)
  elem.send_keys(Keys.RETURN)
  time.sleep(1)
  elem = driver.find_element_by_link_text(districtName.upper())
  elem.send_keys(Keys.RETURN)
  time.sleep(1)
  #Query to get all the blocks
  query="select stateCode,districtCode,blockCode,name from blocks where isRequired=1"
  cur.execute(query)
  results = cur.fetchall()
  for row in results:
    stateCode=row[0]
    blockCode=row[2]
    blockName=row[3]
    logger.info("Block Name" + blockName)
    elem = driver.find_element_by_link_text(blockName)
    elem.send_keys(Keys.RETURN)
    time.sleep(1)
  
    query="select name,panchayatCode,id from panchayats where isRequired=1  and stateCode='"+stateCode+"' and districtCode='"+districtCode+"' and blockCode='"+blockCode+"' order by jobcardCrawlDate"
    cur.execute(query)
    panchresults = cur.fetchall()
    for panchrow in panchresults:
      panchayatName=panchrow[0]
      panchayatCode=panchrow[1]
      panchID=panchrow[2]
      logger.info(stateCode+districtCode+blockCode+blockName+panchayatCode+panchayatName)
      elem = driver.find_element_by_link_text(panchayatName)
      elem.send_keys(Keys.RETURN)
      time.sleep(1)
      elem = driver.find_element_by_link_text("Job card/Employment Register")
      elem.send_keys(Keys.RETURN)
      time.sleep(5)
      curtime = time.strftime('%Y-%m-%d %H:%M:%S')
      html_source = driver.page_source
      htmlsoup=BeautifulSoup(html_source)
     #logger.info(html_source)
     #f=open("/tmp/ab.html","w")
     #f.write(html_source)
      try:
        table=htmlsoup.find('table',align="center")
        rows = table.findAll('tr')
        status=1
      except:
        status=0
      query="update panchayats set jobcardCrawlStatus="+str(status)+", jobcardCrawlDate='"+curtime+"' where id="+str(panchID) 
      logger.info(query)
      cur.execute(query)
      logger.info("Status is " + str(status))
      if status==1:
        for tr in rows:
          cols = tr.findAll('td')
          jclink=''
          for link in tr.find_all('a'):
            jclink=link.get('href')
          if len(cols) > 2:
            jcno="".join(cols[1].text.split())
          if jobcardPrefix in jcno:
            logger.info(jcno)
            query="insert into jobcardRegister (jobcard,stateCode,districtCode,blockCode,panchayatCode) values ('"+jcno+"','"+stateCode+"','"+districtCode+"','"+blockCode+"','"+panchayatCode+"')"
            try:
              cur.execute(query)
            except MySQLdb.IntegrityError,e:
              errormessage=(time.strftime("%d/%m/%Y %H:%M:%S "))+str(e)+"\n"
              #errorfile.write(errormessage)
            continue
      driver.back()
      driver.back()
      time.sleep(5)
  
    driver.back()
    time.sleep(5)
示例#21
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  display = displayInitialize()
  driver = driverInitialize()
  db = dbInitialize(db="libtech", charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  limitString=''
  if args['limit']:
    limitString=' limit '+args['limit']
  if args['district']:
    districtName=args['district']
 
  logger.info("DistrictName "+districtName)
  if args['finyear']:
    finyear=args['finyear']
  logger.info("finyear "+finyear)
#Query to get all the blocks
  query="use libtech"
  cur.execute(query)
  query="select crawlIP from crawlDistricts where name='%s'" % districtName.lower()
  crawlIP=singleRowQuery(cur,query)
  query="select state from crawlDistricts where name='%s'" % districtName.lower()
  stateName=singleRowQuery(cur,query)
  logger.info("crawlIP "+crawlIP)
  logger.info("State Name "+stateName)
  query="use %s" % districtName.lower()
  cur.execute(query)



  ftofilepath=nregaDataDir.replace("stateName",stateName.title())+"/"+districtName.upper()+"/"
#
  url="http://services.ptcmysore.gov.in/emo/Trackfto.aspx"
  #ftofilepath="/home/libtech/libtechdata/CHATTISGARH/"+districtName+"/"
  query="select b.name,f.ftoNo,f.stateCode,f.districtCode,f.blockCode,f.finyear,f.id from ftoDetails f,blocks b where TIMESTAMPDIFF(HOUR, f.statusDownloadDate, now()) > 48  and f.isStatusDownloaded=0 and f.finyear='%s' and f.blockCode=b.blockCode and f.stateCode=b.stateCode and f.districtCode=b.districtCode %s;" % (finyear,limitString)
  #query="select b.name,f.ftoNo,f.stateCode,f.districtCode,f.blockCode,f.finyear,f.id from ftoDetails f,blocks b where f.isDownloaded=0 and f.blockCode=b.blockCode and f.stateCode=b.stateCode and f.districtCode=b.districtCode and b.blockCode='003';"
  cur.execute(query)
  if cur.rowcount:
    logger.info("Number of records tobe processed:" +str(cur.rowcount))
    results = cur.fetchall()
    for row in results:
      blockName=row[0]
      ftono=row[1]
      stateCode=row[2]
      districtCode=row[3]
      blockCode=row[4]
      finyear=row[5]
      ftoid=row[6]
      fullBlockCode=stateCode+districtCode+blockCode
      fullDistrictCode=stateCode+districtCode
      fullfinyear=getFullFinYear(finyear) 
      logger.info(stateCode+districtCode+blockCode+blockName)
      
      ftofilename=ftofilepath+blockName+"/FTO/"+fullfinyear+"/"+ftono+"_status.html"
      if not os.path.exists(os.path.dirname(ftofilename)):
        os.makedirs(os.path.dirname(ftofilename))
      
      logger.info(ftofilename)
      driver.get(url)
      driver.find_element_by_id("ctl00_ContentPlaceHolder1_txtFTO").clear()
      #driver.find_element_by_id("ctl00_ContentPlaceHolder1_txtFTO").send_keys("CH3305003_081015FTO_142597")
      driver.find_element_by_id("ctl00_ContentPlaceHolder1_txtFTO").send_keys(ftono)
      driver.find_element_by_id("ctl00_ContentPlaceHolder1_Button1").click()
      html_source = driver.page_source
      html_source=html_source.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
     # print html_source
      f = open(ftofilename, 'w')
      f.write(html_source.encode("UTF-8"))
      query="update ftoDetails set isStatusDownloaded=1,statusDownloadDate=now() where id="+str(ftoid)
      logger.info(query)
      cur.execute(query)
   
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors
  logger.info("...END PROCESSING")     
  exit(0)
示例#22
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  logger.info("BEGIN PROCESSING...")

  display = displayInitialize(args['visible'])
  driver = driverInitialize(browser=args['browser'] , path='/home/mayank/.mozilla/firefox/4s3bttuq.default/')
  base_url="https://www.skillshare.com/login"
  driver.get(base_url)
  logger.info('Fetching URL[%s]' % base_url)
  # driver.find_element_by_link_text("Sign In").click()
  try:
    driver.find_element_by_name("LoginForm[email]").clear()
    driver.find_element_by_name("LoginForm[email]").send_keys("*****@*****.**")
    driver.find_element_by_name("LoginForm[password]").clear()
    # driver.find_element_by_name("LoginForm[password]").send_keys("#######")
    # driver.find_element_by_xpath("//input[@value='Sign In']").click()
    time.sleep(100) # If you want to manually log in
  except Exception as e:
    logger.info('Already signed in [%s]', e)
  time.sleep(10)

  filename = "./z.csv"
  content = csv.reader(open(filename, 'r'), delimiter=',', quotechar='"')
  for (title, url) in content:    
    # driver.get('https://www.skillshare.com/classes/Sketchbook-Practice-Bring-watercolour-to-Life-with-Line-Drawing/1053382271/classroom/discuss')
    logger.info('Fetching URL[%s]' % url)
    driver.get(url)
    time.sleep(10)

    escaped_title = re.sub(r"[^A-Za-z 0-9]+", '', title).replace(' ', '_')
    dirname = 'SkillsShare/' + escaped_title
    cmd = 'mkdir -p ' + dirname
    logger.info(cmd)
    os.system(cmd)    

    els = driver.find_elements_by_class_name("session-item")
    
    for i, el in enumerate(els):
      logger.debug(str(el))
      bs = BeautifulSoup(el.get_attribute('innerHTML'), "html.parser")
      p = bs.find('p')
      name = p.text
      name = "%02d" % (i+1) + '_' + re.sub(r"[^A-Za-z 0-9]+", '', name).replace(' ', '_') + '.mp4'
      
      logger.info(str(p) + name)
      el.click()
      time.sleep(10)
      html_source = driver.page_source

      bs = BeautifulSoup(html_source, "html.parser")
      html = bs.findAll('video', attrs={'class':['vjs-tech']})
      str_html = str(html)
      logger.info(str_html)
      url = str_html[str_html.find("src=")+5:]
      fetch_url = url[:url.find("?pubId")]
      logger.info(fetch_url)

      if os.path.exists(dirname + '/' + name):
        continue
      cmd = 'cd %s && curl -s %s -o %s' % (dirname, fetch_url, name)
      logger.info(cmd)
      # os.system(cmd)

  driverFinalize(driver)
  displayFinalize(display)
  logger.info("...END PROCESSING")     
  exit(0)
示例#23
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  if args['enumerate']:
    myFPSShops=FPSShop.objects.all()
    for eachFPSShop in myFPSShops:
      startYear=2016
      now = datetime.now()
      endYear=now.year
      yearArray=list(range(startYear,endYear+1))
      for eachYear in yearArray:
        if (eachYear == now.year):
           maxMonth=now.month
        else:
           maxMonth=12
        
        eachMonth=0
        while eachMonth < maxMonth:
          eachMonth=eachMonth+1
          logger.info("%d-%d" % (eachMonth,eachYear))
          myShop=FPSStatus.objects.filter(fpsShop=eachFPSShop,fpsMonth=eachMonth,fpsYear=eachYear).first()
          if myShop is None:
            FPSStatus.objects.create(fpsShop=eachFPSShop,fpsMonth=eachMonth,fpsYear=eachYear)
            logger.info("Created object")
          myShop=FPSStatus.objects.filter(fpsShop=eachFPSShop,fpsMonth=eachMonth,fpsYear=eachYear).first()

          myVillages=FPSVillage.objects.filter(fpsShop=eachFPSShop)
          for eachVillage in myVillages:
            myVillageFPSStatus=VillageFPSStatus.objects.filter(fpsVillage=eachVillage,fpsStatus=myShop).first()
            if myVillageFPSStatus is None:
              VillageFPSStatus.objects.create(fpsVillage=eachVillage,fpsStatus=myShop)
                

  if args['crawl']:
    display = displayInitialize(args['visible'])
    driver = driverInitialize(args['browser'])
    
    #Start Program here

    base_url = "http://sfc.bihar.gov.in/"
    verificationErrors = []
    accept_next_alert = True
    driver.get("http://sfc.bihar.gov.in/login.htm")
    driver.get(base_url + "/fpshopsSummaryDetails.htm")
#    Select(driver.find_element_by_id("year")).select_by_visible_text(inyear)
#    time.sleep(10)
    
    myBlocks=Block.objects.filter(fpsRequired=True)
    myBlocks=Block.objects.filter(fpsRequired=True)
    for eachBlock in myBlocks:
      logger.info("District Name: %s Block Name: %s " % (eachBlock.name,eachBlock.district.name))
      districtCode=eachBlock.district.fpsCode
      blockCode=eachBlock.fpsCode
      Select(driver.find_element_by_id("district_id")).select_by_value(districtCode)
      time.sleep(10)

      Select(driver.find_element_by_id("block_id")).select_by_value(blockCode)
      time.sleep(10)
      fps_box = driver.find_element_by_id("fpshop_id") # if your select_box has a name.. why use xpath?..... this step could use either xpath or name, but name is sooo much easier.
      fpsOptions = [z for z in fps_box.find_elements_by_tag_name("option")] #this part is cool, because it searches the elements contained inside of select_box and then adds them to the list options if they have the tag name "options"
      for fpsElement in fpsOptions:
        fpsCode=fpsElement.get_attribute("value") #
        fpsName=fpsElement.get_attribute("text") #     
        logger.info("fpsCode: %s, fpsName: %s " % (fpsCode,fpsName))
        myFPSShop=FPSShop.objects.filter(fpsCode=fpsCode).first()
        if myFPSShop is None:
          FPSShop.objects.create(fpsCode=fpsCode,name=fpsName,block=eachBlock)

#   
    driverFinalize(driver)
    displayFinalize(display)


  
  logger.info("...END PROCESSING")     
  exit(0)
示例#24
0
def runTestSuite():
  logger = loggerFetch("info")
  logger.info("BEGIN PROCESSING...")
  
  display = displayInitialize(0)
  driver = driverInitialize()

  '''
  content = csv.reader(open('./gats.csv', 'r'), delimiter=',', quotechar='"')
  for (gat, d) in content:
  '''
  
  for gat in gat_list:
    logger.info('Fetching gat[%s]...' % gat)

    driver.get(url)
    try:
      driver.find_element_by_xpath("//form[@id='aspnetForm']/div[3]/div/div/div[3]/a[3]/p").click()
    except:
      logger.error('Cant find element for [%s]' % gat)
      continue

    Select(driver.find_element_by_id("distSelect")).select_by_visible_text(dn)
    Select(driver.find_element_by_id("talSelect")).select_by_visible_text(tn)
    Select(driver.find_element_by_id("vilSelect")).select_by_visible_text(vn)
    # Select(driver.find_element_by_id("vilSelect")).select_by_visible_text(u"सार्पिली")
    # driver.find_element_by_css_selector("option[value=\"string:273200030399810000\"]").click()
    driver.find_element_by_id("rbsryno").click()
    driver.find_element_by_xpath("//input[@type='number']").clear()
    driver.find_element_by_xpath("//input[@type='number']").send_keys(gat)
    driver.find_element_by_css_selector("input[type=\"button\"]").click()

    '''
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException
    
    try:
      WebDriverWait(driver, 5).until(EC.alert_is_present(), 'Waiting for alert timed out')

      alert = driver.switch_to_alert()
      alert.accept()
      logger.warning("alert accepted")

    except:
      logger.info("Yippie!")    
    '''
    if len(driver.window_handles) > 1:
      logger.info("Dialog Box Window [" + str(driver.window_handles) + "]")
      # self.assertEqual(u"सर्वे नंबर / गट नंबर तपासुन पहा.", self.close_alert_and_get_its_text())
      # driver.find_element_by_xpath("//div[@id='block-desktop']/div[3]/div[2]/div").click()
      driver.switch_to_alert().accept()
      continue
    
    html_source = driver.page_source.replace('<head>',
                                             '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')

    logger.debug("HTML Fetched [%s]" % html_source)
    soup = BeautifulSoup(driver.page_source, "html.parser")
    #dict_from_json = soup.find(attribute).text
    sno_select = soup.find("select", {"ng-model" : "selectedSno"}) # ng-model="selectedSno"
    sno_options = sno_select.findAll("option")
    snos = [ sno_option.text for sno_option in sno_options ]
    logger.debug("Found [%s]" % str(snos))
    snos = snos[1:]
    logger.info("SNO List [%s]" % str(snos))

    #time.sleep(5)
    for sno in snos:
      logger.info('Processing [%s]' % sno)
      filename = '/home/mayank/wd/SaatBaara/7-12-mahabhulekh/%s.html' % sno.replace('/','_')
      if os.path.exists(filename):
        #time.sleep(1)
        continue
      Select(driver.find_element_by_xpath("//form[@id='aspnetForm']/div[3]/div/div/div[3]/div/div[3]/table/tbody/tr[3]/td/select")).select_by_visible_text(sno)
      #logger.info(driver.find_element_by_link_text(sno))
      driver.find_element_by_css_selector("td.last-rows > input[type=\"button\"]").click()
      time.sleep(5)
      
      parent_handle = driver.current_window_handle
      logger.info("Handles : %s Number : %s" % (driver.window_handles, len(driver.window_handles)))
  
      if len(driver.window_handles) == 2:
        driver.switch_to_window(driver.window_handles[-1])
      else:
        logger.error("Handlers gone wrong [" + str(driver.window_handles) + "]")
        driver.save_screenshot('z.png')

      html_source = driver.page_source.replace('<head>',
                                               '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>').encode('utf-8')

      logger.debug("HTML Fetched [%s]" % html_source)
      if(driver.title != '७/१२'):
        logger.error(driver.title)
        driver.close()
        driver.switch_to_window(parent_handle)
        continue
        
      bs = BeautifulSoup(html_source, "html.parser")
      body = bs.find('tbody')
      try:
        body = body.findNext('tbody')
      except:
        logger.error('Empty body for [%s]' % sno)
        driver.close()
        driver.switch_to_window(parent_handle)
        continue
      body = body.findNext('tbody')
      logger.debug(body)
      td = body.find('td')
      td = td.findAll('td')
  
      logger.info("Checking [%s]" % td[2].text)
      if(sno != td[2].text):
        logger.error('sno[%s] != td.text[%s]' % (sno, td[2].text))
        driver.close()
        driver.switch_to_window(parent_handle)
        continue
  
      with open(filename, 'wb') as html_file:
        logger.info('Writing [%s]' % filename)
        html_file.write(html_source)
    
      driver.close()
      driver.switch_to_window(parent_handle)
      time.sleep(1)
    time.sleep(1)

  driverFinalize(driver)
  displayFinalize(display)


  logger.info("...END PROCESSING")     
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  if args['limit']:
    limit = int(args['limit'])
  else:
    limit =1
  myVillages=Village.objects.all()[:limit]
  for eachVillage in myVillages:
    logger.info(eachVillage.name)
    stateName=eachVillage.panchayat.block.district.state.name
    districtName=eachVillage.panchayat.block.district.name
    blockName=eachVillage.panchayat.block.name
    panchayatName=eachVillage.panchayat.name
    villageName=eachVillage.name
    eachPanchayat=eachVillage.panchayat
    url="http://www.nrega.telangana.gov.in/Nregs/FrontServlet?requestType=SmartCardreport_engRH&actionVal=MobnumberStatus&id=%s&Retype=null&type=null&file=%s" % (eachVillage.tcode,eachVillage.name) 
    logger.info(url)
    try:
      driver.get(url)
      driver.get(url)
      myhtml = driver.page_source
      error=0
    except:
      error=1

    if error==0:
      logger.info("No Error")
      error1,myTable=validateWorkerList(myhtml)
      if error1 is  None:
        logger.info("Worker List found") 
        outhtml=''
        outcsv=''
        outhtml+=stripTableAttributes(myTable,"myTable")
        outcsv+=table2csv(myTable)
        title="WorkerList  state:%s District:%s block:%s panchayat: %s vilage:%s " % (stateName,districtName,blockName,panchayatName,villageName)
        outhtml=htmlWrapperLocal(title=title, head='<h1 aling="center">'+title+'</h1>', body=outhtml)
        try:
          outhtml=outhtml.encode("UTF-8")
        except:
          outhtml=outhtml
   
        try:
          outcsv=outcsv.encode("UTF-8")
        except:
          outcsv=outcsv

        filename=eachVillage.slug+"_tjr.html"
        filenamecsv=eachVillage.slug+"_tjr.csv"
        finyear=getCurrentFinYear()
        reportType="telJobcardRegisterHTML"
        saveVillageReport(logger,eachVillage,finyear,reportType,filename,outhtml)
        reportType="telJobcardRegisterCSV"
        saveVillageReport(logger,eachVillage,finyear,reportType,filenamecsv,outcsv)

  driverFinalize(driver)
  displayFinalize(display)
  logger.info("...END PROCESSING") 
  exit(0)
示例#26
0
def fetchMusterDetails(logger, db, cmd=None, directory=None, url=None, is_parse_info=None, is_push_info=None, is_visible=None):
  '''
  Fetch the Muster Details for specified parameters in the specified directory
  '''
  if not cmd:
    cmd="FETCH MUSTER DETAILS"
  logger.info("BEGIN %s..." % cmd)
    
  if not directory:
    directory = "./Downloads"

  if not url:
    url = 'http://khadya.cg.nic.in/pdsonline/cgfsa/Report/SSRS_Reports/RptMonthWiseDeleteRestoreNew_RC.aspx'
    url = 'http://164.100.112.66/netnrega/Citizen_html/Musternew.aspx?id=2&lflag=eng&ExeL=GP&fin_year=2015-2016&state_code=33&district_code=3305&block_code=3305007&panchayat_code=3305007038&State_name=CHHATTISGARH&District_name=SURGUJA&Block_name=BATAULI&panchayat_name=Govindpur'
    
  if not is_visible:
    is_visible = 0        # Set to 1 for debugging selenium

  if not is_parse_info:
    is_parse_info = False

  if not is_push_info:
    is_push_info = False

  # The part below could be moved to a function downloadMusterDetails() to make it reusable
  filename = directory + '/' + 'test.html' # Use your naming logic + blockName + '_' + panchayat + '_' + shopCode + '.html'
  logger.info('filename[%s]' % filename)
  
  filepath = os.path.dirname(filename)
  if not os.path.exists(filepath):
    logger.info('Creating direcotry [%s] as it does not exist' % filepath)
    os.makedirs(filepath)

  display = displayInitialize(is_visible)
  driver = driverInitialize()

  logger.error("Current URL [%s] Title [%s]" % (driver.current_url, driver.title))
  # cookieDump(driver)
  # driver.delete_all_cookies()
  logger.error("Current URL [%s] Title [%s]" % (driver.current_url, driver.title))
  logger.info("Fetching...[%s]" % url)
  driver.get(url)

  logger.error("Current URL [%s] Title [%s]" % (driver.current_url, driver.title))

  # cookieDump(driver)
  # Use double refresh if need be like in AP sites
  if False:
    logger.info("Refreshing...[%s]" % url)
    driver.get(url)    # A double refresh required for the page to load
  logger.error("Current URL [%s] Title [%s]" % (driver.current_url, driver.title))
    
  # cookieDump(driver)
  el = waitUntilID(logger, driver, 'ctl00_ContentPlaceHolder1_ddlwork', 10) 
  if el:
    #el = driver.find_element_by_id('ctl00_ContentPlaceHolder1_ddlwork')
    logger.info("Found El[%s]" % str(el))
    html_source = driver.page_source.replace('<head>',
                                           '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
    logger.debug("HTML Fetched [%s]" % html_source)
    # cookieDump(driver)

    with open(filename, "wb") as html_file:
      logger.info("Writing [%s]" % filename)
      html_file.write(html_source.encode('UTF-8'))
  else:
    logger.error("Failed to fetch the page [%s]" % driver.current_url)
    logger.error("Current URL [%s] Title [%s]" % (driver.current_url, driver.title))    
    # cookieDump(driver)
    html_source = driver.page_source
    logger.info("HTML Fetched [%s]" % html_source)

    with open(filename, "wb") as html_file:
      logger.info("Writing [%s]" % filename)
      html_file.write(html_source.encode('UTF-8'))
    
    driverFinalize(driver)
    displayFinalize(display)
    return # Error condition to be dealt with

    
  '''  
  try:
    logger.info("Waiting for the page to load...")
    elem = WebDriverWait(driver, 10).until(
      EC.presence_of_element_located((By.ID, 'ctl00_ContentPlaceHolder1_ddlwork'))
    )
    logger.info("...done looking")

  except (NoSuchElementException, TimeoutException):
    logger.error("Failed to fetch the page")
    driverFinalize(driver)
    displayFinalize(display)
    return # Error condition to be dealt with

  finally:
    html_source = driver.page_source.replace('<head>',
                                           '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
    logger.debug("HTML Fetched [%s]" % html_source)

    with open(filename, "wb") as html_file:
      logger.info("Writing [%s]" % filename)
      html_file.write(html_source.encode('UTF-8'))
  '''
  
  # If you have information to parse using Beautiful Soup
  if is_parse_info:
    bs = BeautifulSoup(html_source, "html.parser")
    tr_list = bs.findAll('tr', attrs={'class':['normalRow', 'alternateRow']})
    logger.debug(str(tr_list))
    for tr in tr_list:
      td = tr.find('td')
      td = td.findNext('td')
      panchayat = td.text.strip()
      logger.info("Panchayat[%s]", panchayat)
      elem = driver.find_element_by_link_text(panchayat)
      elem.click()
      filename="/tmp/%s.html" % panchayat
      with open(filename, 'w') as html_file:
        logger.info("Writing [%s]" % filename)
        html_file.write(driver.page_source)
        driver.back()
  
  driverFinalize(driver)
  displayFinalize(display)


  # If you want to push the information to the Database
  if is_push_info:  
    query = 'select j.jobcard, p.name, p.panchayatCode from jobcardRegister j, panchayats p, blocks b where j.blockCode=p.blockCode and j.panchayatCode=p.panchayatCode  and j.blockCode=b.blockCode and j.jobcard="%s"' % jobcard
    logger.info("Command[%s] Directory[%s] URL[%s] jobcard[%s]" % (cmd, dir, url, jobcard))
    pushInfoIntoDB(logger, db, "POPULATE_DATABASE", dir, url, is_visible, is_push_info, query) # So that function can be shared

  logger.info("...END %s" % cmd)     
示例#27
0
def runTestSuite():
    logger = loggerFetch("info")
    logger.info("BEGIN PROCESSING...")

    display = displayInitialize(1)
    driver = driverInitialize()
    '''
  content = csv.reader(open('./gats.csv', 'r'), delimiter=',', quotechar='"')
  for (gat, d) in content:
  '''

    for gat in gat_list:
        logger.info('Fetching gat[%s]...' % gat)

        driver.get(url)
        try:
            driver.find_element_by_xpath(
                "//form[@id='aspnetForm']/div[3]/div/div/div[3]/a[3]/p").click(
                )
        except:
            logger.error('Cant find element for [%s]' % gat)
            continue

        Select(
            driver.find_element_by_id("distSelect")).select_by_visible_text(dn)
        Select(
            driver.find_element_by_id("talSelect")).select_by_visible_text(tn)
        Select(
            driver.find_element_by_id("vilSelect")).select_by_visible_text(vn)
        # Select(driver.find_element_by_id("vilSelect")).select_by_visible_text(u"सार्पिली")
        # driver.find_element_by_css_selector("option[value=\"string:273200030399810000\"]").click()
        driver.find_element_by_id("rbsryno").click()
        driver.find_element_by_xpath("//input[@type='number']").clear()
        driver.find_element_by_xpath("//input[@type='number']").send_keys(gat)
        driver.find_element_by_css_selector("input[type=\"button\"]").click()
        '''
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException
    
    try:
      WebDriverWait(driver, 5).until(EC.alert_is_present(), 'Waiting for alert timed out')

      alert = driver.switch_to_alert()
      alert.accept()
      logger.warning("alert accepted")

    except:
      logger.info("Yippie!")    
    '''
        if len(driver.window_handles) > 1:
            logger.info("Dialog Box Window [" + str(driver.window_handles) +
                        "]")
            # self.assertEqual(u"सर्वे नंबर / गट नंबर तपासुन पहा.", self.close_alert_and_get_its_text())
            # driver.find_element_by_xpath("//div[@id='block-desktop']/div[3]/div[2]/div").click()
            driver.switch_to_alert().accept()
            continue

        html_source = driver.page_source.replace(
            '<head>',
            '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        )

        logger.debug("HTML Fetched [%s]" % html_source)
        soup = BeautifulSoup(driver.page_source, "html.parser")
        #dict_from_json = soup.find(attribute).text
        sno_select = soup.find(
            "select", {"ng-model": "selectedSno"})  # ng-model="selectedSno"
        sno_options = sno_select.findAll("option")
        snos = [sno_option.text for sno_option in sno_options]
        logger.debug("Found [%s]" % str(snos))
        snos = snos[1:]
        logger.info("SNO List [%s]" % str(snos))

        #time.sleep(5)
        for sno in snos:
            logger.info('Processing [%s]' % sno)
            filename = '/home/mayank/wd/SaatBaara/remaining/%s.html' % sno.replace(
                '/', '_')
            if os.path.exists(filename):
                #time.sleep(1)
                continue
            Select(
                driver.find_element_by_xpath(
                    "//form[@id='aspnetForm']/div[3]/div/div/div[3]/div/div[3]/table/tbody/tr[3]/td/select"
                )).select_by_visible_text(sno)
            #logger.info(driver.find_element_by_link_text(sno))
            driver.find_element_by_css_selector(
                "td.last-rows > input[type=\"button\"]").click()
            time.sleep(5)

            parent_handle = driver.current_window_handle
            logger.info("Handles : %s Number : %s" %
                        (driver.window_handles, len(driver.window_handles)))

            if len(driver.window_handles) == 2:
                driver.switch_to_window(driver.window_handles[-1])
            else:
                logger.error("Handlers gone wrong [" +
                             str(driver.window_handles) + "]")
                driver.save_screenshot('z.png')

            html_source = driver.page_source.replace(
                '<head>',
                '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
            ).encode('utf-8')

            logger.debug("HTML Fetched [%s]" % html_source)
            if (driver.title != '७/१२'):
                logger.error(driver.title)
                driver.close()
                driver.switch_to_window(parent_handle)
                continue

            bs = BeautifulSoup(html_source, "html.parser")
            body = bs.find('tbody')
            try:
                body = body.findNext('tbody')
            except:
                logger.error('Empty body for [%s]' % sno)
                driver.close()
                driver.switch_to_window(parent_handle)
                continue
            body = body.findNext('tbody')
            logger.debug(body)
            td = body.find('td')
            td = td.findAll('td')

            logger.info("Checking [%s]" % td[2].text)
            if (sno != td[2].text):
                logger.error('sno[%s] != td.text[%s]' % (sno, td[2].text))
                driver.close()
                driver.switch_to_window(parent_handle)
                continue

            with open(filename, 'wb') as html_file:
                logger.info('Writing [%s]' % filename)
                html_file.write(html_source)

            driver.close()
            driver.switch_to_window(parent_handle)
            time.sleep(1)
        time.sleep(1)

    driverFinalize(driver)
    displayFinalize(display)

    logger.info("...END PROCESSING")
示例#28
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  districtName=args['district']
  logger.info("DistrictName "+districtName)
  limitString=''
  if args['limit']:
    limitString=" limit %s " % args['limit']
  db = dbInitialize(db=districtName.lower(), charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  additionalFilters = ''
  if args['blockCode']:
    additionalFilters=" and b.blockCode='%s' " % args['blockCode']
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  crawlIP,stateName,stateCode,stateShortCode,districtCode=getDistrictParams(cur,districtName)
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  jobcardPrefix="%s-%s" % (stateShortCode,districtCode)
  logger.info("crawlIP "+crawlIP)
  logger.info("State Name "+stateName)
  jcReportFilePath=nregaDir.replace("districtName",districtName.lower())+districtName.upper()+"/"
  jcReportRawFilePath=nregaRawDir.replace("districtName",districtName.lower())+districtName.upper()+"/"
  #Start Program here
  url="http://nrega.nic.in/netnrega/sthome.aspx"
  driver.get(url)
  elem = driver.find_element_by_link_text(stateName)
  elem.send_keys(Keys.RETURN)
  time.sleep(1)
  elem = driver.find_element_by_link_text(districtName.upper())
  elem.send_keys(Keys.RETURN)
  time.sleep(1)
  #Query to get all the blocks
  query="select b.blockCode,b.name,p.panchayatCode,p.name from blocks b,panchayats p where b.blockCode=p.blockCode and p.isRequired=1 and jobcardCrawlDate is not NULL order by jobcardDownloadDate %s %s" % (additionalFilters,limitString)
  cur.execute(query)
  results = cur.fetchall()
  for row in results:
    blockCode=row[0]
    blockName=row[1]
    panchayatCode=row[2]
    panchayatName=row[3]
    panchayatNameOnlyLetters=re.sub(r"[^A-Za-z]+", '', panchayatName)
    elem = driver.find_element_by_link_text(blockName)
    elem.send_keys(Keys.RETURN)
    elem = driver.find_element_by_link_text(panchayatName)
    elem.send_keys(Keys.RETURN)
    elem = driver.find_element_by_link_text("List of Worker with Aadhar No.(UID No.)")
    elem.send_keys(Keys.RETURN)
    time.sleep(15)
    query="update panchayats set jobcardDownloadDate=now() where blockCode='%s' and panchayatCode='%s' " % (blockCode,panchayatCode)
    cur.execute(query)
    jcsource = driver.page_source
    driver.back()
    time.sleep(5)
    driver.back()
    time.sleep(5)
    driver.back()
    
    rawhtml=jcsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
    jcfilename=jcReportRawFilePath+blockName.upper()+"/"+panchayatNameOnlyLetters.upper()+"/jobcardRegister/workerList.html"
    logger.info(jcfilename)
    writeFile(jcfilename,rawhtml)



  driverFinalize(driver)
  displayFinalize(display)
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors
  logger.info("...END PROCESSING")     
  exit(0)
示例#29
0
def fetchMusterDetails(logger,
                       db,
                       cmd=None,
                       directory=None,
                       url=None,
                       is_parse_info=None,
                       is_push_info=None,
                       is_visible=None):
    '''
  Fetch the Muster Details for specified parameters in the specified directory
  '''
    if not cmd:
        cmd = "FETCH MUSTER DETAILS"
    logger.info("BEGIN %s..." % cmd)

    if not directory:
        directory = "./Downloads"

    if not url:
        url = 'http://khadya.cg.nic.in/pdsonline/cgfsa/Report/SSRS_Reports/RptMonthWiseDeleteRestoreNew_RC.aspx'
        url = 'http://164.100.112.66/netnrega/Citizen_html/Musternew.aspx?id=2&lflag=eng&ExeL=GP&fin_year=2015-2016&state_code=33&district_code=3305&block_code=3305007&panchayat_code=3305007038&State_name=CHHATTISGARH&District_name=SURGUJA&Block_name=BATAULI&panchayat_name=Govindpur'

    if not is_visible:
        is_visible = 0  # Set to 1 for debugging selenium

    if not is_parse_info:
        is_parse_info = False

    if not is_push_info:
        is_push_info = False

    # The part below could be moved to a function downloadMusterDetails() to make it reusable
    filename = directory + '/' + 'test.html'  # Use your naming logic + blockName + '_' + panchayat + '_' + shopCode + '.html'
    logger.info('filename[%s]' % filename)

    filepath = os.path.dirname(filename)
    if not os.path.exists(filepath):
        logger.info('Creating direcotry [%s] as it does not exist' % filepath)
        os.makedirs(filepath)

    display = displayInitialize(is_visible)
    driver = driverInitialize()

    logger.error("Current URL [%s] Title [%s]" %
                 (driver.current_url, driver.title))
    # cookieDump(driver)
    # driver.delete_all_cookies()
    logger.error("Current URL [%s] Title [%s]" %
                 (driver.current_url, driver.title))
    logger.info("Fetching...[%s]" % url)
    driver.get(url)

    logger.error("Current URL [%s] Title [%s]" %
                 (driver.current_url, driver.title))

    # cookieDump(driver)
    # Use double refresh if need be like in AP sites
    if False:
        logger.info("Refreshing...[%s]" % url)
        driver.get(url)  # A double refresh required for the page to load
    logger.error("Current URL [%s] Title [%s]" %
                 (driver.current_url, driver.title))

    # cookieDump(driver)
    el = waitUntilID(logger, driver, 'ctl00_ContentPlaceHolder1_ddlwork', 10)
    if el:
        #el = driver.find_element_by_id('ctl00_ContentPlaceHolder1_ddlwork')
        logger.info("Found El[%s]" % str(el))
        html_source = driver.page_source.replace(
            '<head>',
            '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        )
        logger.debug("HTML Fetched [%s]" % html_source)
        # cookieDump(driver)

        with open(filename, "wb") as html_file:
            logger.info("Writing [%s]" % filename)
            html_file.write(html_source.encode('UTF-8'))
    else:
        logger.error("Failed to fetch the page [%s]" % driver.current_url)
        logger.error("Current URL [%s] Title [%s]" %
                     (driver.current_url, driver.title))
        # cookieDump(driver)
        html_source = driver.page_source
        logger.info("HTML Fetched [%s]" % html_source)

        with open(filename, "wb") as html_file:
            logger.info("Writing [%s]" % filename)
            html_file.write(html_source.encode('UTF-8'))

        driverFinalize(driver)
        displayFinalize(display)
        return  # Error condition to be dealt with
    '''  
  try:
    logger.info("Waiting for the page to load...")
    elem = WebDriverWait(driver, 10).until(
      EC.presence_of_element_located((By.ID, 'ctl00_ContentPlaceHolder1_ddlwork'))
    )
    logger.info("...done looking")

  except (NoSuchElementException, TimeoutException):
    logger.error("Failed to fetch the page")
    driverFinalize(driver)
    displayFinalize(display)
    return # Error condition to be dealt with

  finally:
    html_source = driver.page_source.replace('<head>',
                                           '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
    logger.debug("HTML Fetched [%s]" % html_source)

    with open(filename, "wb") as html_file:
      logger.info("Writing [%s]" % filename)
      html_file.write(html_source.encode('UTF-8'))
  '''

    # If you have information to parse using Beautiful Soup
    if is_parse_info:
        bs = BeautifulSoup(html_source, "html.parser")
        tr_list = bs.findAll('tr',
                             attrs={'class': ['normalRow', 'alternateRow']})
        logger.debug(str(tr_list))
        for tr in tr_list:
            td = tr.find('td')
            td = td.findNext('td')
            panchayat = td.text.strip()
            logger.info("Panchayat[%s]", panchayat)
            elem = driver.find_element_by_link_text(panchayat)
            elem.click()
            filename = "/tmp/%s.html" % panchayat
            with open(filename, 'w') as html_file:
                logger.info("Writing [%s]" % filename)
                html_file.write(driver.page_source)
                driver.back()

    driverFinalize(driver)
    displayFinalize(display)

    # If you want to push the information to the Database
    if is_push_info:
        query = 'select j.jobcard, p.name, p.panchayatCode from jobcardRegister j, panchayats p, blocks b where j.blockCode=p.blockCode and j.panchayatCode=p.panchayatCode  and j.blockCode=b.blockCode and j.jobcard="%s"' % jobcard
        logger.info("Command[%s] Directory[%s] URL[%s] jobcard[%s]" %
                    (cmd, dir, url, jobcard))
        pushInfoIntoDB(logger, db, "POPULATE_DATABASE", dir, url, is_visible,
                       is_push_info, query)  # So that function can be shared

    logger.info("...END %s" % cmd)
示例#30
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  districtName=args['district']
  logger.info("DistrictName "+districtName)
  limitString=''
  if args['limit']:
    limitString=" limit %s " % args['limit']
  db = dbInitialize(db=districtName.lower(), charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  jobcardPrefix="%s-%s" % (stateShortCode,districtCode)
  logger.info("crawlIP "+crawlIP)
  logger.info("State Name "+stateName)
  jcReportFilePath=nregaDownloadsDir.replace("districtName",districtName.lower())+districtName.upper()+"/"
  jcReportRawFilePath=nregaRawDownloadsDir.replace("districtName",districtName.lower())+districtName.upper()+"/"
  #Start Program here
  url="http://nrega.nic.in/netnrega/sthome.aspx"
  driver.get(url)
  elem = driver.find_element_by_link_text(stateName)
  elem.send_keys(Keys.RETURN)
  time.sleep(1)
  elem = driver.find_element_by_link_text(districtName.upper())
  elem.send_keys(Keys.RETURN)
  time.sleep(1)
  #Query to get all the blocks
  query="select b.blockCode,b.name,p.panchayatCode,p.name from blocks b,panchayats p where b.blockCode=p.blockCode and p.isRequired=1 order by jobcardCrawlDate %s" % (limitString)
  cur.execute(query)
  results = cur.fetchall()
  for row in results:
    blockCode=row[0]
    blockName=row[1]
    panchayatCode=row[2]
    panchayatName=row[3]
    elem = driver.find_element_by_link_text(blockName)
    elem.send_keys(Keys.RETURN)
    elem = driver.find_element_by_link_text(panchayatName)
    elem.send_keys(Keys.RETURN)
    elem = driver.find_element_by_link_text("Job card/Employment Register")
    elem.send_keys(Keys.RETURN)
    time.sleep(5)
    query="update panchayats set jobcardCrawlDate=now() where blockCode='%s' and panchayatCode='%s' " % (blockCode,panchayatCode)
    cur.execute(query)
    query="select jobcard from jobcardRegister where isDownloaded=0 and stateCode='"+stateCode+"' and districtCode='"+districtCode+"' and blockCode='"+blockCode+"' and panchayatCode='"+panchayatCode+"' limit 50"
    cur.execute(query)
    jcresults = cur.fetchall()
    for jcrow in jcresults:
      jobcard=jcrow[0]
      logger.info("blockName %s   panchayatName: %s jobcard: %s" % (blockName,panchayatName,jobcard) )
      elem = driver.find_element_by_link_text(jobcard)
      elem.send_keys(Keys.RETURN)
      jcsource = driver.page_source
      driver.back()
      rawhtml=jcsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
      jcfilename=jcReportRawFilePath+blockName.upper()+"/"+panchayatName.upper()+"/jobcardRegister/"+jobcard.replace("/","_")+".html"
      logger.info(jcfilename)
      writeFile(jcfilename,rawhtml)

      category,isBPL=getCategoryBPL(rawhtml)
      logger.info("Category : %s " % category)
      logger.info("isBPL : %s " % isBPL)
      htmlSoup=BeautifulSoup(rawhtml,"lxml")
     
      myhtml=''
      myhtml+=getSpans(htmlSoup,rawhtml)
      myhtml+=rewriteTable(htmlSoup,"Family Details","GridView4")
      myhtml+=rewriteTable(htmlSoup,"Requested Period of Employment","GridView1")
      myhtml+=rewriteTable(htmlSoup,"Period and Work on which Employment Offered","GridView2")
      myhtml+=rewriteTable(htmlSoup,"Period and Work on which Employment Given","GridView3")
     
     
     
      myhtml=htmlWrapperLocal(title="Jobcard Details", head='<h1 aling="center">'+jobcard+'</h1>', body=myhtml)
      jcfilename=jcReportFilePath+blockName.upper()+"/"+panchayatName.upper()+"/jobcardRegister/"+jobcard.replace("/","_")+".html"
      logger.info(jcfilename)
      writeFile(jcfilename,myhtml)
      query="update jobcardRegister set isDownloaded=1 where jobcard='"+jobcard+"'"
      cur.execute(query)




#     if not os.path.exists(os.path.dirname(jcfilename)):
#       os.makedirs(os.path.dirname(jcfilename))
#     myfile = open(jcfilename, "w")
#     myfile.write(myhtml.encode("UTF-8"))
#     query="update jobcardRegister set isDownloaded=1 where jobcard='"+jobcard+"'"
#     cur.execute(query)


  driverFinalize(driver)
  displayFinalize(display)
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors
  logger.info("...END PROCESSING")     
  exit(0)
示例#31
0
 def setUp(self):
     self.logger = loggerFetch('info')
     self.logger.info('BEGIN PROCESSING...')
     self.display = displayInitialize(1)
     self.driver = driverInitialize(path='/opt/firefox/', timeout=3)
示例#32
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  logger.info("BEGIN PROCESSING...")
  limitString=''
  if args['limit']:
    limitString=" limit %s " % args['limit']
  additionalFilters=''
  if args['district']:
    additionalFilters+= " and p.districtName='%s' " % args['district']
  db = dbInitialize(db=nregaDB, charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)

  
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])

  url="http://nrega.nic.in/netnrega/sthome.aspx"
  driver.get(url)

  query="select p.stateCode,p.districtCode,p.blockCode,p.panchayatCode,p.stateName,p.districtName,p.rawBlockName,p.panchayatName,p.fullPanchayatCode,p.stateShortCode,p.crawlIP from panchayats p,panchayatStatus ps where p.fullPanchayatCode=ps.fullPanchayatCode and p.isRequired=1 and ( (TIMESTAMPDIFF(DAY, ps.jobcardCrawlDate, now()) > 7) or ps.jobcardCrawlDate is NULL)  %s order by ps.jobcardCrawlDate,fullPanchayatCode %s" % (additionalFilters,limitString)
  cur.execute(query)
  results=cur.fetchall()
  for row in results:
    [stateCode,districtCode,blockCode,panchayatCode,stateName,districtName,blockName,panchayatName,fullPanchayatCode,stateShortCode,crawlIP]=row
    filepath=nregaWebDir.replace("stateName",stateName.upper()).replace("districtName",districtName.upper())
    filename=filepath+blockName.upper()+"/%s/%s_jobcardRegister.html" % (panchayatName.upper(),panchayatName.upper())
    logger.info(filename)
    jobcardPrefix="%s-" % (stateShortCode)
    logger.info("Processing %s-%s-%s-%s " % (stateName,districtName,blockName,panchayatName))
    elem = driver.find_element_by_link_text(stateName)
    elem.send_keys(Keys.RETURN)
    time.sleep(1)
    elem = driver.find_element_by_link_text(districtName.upper())
    elem.send_keys(Keys.RETURN)
    time.sleep(1)
    elem = driver.find_element_by_link_text(blockName)
    elem.send_keys(Keys.RETURN)
    time.sleep(1)
    compareText="Panchayat_Code=%s" % fullPanchayatCode
    elems = driver.find_elements_by_xpath("//a[@href]")
    foundCode=0
    for elem in elems:
      hrefLink=str(elem.get_attribute("href"))
      if compareText in hrefLink:
        logger.info("Found the Code")
        foundCode=1
        break
    #elem = driver.find_element_by_link_text(panchayatName)
    if foundCode==1:
      elem.send_keys(Keys.RETURN)
      time.sleep(1)
      #Before thsi lets download the applicatn Register:
      elem = driver.find_element_by_link_text("Registration Application Register")
      elem.send_keys(Keys.RETURN)
      time.sleep(5)
      html_source = driver.page_source
      #filename="%s/%s.html" % (tempDir,panchayatName)
      writeFile(filename,html_source) 
      driver.back()


      elem = driver.find_element_by_link_text("Download Panchayatwise MGNREGA Bank A/C Detail")
      elem.send_keys(Keys.RETURN)
      time.sleep(15)
      jcsource = driver.page_source
      rawhtml=jcsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
      filename=filepath+blockName.upper()+"/%s/%s_bankAccounts.html" % (panchayatName.upper(),panchayatName.upper())
      writeFile(filename,rawhtml) 
      driver.back()

      elem = driver.find_element_by_link_text("Download Panchayatwise MGNREGA Post Office Account Detail")
      elem.send_keys(Keys.RETURN)
      time.sleep(15)
      jcsource = driver.page_source
      rawhtml=jcsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
      filename=filepath+blockName.upper()+"/%s/%s_postAccounts.html" % (panchayatName.upper(),panchayatName.upper())
      writeFile(filename,rawhtml) 
      driver.back()

      elem = driver.find_element_by_link_text("Download Panchayat Wise MGNREGA Co-operative Bank A/C Detail")
      elem.send_keys(Keys.RETURN)
      time.sleep(15)
      jcsource = driver.page_source
      rawhtml=jcsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
      filename=filepath+blockName.upper()+"/%s/%s_cobankAccounts.html" % (panchayatName.upper(),panchayatName.upper())
      writeFile(filename,rawhtml) 
      driver.back()


 
      elem = driver.find_element_by_link_text("Job card/Employment Register")
      elem.send_keys(Keys.RETURN)
      time.sleep(5)
      curtime = time.strftime('%Y-%m-%d %H:%M:%S')
      html_source = driver.page_source
      htmlsoup=BeautifulSoup(html_source,"html.parser")
      try:
        table=htmlsoup.find('table',align="center")
        rows = table.findAll('tr')
        status=1
      except:
        status=0
      query="update panchayatStatus set jobcardCrawlDate=NOW() where fullPanchayatCode='%s'"%fullPanchayatCode 
      logger.info(query)
      cur.execute(query)
      logger.info("Status is " + str(status))
      if status==1:
        for tr in rows:
          cols = tr.findAll('td')
          jclink=''
          for link in tr.find_all('a'):
            jclink=link.get('href')
          if len(cols) > 2:
            jcno="".join(cols[1].text.split())
            headOfFamily=cols[2].text.replace("'","").lstrip().rstrip()
          logger.info("%s-%s" % (jcno,jobcardPrefix))
          if jobcardPrefix in jcno:
            logger.info(jcno)
            jcNumber=getjcNumber(jcno)
            query="select * from jobcards where jobcard='%s' " % jcno
            cur.execute(query)
            if cur.rowcount == 0:
              query="insert into jobcards (name,jobcard,stateCode,districtCode,blockCode,panchayatCode,fullPanchayatCode) values ('"+headOfFamily+"','"+jcno+"','"+stateCode+"','"+districtCode+"','"+blockCode+"','"+panchayatCode+"',"+fullPanchayatCode+")"
              logger.info(query)
              cur.execute(query)

    driver.back()
    driver.back()
    driver.back()
    driver.back()
    driver.back()


  driverFinalize(driver)
  displayFinalize(display)
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors
  logger.info("...END PROCESSING")     
  exit(0)
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  if args['limit']:
    limit = int(args['limit'])
  else:
    limit =1
  nicBlockCode=args["nicBlockCode"]
  myBlock=Block.objects.filter(code=nicBlockCode).first()
  blockCode=myBlock.tcode
  blockName=myBlock.name
  url="http://www.nrega.telangana.gov.in/Nregs/FrontServlet?requestType=SmartCardreport_engRH&actionVal=MobnumberStatus&id=%s&Retype=null&type=null&file=%s"  % (blockCode,blockName)
  logger.info(url)
  try:
    driver.get(url)
    driver.get(url)
    myhtml = driver.page_source
    error=0
  except:
    error=1

  if error==0:
    logger.info("No Error")
#   dom =  lxml.html.fromstring(myhtml)
#   for link in dom.xpath('//a/@href'): # select the url in href for all a tags(links)
#     print(link)
    htmlsoup=BeautifulSoup(myhtml,"html.parser")
    table=htmlsoup.find('table',id="sortable")
    if table is not None:
      print("Found")
 
      for link in table.find_all('a'):
        logger.info(link['href'])
        panchayatLink="http://www.nrega.telangana.gov.in"+link['href']
        myArray=panchayatLink.split("file=")
        panchayatName=myArray[1]
        logger.info(panchayatName+panchayatLink)
        myPanchayat=Panchayat.objects.filter(block=myBlock,name=panchayatName).first()
        if myPanchayat is not None:
          try:
            driver.get(panchayatLink)
            driver.get(panchayatLink)
            phtml = driver.page_source
            perror=0
          except:
            perror=1

          if perror==0:
            logger.info("No Error") 
            phtmlsoup=BeautifulSoup(phtml,"html.parser")
            ptable=phtmlsoup.find('table',id="sortable")
            if ptable is not None:
              print("Found")
          
              for link in ptable.find_all('a'):
                logger.info(link['href'])
                villageLink="http://www.nrega.telangana.gov.in"+link['href']
                myArray=villageLink.split("file=")
                villageName=myArray[1]
                par = parse_qs(urlparse(villageLink).query)
                villageID=str(par['id'][0]).lstrip().rstrip()
                villageName=str(par['file'][0])
                logger.info(villageName+villageID)
                logger.info(par)
                logger.info(len(villageID))
                myVillage=Village.objects.filter(tcode=villageID).first()
                if myVillage is None:
                  Village.objects.create(tcode=villageID)
                myVillage=Village.objects.filter(tcode=villageID).first()
                myVillage.name=villageName
                myVillage.code=villageID
                myVillage.panchayat=myPanchayat
                myVillage.save()
       
        

  driverFinalize(driver)
  displayFinalize(display)
  logger.info("...END PROCESSING") 
  exit(0)
示例#34
0
def main():
  args = argsFetch()
  logger = loggerFetch(args.get('log_level'))
  logger.info('args: %s', str(args))

  logger.info("BEGIN PROCESSING...")
  districtName=args['district']
  finyear=args['finyear']
  logger.info("DistrictName "+districtName)
  limitString=''
  if args['limit']:
    limitString=" limit %s " % args['limit']
  db = dbInitialize(db=districtName.lower(), charset="utf8")  # The rest is updated automatically in the function
  cur=db.cursor()
  db.autocommit(True)
  additionalFilters = ''
  if args['blockCode']:
    additionalFilters=" and b.blockCode='%s' " % args['blockCode']
  #Query to set up Database to read Hindi Characters
  query="SET NAMES utf8"
  cur.execute(query)
  crawlIP,stateName,stateCode,stateShortCode,districtCode=getDistrictParams(cur,districtName)
  filepath=nregaRawDataDir.replace("districtName",districtName.lower())
  fullfinyear=getFullFinYear(finyear)
  fullDistrictCode=stateCode+districtCode
  display = displayInitialize(args['visible'])
  driver = driverInitialize(args['browser'])
  url="http://164.100.129.6/netnrega/nregasearch1.aspx"
  driver.get(url)
  time.sleep(22)
  htmlsource = driver.page_source
  writeFile("/home/libtech/webroot/nreganic.libtech.info/temp/a.html",htmlsource)

  query="select w.id,w.wagelistNo,b.name from wagelists w,blocks b where w.blockCode=b.blockCode and ( (w.isDownloaded=0) or (w.isComplete=0 and TIMESTAMPDIFF(HOUR, w.downloadAttemptDate, now()) > 48 )) and finyear='%s' %s order by w.isDownloaded %s " % (finyear,additionalFilters,limitString)
  query="select w.id,w.wagelistNo,b.name from wagelists w,blocks b where w.blockCode=b.blockCode and w.id=1 and finyear='%s' %s order by w.isDownloaded limit 1 " % (finyear,additionalFilters)
  logger.info(query)
  cur.execute(query)
  results=cur.fetchall()
  for row in results:
    rowid=str(row[0])
    wagelistNo=row[1] 
    blockName=row[2]
    logger.info("Same WagelistNo %s " % wagelistNo)
    if wagelistNo != '':
     # logger.info(wagelistNo)
      maintab = driver.current_window_handle
      Select(driver.find_element_by_id("ddl_search")).select_by_visible_text("WageList")
      driver.find_element_by_css_selector("option[value=\"WageList\"]").click()
      Select(driver.find_element_by_id("ddl_state")).select_by_visible_text(stateName.upper())
      myvalue='value="%s"' % stateCode
      #driver.find_element_by_css_selector("option[value=\"33\"]").click()
      driver.find_element_by_css_selector("option[%s]" % myvalue).click()
      Select(driver.find_element_by_id("ddl_district")).select_by_visible_text(districtName.upper())
      myvalue='value="%s"' % (stateCode+districtCode)
      #driver.find_element_by_css_selector("option[value=\"3305\"]").click()
      driver.find_element_by_css_selector("option[%s]" % myvalue).click()
      driver.find_element_by_id("txt_keyword2").clear()
      driver.find_element_by_id("txt_keyword2").send_keys(wagelistNo)
      driver.find_element_by_id("btn_go").click()
      time.sleep(30)
      logger.info("Currently the number of active tabs are %s" % str(len(driver.window_handles))) 
      if len(driver.window_handles) > 1:
        logger.info("There are multiple tabs")
        driver.switch_to_window(driver.window_handles[1])

        #htmlsource = driver.page_source
        #logger.info(htmlsource)
        # ERROR: Caught exception [ERROR: Unsupported command [waitForPopUp |  | 30000]]
        # ERROR: Caught exception [ERROR: Unsupported command [selectWindow | null | ]]
        elems = driver.find_elements_by_xpath("//a[@href]")
        if len(elems) > 0:
          query="select w.id,w.wagelistNo,b.name,b.blockCode from wagelists w,blocks b where w.blockCode=b.blockCode and ( (w.isDownloaded=0) or (w.isComplete=0 and TIMESTAMPDIFF(HOUR, w.downloadAttemptDate, now()) > 48 )) and finyear='%s' %s order by w.isDownloaded %s " % (finyear,additionalFilters,limitString)
          cur.execute(query)
          results1=cur.fetchall()
          for row1 in results1:
            rowid=str(row1[0])
            wagelistNo=row1[1] 
            blockName=row1[2]
            blockCode=row1[3]
            jobcardPrefix="%s-%s-%s-" % (stateShortCode,districtCode,blockCode)
            logger.info("Jobcard Prefix : %s " % jobcardPrefix)
            fullBlockCode=stateCode+districtCode+blockCode
            if wagelistNo != '':
              logger.info(wagelistNo)
              wurl="http://%s/netnrega/srch_wg_dtl.aspx?state_code=&district_code=%s&state_name=%s&district_name=%s&block_code=%s&wg_no=%s&short_name=%s&fin_year=%s&mode=wg" % (searchIP,fullDistrictCode,stateName.upper(),districtName.upper(),fullBlockCode,wagelistNo,stateShortCode,fullfinyear)
              logger.info("URL: %s " % wurl)
              driver.get(wurl)
              htmlsource = driver.page_source
              htmlsource=htmlsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
              success=0
              isPopulatedString=''
              if ("WageList Agency Code" in htmlsource) and (jobcardPrefix in htmlsource):
                filename=filepath+blockName.upper()+"/WAGELIST/"+fullfinyear+"/"+wagelistNo+".html"
                logger.info(filename)
                writeFile("/home/libtech/webroot/nreganic.libtech.info/temp/"+wagelistNo+".html",htmlsource)
                writeFile(filename,htmlsource)
                success=1
                isPopulatedString="isProcessed=0,"
              query="update wagelists set isDownloaded=%s,%sdownloadAttemptDate=NOW()  where id=%s" %(str(success),isPopulatedString,str(rowid))
              logger.info(query)
              cur.execute(query)
           
        # elem=driver.find_element_by_link_text(wagelistNo)
        # hrefLink=str(elem.get_attribute("href"))
        # logger.info(hrefLink)
        # driver.get(hrefLink)
        # htmlsource = driver.page_source
        # htmlsource=htmlsource.replace('<head>','<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>')
        # success=0
        # isPopulatedString=''
        # if "WageList Agency Code" in htmlsource:
        #   filename=filepath+blockName.upper()+"/WAGELIST/"+fullfinyear+"/"+wagelistNo+".html"
        #   logger.info(filename)
        #   writeFile("/home/libtech/webroot/nreganic.libtech.info/temp/"+wagelistNo+".html",htmlsource)
        #   writeFile(filename,htmlsource)
        #   success=1
        #   isPopulatedString="isProcessed=0,"
        # query="update wagelists set isDownloaded=%s,%sdownloadAttemptDate=NOW()  where id=%s" %(str(success),isPopulatedString,str(rowid))
        # logger.info(query)
        # cur.execute(query)
        driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
        driver.switch_to_window(maintab) 
          

  driverFinalize(driver)
  displayFinalize(display)
  dbFinalize(db) # Make sure you put this if there are other exit paths or errors
  logger.info("...END PROCESSING")     
  exit(0)
示例#35
0
 def setUp(self):
     self.logger = loggerFetch('info')
     self.logger.info('BEGIN PROCESSING...')
     self.display = displayInitialize(0)
     # self.driver = driverInitialize(timeout=3)
     self.driver = driverInitialize(timeout=3, options='--headless') # driverInitialize(path='/opt/firefox/', timeout=3)