示例#1
0
    shutil.rmtree('/home/sahil/fundamentals6')
os.mkdir('fundamentals6')
options = Options()
options.headless=True
driver = webdriver.Firefox(options=options,executable_path='/home/sahil/Downloads/geckodriver')
driver.get('https://www.moneycontrol.com/markets/earnings/latest-results/latest/yoy/standalone/')
os.chdir('/home/sahil/fundamentals6')
def ext_symb(pg):
 sym = pg.find('ctag',class_='mob-hide').text.encode('ascii')
 bse = re.findall('\d+',sym[sym.find('BSE')+3:])
 nse = re.findall('\w+',sym[sym.find('NSE')+3:])
 return bse,nse
urls =[]
def map1(lst):
  urls.append(lst['href'])
pg = bsoap(driver.page_source,'html.parser')
map(map1,pg.find_all('a',class_='op_gld13'))
k=[]
for i in range(2732):
    try:
	    driver.get(urls[i])
    except:
            continue
    try:
     driver.find_element_by_class_name('Ratios').click()
    except:
     print('exception caught')
     continue
    pg1 = bsoap(driver.page_source,'html.parser')
    title = pg1.find('h1',class_='pcstname').text
    bse,nse = ext_symb(pg1)
示例#2
0
    def scrapedata(self):

        os.chdir('/home/sahil/projdir/fundamentals8')
        caps = DesiredCapabilities().FIREFOX
        if (self.ch1 == 0):
            caps["pageLoadStrategy"] = "eager"
        else:
            caps["pageLoadStrategy"] = "normal"
        options = Options()
        options.headless = True

        driver = webdriver.Firefox(
            options=options,
            executable_path='/home/sahil/Downloads/geckodriver')
        driver.get('https://www.moneycontrol.com/india/stockpricequote/')
        print(driver.current_url)
        tbox = driver.find_element_by_xpath('//*[@id="company"]')
        tbox.send_keys(self.name)
        btxpath = "div.MT2:nth-child(1) > input:nth-child(2)"
        flag = 0
        c = 0
        while (flag == 0):
            try:
                driver.find_element_by_css_selector(btxpath).click()
                flag = 1
            except Exception as e:
                print(e)
                c = c + 1
                if (c == 3):
                    flag = 1
                print('sleeping')
                time.sleep(5)
        print(driver.current_url)
        hpxpath = "Historical Prices"
        flag = 0
        c = 0
        while (flag == 0):
            try:
                driver.find_element_by_link_text(hpxpath).click()
                flag = 1
            except Exception as e:
                print(e)
                c = c + 1
                print('sleeping')
                if (c == 3):
                    flag = 1
                time.sleep(5)
        pg = bsoap(driver.page_source, 'html.parser')
        driver.get(pg.find('a', title='Click Here')['href'])
        nse1 = Select(driver.find_element_by_css_selector('#ex'))
        if (self.ch == 'd'):
            nse1.select_by_visible_text('NSE')
            nse = Select(driver.find_element_by_name('frm_dy'))
            nse.select_by_visible_text('01')
            nse = Select(driver.find_element_by_name('frm_mth'))
            nse.select_by_visible_text('Mar')
            nse = Select(driver.find_element_by_name('frm_yr'))
            nse.select_by_visible_text('2008')
            nse = Select(driver.find_element_by_name('to_dy'))
            nse.select_by_visible_text('01')
            nse = Select(driver.find_element_by_name('to_mth'))
            nse.select_by_visible_text('Mar')
            nse = Select(driver.find_element_by_name('to_yr'))
            nse.select_by_visible_text('2019')
            p = driver.find_element_by_css_selector(
                'td.PL20:nth-child(1) > form:nth-child(1) > div:nth-child(4) > input:nth-child(4)'
            )
            p.click()
        else:
            nse = Select(driver.find_element_by_name('mth_frm_mth'))
            nse.select_by_visible_text('Mar')
            nse = Select(driver.find_element_by_name('mth_frm_yr'))
            nse.select_by_visible_text('2000')
            nse = Select(driver.find_element_by_name('mth_to_mth'))
            nse.select_by_visible_text('Mar')
            nse = Select(driver.find_element_by_name('mth_to_yr'))
            nse.select_by_visible_text('2019')
            p = driver.find_element_by_css_selector(
                'td.PT15:nth-child(3) > form:nth-child(1) > div:nth-child(4) > input:nth-child(3)'
            )
            p.click()

        k = []
        while (True):
            pg = bsoap(driver.page_source, 'html.parser')

            tab = pg.find('table', class_='tblchart')
            k.append(pd.read_html(str(tab)))
            url = str(driver.current_url.encode('ascii'))
            url = url[0:url.find('?')]
            elem = pg.find_all('a', class_='nextprev')
            if (len(elem) == 0):
                break
            url1 = elem[0]['href'].encode('ascii')
            url = url + url1
            driver.get(url)
            print('next')
        print(k)
        driver.quit()
        return k
示例#3
0
def fin_data(i, driver):
    try:
        print('in here')
        print(driver.current_url)
        driver.find_element_by_xpath('//*[@id="company"]').send_keys(i)
        bt = driver.find_element_by_css_selector(
            'div.MT2:nth-child(1) > input:nth-child(2)')
        bt.click()
        print(driver.current_url)
        ct = 0
        while (1):
            if (ct >= 3):
                break
            try:
                driver_act(driver)
                break
            except Exception as e:
                print(e)
                ct = ct + 1
                time.sleep(10)
        pg1 = bsoap(driver.page_source, 'html.parser')
        title = pg1.find('h1', class_='pcstname').text
        rat = pg1.find('a', title='Ratios')
        driver.get(rat['href'])
        bse, nse = ext_symb(pg1)
        flag = 0
        flag1 = 1
        k = []
        while (flag == 0):
            try:
                print('on')
                url1 = driver.current_url
                print(url1)
                k1 = pd.read_html(url1, header=0)[0]
                k.append(k1)
                print(len(k1))
                k1['title'] = title
                k1['NSE'] = ''
                k1['BSE'] = ''
                if len(bse) != 0: k1['BSE'] = bse[0]
                if len(nse) != 0: k1['NSE'] = nse[0]
                print(nse[0])
                pg = bsoap(driver.page_source, 'html.parser')
                btx = driver.find_element_by_xpath(
                    '//*[@id="mc_content"]/div[2]/div/div[2]/ul/li[2]/a')
                driver.execute_script("arguments[0].click();", btx)
                if (driver.current_url.encode('ascii') == url1.encode('ascii')
                    ):
                    print('reach')
                    flag = 1
            except Exception as e:
                print(e)
                flag1 = 0
                flag = 1
        if (flag1 == 1):
            k = pd.concat(k, axis=1)
            df = k
            df = df.loc[:, ~df.columns.duplicated()]

            ext().store_file('/usr/share/app', 'finalkfr1', df)
    except Exception as e:
        print(e)
        return
    print(i)
    last_symb = nse[0]
    print(nse[0])
    return k
示例#4
0
def dailydata(s, driver, date1, date2):
    def ext_date(datez, month):
        dt = datetime.strptime(datez, '%Y-%m-%d').date()
        day = datez[8:10]
        month = month[int(datez[5:7]) - 1]
        year = datez[0:4]
        return day, month, year

    day_from, month_from, year_from = ext_date(date1, month)
    day_to, month_to, year_to = ext_date(date2, month)
    print(driver.current_url)
    tbox = driver.find_element_by_xpath('//*[@id="company"]')
    tbox.send_keys(s)
    btxpath = "div.MT2:nth-child(1) > input:nth-child(2)"
    flag = 0
    c = 0
    while (flag == 0):
        try:
            driver.find_element_by_css_selector(btxpath).click()
            flag = 1
        except Exception as e:
            print(e)
            c = c + 1
            if (c == 3):
                flag = 1
            print('sleeping')
            time.sleep(5)
    print(driver.current_url)
    hpxpath = "Historical Prices"
    flag = 0
    c = 0
    flag1 = 0
    while (flag == 0):
        try:
            driver.find_element_by_link_text(hpxpath).click()
            flag = 1
        except Exception as e:
            print(e)
            c = c + 1
            print('sleeping')
            if (c == 3):
                flag = 1
                flag1 = 1
            time.sleep(5)
    if (flag1):
        return pd.DataFrame()
    pg = bsoap(driver.page_source, 'html.parser')
    driver.get(pg.find('a', title='Click Here')['href'])
    nse1 = Select(driver.find_element_by_css_selector('#ex'))
    if (ch == 'd'):
        nse1.select_by_visible_text('NSE')
        nse = Select(driver.find_element_by_name('frm_dy'))
        nse.select_by_visible_text(str(day_from))
        nse = Select(driver.find_element_by_name('frm_mth'))
        nse.select_by_visible_text(str(month_from))
        nse = Select(driver.find_element_by_name('frm_yr'))
        nse.select_by_visible_text(str(year_from))
        nse = Select(driver.find_element_by_name('to_dy'))
        nse.select_by_visible_text(str(day_to))
        nse = Select(driver.find_element_by_name('to_mth'))
        nse.select_by_visible_text(str(month_to))
        nse = Select(driver.find_element_by_name('to_yr'))
        nse.select_by_visible_text(str(year_to))
        p = driver.find_element_by_css_selector(
            '#mc_mainWrapper > div.PA10 > div > div.PT15 > div.PT10 > div.brdb > table > tbody > tr > td:nth-child(1) > form > div:nth-child(4) > input[type="image"]:nth-child(4)'
        )
        p.click()
    else:
        nse = Select(driver.find_element_by_name('mth_frm_mth'))
        nse.select_by_visible_text('Mar')
        nse = Select(driver.find_element_by_name('mth_frm_yr'))
        nse.select_by_visible_text('2000')
        nse = Select(driver.find_element_by_name('mth_to_mth'))
        nse.select_by_visible_text('Mar')
        nse = Select(driver.find_element_by_name('mth_to_yr'))
        nse.select_by_visible_text('2019')
        p = driver.find_element_by_css_selector(
            'td.PT15:nth-child(3) > form:nth-child(1) > div:nth-child(4) > input:nth-child(3)'
        )
        p.click()

    k = []
    flag = 0
    while (True):
        try:
            pg = bsoap(driver.page_source, 'html.parser')

            tab = pg.find_all('table', class_='tblchart')[0]

            k.append(pd.read_html(str(tab)))
            #k.append(pd.read_html(driver.current_url,attrs={'class':'tblchart'}))
            #url = str(driver.current_url.encode('ascii'))
            url = driver.current_url
            url = url[0:url.find('?')]
            elem = pg.find_all('a', class_='nextprev')
            flag = 1

            if (len(elem) == 0):
                break
            #    url1 = str(elem[0]['href'].encode('ascii'))

            #url1 = elem[0]['href'].decode('utf-8')
            url1 = elem[0]['href']
            url = url + url1
            driver.get(url)
            print(k)
            print('next')
        except Exception as e:
            print(e)

    flag = 1
    print(k)
    k = pd.concat(k[0:][0])
    k['id'] = s
    with open('daily_data', 'a+b') as d:
        pck.dump(k, d)
    driver.quit()
    return k
示例#5
0
def dailydata(s):
  driver.get('https://www.moneycontrol.com/india/stockpricequote/')
  print(driver.current_url)
  tbox = driver.find_element_by_xpath('//*[@id="company"]')
  tbox.send_keys(s)
  btxpath =  "div.MT2:nth-child(1) > input:nth-child(2)"
  flag=0
  c=0
  while(flag==0):
      try:
        driver.find_element_by_css_selector(btxpath).click()
        flag=1
      except Exception as e:
        print(e)
        c=c+1
        if(c==3): 
          flag=1
        print('sleeping')
        time.sleep(5)
  print(driver.current_url)
  hpxpath= "Historical Prices"
  flag=0
  c=0
  while(flag==0):
      try:
        driver.find_element_by_link_text(hpxpath).click()
        flag=1
      except Exception as e:
        print(e)
        c=c+1
        print('sleeping')
        if(c==3):
          flag=1
        time.sleep(5)
  pg = bsoap(driver.page_source,'html.parser')
  driver.get(pg.find('a',title='Click Here')['href'])
  nse1 = Select(driver.find_element_by_css_selector('#ex'))
  if(ch=='d'):
      nse1.select_by_visible_text('NSE')
      nse = Select(driver.find_element_by_name('frm_dy'))
      nse.select_by_visible_text('01')
      nse = Select(driver.find_element_by_name('frm_mth'))
      nse.select_by_visible_text('Mar')
      nse = Select(driver.find_element_by_name('frm_yr'))
      nse.select_by_visible_text('2000')
      nse = Select(driver.find_element_by_name('to_dy'))
      nse.select_by_visible_text('01')
      nse = Select(driver.find_element_by_name('to_mth'))
      nse.select_by_visible_text('Mar')
      nse = Select(driver.find_element_by_name('to_yr'))
      nse.select_by_visible_text('2020')
      p = driver.find_element_by_css_selector('#mc_mainWrapper > div.PA10 > div > div.PT15 > div.PT10 > div.brdb > table > tbody > tr > td:nth-child(1) > form > div:nth-child(4) > input[type="image"]:nth-child(4)')
      p.click()
  else:
      nse =Select(driver.find_element_by_name('mth_frm_mth'))
      nse.select_by_visible_text('Mar')
      nse =Select(driver.find_element_by_name('mth_frm_yr'))
      nse.select_by_visible_text('2000')
      nse =Select(driver.find_element_by_name('mth_to_mth'))
      nse.select_by_visible_text('Mar')
      nse =Select(driver.find_element_by_name('mth_to_yr'))
      nse.select_by_visible_text('2019')
      p = driver.find_element_by_css_selector('td.PT15:nth-child(3) > form:nth-child(1) > div:nth-child(4) > input:nth-child(3)')
      p.click()


  k=[]
  flag=0
  while(True):
     try:
        pg = bsoap(driver.page_source,'html.parser')

        tab = pg.find_all('table',class_='tblchart')[0]
        
        k.append(pd.read_html(str(tab)))        
        #k.append(pd.read_html(driver.current_url,attrs={'class':'tblchart'}))
        #url = str(driver.current_url.encode('ascii'))
        url = driver.current_url
        url = url[0:url.find('?')]
        elem = pg.find_all('a',class_='nextprev')
        flag=1
     except Exception as e:
        print(e)
     if(len(elem)==0):
          break
      #    url1 = str(elem[0]['href'].encode('ascii'))

      #url1 = elem[0]['href'].decode('utf-8')
     url1 = elem[0]['href']
     url = url+url1
     driver.get(url)
     print(k)
     print('next')
  with open('daily_data','a+b') as d:
       pck.dump(k,d)
  flag=1
  print(k)
  k =pd.concat(k[0:][0])
  k['id'] = s
  return k