Exemplo n.º 1
0
def get_conn_sqlite():
	conn = sqliteDefaults.get_conn(dbname+".db")
	if(conn):
		print "\n\n\tSuccessfully Connected.\n\n"
	else:
		print "\n\n\tERROR: Unable to establish connection"
	return conn
Exemplo n.º 2
0
def get_conn_sqlite():
    conn = sqliteDefaults.get_conn(dbname + ".db")
    if (conn):
        print "\n\n\tSuccessfully Connected.\n\n"
    else:
        print "\n\n\tERROR: Unable to establish connection"
    return conn
Exemplo n.º 3
0
def init_db_sqlite():
    conn = sqliteDefaults.get_conn(dbname + ".db")
    conn.execute('''Create table if not exists website_regex(
		base_url TEXT,
		exec_code TEXT,
		date_of_addition DATE,
		primary key(base_url, date_of_addition)
		);
		''')
    conn.commit()
    conn.close()
Exemplo n.º 4
0
def init_db_sqlite():
	conn = sqliteDefaults.get_conn(dbname+".db")
	conn.execute('''Create table if not exists website_regex(
		base_url TEXT,
		exec_code TEXT,
		date_of_addition DATE,
		primary key(base_url, date_of_addition)
		);
		''')
	conn.commit()
	conn.close()
Exemplo n.º 5
0
	def connectToSQLiteDB(self, dbFilePath="GoogleSearchResults.db", dbTableName="SearchResultURLs", printing=True):
		"""
		Args:
		    dbFilePath: the file path of the SQLite database file. If not a .db file, it is corrected.
		    	e.g. "xxx/xxx/xxx.db" stays the same, whereas "xxx/xxx/xxx" becomes the former, and "xxx/xxx/" (i.e. a directory) bedomes "xxx/xxx/GoogleSearchResults.db"
		    dbTableName: the SQLite table name to be referred to henceforth.
		    printing: if we should print to terminal or not.

		Returns: True or False, depending on whether we have successfully connected to SQLite and created a usable table, or not.
		"""

		## Correct common errors:
		if dbFilePath.endswith("/") or dbFilePath.endswith("\\"):
			dbFilePath+="GoogleSearchResults.db"
		if not dbFilePath.endswith(".db"):
			dbFilePath+=".db"

		try:
			self.conn=sqliteDefaults.get_conn(dbFilePath, printing)
		except Exception, e:
			print_error(printing, self.__class__.__name__, sys._getframe().f_code.co_name, "could not connect to SQLite database.", e)
			self.conn = None
			return False







##-------CODE FOR GETTING A RESULTS FOR DATE-STAGGERED QUERIES AS AN SQLITE DATABASE-------#



signal.signal(signal.SIGINT, ctrl_c_signal_handler)		## assign ctrl_c_signal_handler to Ctrl+C, i.e. SIGINT


conn=sqliteDefaults.get_conn(db_file_path)
conn.execute('''Create table if not exists %s(
		Topic 				TEXT,
		StartDate 			INTEGER,
		EndDate 			INTEGER,
		ResultPageNumber 	INTEGER,
		URL 				TEXT,
		ResultNumber		INTEGER,
		PRIMARY KEY(Topic, URL)
	);
	'''%db_table_name)
conn.commit()



initial_start_date = to_julian_date_datetime(datetime.now().date())
Exemplo n.º 7
0
        10 * (int(results_per_page * (540 / float(80)) / 10)))

elif results_per_page == 100 and wait_between_pages / float(
        results_per_page) < 600 / float(100):
    print "\n\t\tWARNING: the wait time between pages may not be large enough to prevent IP blocking."
    print "\t\tRecommend wait time between pages: %s seconds or more." % (
        10 * (int(results_per_page * (600 / float(100)) / 10)))

print "\n\n\n\n"

##-------CODE FOR GETTING A RESULTS FOR DATE-STAGGERED QUERIES AS AN SQLITE DATABASE-------#

signal.signal(signal.SIGINT, ctrl_c_signal_handler
              )  ## assign ctrl_c_signal_handler to Ctrl+C, i.e. SIGINT

conn = sqliteDefaults.get_conn(db_file_path)
conn.execute('''Create table if not exists %s(
		Topic 				TEXT,
		StartDate 			INTEGER,
		EndDate 			INTEGER,
		ResultPageNumber 	INTEGER,
		URL 				TEXT,
		ResultNumber		INTEGER,
		PRIMARY KEY(Topic, URL)
	);
	''' % db_table_name)
conn.commit()

initial_start_date = to_julian_date_datetime(datetime.now().date())
start_date = 0
end_date = 0
Exemplo n.º 8
0
import sqliteDefaults
import os
import extraction_text_manip

os.system("reset")

conn = sqliteDefaults.get_conn("article_extract_db.db")
conn.execute('''CREATE TABLE IF NOT EXISTS `articles_clean` (
	`company_or_sector`		TEXT,
	`article_url`			TEXT,
	PRIMARY KEY(company_or_sector, article_url)
	);
	''')
conn.commit()

company_name = 'Infosys'

articles = sqliteDefaults.verified_select_sqlite(conn,
													"SELECT DISTINCT article_url, company_name, article_headline, article_text, article_date \
														FROM articles \
														WHERE company_name='%s' \
															and article_url not in (select article_url from articles_clean)\
														ORDER BY article_url ASC\
															"%(company_name)
												)


conn2 = sqliteDefaults.get_conn("extracted_search_urls.db")
company_dict = {}
temp_table = sqliteDefaults.verified_select_sqlite(conn2,"SELECT DISTINCT ArticleTopic from articleUrls order by ArticleTopic asc")
for i in range(0,len(temp_table)):