def getText(): """Get text from clean_posts.""" con = mdb.connect(configPy.HOST, configPy.USERNAME, configPy.PASSWORD, configPy.DATABASE_NAME) list = [ "sgv", "sfv", "sj", "sf", "la", "slo", "sb", "sac", "oc", "lb", "sd", "southbay", "eastbay", "northbay", "southbayarea", "eastbayarea", "northbayarea", "bayarea", "sfo", "lax", "sjc", "sba", "sbp", "calpoly" ] abbrev_set = frozenset(list) all_words = [] ps = PorterStemmer() stop_words = set(stopwords.words("english")) #$str = str_replace("la",'los angeles',$str); #$str = str_replace("slo",'san luis obispo',$str); #$str = str_replace("sac",'sacramento',$str); #$str = str_replace("sf",'san francisco',$str); #$str = str_replace("sj",'san jose',$str); #$str = str_replace("sd",'san diego',$str); #$str = str_replace("sb",'santa barbara',$str); #$str = str_replace("lb",'long beach',$str); with con: cur = con.cursor() createHashmap(con) #temp = hashmapList(con) #cur.execute("SELECT O.post,C.post FROM clean_posts C,posts O") cur.execute("SELECT pid,post FROM clean_posts") #cur.execute("SELECT post FROM posts") for i in range(cur.rowcount): row = cur.fetchone() removeCities(con, row[0], row[1])
def getText(): """Get text from clean_posts.""" con = mdb.connect(configPy.HOST, configPy.USERNAME, configPy.PASSWORD, configPy.DATABASE_NAME) list = ["sgv", "sfv", "sj", "sf", "la", "slo", "sb", "sac", "oc", "lb", "sd", "southbay", "eastbay", "northbay", "southbayarea", "eastbayarea", "northbayarea", "bayarea", "sfo", "lax", "sjc", "sba", "sbp", "calpoly"] abbrev_set = frozenset(list) all_words = [] ps = PorterStemmer() stop_words = set(stopwords.words("english")) #$str = str_replace("la",'los angeles',$str); #$str = str_replace("slo",'san luis obispo',$str); #$str = str_replace("sac",'sacramento',$str); #$str = str_replace("sf",'san francisco',$str); #$str = str_replace("sj",'san jose',$str); #$str = str_replace("sd",'san diego',$str); #$str = str_replace("sb",'santa barbara',$str); #$str = str_replace("lb",'long beach',$str); with con: cur = con.cursor() createHashmap(con) #temp = hashmapList(con) #cur.execute("SELECT O.post,C.post FROM clean_posts C,posts O") cur.execute("SELECT pid,post FROM clean_posts") #cur.execute("SELECT post FROM posts") for i in range(cur.rowcount): row = cur.fetchone() removeCities(con, row[0], row[1])
def testMdict(): con = mdb.connect(configPy.HOST,configPy.USERNAME,configPy.PASSWORD,configPy.DATABASE_NAME) strings = ["seeking fremont to irvine saturday 08/22 on when ever time please let me know users i am very exited here to share ride with you","offering slo to woodland hills leaving friday august 7 at 315pm i will be taking the the whole way and can drop you off anywhere along the way 15 gas money per person shoot me a message if you are interested please do not message me asking to leave at a different time or drop you off anywhere else other than the locations listed above the answer is no", "oferringgggg slo to the bay san mateo area tomorrow friday july 24 leaving around 6pm can stop along the way just lemme know returning monday july 27 around 11 am","offering slo to palo alto friday 7/24 at 1pm and palo alto to slo sunday 7/26 undecided time i can drop off anywhere along the way gas money is appreciated","seeking ride for 2 to salinas santa cruz aptos watsonville tomorrow the 17 anytime after 6pm will provide gas money","seeking south ventura county thousand oaks area to slo on sunday will provide gas and company","offering slo to nevada city north on 5 towards sac can drop of anywhere on the way sac stockton roseville etc leaving friday 4/24 after 2pm and returning sunday evening or monday morning early could do either"] length = len(strings) with con: createHashmap(con) #removeCities(con,1,thousand) for num in range(0,length): print "Original: "+strings[num]+"\n" removeCities(con,num,strings[num])
def testMdict(): con = mdb.connect(configPy.HOST, configPy.USERNAME, configPy.PASSWORD, configPy.DATABASE_NAME) strings = [ "seeking fremont to irvine saturday 08/22 on when ever time please let me know users i am very exited here to share ride with you", "offering slo to woodland hills leaving friday august 7 at 315pm i will be taking the the whole way and can drop you off anywhere along the way 15 gas money per person shoot me a message if you are interested please do not message me asking to leave at a different time or drop you off anywhere else other than the locations listed above the answer is no", "oferringgggg slo to the bay san mateo area tomorrow friday july 24 leaving around 6pm can stop along the way just lemme know returning monday july 27 around 11 am", "offering slo to palo alto friday 7/24 at 1pm and palo alto to slo sunday 7/26 undecided time i can drop off anywhere along the way gas money is appreciated", "seeking ride for 2 to salinas santa cruz aptos watsonville tomorrow the 17 anytime after 6pm will provide gas money", "seeking south ventura county thousand oaks area to slo on sunday will provide gas and company", "offering slo to nevada city north on 5 towards sac can drop of anywhere on the way sac stockton roseville etc leaving friday 4/24 after 2pm and returning sunday evening or monday morning early could do either" ] length = len(strings) with con: createHashmap(con) #removeCities(con,1,thousand) for num in range(0, length): print "Original: " + strings[num] + "\n" removeCities(con, num, strings[num])