def data_from_CSV(): conn=establish_DB_connection_SQL() df=pd.read_csv(sys.argv[1], keep_default_na=False) tweets = df.set_index('status_id',drop=False) for index,row in tweets.iterrows(): phone_no='' hashtag=[] text=str(row['text']) if str(detect_langs(text)[0])[0:2] == 'en' and text!='nan': text = data_cleaning(text) words=text.split() words=set(words) words=list(words) for w in words: if re.match('^((\+){0,1}91(\s){0,1}(\-){0,1}(\s){0,1}){0,1}0{0,1}[1-9]{1}[0-9]{9}$',w): phone_no = w[-10:] if w.startswith('#'): hashtag.append(w) request_type = processML(text) if request_type != '': name=str(row['screen_name']) if name == '': name = None if text == '': text = None if phone_no == '': phone_no = 9900990099 latitude = 9.9252 longitude = 78.1198 updateDB_SQL(row['screen_name'],str(row['text']),latitude,longitude,phone_no,request_type, conn) disconnect_DB_SQL()
def data_from_CSV(): conn = establish_DB_connection_SQL() df = pd.read_csv(sys.argv[1], keep_default_na=False) tweets = df.set_index('status_id', drop=False) for index, row in tweets.iterrows(): phone_no = '' hashtag = [] text = str(row['text']) if str(detect_langs(text)[0])[0:2] == 'hi' and text != 'nan': text1 = text authenticator = IAMAuthenticator( 'worUzb_Eb5emCaIs0oL7sR86Fb2LeTJGOk1EN1Q-4Cni') language_translator = LanguageTranslatorV3( version='2018-05-01', authenticator=authenticator) language_translator.set_service_url( 'https://api.eu-gb.language-translator.watson.cloud.ibm.com/instances/cdafbc7e-b59a-40f8-818f-1914f02063cc' ) translation = language_translator.translate( text=text1, model_id='hi-en').get_result() output = json.loads( json.dumps(translation, indent=2, ensure_ascii=False)) text = output['translations'][0]['translation'] if str(detect_langs(text)[0])[0:2] == 'en' and text != 'nan': text = data_cleaning(text) words = text.split() words = set(words) words = list(words) for w in words: if re.match( '^((\+){0,1}91(\s){0,1}(\-){0,1}(\s){0,1}){0,1}0{0,1}[1-9]{1}[0-9]{9}$', w): phone_no = w[-10:] if w.startswith('#'): hashtag.append(w) request_type = processML(text) if request_type != '': name = str(row['screen_name']) if name == '': name = None if text == '': text = None if phone_no == '': phone_no = 9900990099 latitude = 9.9252 longitude = 78.1198 updateDB_SQL(row['screen_name'], text, latitude, longitude, phone_no, request_type, conn) disconnect_DB_SQL()
'https://api.eu-gb.speech-to-text.watson.cloud.ibm.com/instances/1fd012ca-4f8f-4fa1-a40d-4a335380ae6a' ) def speechToText(audioLocation, speech_to_text): with open(audioLocation, 'rb') as audio_file: speech_recognition_results = speech_to_text.recognize( audio=audio_file, content_type='audio/mpeg', smart_formatting=True, end_of_phrase_silence_time=2, ).get_result() return speech_recognition_results transcriptResult = speechToText(sys.argv[1], speech_to_text) output = json.loads(json.dumps(transcriptResult, indent=2)) finalTranscript = output['results'][0]['alternatives'][0]['transcript'] text = data_cleaning(finalTranscript) conn = establish_DB_connection() request_type = processML(text) latitude = 22.22 longitude = 22.22 phone_no = None if request_type != '': updateDB('speech_to_text', finalTranscript, latitude, longitude, phone_no, request_type, conn) disconnect_DB(conn)