示例#1
0
def data_from_CSV():
    conn=establish_DB_connection_SQL()
    df=pd.read_csv(sys.argv[1], keep_default_na=False)
    tweets = df.set_index('status_id',drop=False)
    for index,row in tweets.iterrows():
        phone_no=''
        hashtag=[]
        text=str(row['text'])
        if str(detect_langs(text)[0])[0:2] == 'en' and text!='nan':
            text = data_cleaning(text)
            words=text.split()
            words=set(words)
            words=list(words)
            for w in words:
                if re.match('^((\+){0,1}91(\s){0,1}(\-){0,1}(\s){0,1}){0,1}0{0,1}[1-9]{1}[0-9]{9}$',w):
                    phone_no = w[-10:]
                if w.startswith('#'):
                    hashtag.append(w)
            request_type = processML(text)
            
            if request_type != '':
                name=str(row['screen_name'])
                if name == '': name = None
                if text == '': text = None
                if phone_no == '': phone_no = 9900990099
                latitude = 9.9252
                longitude = 78.1198
                updateDB_SQL(row['screen_name'],str(row['text']),latitude,longitude,phone_no,request_type, conn)
    disconnect_DB_SQL()
示例#2
0
def data_from_CSV():
    conn = establish_DB_connection_SQL()
    df = pd.read_csv(sys.argv[1], keep_default_na=False)
    tweets = df.set_index('status_id', drop=False)
    for index, row in tweets.iterrows():
        phone_no = ''
        hashtag = []
        text = str(row['text'])

        if str(detect_langs(text)[0])[0:2] == 'hi' and text != 'nan':
            text1 = text
            authenticator = IAMAuthenticator(
                'worUzb_Eb5emCaIs0oL7sR86Fb2LeTJGOk1EN1Q-4Cni')
            language_translator = LanguageTranslatorV3(
                version='2018-05-01', authenticator=authenticator)

            language_translator.set_service_url(
                'https://api.eu-gb.language-translator.watson.cloud.ibm.com/instances/cdafbc7e-b59a-40f8-818f-1914f02063cc'
            )
            translation = language_translator.translate(
                text=text1, model_id='hi-en').get_result()
            output = json.loads(
                json.dumps(translation, indent=2, ensure_ascii=False))
            text = output['translations'][0]['translation']

        if str(detect_langs(text)[0])[0:2] == 'en' and text != 'nan':
            text = data_cleaning(text)
            words = text.split()
            words = set(words)
            words = list(words)
            for w in words:
                if re.match(
                        '^((\+){0,1}91(\s){0,1}(\-){0,1}(\s){0,1}){0,1}0{0,1}[1-9]{1}[0-9]{9}$',
                        w):
                    phone_no = w[-10:]
                if w.startswith('#'):
                    hashtag.append(w)
            request_type = processML(text)

            if request_type != '':
                name = str(row['screen_name'])
                if name == '': name = None
                if text == '': text = None
                if phone_no == '': phone_no = 9900990099
                latitude = 9.9252
                longitude = 78.1198
                updateDB_SQL(row['screen_name'], text, latitude, longitude,
                             phone_no, request_type, conn)
    disconnect_DB_SQL()
    'https://api.eu-gb.speech-to-text.watson.cloud.ibm.com/instances/1fd012ca-4f8f-4fa1-a40d-4a335380ae6a'
)


def speechToText(audioLocation, speech_to_text):
    with open(audioLocation, 'rb') as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,
            content_type='audio/mpeg',
            smart_formatting=True,
            end_of_phrase_silence_time=2,
        ).get_result()
        return speech_recognition_results


transcriptResult = speechToText(sys.argv[1], speech_to_text)
output = json.loads(json.dumps(transcriptResult, indent=2))
finalTranscript = output['results'][0]['alternatives'][0]['transcript']

text = data_cleaning(finalTranscript)

conn = establish_DB_connection()
request_type = processML(text)
latitude = 22.22
longitude = 22.22
phone_no = None
if request_type != '':
    updateDB('speech_to_text', finalTranscript, latitude, longitude, phone_no,
             request_type, conn)
disconnect_DB(conn)