1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- '''
- @Contact : liuyuqi.gov@msn.cn
- @Time : 2023/05/12 18:19:33
- @License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
- @Desc : whisper voice to text
- '''
- import torch
- from transformers import pipeline
- import gradio as gr
- # import pytube as pt
- MODE_NAME="openai/whisper-large-v2"
- device= 0 if torch.cuda.is_available() else "cpu"
- pipe = pipeline("automatic-speech-recognition", model=MODE_NAME, device=device,
- chunk_length_s=30)
- all_special_ids = pipe.tokenizer.all_special_ids
- transcribe_token_id = all_special_ids[-5]
- translate_token_id = all_special_ids[-6]
- def transcribe(microphone, state, task="transcribe"):
- file = microphone
- pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
- text = pipe(file)["text"]
- return state + "\n" + text, state + "\n" + text
- iface = gr.Interface(fn=transcribe,
- inputs=[
- gr.Audio(source="microphone", type="filepath", optional=True),
- gr.State(value="")
- ], outputs=[
- gr.Textbox(lines=15),
- gr.State()],
- title="Speech to Text",
- layout="horizontal",
- theme="huggingface",
- live=True,
- description="Transcribe speech from your microphone or from a youtube video",
- allow_flagging="never",
- )
- iface.launch(enable_queue=True)
|