Skip to content

Commit 5cd5909

Browse files
committed
add youtube video transcription example
1 parent e17c57b commit 5cd5909

File tree

10 files changed

+52
-0
lines changed

10 files changed

+52
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
examples/*/data/*
12
__pycache__
23
*.egg-info/
34

examples/deploy_model/hugging_face_transformer/deploy_hugging_face_transformer.py renamed to examples/deploy_custom_models/hugging_face_transformer/deploy_hugging_face_transformer.py

File renamed without changes.

examples/deploy_model/hugging_face_transformer/requirements.txt renamed to examples/deploy_custom_models/hugging_face_transformer/requirements.txt

File renamed without changes.

examples/deploy_model/pytorch/deploy_pytorch_models.py renamed to examples/deploy_custom_models/pytorch/deploy_pytorch_models.py

File renamed without changes.
File renamed without changes.

examples/deploy_model/scikit-learn/deploy_scikit_learn_models.py renamed to examples/deploy_custom_models/scikit-learn/deploy_scikit_learn_models.py

File renamed without changes.

examples/deploy_model/scikit-learn/requirements.txt renamed to examples/deploy_custom_models/scikit-learn/requirements.txt

File renamed without changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
youtube-dl@git+https://github.com/ytdl-org/youtube-dl.git
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
2+
# import the model
3+
from slashml import SpeechToText
4+
from utils import download_youtube_file
5+
6+
# Replace `API_KEY` with your SlasML API token. This example still runs without
7+
# the API token but usage will be limited
8+
API_KEY = None
9+
10+
def transcribe_youtube_file(*, youtube_url:str):
11+
12+
# parse the id from the youtube url
13+
youtube_id = youtube_url.split('/')[-1]
14+
15+
# this downloads the file at data/{youtube_id}.mp4
16+
download_youtube_file(youtube_url = youtube_url, output_folder='data')
17+
18+
downloaded_file_path= f'data/{youtube_id}.mp4'
19+
20+
# We recommend using AWS for video transcription
21+
service_provider = SpeechToText.ServiceProvider.AWS
22+
23+
model = SpeechToText(api_key=API_KEY)
24+
25+
# Upload audio
26+
uploaded_file = model.upload_audio(downloaded_file_path)
27+
print(f"file uploaded: {uploaded_file}")
28+
29+
response = model.execute(
30+
upload_url=uploaded_file["upload_url"], service_provider=service_provider
31+
)
32+
33+
return response.transcription_data.transcription
34+
35+
def transcribe_multiple_youtube_videos(*, youtube_urls:list):
36+
return list(map(transcribe_youtube_file, youtube_urls))
37+
38+
if __name__=='__main__':
39+
youtube_url = 'https://youtu.be/5-TgqZ8nado'
40+
print(transcribe_youtube_file(youtube_url=youtube_url))
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import youtube_dl
2+
3+
def download_youtube_file(*, youtube_url:str, output_folder:str):
4+
ydl = youtube_dl.YoutubeDL({'outtmpl': output_folder+'/%(id)s.%(ext)s'})
5+
6+
with ydl:
7+
ydl.download([youtube_url])
8+
9+
if __name__=='__main__':
10+
download_youtube_file(youtube_url='https://youtu.be/5-TgqZ8nado', output_folder='data')

0 commit comments

Comments
 (0)