@@ -105,7 +105,7 @@ def get_transcript(video_id, language, video_info, verbose=True):
105105 transcript = ''
106106 current_chapter_index = 0
107107 chapters = video_info ["chapters" ]
108- logging .info (f"""Transcript_List Length: {
108+ logging .info (f"""Transcript List Length: {
109109 len (transcript_list )} , Chapter Length: { len (chapters )} """ )
110110
111111 for i , line in enumerate (transcript_list ):
@@ -125,8 +125,6 @@ def get_transcript(video_id, language, video_info, verbose=True):
125125 buffer_time = 2
126126
127127 if start_time >= chapter_start_seconds - buffer_time :
128- logging .info (
129- f'\n \n ## { chapters [current_chapter_index ]["title" ]} \n ' )
130128 current_chapter_index += 1
131129 except Exception as e :
132130 logging .error (
@@ -148,21 +146,19 @@ def get_transcript(video_id, language, video_info, verbose=True):
148146
149147def process_and_save_transcript (video_id , video_info , language , generate_punctuated , output_dir , filename , verbose , punctuation_model ):
150148 try :
149+ logging .info ('Getting transcript...' )
151150 raw_transcript = get_transcript (
152151 video_id , language , video_info , verbose )
153- logging .info ("Raw Transcript Length: %d" , len (raw_transcript ))
154152
155153 if generate_punctuated :
154+ logging .info ('Generating punctuated transcript...' )
156155 with_punctuation = add_punctuation (
157156 raw_transcript , punctuation_model )
158157 with_punctuation = remove_period_after_hashes (with_punctuation )
159- logging .info ("Punctuation Char Length: %d" , len ( with_punctuation ) )
158+ logging .info ('Capitalizing sentences...' )
160159 sentences = nltk .sent_tokenize (with_punctuation )
161- logging .info ("Sentences to process, (punctuated): %d" ,
162- len (sentences ))
163160 else :
164161 sentences = nltk .sent_tokenize (raw_transcript )
165- logging .info ("Sentences to process, (raw): %d" , len (sentences ))
166162
167163 # Capitalize sentences without batching
168164 capitalized_sentences = capitalize_sentences (sentences )
@@ -171,13 +167,16 @@ def process_and_save_transcript(video_id, video_info, language, generate_punctua
171167 capitalized_transcript = double_linesep .join (capitalized_sentences )
172168 output_path = os .path .join (output_dir , f'{ filename } .md' )
173169
170+ logging .info (f'Saving transcript to { output_path } ...' )
174171 with open (output_path , 'w' , encoding = 'utf-8' ) as f :
175172 f .write (capitalized_transcript )
176173
174+ # set log level to info to print the output path
175+ logging .getLogger ().setLevel (logging .INFO )
177176 if generate_punctuated :
178- logging .info (f'Punctuated transcript saved to { output_path } ' )
177+ logging .info (f'Punctuated transcript saved to \' { output_path } \' ' )
179178 else :
180- logging .info (f'Raw transcript saved to { output_path } ' )
179+ logging .info (f'Raw transcript saved to \' { output_path } \' ' )
181180
182181 except Exception as e :
183182 logging .error (f'Error: { e } ' )
@@ -191,6 +190,7 @@ def getVideoInfo(video_id):
191190 raise Exception (
192191 "No API key found, please set the YOUTUBE_API_KEY environment variable. \n Example: export YOUTUBE_API_KEY=your_api_key"
193192 )
193+ logging .info ('Getting video info...' )
194194 youtube = googleapiclient .discovery .build (
195195 "youtube" , "v3" , developerKey = api_key )
196196 request = youtube .videos ().list (part = "id,snippet" ,
@@ -242,6 +242,10 @@ def main():
242242
243243 args = parser .parse_args ()
244244
245+ # if verbose is false, set logging level to error
246+ if not args .verbose :
247+ logging .getLogger ().setLevel (logging .ERROR )
248+
245249 video_id = parse_youtube_url (args .url )
246250 video_info = getVideoInfo (video_id )
247251 filename = args .filename or clean_for_filename (
0 commit comments