NielsMayer commited on
Commit
20ca90d
·
unverified ·
1 Parent(s): 86b56ca

main : add -ocsv, aka --output-csv to output a CSV file

Browse files

Adds -ocsv, aka --output-csv feature to examples/main, which outputs a CSV file containing lines formatted as follows <startTime-in-integer-milliseconds>, <endTime-in-integer-milliseconds>, "<transcript-line-including-commas>".

Files changed (1) hide show
  1. examples/main/main.cpp +36 -0
examples/main/main.cpp CHANGED
@@ -69,6 +69,7 @@ struct whisper_params {
69
  bool output_vtt = false;
70
  bool output_srt = false;
71
  bool output_wts = false;
 
72
  bool print_special = false;
73
  bool print_colors = false;
74
  bool print_progress = false;
@@ -111,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
111
  else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
112
  else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
113
  else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
 
114
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
115
  else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
116
  else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
@@ -150,6 +152,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
150
  fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
151
  fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
152
  fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
 
153
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
154
  fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
155
  fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
@@ -325,6 +328,32 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
325
  return true;
326
  }
327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  // karaoke video generation
329
  // outputs a bash script that uses ffmpeg to generate a video with the subtitles
330
  // TODO: font parameter adjustments
@@ -674,6 +703,13 @@ int main(int argc, char ** argv) {
674
  const auto fname_wts = fname_inp + ".wts";
675
  output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
676
  }
 
 
 
 
 
 
 
677
  }
678
  }
679
 
 
69
  bool output_vtt = false;
70
  bool output_srt = false;
71
  bool output_wts = false;
72
+ bool output_csv = false;
73
  bool print_special = false;
74
  bool print_colors = false;
75
  bool print_progress = false;
 
112
  else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
113
  else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
114
  else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
115
+ else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
116
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
117
  else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
118
  else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
 
152
  fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
153
  fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
154
  fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
155
+ fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
156
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
157
  fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
158
  fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
 
328
  return true;
329
  }
330
 
331
+ bool output_csv(struct whisper_context * ctx, const char * fname) {
332
+ std::ofstream fout(fname);
333
+ if (!fout.is_open()) {
334
+ fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
335
+ return false;
336
+ }
337
+
338
+ fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
339
+
340
+ const int n_segments = whisper_full_n_segments(ctx);
341
+ for (int i = 0; i < n_segments; ++i) {
342
+ const char * text = whisper_full_get_segment_text(ctx, i);
343
+ if (text[0] == ' ')
344
+ text = text + sizeof(char); //whisper_full_get_segment_text() returns a string with leading space, point to the next character.
345
+ const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
346
+ const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
347
+ //need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
348
+ fout << 10 * t0 << ", "
349
+ << 10 * t1 << ", \""
350
+ << text << "\"\n";
351
+ }
352
+
353
+ return true;
354
+ }
355
+
356
+
357
  // karaoke video generation
358
  // outputs a bash script that uses ffmpeg to generate a video with the subtitles
359
  // TODO: font parameter adjustments
 
703
  const auto fname_wts = fname_inp + ".wts";
704
  output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
705
  }
706
+
707
+ // output to CSV file
708
+ if (params.output_csv) {
709
+ const auto fname_csv = fname_inp + ".csv";
710
+ output_csv(ctx, fname_csv.c_str());
711
+ }
712
+
713
  }
714
  }
715