Spaces:
Sleeping
Sleeping
readme : update help (#1560)
Browse files- README.md +5 -3
- examples/main/README.md +12 -3
README.md
CHANGED
|
@@ -110,8 +110,8 @@ options:
|
|
| 110 |
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
| 111 |
-ml N, --max-len N [0 ] maximum segment length in characters
|
| 112 |
-sow, --split-on-word [false ] split on word rather than on token
|
| 113 |
-
-bo N, --best-of N [
|
| 114 |
-
-bs N, --beam-size N [
|
| 115 |
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
| 116 |
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
| 117 |
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
|
@@ -128,6 +128,7 @@ options:
|
|
| 128 |
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
|
| 129 |
-ocsv, --output-csv [false ] output result in a CSV file
|
| 130 |
-oj, --output-json [false ] output result in a JSON file
|
|
|
|
| 131 |
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
| 132 |
-ps, --print-special [false ] print special tokens
|
| 133 |
-pc, --print-colors [false ] print colors
|
|
@@ -139,7 +140,8 @@ options:
|
|
| 139 |
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
| 140 |
-f FNAME, --file FNAME [ ] input WAV file path
|
| 141 |
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
| 142 |
-
-ls, --log-score [false ] log best decoder scores of
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
bash ./models/download-ggml-model.sh base.en
|
|
|
|
| 110 |
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
| 111 |
-ml N, --max-len N [0 ] maximum segment length in characters
|
| 112 |
-sow, --split-on-word [false ] split on word rather than on token
|
| 113 |
+
-bo N, --best-of N [5 ] number of best candidates to keep
|
| 114 |
+
-bs N, --beam-size N [5 ] beam size for beam search
|
| 115 |
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
| 116 |
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
| 117 |
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
|
|
|
| 128 |
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
|
| 129 |
-ocsv, --output-csv [false ] output result in a CSV file
|
| 130 |
-oj, --output-json [false ] output result in a JSON file
|
| 131 |
+
-ojf, --output-json-full [false ] include more information in the JSON file
|
| 132 |
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
| 133 |
-ps, --print-special [false ] print special tokens
|
| 134 |
-pc, --print-colors [false ] print colors
|
|
|
|
| 140 |
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
| 141 |
-f FNAME, --file FNAME [ ] input WAV file path
|
| 142 |
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
| 143 |
+
-ls, --log-score [false ] log best decoder scores of tokens
|
| 144 |
+
-ng, --no-gpu [false ] disable GPU
|
| 145 |
|
| 146 |
|
| 147 |
bash ./models/download-ggml-model.sh base.en
|
examples/main/README.md
CHANGED
|
@@ -17,28 +17,37 @@ options:
|
|
| 17 |
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
| 18 |
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
| 19 |
-ml N, --max-len N [0 ] maximum segment length in characters
|
|
|
|
| 20 |
-bo N, --best-of N [5 ] number of best candidates to keep
|
| 21 |
-
-bs N, --beam-size N [
|
| 22 |
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
| 23 |
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
| 24 |
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
| 25 |
-
-
|
| 26 |
-tr, --translate [false ] translate from source language to english
|
| 27 |
-di, --diarize [false ] stereo audio diarization
|
|
|
|
| 28 |
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
| 29 |
-otxt, --output-txt [false ] output result in a text file
|
| 30 |
-ovtt, --output-vtt [false ] output result in a vtt file
|
| 31 |
-osrt, --output-srt [false ] output result in a srt file
|
|
|
|
| 32 |
-owts, --output-words [false ] output script for generating karaoke video
|
|
|
|
| 33 |
-ocsv, --output-csv [false ] output result in a CSV file
|
| 34 |
-oj, --output-json [false ] output result in a JSON file
|
|
|
|
| 35 |
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
| 36 |
-ps, --print-special [false ] print special tokens
|
| 37 |
-pc, --print-colors [false ] print colors
|
| 38 |
-pp, --print-progress [false ] print progress
|
| 39 |
-
-nt, --no-timestamps [
|
| 40 |
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
|
|
|
| 41 |
--prompt PROMPT [ ] initial prompt
|
| 42 |
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
| 43 |
-f FNAME, --file FNAME [ ] input WAV file path
|
|
|
|
|
|
|
|
|
|
| 44 |
```
|
|
|
|
| 17 |
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
| 18 |
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
| 19 |
-ml N, --max-len N [0 ] maximum segment length in characters
|
| 20 |
+
-sow, --split-on-word [false ] split on word rather than on token
|
| 21 |
-bo N, --best-of N [5 ] number of best candidates to keep
|
| 22 |
+
-bs N, --beam-size N [5 ] beam size for beam search
|
| 23 |
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
| 24 |
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
| 25 |
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
| 26 |
+
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
|
| 27 |
-tr, --translate [false ] translate from source language to english
|
| 28 |
-di, --diarize [false ] stereo audio diarization
|
| 29 |
+
-tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
|
| 30 |
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
| 31 |
-otxt, --output-txt [false ] output result in a text file
|
| 32 |
-ovtt, --output-vtt [false ] output result in a vtt file
|
| 33 |
-osrt, --output-srt [false ] output result in a srt file
|
| 34 |
+
-olrc, --output-lrc [false ] output result in a lrc file
|
| 35 |
-owts, --output-words [false ] output script for generating karaoke video
|
| 36 |
+
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
|
| 37 |
-ocsv, --output-csv [false ] output result in a CSV file
|
| 38 |
-oj, --output-json [false ] output result in a JSON file
|
| 39 |
+
-ojf, --output-json-full [false ] include more information in the JSON file
|
| 40 |
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
| 41 |
-ps, --print-special [false ] print special tokens
|
| 42 |
-pc, --print-colors [false ] print colors
|
| 43 |
-pp, --print-progress [false ] print progress
|
| 44 |
+
-nt, --no-timestamps [false ] do not print timestamps
|
| 45 |
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
| 46 |
+
-dl, --detect-language [false ] exit after automatically detecting language
|
| 47 |
--prompt PROMPT [ ] initial prompt
|
| 48 |
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
| 49 |
-f FNAME, --file FNAME [ ] input WAV file path
|
| 50 |
+
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
| 51 |
+
-ls, --log-score [false ] log best decoder scores of tokens
|
| 52 |
+
-ng, --no-gpu [false ] disable GPU
|
| 53 |
```
|