Spaces:
Running
Running
File size: 4,541 Bytes
f795870 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
const path = require("path");
const { whisper } = require(path.join(
__dirname,
"../../build/Release/addon.node"
));
const { promisify } = require("util");
const whisperAsync = promisify(whisper);
// Example with VAD enabled
const vadParams = {
language: "en",
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
fname_inp: path.join(__dirname, "../../samples/jfk.wav"),
use_gpu: true,
flash_attn: false,
no_prints: false,
comma_in_time: true,
translate: false,
no_timestamps: false,
detect_language: false,
audio_ctx: 0,
max_len: 0,
// VAD parameters
vad: true,
vad_model: path.join(__dirname, "../../models/ggml-silero-v5.1.2.bin"), // You need to download this model
vad_threshold: 0.5,
vad_min_speech_duration_ms: 250,
vad_min_silence_duration_ms: 100,
vad_max_speech_duration_s: 30.0,
vad_speech_pad_ms: 30,
vad_samples_overlap: 0.1,
progress_callback: (progress) => {
console.log(`VAD Transcription progress: ${progress}%`);
}
};
// Example without VAD (traditional approach)
const traditionalParams = {
language: "en",
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
fname_inp: path.join(__dirname, "../../samples/jfk.wav"),
use_gpu: true,
flash_attn: false,
no_prints: false,
comma_in_time: true,
translate: false,
no_timestamps: false,
detect_language: false,
audio_ctx: 0,
max_len: 0,
vad: false, // Explicitly disable VAD
progress_callback: (progress) => {
console.log(`Traditional transcription progress: ${progress}%`);
}
};
async function runVADExample() {
try {
console.log("=== Whisper.cpp Node.js VAD Example ===\n");
// Check if VAD model exists
const fs = require('fs');
if (!fs.existsSync(vadParams.vad_model)) {
console.log("β οΈ VAD model not found. Please download the VAD model first:");
console.log(" ./models/download-vad-model.sh silero-v5.1.2");
console.log(" Or run: python models/convert-silero-vad-to-ggml.py");
console.log("\n Falling back to traditional transcription without VAD...\n");
// Run without VAD
console.log("π΅ Running traditional transcription...");
const traditionalResult = await whisperAsync(traditionalParams);
console.log("\nπ Traditional transcription result:");
console.log(traditionalResult);
return;
}
console.log("π΅ Running transcription with VAD enabled...");
console.log("VAD Parameters:");
console.log(` - Threshold: ${vadParams.vad_threshold}`);
console.log(` - Min speech duration: ${vadParams.vad_min_speech_duration_ms}ms`);
console.log(` - Min silence duration: ${vadParams.vad_min_silence_duration_ms}ms`);
console.log(` - Max speech duration: ${vadParams.vad_max_speech_duration_s}s`);
console.log(` - Speech padding: ${vadParams.vad_speech_pad_ms}ms`);
console.log(` - Samples overlap: ${vadParams.vad_samples_overlap}\n`);
const startTime = Date.now();
const vadResult = await whisperAsync(vadParams);
const vadDuration = Date.now() - startTime;
console.log("\nβ
VAD transcription completed!");
console.log(`β±οΈ Processing time: ${vadDuration}ms`);
console.log("\nπ VAD transcription result:");
console.log(vadResult);
// Compare with traditional approach
console.log("\nπ Running traditional transcription for comparison...");
const traditionalStartTime = Date.now();
const traditionalResult = await whisperAsync(traditionalParams);
const traditionalDuration = Date.now() - traditionalStartTime;
console.log("\nβ
Traditional transcription completed!");
console.log(`β±οΈ Processing time: ${traditionalDuration}ms`);
console.log("\nπ Traditional transcription result:");
console.log(traditionalResult);
// Performance comparison
console.log("\nπ Performance Comparison:");
console.log(`VAD: ${vadDuration}ms`);
console.log(`Traditional: ${traditionalDuration}ms`);
const speedup = traditionalDuration / vadDuration;
if (speedup > 1) {
console.log(`π VAD is ${speedup.toFixed(2)}x faster!`);
} else {
console.log(`βΉοΈ Traditional approach was ${(1/speedup).toFixed(2)}x faster in this case.`);
}
} catch (error) {
console.error("β Error during transcription:", error);
}
}
// Run the example
if (require.main === module) {
runVADExample();
}
module.exports = {
runVADExample,
vadParams,
traditionalParams
}; |