Spaces:
Sleeping
Sleeping
ruby : extend API (#2551)
Browse files* Handle objs in Ruby code
* Add task to make Makefile
* Share commont constance in test suites
* Add model-related APIs
* Add Whisper::Model class
* Add tests for Whisper::Model
* Add missing LDFLAG -lstdc++
* Add tests for Whisper.log_set
* Add Whisper.set_log
* Define log level
* Add document on logging
* Add license section to README
* Add document on Whisper::Model
* Fix examples in README
* Add test for Model with GC
* Make dependency on Makefile more accurate
* Fix bug about Whisper::Model and GC
- bindings/ruby/README.md +58 -0
- bindings/ruby/Rakefile +19 -10
- bindings/ruby/ext/extconf.rb +21 -58
- bindings/ruby/ext/ruby_whisper.cpp +367 -0
- bindings/ruby/tests/helper.rb +7 -0
- bindings/ruby/tests/test_model.rb +44 -0
- bindings/ruby/tests/test_package.rb +2 -2
- bindings/ruby/tests/test_params.rb +2 -3
- bindings/ruby/tests/test_segment.rb +6 -10
- bindings/ruby/tests/test_whisper.rb +38 -10
bindings/ruby/README.md
CHANGED
|
@@ -107,5 +107,63 @@ whisper.transcribe("path/to/audio.wav", params)
|
|
| 107 |
|
| 108 |
```
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
|
| 111 |
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
|
|
|
|
| 107 |
|
| 108 |
```
|
| 109 |
|
| 110 |
+
You can see model information:
|
| 111 |
+
|
| 112 |
+
```ruby
|
| 113 |
+
whisper = Whisper::Context.new("path/to/model.bin")
|
| 114 |
+
model = whisper.model
|
| 115 |
+
|
| 116 |
+
model.n_vocab # => 51864
|
| 117 |
+
model.n_audio_ctx # => 1500
|
| 118 |
+
model.n_audio_state # => 512
|
| 119 |
+
model.n_audio_head # => 8
|
| 120 |
+
model.n_audio_layer # => 6
|
| 121 |
+
model.n_text_ctx # => 448
|
| 122 |
+
model.n_text_state # => 512
|
| 123 |
+
model.n_text_head # => 8
|
| 124 |
+
model.n_text_layer # => 6
|
| 125 |
+
model.n_mels # => 80
|
| 126 |
+
model.ftype # => 1
|
| 127 |
+
model.type # => "base"
|
| 128 |
+
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
You can set log callback:
|
| 132 |
+
|
| 133 |
+
```ruby
|
| 134 |
+
prefix = "[MyApp] "
|
| 135 |
+
log_callback = ->(level, buffer, user_data) {
|
| 136 |
+
case level
|
| 137 |
+
when Whisper::LOG_LEVEL_NONE
|
| 138 |
+
puts "#{user_data}none: #{buffer}"
|
| 139 |
+
when Whisper::LOG_LEVEL_INFO
|
| 140 |
+
puts "#{user_data}info: #{buffer}"
|
| 141 |
+
when Whisper::LOG_LEVEL_WARN
|
| 142 |
+
puts "#{user_data}warn: #{buffer}"
|
| 143 |
+
when Whisper::LOG_LEVEL_ERROR
|
| 144 |
+
puts "#{user_data}error: #{buffer}"
|
| 145 |
+
when Whisper::LOG_LEVEL_DEBUG
|
| 146 |
+
puts "#{user_data}debug: #{buffer}"
|
| 147 |
+
when Whisper::LOG_LEVEL_CONT
|
| 148 |
+
puts "#{user_data}same to previous: #{buffer}"
|
| 149 |
+
end
|
| 150 |
+
}
|
| 151 |
+
Whisper.log_set log_callback, prefix
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
Using this feature, you are also able to suppress log:
|
| 155 |
+
|
| 156 |
+
```ruby
|
| 157 |
+
Whisper.log_set ->(level, buffer, user_data) {
|
| 158 |
+
# do nothing
|
| 159 |
+
}, nil
|
| 160 |
+
Whisper::Context.new(MODEL)
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
License
|
| 164 |
+
-------
|
| 165 |
+
|
| 166 |
+
The same to [whisper.cpp][].
|
| 167 |
+
|
| 168 |
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
|
| 169 |
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
|
bindings/ruby/Rakefile
CHANGED
|
@@ -23,30 +23,39 @@ CLEAN.include FileList[
|
|
| 23 |
"ext/depend"
|
| 24 |
]
|
| 25 |
|
| 26 |
-
task build:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
directory "pkg"
|
| 34 |
CLOBBER.include "pkg"
|
| 35 |
|
| 36 |
TEST_MODEL = "../../models/ggml-base.en.bin"
|
| 37 |
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
|
|
|
| 38 |
LIB_FILE = File.join("lib", LIB_NAME)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
Dir.chdir "ext" do
|
| 43 |
-
sh "ruby extconf.rb"
|
| 44 |
sh "make"
|
| 45 |
end
|
| 46 |
-
mv "ext/#{LIB_NAME}", t.name
|
| 47 |
end
|
| 48 |
CLEAN.include LIB_FILE
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
Rake::TestTask.new do |t|
|
| 51 |
t.test_files = FileList["tests/test_*.rb"]
|
| 52 |
end
|
|
|
|
| 23 |
"ext/depend"
|
| 24 |
]
|
| 25 |
|
| 26 |
+
task build: FileList[
|
| 27 |
+
"ext/Makefile",
|
| 28 |
+
"ext/ruby_whisper.h",
|
| 29 |
+
"ext/ruby_whisper.cpp",
|
| 30 |
+
"whispercpp.gemspec",
|
| 31 |
+
]
|
| 32 |
|
| 33 |
directory "pkg"
|
| 34 |
CLOBBER.include "pkg"
|
| 35 |
|
| 36 |
TEST_MODEL = "../../models/ggml-base.en.bin"
|
| 37 |
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
| 38 |
+
SO_FILE = File.join("ext", LIB_NAME)
|
| 39 |
LIB_FILE = File.join("lib", LIB_NAME)
|
| 40 |
|
| 41 |
+
file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
|
| 42 |
+
Dir.chdir "ext" do
|
| 43 |
+
ruby "extconf.rb"
|
| 44 |
+
end
|
| 45 |
+
end
|
| 46 |
+
|
| 47 |
+
file SO_FILE => "ext/Makefile" do |t|
|
| 48 |
Dir.chdir "ext" do
|
|
|
|
| 49 |
sh "make"
|
| 50 |
end
|
|
|
|
| 51 |
end
|
| 52 |
CLEAN.include LIB_FILE
|
| 53 |
|
| 54 |
+
directory "lib"
|
| 55 |
+
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
| 56 |
+
copy t.source, t.name
|
| 57 |
+
end
|
| 58 |
+
|
| 59 |
Rake::TestTask.new do |t|
|
| 60 |
t.test_files = FileList["tests/test_*.rb"]
|
| 61 |
end
|
bindings/ruby/ext/extconf.rb
CHANGED
|
@@ -2,6 +2,9 @@ require 'mkmf'
|
|
| 2 |
|
| 3 |
# need to use c++ compiler flags
|
| 4 |
$CXXFLAGS << ' -std=c++11'
|
|
|
|
|
|
|
|
|
|
| 5 |
# Set to true when building binary gems
|
| 6 |
if enable_config('static-stdlib', false)
|
| 7 |
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
|
@@ -12,34 +15,6 @@ if enable_config('march-tune-native', false)
|
|
| 12 |
$CXXFLAGS << ' -march=native -mtune=native'
|
| 13 |
end
|
| 14 |
|
| 15 |
-
def with_disabling_unsupported_files
|
| 16 |
-
disabled_files = []
|
| 17 |
-
|
| 18 |
-
unless $GGML_METAL
|
| 19 |
-
disabled_files << 'ggml-metal.h' << 'ggml-metal.m'
|
| 20 |
-
end
|
| 21 |
-
|
| 22 |
-
unless $GGML_METAL_EMBED_LIBRARY
|
| 23 |
-
disabled_files << 'ggml-metal.metal'
|
| 24 |
-
end
|
| 25 |
-
|
| 26 |
-
unless $OBJ_ALL&.include? 'ggml-blas.o'
|
| 27 |
-
disabled_files << 'ggml-blas.h' << 'ggml-blas.cpp'
|
| 28 |
-
end
|
| 29 |
-
|
| 30 |
-
disabled_files.filter! {|file| File.exist? file}
|
| 31 |
-
|
| 32 |
-
disabled_files.each do |file|
|
| 33 |
-
File.rename file, "#{file}.disabled"
|
| 34 |
-
end
|
| 35 |
-
|
| 36 |
-
yield
|
| 37 |
-
|
| 38 |
-
disabled_files.each do |file|
|
| 39 |
-
File.rename "#{file}.disabled", file
|
| 40 |
-
end
|
| 41 |
-
end
|
| 42 |
-
|
| 43 |
if ENV['WHISPER_METAL']
|
| 44 |
$GGML_METAL ||= true
|
| 45 |
$DEPRECATE_WARNING ||= true
|
|
@@ -66,10 +41,10 @@ $MK_CXXFLAGS = '-std=c++11 -fPIC'
|
|
| 66 |
$MK_NVCCFLAGS = '-std=c++11'
|
| 67 |
$MK_LDFLAGS = ''
|
| 68 |
|
| 69 |
-
$OBJ_GGML =
|
| 70 |
-
$OBJ_WHISPER =
|
| 71 |
-
$OBJ_COMMON =
|
| 72 |
-
$OBJ_SDL =
|
| 73 |
|
| 74 |
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
| 75 |
|
|
@@ -152,7 +127,7 @@ unless ENV['GGML_NO_ACCELERATE']
|
|
| 152 |
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
| 153 |
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
| 154 |
$MK_LDFLAGS << ' -framework Accelerate'
|
| 155 |
-
$OBJ_GGML << '
|
| 156 |
end
|
| 157 |
end
|
| 158 |
|
|
@@ -160,20 +135,20 @@ if ENV['GGML_OPENBLAS']
|
|
| 160 |
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
| 161 |
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
| 162 |
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
| 163 |
-
$OBJ_GGML << '
|
| 164 |
end
|
| 165 |
|
| 166 |
if ENV['GGML_OPENBLAS64']
|
| 167 |
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
| 168 |
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
| 169 |
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
| 170 |
-
$OBJ_GGML << '
|
| 171 |
end
|
| 172 |
|
| 173 |
if $GGML_METAL
|
| 174 |
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
| 175 |
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
| 176 |
-
$OBJ_GGML << '
|
| 177 |
|
| 178 |
if ENV['GGML_METAL_NDEBUG']
|
| 179 |
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
|
@@ -181,21 +156,22 @@ if $GGML_METAL
|
|
| 181 |
|
| 182 |
if $GGML_METAL_EMBED_LIBRARY
|
| 183 |
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
| 184 |
-
$OBJ_GGML << '
|
| 185 |
end
|
| 186 |
end
|
| 187 |
|
| 188 |
$OBJ_GGML <<
|
| 189 |
-
'
|
| 190 |
-
'
|
| 191 |
-
'
|
| 192 |
-
'
|
| 193 |
-
'
|
| 194 |
|
| 195 |
$OBJ_WHISPER <<
|
| 196 |
-
'
|
| 197 |
|
| 198 |
-
$
|
|
|
|
| 199 |
|
| 200 |
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
| 201 |
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
|
@@ -204,26 +180,13 @@ $CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
|
|
| 204 |
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
| 205 |
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
| 206 |
|
| 207 |
-
|
| 208 |
-
File.write 'depend', "$(OBJS): $(OBJS) ggml-metal-embed.o\n"
|
| 209 |
-
end
|
| 210 |
-
|
| 211 |
-
with_disabling_unsupported_files do
|
| 212 |
-
|
| 213 |
-
create_makefile('whisper')
|
| 214 |
-
|
| 215 |
-
end
|
| 216 |
|
| 217 |
File.open 'Makefile', 'a' do |file|
|
| 218 |
file.puts 'include get-flags.mk'
|
| 219 |
|
| 220 |
if $GGML_METAL
|
| 221 |
if $GGML_METAL_EMBED_LIBRARY
|
| 222 |
-
# mkmf determines object files to compile dependent on existing *.{c,cpp,m} files
|
| 223 |
-
# but ggml-metal-embed.c doesn't exist on creating Makefile.
|
| 224 |
-
file.puts "objs := $(OBJS)"
|
| 225 |
-
file.puts "OBJS = $(objs) 'ggml-metal-embed.o'"
|
| 226 |
-
|
| 227 |
file.puts 'include metal-embed.mk'
|
| 228 |
end
|
| 229 |
end
|
|
|
|
| 2 |
|
| 3 |
# need to use c++ compiler flags
|
| 4 |
$CXXFLAGS << ' -std=c++11'
|
| 5 |
+
|
| 6 |
+
$LDFLAGS << ' -lstdc++'
|
| 7 |
+
|
| 8 |
# Set to true when building binary gems
|
| 9 |
if enable_config('static-stdlib', false)
|
| 10 |
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
|
|
|
| 15 |
$CXXFLAGS << ' -march=native -mtune=native'
|
| 16 |
end
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
if ENV['WHISPER_METAL']
|
| 19 |
$GGML_METAL ||= true
|
| 20 |
$DEPRECATE_WARNING ||= true
|
|
|
|
| 41 |
$MK_NVCCFLAGS = '-std=c++11'
|
| 42 |
$MK_LDFLAGS = ''
|
| 43 |
|
| 44 |
+
$OBJ_GGML = []
|
| 45 |
+
$OBJ_WHISPER = []
|
| 46 |
+
$OBJ_COMMON = []
|
| 47 |
+
$OBJ_SDL = []
|
| 48 |
|
| 49 |
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
| 50 |
|
|
|
|
| 127 |
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
| 128 |
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
| 129 |
$MK_LDFLAGS << ' -framework Accelerate'
|
| 130 |
+
$OBJ_GGML << 'ggml-blas.o'
|
| 131 |
end
|
| 132 |
end
|
| 133 |
|
|
|
|
| 135 |
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
| 136 |
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
| 137 |
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
| 138 |
+
$OBJ_GGML << 'ggml-blas.o'
|
| 139 |
end
|
| 140 |
|
| 141 |
if ENV['GGML_OPENBLAS64']
|
| 142 |
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
| 143 |
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
| 144 |
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
| 145 |
+
$OBJ_GGML << 'ggml-blas.o'
|
| 146 |
end
|
| 147 |
|
| 148 |
if $GGML_METAL
|
| 149 |
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
| 150 |
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
| 151 |
+
$OBJ_GGML << 'ggml-metal.o'
|
| 152 |
|
| 153 |
if ENV['GGML_METAL_NDEBUG']
|
| 154 |
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
|
|
|
| 156 |
|
| 157 |
if $GGML_METAL_EMBED_LIBRARY
|
| 158 |
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
| 159 |
+
$OBJ_GGML << 'ggml-metal-embed.o'
|
| 160 |
end
|
| 161 |
end
|
| 162 |
|
| 163 |
$OBJ_GGML <<
|
| 164 |
+
'ggml.o' <<
|
| 165 |
+
'ggml-alloc.o' <<
|
| 166 |
+
'ggml-backend.o' <<
|
| 167 |
+
'ggml-quants.o' <<
|
| 168 |
+
'ggml-aarch64.o'
|
| 169 |
|
| 170 |
$OBJ_WHISPER <<
|
| 171 |
+
'whisper.o'
|
| 172 |
|
| 173 |
+
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
| 174 |
+
$objs << "ruby_whisper.o"
|
| 175 |
|
| 176 |
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
| 177 |
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
|
|
|
| 180 |
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
| 181 |
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
| 182 |
|
| 183 |
+
create_makefile('whisper')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
File.open 'Makefile', 'a' do |file|
|
| 186 |
file.puts 'include get-flags.mk'
|
| 187 |
|
| 188 |
if $GGML_METAL
|
| 189 |
if $GGML_METAL_EMBED_LIBRARY
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
file.puts 'include metal-embed.mk'
|
| 191 |
end
|
| 192 |
end
|
bindings/ruby/ext/ruby_whisper.cpp
CHANGED
|
@@ -41,6 +41,8 @@ static ID id_call;
|
|
| 41 |
static ID id___method__;
|
| 42 |
static ID id_to_enum;
|
| 43 |
|
|
|
|
|
|
|
| 44 |
/*
|
| 45 |
* call-seq:
|
| 46 |
* lang_max_id -> Integer
|
|
@@ -88,6 +90,39 @@ static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
|
| 88 |
return rb_str_new2(str_full);
|
| 89 |
}
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
static void ruby_whisper_free(ruby_whisper *rw) {
|
| 92 |
if (rw->context) {
|
| 93 |
whisper_free(rw->context);
|
|
@@ -389,6 +424,126 @@ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
|
| 389 |
return self;
|
| 390 |
}
|
| 391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
/*
|
| 393 |
* Number of segments.
|
| 394 |
*
|
|
@@ -1015,7 +1170,12 @@ typedef struct {
|
|
| 1015 |
int index;
|
| 1016 |
} ruby_whisper_segment;
|
| 1017 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1018 |
VALUE cSegment;
|
|
|
|
| 1019 |
|
| 1020 |
static void rb_whisper_segment_mark(ruby_whisper_segment *rws) {
|
| 1021 |
rb_gc_mark(rws->context);
|
|
@@ -1188,6 +1348,176 @@ static VALUE ruby_whisper_segment_get_text(VALUE self) {
|
|
| 1188 |
return rb_str_new2(text);
|
| 1189 |
}
|
| 1190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1191 |
void Init_whisper() {
|
| 1192 |
id_to_s = rb_intern("to_s");
|
| 1193 |
id_call = rb_intern("call");
|
|
@@ -1198,15 +1528,36 @@ void Init_whisper() {
|
|
| 1198 |
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
|
| 1199 |
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
|
| 1200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1201 |
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
| 1202 |
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
| 1203 |
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
| 1204 |
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
|
|
|
|
|
|
| 1205 |
|
| 1206 |
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
| 1207 |
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
| 1208 |
|
| 1209 |
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1210 |
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
| 1211 |
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
| 1212 |
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
|
@@ -1284,6 +1635,22 @@ void Init_whisper() {
|
|
| 1284 |
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
| 1285 |
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
| 1286 |
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1287 |
}
|
| 1288 |
#ifdef __cplusplus
|
| 1289 |
}
|
|
|
|
| 41 |
static ID id___method__;
|
| 42 |
static ID id_to_enum;
|
| 43 |
|
| 44 |
+
static bool is_log_callback_finalized = false;
|
| 45 |
+
|
| 46 |
/*
|
| 47 |
* call-seq:
|
| 48 |
* lang_max_id -> Integer
|
|
|
|
| 90 |
return rb_str_new2(str_full);
|
| 91 |
}
|
| 92 |
|
| 93 |
+
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
| 94 |
+
is_log_callback_finalized = true;
|
| 95 |
+
return Qnil;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/*
|
| 99 |
+
* call-seq:
|
| 100 |
+
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
| 101 |
+
*/
|
| 102 |
+
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
| 103 |
+
VALUE old_callback = rb_iv_get(self, "@log_callback");
|
| 104 |
+
if (!NIL_P(old_callback)) {
|
| 105 |
+
rb_undefine_finalizer(old_callback);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
rb_iv_set(self, "@log_callback", log_callback);
|
| 109 |
+
rb_iv_set(self, "@user_data", user_data);
|
| 110 |
+
|
| 111 |
+
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
| 112 |
+
rb_define_finalizer(log_callback, finalize_log_callback);
|
| 113 |
+
|
| 114 |
+
whisper_log_set([](ggml_log_level level, const char * buffer, void * user_data) {
|
| 115 |
+
if (is_log_callback_finalized) {
|
| 116 |
+
return;
|
| 117 |
+
}
|
| 118 |
+
VALUE log_callback = rb_iv_get(mWhisper, "@log_callback");
|
| 119 |
+
VALUE udata = rb_iv_get(mWhisper, "@user_data");
|
| 120 |
+
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
| 121 |
+
}, nullptr);
|
| 122 |
+
|
| 123 |
+
return Qnil;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
static void ruby_whisper_free(ruby_whisper *rw) {
|
| 127 |
if (rw->context) {
|
| 128 |
whisper_free(rw->context);
|
|
|
|
| 424 |
return self;
|
| 425 |
}
|
| 426 |
|
| 427 |
+
/*
|
| 428 |
+
* call-seq:
|
| 429 |
+
* model_n_vocab -> Integer
|
| 430 |
+
*/
|
| 431 |
+
VALUE ruby_whisper_model_n_vocab(VALUE self) {
|
| 432 |
+
ruby_whisper *rw;
|
| 433 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 434 |
+
return INT2NUM(whisper_model_n_vocab(rw->context));
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
/*
|
| 438 |
+
* call-seq:
|
| 439 |
+
* model_n_audio_ctx -> Integer
|
| 440 |
+
*/
|
| 441 |
+
VALUE ruby_whisper_model_n_audio_ctx(VALUE self) {
|
| 442 |
+
ruby_whisper *rw;
|
| 443 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 444 |
+
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
/*
|
| 448 |
+
* call-seq:
|
| 449 |
+
* model_n_audio_state -> Integer
|
| 450 |
+
*/
|
| 451 |
+
VALUE ruby_whisper_model_n_audio_state(VALUE self) {
|
| 452 |
+
ruby_whisper *rw;
|
| 453 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 454 |
+
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
/*
|
| 458 |
+
* call-seq:
|
| 459 |
+
* model_n_audio_head -> Integer
|
| 460 |
+
*/
|
| 461 |
+
VALUE ruby_whisper_model_n_audio_head(VALUE self) {
|
| 462 |
+
ruby_whisper *rw;
|
| 463 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 464 |
+
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
/*
|
| 468 |
+
* call-seq:
|
| 469 |
+
* model_n_audio_layer -> Integer
|
| 470 |
+
*/
|
| 471 |
+
VALUE ruby_whisper_model_n_audio_layer(VALUE self) {
|
| 472 |
+
ruby_whisper *rw;
|
| 473 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 474 |
+
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
/*
|
| 478 |
+
* call-seq:
|
| 479 |
+
* model_n_text_ctx -> Integer
|
| 480 |
+
*/
|
| 481 |
+
VALUE ruby_whisper_model_n_text_ctx(VALUE self) {
|
| 482 |
+
ruby_whisper *rw;
|
| 483 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 484 |
+
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
/*
|
| 488 |
+
* call-seq:
|
| 489 |
+
* model_n_text_state -> Integer
|
| 490 |
+
*/
|
| 491 |
+
VALUE ruby_whisper_model_n_text_state(VALUE self) {
|
| 492 |
+
ruby_whisper *rw;
|
| 493 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 494 |
+
return INT2NUM(whisper_model_n_text_state(rw->context));
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
/*
|
| 498 |
+
* call-seq:
|
| 499 |
+
* model_n_text_head -> Integer
|
| 500 |
+
*/
|
| 501 |
+
VALUE ruby_whisper_model_n_text_head(VALUE self) {
|
| 502 |
+
ruby_whisper *rw;
|
| 503 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 504 |
+
return INT2NUM(whisper_model_n_text_head(rw->context));
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
/*
|
| 508 |
+
* call-seq:
|
| 509 |
+
* model_n_text_layer -> Integer
|
| 510 |
+
*/
|
| 511 |
+
VALUE ruby_whisper_model_n_text_layer(VALUE self) {
|
| 512 |
+
ruby_whisper *rw;
|
| 513 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 514 |
+
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
/*
|
| 518 |
+
* call-seq:
|
| 519 |
+
* model_n_mels -> Integer
|
| 520 |
+
*/
|
| 521 |
+
VALUE ruby_whisper_model_n_mels(VALUE self) {
|
| 522 |
+
ruby_whisper *rw;
|
| 523 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 524 |
+
return INT2NUM(whisper_model_n_mels(rw->context));
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
/*
|
| 528 |
+
* call-seq:
|
| 529 |
+
* model_ftype -> Integer
|
| 530 |
+
*/
|
| 531 |
+
VALUE ruby_whisper_model_ftype(VALUE self) {
|
| 532 |
+
ruby_whisper *rw;
|
| 533 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 534 |
+
return INT2NUM(whisper_model_ftype(rw->context));
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
/*
|
| 538 |
+
* call-seq:
|
| 539 |
+
* model_type -> String
|
| 540 |
+
*/
|
| 541 |
+
VALUE ruby_whisper_model_type(VALUE self) {
|
| 542 |
+
ruby_whisper *rw;
|
| 543 |
+
Data_Get_Struct(self, ruby_whisper, rw);
|
| 544 |
+
return rb_str_new2(whisper_model_type_readable(rw->context));
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
/*
|
| 548 |
* Number of segments.
|
| 549 |
*
|
|
|
|
| 1170 |
int index;
|
| 1171 |
} ruby_whisper_segment;
|
| 1172 |
|
| 1173 |
+
typedef struct {
|
| 1174 |
+
VALUE context;
|
| 1175 |
+
} ruby_whisper_model;
|
| 1176 |
+
|
| 1177 |
VALUE cSegment;
|
| 1178 |
+
VALUE cModel;
|
| 1179 |
|
| 1180 |
static void rb_whisper_segment_mark(ruby_whisper_segment *rws) {
|
| 1181 |
rb_gc_mark(rws->context);
|
|
|
|
| 1348 |
return rb_str_new2(text);
|
| 1349 |
}
|
| 1350 |
|
| 1351 |
+
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
| 1352 |
+
rb_gc_mark(rwm->context);
|
| 1353 |
+
}
|
| 1354 |
+
|
| 1355 |
+
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
| 1356 |
+
ruby_whisper_model *rwm;
|
| 1357 |
+
rwm = ALLOC(ruby_whisper_model);
|
| 1358 |
+
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
| 1359 |
+
}
|
| 1360 |
+
|
| 1361 |
+
static VALUE rb_whisper_model_initialize(VALUE context) {
|
| 1362 |
+
ruby_whisper_model *rwm;
|
| 1363 |
+
const VALUE model = ruby_whisper_model_allocate(cModel);
|
| 1364 |
+
Data_Get_Struct(model, ruby_whisper_model, rwm);
|
| 1365 |
+
rwm->context = context;
|
| 1366 |
+
return model;
|
| 1367 |
+
};
|
| 1368 |
+
|
| 1369 |
+
/*
|
| 1370 |
+
* call-seq:
|
| 1371 |
+
* model -> Whisper::Model
|
| 1372 |
+
*/
|
| 1373 |
+
static VALUE ruby_whisper_get_model(VALUE self) {
|
| 1374 |
+
return rb_whisper_model_initialize(self);
|
| 1375 |
+
}
|
| 1376 |
+
|
| 1377 |
+
/*
|
| 1378 |
+
* call-seq:
|
| 1379 |
+
* n_vocab -> Integer
|
| 1380 |
+
*/
|
| 1381 |
+
static VALUE ruby_whisper_c_model_n_vocab(VALUE self) {
|
| 1382 |
+
ruby_whisper_model *rwm;
|
| 1383 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1384 |
+
ruby_whisper *rw;
|
| 1385 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1386 |
+
return INT2NUM(whisper_model_n_vocab(rw->context));
|
| 1387 |
+
}
|
| 1388 |
+
|
| 1389 |
+
/*
|
| 1390 |
+
* call-seq:
|
| 1391 |
+
* n_audio_ctx -> Integer
|
| 1392 |
+
*/
|
| 1393 |
+
static VALUE ruby_whisper_c_model_n_audio_ctx(VALUE self) {
|
| 1394 |
+
ruby_whisper_model *rwm;
|
| 1395 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1396 |
+
ruby_whisper *rw;
|
| 1397 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1398 |
+
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
| 1399 |
+
}
|
| 1400 |
+
|
| 1401 |
+
/*
|
| 1402 |
+
* call-seq:
|
| 1403 |
+
* n_audio_state -> Integer
|
| 1404 |
+
*/
|
| 1405 |
+
static VALUE ruby_whisper_c_model_n_audio_state(VALUE self) {
|
| 1406 |
+
ruby_whisper_model *rwm;
|
| 1407 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1408 |
+
ruby_whisper *rw;
|
| 1409 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1410 |
+
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
| 1411 |
+
}
|
| 1412 |
+
|
| 1413 |
+
/*
|
| 1414 |
+
* call-seq:
|
| 1415 |
+
* n_audio_head -> Integer
|
| 1416 |
+
*/
|
| 1417 |
+
static VALUE ruby_whisper_c_model_n_audio_head(VALUE self) {
|
| 1418 |
+
ruby_whisper_model *rwm;
|
| 1419 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1420 |
+
ruby_whisper *rw;
|
| 1421 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1422 |
+
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
| 1423 |
+
}
|
| 1424 |
+
|
| 1425 |
+
/*
|
| 1426 |
+
* call-seq:
|
| 1427 |
+
* n_audio_layer -> Integer
|
| 1428 |
+
*/
|
| 1429 |
+
static VALUE ruby_whisper_c_model_n_audio_layer(VALUE self) {
|
| 1430 |
+
ruby_whisper_model *rwm;
|
| 1431 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1432 |
+
ruby_whisper *rw;
|
| 1433 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1434 |
+
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
| 1435 |
+
}
|
| 1436 |
+
|
| 1437 |
+
/*
|
| 1438 |
+
* call-seq:
|
| 1439 |
+
* n_text_ctx -> Integer
|
| 1440 |
+
*/
|
| 1441 |
+
static VALUE ruby_whisper_c_model_n_text_ctx(VALUE self) {
|
| 1442 |
+
ruby_whisper_model *rwm;
|
| 1443 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1444 |
+
ruby_whisper *rw;
|
| 1445 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1446 |
+
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
| 1447 |
+
}
|
| 1448 |
+
|
| 1449 |
+
/*
|
| 1450 |
+
* call-seq:
|
| 1451 |
+
* n_text_state -> Integer
|
| 1452 |
+
*/
|
| 1453 |
+
static VALUE ruby_whisper_c_model_n_text_state(VALUE self) {
|
| 1454 |
+
ruby_whisper_model *rwm;
|
| 1455 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1456 |
+
ruby_whisper *rw;
|
| 1457 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1458 |
+
return INT2NUM(whisper_model_n_text_state(rw->context));
|
| 1459 |
+
}
|
| 1460 |
+
|
| 1461 |
+
/*
|
| 1462 |
+
* call-seq:
|
| 1463 |
+
* n_text_head -> Integer
|
| 1464 |
+
*/
|
| 1465 |
+
static VALUE ruby_whisper_c_model_n_text_head(VALUE self) {
|
| 1466 |
+
ruby_whisper_model *rwm;
|
| 1467 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1468 |
+
ruby_whisper *rw;
|
| 1469 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1470 |
+
return INT2NUM(whisper_model_n_text_head(rw->context));
|
| 1471 |
+
}
|
| 1472 |
+
|
| 1473 |
+
/*
|
| 1474 |
+
* call-seq:
|
| 1475 |
+
* n_text_layer -> Integer
|
| 1476 |
+
*/
|
| 1477 |
+
static VALUE ruby_whisper_c_model_n_text_layer(VALUE self) {
|
| 1478 |
+
ruby_whisper_model *rwm;
|
| 1479 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1480 |
+
ruby_whisper *rw;
|
| 1481 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1482 |
+
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
| 1483 |
+
}
|
| 1484 |
+
|
| 1485 |
+
/*
|
| 1486 |
+
* call-seq:
|
| 1487 |
+
* n_mels -> Integer
|
| 1488 |
+
*/
|
| 1489 |
+
static VALUE ruby_whisper_c_model_n_mels(VALUE self) {
|
| 1490 |
+
ruby_whisper_model *rwm;
|
| 1491 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1492 |
+
ruby_whisper *rw;
|
| 1493 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1494 |
+
return INT2NUM(whisper_model_n_mels(rw->context));
|
| 1495 |
+
}
|
| 1496 |
+
|
| 1497 |
+
/*
|
| 1498 |
+
* call-seq:
|
| 1499 |
+
* ftype -> Integer
|
| 1500 |
+
*/
|
| 1501 |
+
static VALUE ruby_whisper_c_model_ftype(VALUE self) {
|
| 1502 |
+
ruby_whisper_model *rwm;
|
| 1503 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1504 |
+
ruby_whisper *rw;
|
| 1505 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1506 |
+
return INT2NUM(whisper_model_ftype(rw->context));
|
| 1507 |
+
}
|
| 1508 |
+
|
| 1509 |
+
/*
|
| 1510 |
+
* call-seq:
|
| 1511 |
+
* type -> String
|
| 1512 |
+
*/
|
| 1513 |
+
static VALUE ruby_whisper_c_model_type(VALUE self) {
|
| 1514 |
+
ruby_whisper_model *rwm;
|
| 1515 |
+
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
| 1516 |
+
ruby_whisper *rw;
|
| 1517 |
+
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
| 1518 |
+
return rb_str_new2(whisper_model_type_readable(rw->context));
|
| 1519 |
+
}
|
| 1520 |
+
|
| 1521 |
void Init_whisper() {
|
| 1522 |
id_to_s = rb_intern("to_s");
|
| 1523 |
id_call = rb_intern("call");
|
|
|
|
| 1528 |
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
|
| 1529 |
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
|
| 1530 |
|
| 1531 |
+
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
| 1532 |
+
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
| 1533 |
+
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
| 1534 |
+
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
| 1535 |
+
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
| 1536 |
+
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
| 1537 |
+
|
| 1538 |
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
| 1539 |
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
| 1540 |
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
| 1541 |
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
| 1542 |
+
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
| 1543 |
+
rb_define_singleton_method(mWhisper, "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
| 1544 |
|
| 1545 |
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
| 1546 |
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
| 1547 |
|
| 1548 |
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
| 1549 |
+
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
| 1550 |
+
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
| 1551 |
+
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
| 1552 |
+
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
| 1553 |
+
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
| 1554 |
+
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
| 1555 |
+
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
| 1556 |
+
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
| 1557 |
+
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
| 1558 |
+
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
| 1559 |
+
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
| 1560 |
+
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
| 1561 |
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
| 1562 |
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
| 1563 |
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
|
|
|
| 1635 |
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
| 1636 |
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
| 1637 |
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
| 1638 |
+
|
| 1639 |
+
cModel = rb_define_class_under(mWhisper, "Model", rb_cObject);
|
| 1640 |
+
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
| 1641 |
+
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
| 1642 |
+
rb_define_method(cModel, "n_vocab", ruby_whisper_c_model_n_vocab, 0);
|
| 1643 |
+
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_c_model_n_audio_ctx, 0);
|
| 1644 |
+
rb_define_method(cModel, "n_audio_state", ruby_whisper_c_model_n_audio_state, 0);
|
| 1645 |
+
rb_define_method(cModel, "n_audio_head", ruby_whisper_c_model_n_audio_head, 0);
|
| 1646 |
+
rb_define_method(cModel, "n_audio_layer", ruby_whisper_c_model_n_audio_layer, 0);
|
| 1647 |
+
rb_define_method(cModel, "n_text_ctx", ruby_whisper_c_model_n_text_ctx, 0);
|
| 1648 |
+
rb_define_method(cModel, "n_text_state", ruby_whisper_c_model_n_text_state, 0);
|
| 1649 |
+
rb_define_method(cModel, "n_text_head", ruby_whisper_c_model_n_text_head, 0);
|
| 1650 |
+
rb_define_method(cModel, "n_text_layer", ruby_whisper_c_model_n_text_layer, 0);
|
| 1651 |
+
rb_define_method(cModel, "n_mels", ruby_whisper_c_model_n_mels, 0);
|
| 1652 |
+
rb_define_method(cModel, "ftype", ruby_whisper_c_model_ftype, 0);
|
| 1653 |
+
rb_define_method(cModel, "type", ruby_whisper_c_model_type, 0);
|
| 1654 |
}
|
| 1655 |
#ifdef __cplusplus
|
| 1656 |
}
|
bindings/ruby/tests/helper.rb
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
require "test/unit"
|
| 2 |
+
require "whisper"
|
| 3 |
+
|
| 4 |
+
class TestBase < Test::Unit::TestCase
|
| 5 |
+
MODEL = File.join(__dir__, "..", "..", "..", "models", "ggml-base.en.bin")
|
| 6 |
+
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
|
| 7 |
+
end
|
bindings/ruby/tests/test_model.rb
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
require_relative "helper"
|
| 2 |
+
|
| 3 |
+
class TestModel < TestBase
|
| 4 |
+
def test_model
|
| 5 |
+
whisper = Whisper::Context.new(MODEL)
|
| 6 |
+
assert_instance_of Whisper::Model, whisper.model
|
| 7 |
+
end
|
| 8 |
+
|
| 9 |
+
def test_attributes
|
| 10 |
+
whisper = Whisper::Context.new(MODEL)
|
| 11 |
+
model = whisper.model
|
| 12 |
+
|
| 13 |
+
assert_equal 51864, model.n_vocab
|
| 14 |
+
assert_equal 1500, model.n_audio_ctx
|
| 15 |
+
assert_equal 512, model.n_audio_state
|
| 16 |
+
assert_equal 8, model.n_audio_head
|
| 17 |
+
assert_equal 6, model.n_audio_layer
|
| 18 |
+
assert_equal 448, model.n_text_ctx
|
| 19 |
+
assert_equal 512, model.n_text_state
|
| 20 |
+
assert_equal 8, model.n_text_head
|
| 21 |
+
assert_equal 6, model.n_text_layer
|
| 22 |
+
assert_equal 80, model.n_mels
|
| 23 |
+
assert_equal 1, model.ftype
|
| 24 |
+
assert_equal "base", model.type
|
| 25 |
+
end
|
| 26 |
+
|
| 27 |
+
def test_gc
|
| 28 |
+
model = Whisper::Context.new(MODEL).model
|
| 29 |
+
GC.start
|
| 30 |
+
|
| 31 |
+
assert_equal 51864, model.n_vocab
|
| 32 |
+
assert_equal 1500, model.n_audio_ctx
|
| 33 |
+
assert_equal 512, model.n_audio_state
|
| 34 |
+
assert_equal 8, model.n_audio_head
|
| 35 |
+
assert_equal 6, model.n_audio_layer
|
| 36 |
+
assert_equal 448, model.n_text_ctx
|
| 37 |
+
assert_equal 512, model.n_text_state
|
| 38 |
+
assert_equal 8, model.n_text_head
|
| 39 |
+
assert_equal 6, model.n_text_layer
|
| 40 |
+
assert_equal 80, model.n_mels
|
| 41 |
+
assert_equal 1, model.ftype
|
| 42 |
+
assert_equal "base", model.type
|
| 43 |
+
end
|
| 44 |
+
end
|
bindings/ruby/tests/test_package.rb
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
|
| 2 |
require 'tempfile'
|
| 3 |
require 'tmpdir'
|
| 4 |
require 'shellwords'
|
| 5 |
|
| 6 |
-
class TestPackage <
|
| 7 |
def test_build
|
| 8 |
Tempfile.create do |file|
|
| 9 |
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
|
|
|
| 1 |
+
require_relative "helper"
|
| 2 |
require 'tempfile'
|
| 3 |
require 'tmpdir'
|
| 4 |
require 'shellwords'
|
| 5 |
|
| 6 |
+
class TestPackage < TestBase
|
| 7 |
def test_build
|
| 8 |
Tempfile.create do |file|
|
| 9 |
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
bindings/ruby/tests/test_params.rb
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
-
|
| 2 |
-
require 'whisper'
|
| 3 |
|
| 4 |
-
class TestParams <
|
| 5 |
def setup
|
| 6 |
@params = Whisper::Params.new
|
| 7 |
end
|
|
|
|
| 1 |
+
require_relative "helper"
|
|
|
|
| 2 |
|
| 3 |
+
class TestParams < TestBase
|
| 4 |
def setup
|
| 5 |
@params = Whisper::Params.new
|
| 6 |
end
|
bindings/ruby/tests/test_segment.rb
CHANGED
|
@@ -1,18 +1,14 @@
|
|
| 1 |
-
|
| 2 |
-
require "whisper"
|
| 3 |
-
|
| 4 |
-
class TestSegment < Test::Unit::TestCase
|
| 5 |
-
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
| 6 |
|
|
|
|
| 7 |
class << self
|
| 8 |
attr_reader :whisper
|
| 9 |
|
| 10 |
def startup
|
| 11 |
-
@whisper = Whisper::Context.new(
|
| 12 |
params = Whisper::Params.new
|
| 13 |
params.print_timestamps = false
|
| 14 |
-
|
| 15 |
-
@whisper.transcribe(jfk, params)
|
| 16 |
end
|
| 17 |
end
|
| 18 |
|
|
@@ -60,7 +56,7 @@ class TestSegment < Test::Unit::TestCase
|
|
| 60 |
end
|
| 61 |
index += 1
|
| 62 |
end
|
| 63 |
-
whisper.transcribe(
|
| 64 |
assert_equal 0, seg.start_time
|
| 65 |
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
|
| 66 |
end
|
|
@@ -76,7 +72,7 @@ class TestSegment < Test::Unit::TestCase
|
|
| 76 |
assert_same seg, segment
|
| 77 |
return
|
| 78 |
end
|
| 79 |
-
whisper.transcribe(
|
| 80 |
end
|
| 81 |
|
| 82 |
private
|
|
|
|
| 1 |
+
require_relative "helper"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
class TestSegment < TestBase
|
| 4 |
class << self
|
| 5 |
attr_reader :whisper
|
| 6 |
|
| 7 |
def startup
|
| 8 |
+
@whisper = Whisper::Context.new(TestBase::MODEL)
|
| 9 |
params = Whisper::Params.new
|
| 10 |
params.print_timestamps = false
|
| 11 |
+
@whisper.transcribe(TestBase::AUDIO, params)
|
|
|
|
| 12 |
end
|
| 13 |
end
|
| 14 |
|
|
|
|
| 56 |
end
|
| 57 |
index += 1
|
| 58 |
end
|
| 59 |
+
whisper.transcribe(AUDIO, params)
|
| 60 |
assert_equal 0, seg.start_time
|
| 61 |
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
|
| 62 |
end
|
|
|
|
| 72 |
assert_same seg, segment
|
| 73 |
return
|
| 74 |
end
|
| 75 |
+
whisper.transcribe(AUDIO, params)
|
| 76 |
end
|
| 77 |
|
| 78 |
private
|
bindings/ruby/tests/test_whisper.rb
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
require
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
|
|
|
|
| 7 |
def setup
|
| 8 |
@params = Whisper::Params.new
|
| 9 |
end
|
| 10 |
|
| 11 |
def test_whisper
|
| 12 |
-
@whisper = Whisper::Context.new(
|
| 13 |
params = Whisper::Params.new
|
| 14 |
params.print_timestamps = false
|
| 15 |
|
| 16 |
-
|
| 17 |
-
@whisper.transcribe(jfk, params) {|text|
|
| 18 |
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
| 19 |
}
|
| 20 |
end
|
|
@@ -24,11 +24,10 @@ class TestWhisper < Test::Unit::TestCase
|
|
| 24 |
attr_reader :whisper
|
| 25 |
|
| 26 |
def startup
|
| 27 |
-
@whisper = Whisper::Context.new(
|
| 28 |
params = Whisper::Params.new
|
| 29 |
params.print_timestamps = false
|
| 30 |
-
|
| 31 |
-
@whisper.transcribe(jfk, params)
|
| 32 |
end
|
| 33 |
end
|
| 34 |
|
|
@@ -96,4 +95,33 @@ class TestWhisper < Test::Unit::TestCase
|
|
| 96 |
Whisper.lang_str_full(Whisper.lang_max_id + 1)
|
| 97 |
end
|
| 98 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
end
|
|
|
|
| 1 |
+
require_relative "helper"
|
| 2 |
+
require "stringio"
|
| 3 |
|
| 4 |
+
# Exists to detect memory-related bug
|
| 5 |
+
Whisper.log_set ->(level, buffer, user_data) {}, nil
|
| 6 |
|
| 7 |
+
class TestWhisper < TestBase
|
| 8 |
def setup
|
| 9 |
@params = Whisper::Params.new
|
| 10 |
end
|
| 11 |
|
| 12 |
def test_whisper
|
| 13 |
+
@whisper = Whisper::Context.new(MODEL)
|
| 14 |
params = Whisper::Params.new
|
| 15 |
params.print_timestamps = false
|
| 16 |
|
| 17 |
+
@whisper.transcribe(AUDIO, params) {|text|
|
|
|
|
| 18 |
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
| 19 |
}
|
| 20 |
end
|
|
|
|
| 24 |
attr_reader :whisper
|
| 25 |
|
| 26 |
def startup
|
| 27 |
+
@whisper = Whisper::Context.new(TestBase::MODEL)
|
| 28 |
params = Whisper::Params.new
|
| 29 |
params.print_timestamps = false
|
| 30 |
+
@whisper.transcribe(TestBase::AUDIO, params)
|
|
|
|
| 31 |
end
|
| 32 |
end
|
| 33 |
|
|
|
|
| 95 |
Whisper.lang_str_full(Whisper.lang_max_id + 1)
|
| 96 |
end
|
| 97 |
end
|
| 98 |
+
|
| 99 |
+
def test_log_set
|
| 100 |
+
user_data = Object.new
|
| 101 |
+
logs = []
|
| 102 |
+
log_callback = ->(level, buffer, udata) {
|
| 103 |
+
logs << [level, buffer, udata]
|
| 104 |
+
}
|
| 105 |
+
Whisper.log_set log_callback, user_data
|
| 106 |
+
Whisper::Context.new(MODEL)
|
| 107 |
+
|
| 108 |
+
assert logs.length > 30
|
| 109 |
+
logs.each do |log|
|
| 110 |
+
assert_equal Whisper::LOG_LEVEL_INFO, log[0]
|
| 111 |
+
assert_same user_data, log[2]
|
| 112 |
+
end
|
| 113 |
+
end
|
| 114 |
+
|
| 115 |
+
def test_log_suppress
|
| 116 |
+
stderr = $stderr
|
| 117 |
+
Whisper.log_set ->(level, buffer, user_data) {
|
| 118 |
+
# do nothing
|
| 119 |
+
}, nil
|
| 120 |
+
dev = StringIO.new("")
|
| 121 |
+
$stderr = dev
|
| 122 |
+
Whisper::Context.new(MODEL)
|
| 123 |
+
assert_empty dev.string
|
| 124 |
+
ensure
|
| 125 |
+
$stderr = stderr
|
| 126 |
+
end
|
| 127 |
end
|