komyu1227 commited on
Commit
d84e695
Β·
1 Parent(s): e0ef507
Files changed (10) hide show
  1. .dockerignore +0 -3
  2. .gitattributes +0 -35
  3. .gitignore +0 -3
  4. .python-version +0 -1
  5. Dockerfile +0 -23
  6. README.md +0 -10
  7. docker-compose.yaml +0 -9
  8. pyproject.toml +0 -21
  9. serve.py +0 -60
  10. uv.lock +0 -0
.dockerignore DELETED
@@ -1,3 +0,0 @@
1
- .venv
2
- __pycache__
3
- ReazonSpeech/
 
 
 
 
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore DELETED
@@ -1,3 +0,0 @@
1
- .venv
2
- __pycache__
3
- ReazonSpeech/
 
 
 
 
.python-version DELETED
@@ -1 +0,0 @@
1
- 3.11
 
 
Dockerfile DELETED
@@ -1,23 +0,0 @@
1
- FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu20.04
2
-
3
- RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata \
4
- curl \
5
- git \
6
- cmake \
7
- build-essential \
8
- ffmpeg \
9
- && apt-get -y clean all
10
-
11
- RUN curl -LsSf https://astral.sh/uv/install.sh | sh
12
- ENV PATH="/root/.local/bin/:$PATH"
13
-
14
- RUN uv python install 3.11
15
-
16
- WORKDIR /app
17
- RUN git clone https://github.com/reazon-research/ReazonSpeech
18
-
19
- COPY . .
20
-
21
- RUN uv sync
22
-
23
- CMD ["uv", "run", "python", "serve.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,10 +0,0 @@
1
- ---
2
- title: Stt Gpu Test
3
- emoji: πŸ‘
4
- colorFrom: purple
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
docker-compose.yaml DELETED
@@ -1,9 +0,0 @@
1
- version: '3.8'
2
-
3
- services:
4
- stt:
5
- build:
6
- context: .
7
- dockerfile: Dockerfile
8
- ports:
9
- - "8000:8000"
 
 
 
 
 
 
 
 
 
 
pyproject.toml DELETED
@@ -1,21 +0,0 @@
1
- [project]
2
- name = "stt"
3
- version = "0.1.0"
4
- description = "Add your description here"
5
- readme = "README.md"
6
- requires-python = ">=3.11"
7
- dependencies = [
8
- "fastapi[all]>=0.115.12",
9
- "numpy<2",
10
- "pydub>=0.25.1",
11
- "torch",
12
- "reazonspeech-espnet-asr",
13
- ]
14
-
15
- [[tool.uv.index]]
16
- name = "torch-cuda"
17
- url = "https://download.pytorch.org/whl/cu126"
18
- explicit = true
19
-
20
- [tool.uv.sources]
21
- reazonspeech-espnet-asr = { path = "ReazonSpeech/pkg/espnet-asr" }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
serve.py DELETED
@@ -1,60 +0,0 @@
1
- from reazonspeech.espnet.asr import load_model, transcribe, audio_from_numpy
2
- from espnet2.bin.asr_inference import Speech2Text
3
- import torch
4
- from fastapi import FastAPI, HTTPException, UploadFile, File
5
- import uvicorn
6
- import numpy as np
7
- import io
8
- from pydub import AudioSegment
9
- import time
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
-
12
- model = load_model(device)
13
-
14
- print(f"Model loaded on {device}")
15
-
16
- def transcribe_audio(audio_data_bytes):
17
- try:
18
- start_time = time.time()
19
- audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_data_bytes))
20
-
21
- # Get audio data as numpy array
22
- audio_data_int16 = np.array(audio_segment.get_array_of_samples())
23
- # Convert to float32 normalized to [-1, 1]
24
- audio_data_float32 = audio_data_int16.astype(np.float32) / 32768.0
25
-
26
- # Process with reazonspeech
27
- audio = audio_from_numpy(audio_data_float32, samplerate=audio_segment.frame_rate)
28
- result = transcribe(model, audio)
29
- end_time = time.time()
30
- print(f"Time taken: {end_time - start_time} seconds")
31
- return result
32
- except Exception as e:
33
- raise HTTPException(status_code=500, detail=str(e))
34
-
35
- app = FastAPI()
36
-
37
- @app.post("/transcribe")
38
- async def transcribe_endpoint(file: UploadFile = File(...)):
39
- audio_data = await file.read()
40
- try:
41
- result = transcribe_audio(audio_data)
42
- return {
43
- "result": [
44
- {
45
- "text": result.text
46
- }
47
- ]
48
- }
49
- except HTTPException as e:
50
- return {
51
- "result": [
52
- {
53
- "text": "γ‚¨γƒ©γƒΌγŒη™Ίη”Ÿγ—γΎγ—γŸ, もう一度試してください",
54
- }
55
- ]
56
- }
57
-
58
-
59
- if __name__ == "__main__":
60
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uv.lock DELETED
The diff for this file is too large to render. See raw diff