Text-to-Speech
English
Chinese
zhu-han commited on
Commit
fdc91b8
·
verified ·
1 Parent(s): 64e06a0

Upload 6 files

Browse files
speaker_similarity/pyannote/pyannote_diarization_config.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 3.1.0
2
+
3
+ pipeline:
4
+ name: pyannote.audio.pipelines.SpeakerDiarization
5
+ params:
6
+ clustering: AgglomerativeClustering
7
+ # embedding: pyannote/wespeaker-voxceleb-resnet34-LM # if you want to use the HF model
8
+ embedding: /star-home/zhuhan/model/huggingface/pyannote/pyannote_model_wespeaker-voxceleb-resnet34-LM.bin # if you want to use the local model
9
+ embedding_batch_size: 32
10
+ embedding_exclude_overlap: true
11
+ # segmentation: pyannote/segmentation-3.0 # if you want to use the HF model
12
+ segmentation: /star-home/zhuhan/model/huggingface/pyannote/pyannote_model_segmentation-3.0.bin # if you want to use the local model
13
+ segmentation_batch_size: 32
14
+
15
+ params:
16
+ clustering:
17
+ method: centroid
18
+ min_cluster_size: 12
19
+ threshold: 0.7045654963945799
20
+ segmentation:
21
+ min_duration_off: 0.0
speaker_similarity/pyannote/pyannote_model_segmentation-3.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da85c29829d4002daedd676e012936488234d9255e65e86dfab9bec6b1729298
3
+ size 5905440
speaker_similarity/pyannote/pyannote_model_wespeaker-voxceleb-resnet34-LM.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:366edf44f4c80889a3eb7a9d7bdf02c4aede3127f7dd15e274dcdb826b143c56
3
+ size 26645418
speaker_similarity/wavlm_large/hubconf.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ # -*- coding: utf-8 -*- #
5
+ """*********************************************************************************************"""
6
+ # FileName [ upstream/wavlm/hubconf.py ]
7
+ # Synopsis [ the WavLM torch hubconf ]
8
+ # Author [ Microsoft ]
9
+ """*********************************************************************************************"""
10
+
11
+
12
+ import os
13
+
14
+ from s3prl.util.download import _urls_to_filepaths
15
+
16
+ from s3prl.upstream.wavlm.expert import UpstreamExpert as _UpstreamExpert
17
+
18
+
19
+ def wavlm_local(ckpt, *args, **kwargs):
20
+ """
21
+ The model from local ckpt
22
+ ckpt (str): PATH
23
+ """
24
+ assert os.path.isfile(ckpt)
25
+ return _UpstreamExpert(ckpt, *args, **kwargs)
26
+
27
+
28
+ def wavlm_url(ckpt, refresh=False, *args, **kwargs):
29
+ """
30
+ The model from google drive id
31
+ ckpt (str): URL
32
+ refresh (bool): whether to download ckpt/config again if existed
33
+ """
34
+ return wavlm_local(_urls_to_filepaths(ckpt, refresh=refresh), *args, **kwargs)
35
+
36
+
37
+ def wavlm(refresh=False, *args, **kwargs):
38
+ """
39
+ The default model - Base-Plus
40
+ refresh (bool): whether to download ckpt/config again if existed
41
+ """
42
+ return wavlm_base_plus(refresh=refresh, *args, **kwargs)
43
+
44
+
45
+ def wavlm_base(refresh=False, *args, **kwargs):
46
+ """
47
+ The Base model
48
+ refresh (bool): whether to download ckpt/config again if existed
49
+ """
50
+ kwargs[
51
+ "ckpt"
52
+ ] = "https://huggingface.co/s3prl/converted_ckpts/resolve/main/wavlm_base.pt"
53
+
54
+ return wavlm_url(refresh=refresh, *args, **kwargs)
55
+
56
+
57
+ def wavlm_base_plus(refresh=False, *args, **kwargs):
58
+ """
59
+ The Base-Plus model
60
+ refresh (bool): whether to download ckpt/config again if existed
61
+ """
62
+ kwargs[
63
+ "ckpt"
64
+ ] = "https://huggingface.co/s3prl/converted_ckpts/resolve/main/wavlm_base_plus.pt"
65
+
66
+ return wavlm_url(refresh=refresh, *args, **kwargs)
67
+
68
+
69
+ def wavlm_large(refresh=False, *args, **kwargs):
70
+ """
71
+ The Large model
72
+ refresh (bool): whether to download ckpt/config again if existed
73
+ """
74
+ kwargs[
75
+ "ckpt"
76
+ ] = "https://huggingface.co/s3prl/converted_ckpts/resolve/main/wavlm_large.pt"
77
+
78
+ return wavlm_url(refresh=refresh, *args, **kwargs)
speaker_similarity/wavlm_large/wavlm_large.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb4b3c3e6aa567f0a997b30855859cb81528ee8078802af439f7b2da0bf100f
3
+ size 1261965425
speaker_similarity/wavlm_large_finetune.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51f07e3b94d9e0262a6a675ef5a087be3dd09e8c62e9d886827f44f82fe7f94b
3
+ size 1301926579