class TextTransform:
"""Maps characters to integers and vice versa"""
def __init__(self):
char_map_str = """
' 0
<space> 1
a 2
b 3
c 4
d 5
e 6
f 7
g 8
h 9
i 10
j 11
k 12
l 13
m 14
n 15
o 16
p 17
q 18
r 19
s 20
t 21
u 22
v 23
w 24
x 25
y 26
z 27
"""
self.char_map = {}
self.index_map = {}
for line in char_map_str.strip().split('\n'):
ch, index = line.split()
self.char_map[ch] = int(index)
self.index_map[int(index)] = ch
self.index_map[1] = ' '
建立編碼與解碼的函式。
# 文字轉數字
def text_to_int(self, text):
""" Use a character map and convert text to an integer sequence """
int_sequence = []
for c in text:
if c == ' ':
ch = self.char_map['<space>']
else:
ch = self.char_map[c]
int_sequence.append(ch)
return int_sequence
# 數字轉文字
def int_to_text(self, labels):
""" Use a character map and convert integer labels to an text sequence """
string = []
for i in labels:
string.append(self.index_map[i])
return ''.join(string)
在訓練之前要先刪除過長之音檔,本次訓練刪除30秒以上之音檔,如下所示。
filelist = os.listdir(path)
for filename in (filelist):
file_path = os.path.join(path, filename)
y, sr = librosa.load(file_path)
duration = librosa.get_duration(y, sr)
# 刪除長度超過30秒之音檔
if duration > 30:
filepath = os.path.join(path, filename)
os.remove(filepath)
[1] Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin @[Amodei, D., Ananthanarayanan, S., Anubhai, R., Bai, J., Battenberg, E. et al..](ht## [2] pytorch載入語音類自定義資料集 @[email protected]