hlrc / tts_bridge / mary / mary_tts_bridge / MaryTTSBridge.py @ 10e164da
History | View | Annotate | Download (4.683 KB)
1 |
#!/usr/bin/python
|
---|---|
2 |
"""
|
3 |
This file is part of hlrc
|
4 |
|
5 |
Copyright(c) sschulz <AT> techfak.uni-bielefeld.de
|
6 |
http://opensource.cit-ec.de/projects/hlrc
|
7 |
|
8 |
This file may be licensed under the terms of the
|
9 |
GNU General Public License Version 3 (the ``GPL''),
|
10 |
or (at your option) any later version.
|
11 |
|
12 |
Software distributed under the License is distributed
|
13 |
on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
|
14 |
express or implied. See the GPL for the specific language
|
15 |
governing rights and limitations.
|
16 |
|
17 |
You should have received a copy of the GPL along with this
|
18 |
program. If not, go to http://www.gnu.org/licenses/gpl.html
|
19 |
or write to the Free Software Foundation, Inc.,
|
20 |
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
21 |
|
22 |
The development of this software was supported by the
|
23 |
Excellence Cluster EXC 277 Cognitive Interaction Technology.
|
24 |
The Excellence Cluster EXC 277 is a grant of the Deutsche
|
25 |
Forschungsgemeinschaft (DFG) in the context of the German
|
26 |
Excellence Initiative.
|
27 |
"""
|
28 |
|
29 |
import logging |
30 |
import rospy |
31 |
from hlrc_server.msg import * |
32 |
import time |
33 |
import sys |
34 |
import actionlib |
35 |
from io import BytesIO |
36 |
import wave |
37 |
from MaryTTSClient import * |
38 |
from cStringIO import StringIO |
39 |
|
40 |
class MaryTTSBridge(object): |
41 |
#_feedback = ttsActionFeedback()
|
42 |
#_result = ttsActionResult()
|
43 |
|
44 |
|
45 |
def __init__(self, topic, voice="cmu-slt-hsmm", locale="en_GB", tts_host="127.0.0.1", tts_port=59125, loglevel=logging.WARNING): |
46 |
"""initialise
|
47 |
:param loglevel: optional log level
|
48 |
"""
|
49 |
self.loglevel = loglevel
|
50 |
self.logger = logging.getLogger(__name__)
|
51 |
# create nice and actually usable formatter and add it to the handler
|
52 |
self.config_logger(loglevel)
|
53 |
self.logger.info("starting MaryTTSBridge on topic '"+topic+"'") |
54 |
|
55 |
self.tts_client = MaryTTSClient(voice, locale, tts_host, tts_port, loglevel)
|
56 |
|
57 |
rospy.init_node('MaryTTSBridge')
|
58 |
|
59 |
self._action_name = topic
|
60 |
self._as = actionlib.SimpleActionServer(self._action_name, ttsAction, execute_cb = self.execute_cb, auto_start = False) |
61 |
self._as.start()
|
62 |
|
63 |
|
64 |
|
65 |
def __del__(self): |
66 |
"""destructor
|
67 |
"""
|
68 |
self.logger.debug("destructor of MaryTTSBridge called") |
69 |
|
70 |
def config_logger(self, level): |
71 |
"""initialise a nice logger formatting
|
72 |
:param level: log level
|
73 |
"""
|
74 |
formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
|
75 |
ch = logging.StreamHandler() |
76 |
#ch.setLevel(level)
|
77 |
ch.setFormatter(formatter) |
78 |
self.logger.setLevel(level)
|
79 |
self.logger.addHandler(ch)
|
80 |
|
81 |
def create_soundchunk(self, audio_data): |
82 |
#extract wave from data
|
83 |
fio = BytesIO(audio_data) |
84 |
wav = wave.open(fio) |
85 |
|
86 |
s = soundchunk() |
87 |
|
88 |
s.channels = wav.getnchannels() |
89 |
s.data = audio_data |
90 |
s.endianess = s.ENDIAN_LITTLE #guessed?!
|
91 |
s.rate = wav.getframerate() |
92 |
s.samplecount = wav.getnframes() |
93 |
|
94 |
|
95 |
#sample format:
|
96 |
sample_width = wav.getsampwidth() |
97 |
if (sample_width == 1): |
98 |
s.sample_type = s.SAMPLE_U8 |
99 |
elif (sample_width == 2): |
100 |
s.sample_type = s.SAMPLE_U16 |
101 |
elif (sample_width == 3): |
102 |
s.sample_type = s.SAMPLE_U24 |
103 |
else:
|
104 |
self.logger.error("ERROR: invalid sample width "+str(sample_width) + " detected") |
105 |
s = soundchunk() |
106 |
|
107 |
self.logger.info("created soundchunk with "+str(s.samplecount)+" samples") |
108 |
|
109 |
return s
|
110 |
|
111 |
def create_phonemes(self, phoneme_str): |
112 |
last = 0.0
|
113 |
plist = [] |
114 |
|
115 |
sio = StringIO(phoneme_str) |
116 |
for line in sio: |
117 |
if (line[0] != '#'): |
118 |
phoneme_list = line.split(" ")
|
119 |
symbol = phoneme_list[2]
|
120 |
symbol = symbol.rstrip() |
121 |
|
122 |
now = float(phoneme_list[0]) |
123 |
duration = (now - last)*1000
|
124 |
last = now |
125 |
plist.append(phoneme(symbol, int(duration)))
|
126 |
|
127 |
self.logger.info("created phonemelist with " + str(len(plist)) + " elements") |
128 |
|
129 |
return plist
|
130 |
|
131 |
def create_utterance(self, text, audio_data, phoneme_list): |
132 |
u = utterance() |
133 |
u.text = text |
134 |
u.audio = self.create_soundchunk(audio_data)
|
135 |
u.phonemes = self.create_phonemes(phoneme_list)
|
136 |
|
137 |
self.logger.info("created utterance for 'phonemelist with '" + u.text + "'") |
138 |
return u
|
139 |
|
140 |
def execute_cb(self, goal): |
141 |
self.logger.info("incoming utterance '" + goal.text + "'") |
142 |
|
143 |
success = True
|
144 |
result = ttsResult() |
145 |
|
146 |
#incoming msg, ask mary tts for data:
|
147 |
try:
|
148 |
audio = self.tts_client.generate_audio(goal.text)
|
149 |
phonelist = self.tts_client.generate_phonemes(goal.text)
|
150 |
|
151 |
except:
|
152 |
self.logger.error("failed to create utterance error = '" + str(sys.exc_info()[1]) + "'") |
153 |
success = False
|
154 |
|
155 |
if success:
|
156 |
#build soundchunk
|
157 |
result.utterance = self.create_utterance(goal.text, audio, phonelist)
|
158 |
self._as.set_succeeded(result)
|
159 |
else:
|
160 |
self._as.set_aborted(result)
|
161 |
|
162 |
def run(self): |
163 |
#run the main loop
|
164 |
rospy.spin() |
165 |
|
166 |
#test code
|
167 |
def main(): |
168 |
if (len(sys.argv) != 2): |
169 |
print("> usage: "+sys.argv[0]+" <topic>\n\n") |
170 |
sys.exit(1)
|
171 |
|
172 |
bridge = MaryTTSBridge(topic=sys.argv[1], loglevel=logging.INFO)
|
173 |
bridge.run() |
174 |
|
175 |
if __name__ == "__main__": |
176 |
main() |