Revision f27f1860
tts_bridge/mary/mary_tts_bridge/MaryTTSBridge.py | ||
---|---|---|
42 | 42 |
except ImportError: |
43 | 43 |
from cStringIO import StringIO |
44 | 44 |
|
45 |
class MaryTTSBridge(object): |
|
46 |
#_feedback = ttsActionFeedback() |
|
47 |
#_result = ttsActionResult() |
|
48 | 45 |
|
46 |
class MaryTTSBridge(object): |
|
47 |
# _feedback = ttsActionFeedback() |
|
48 |
# _result = ttsActionResult() |
|
49 | 49 |
|
50 | 50 |
def __init__(self, topic, voice="cmu-slt-hsmm", locale="en_GB", tts_host="127.0.0.1", tts_port=59125, loglevel=logging.WARNING): |
51 |
"""initialise |
|
52 |
:param loglevel: optional log level |
|
53 |
""" |
|
54 |
self.loglevel = loglevel |
|
55 |
self.logger = logging.getLogger(__name__) |
|
56 |
# create nice and actually usable formatter and add it to the handler |
|
57 |
self.config_logger(loglevel) |
|
58 |
self.logger.info("starting MaryTTSBridge on topic '"+topic+"'") |
|
59 |
|
|
60 |
self.tts_client = MaryTTSClient(voice, locale, tts_host, tts_port, loglevel) |
|
51 |
"""initialise |
|
52 |
:param loglevel: optional log level |
|
53 |
""" |
|
54 |
self.loglevel = loglevel |
|
55 |
self.logger = logging.getLogger(__name__) |
|
56 |
# create nice and actually usable formatter and add it to the handler |
|
57 |
self.config_logger(loglevel) |
|
58 |
self.logger.info("starting MaryTTSBridge on topic '"+topic+"'") |
|
61 | 59 |
|
62 |
rospy.init_node('MaryTTSBridge', anonymous=True) |
|
63 |
|
|
64 |
self._action_name = topic |
|
65 |
self._as = actionlib.SimpleActionServer(self._action_name, ttsAction, execute_cb = self.execute_cb, auto_start = False) |
|
66 |
self._as.start() |
|
60 |
self.tts_client = MaryTTSClient(voice, locale, tts_host, tts_port, loglevel) |
|
67 | 61 |
|
62 |
rospy.init_node('MaryTTSBridge', anonymous=True) |
|
68 | 63 |
|
64 |
self._action_name = topic |
|
65 |
self._as = actionlib.SimpleActionServer(self._action_name, ttsAction, execute_cb = self.execute_cb, auto_start = False) |
|
66 |
self._as.start() |
|
69 | 67 |
|
70 | 68 |
def __del__(self): |
71 |
"""destructor
|
|
72 |
"""
|
|
73 |
self.logger.debug("destructor of MaryTTSBridge called")
|
|
69 |
"""destructor
|
|
70 |
"""
|
|
71 |
self.logger.debug("destructor of MaryTTSBridge called")
|
|
74 | 72 |
|
75 | 73 |
def config_logger(self, level): |
76 |
"""initialise a nice logger formatting
|
|
77 |
:param level: log level
|
|
78 |
"""
|
|
79 |
formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
|
|
80 |
ch = logging.StreamHandler()
|
|
81 |
#ch.setLevel(level)
|
|
82 |
ch.setFormatter(formatter)
|
|
83 |
self.logger.setLevel(level)
|
|
84 |
self.logger.addHandler(ch)
|
|
74 |
"""initialise a nice logger formatting
|
|
75 |
:param level: log level
|
|
76 |
"""
|
|
77 |
formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
|
|
78 |
ch = logging.StreamHandler()
|
|
79 |
#ch.setLevel(level)
|
|
80 |
ch.setFormatter(formatter)
|
|
81 |
self.logger.setLevel(level)
|
|
82 |
self.logger.addHandler(ch)
|
|
85 | 83 |
|
86 | 84 |
def create_soundchunk(self, audio_data): |
87 |
#extract wave from data |
|
88 |
fio = BytesIO(audio_data) |
|
89 |
wav = wave.open(fio) |
|
90 |
|
|
91 |
s = soundchunk() |
|
92 |
|
|
93 |
s.channels = wav.getnchannels() |
|
94 |
s.data = audio_data |
|
95 |
s.endianess = s.ENDIAN_LITTLE #guessed?! |
|
96 |
s.rate = wav.getframerate() |
|
97 |
s.samplecount = wav.getnframes() |
|
98 |
|
|
99 |
|
|
100 |
#sample format: |
|
101 |
sample_width = wav.getsampwidth() |
|
102 |
if (sample_width == 1): |
|
103 |
s.sample_type = s.SAMPLE_U8 |
|
104 |
elif (sample_width == 2): |
|
105 |
s.sample_type = s.SAMPLE_U16 |
|
106 |
elif (sample_width == 3): |
|
107 |
s.sample_type = s.SAMPLE_U24 |
|
108 |
else: |
|
109 |
self.logger.error("ERROR: invalid sample width "+str(sample_width) + " detected") |
|
110 |
s = soundchunk() |
|
111 |
|
|
112 |
self.logger.info("created soundchunk with "+str(s.samplecount)+" samples") |
|
113 |
|
|
114 |
return s |
|
85 |
#extract wave from data |
|
86 |
fio = BytesIO(audio_data) |
|
87 |
wav = wave.open(fio) |
|
88 |
|
|
89 |
s = soundchunk() |
|
90 |
|
|
91 |
s.channels = wav.getnchannels() |
|
92 |
s.data = audio_data |
|
93 |
s.endianess = s.ENDIAN_LITTLE # guessed?! |
|
94 |
s.rate = wav.getframerate() |
|
95 |
s.samplecount = wav.getnframes() |
|
96 |
|
|
97 |
# sample format: |
|
98 |
sample_width = wav.getsampwidth() |
|
99 |
if (sample_width == 1): |
|
100 |
s.sample_type = s.SAMPLE_U8 |
|
101 |
elif (sample_width == 2): |
|
102 |
s.sample_type = s.SAMPLE_U16 |
|
103 |
elif (sample_width == 3): |
|
104 |
s.sample_type = s.SAMPLE_U24 |
|
105 |
else: |
|
106 |
self.logger.error("ERROR: invalid sample width "+str(sample_width) + " detected") |
|
107 |
s = soundchunk() |
|
108 |
|
|
109 |
self.logger.info("created soundchunk with "+str(s.samplecount)+" samples") |
|
110 |
|
|
111 |
return s |
|
115 | 112 |
|
116 | 113 |
def create_phonemes(self, phoneme_bytes): |
117 |
last = 0.0
|
|
118 |
plist = []
|
|
114 |
last = 0.0
|
|
115 |
plist = []
|
|
119 | 116 |
|
120 |
sio = StringIO(phoneme_bytes.decode('ascii'))
|
|
121 |
for line in sio:
|
|
122 |
if (line[0] != '#'):
|
|
123 |
phoneme_list = line.split(" ")
|
|
117 |
sio = StringIO(phoneme_bytes.decode('ascii'))
|
|
118 |
for line in sio:
|
|
119 |
if (line[0] != '#'):
|
|
120 |
phoneme_list = line.split(" ")
|
|
124 | 121 |
if (line == '\n'): |
125 |
#ignore empty lines |
|
122 |
# ignore empty lines
|
|
126 | 123 |
continue |
127 | 124 |
elif (len(phoneme_list) != 3): |
128 | 125 |
print("> could not split line '%s' during phoneme seperation\n" % (line)) |
129 | 126 |
else: |
130 |
symbol = phoneme_list[2]
|
|
131 |
symbol = symbol.rstrip()
|
|
127 |
symbol = phoneme_list[2]
|
|
128 |
symbol = symbol.rstrip()
|
|
132 | 129 |
|
133 |
now = float(phoneme_list[0])
|
|
134 |
duration = (now - last)*1000
|
|
135 |
last = now
|
|
136 |
plist.append(phoneme(symbol, int(duration)))
|
|
130 |
now = float(phoneme_list[0])
|
|
131 |
duration = (now - last)*1000
|
|
132 |
last = now
|
|
133 |
plist.append(phoneme(symbol, int(duration)))
|
|
137 | 134 |
|
138 |
self.logger.info("created phonemelist with " + str(len(plist)) + " elements")
|
|
135 |
self.logger.info("created phonemelist with " + str(len(plist)) + " elements")
|
|
139 | 136 |
|
140 |
return plist
|
|
137 |
return plist
|
|
141 | 138 |
|
142 | 139 |
def create_utterance(self, text, audio_data, phoneme_list): |
143 |
u = utterance()
|
|
144 |
u.text = text
|
|
145 |
u.audio = self.create_soundchunk(audio_data)
|
|
146 |
u.phonemes = self.create_phonemes(phoneme_list)
|
|
140 |
u = utterance()
|
|
141 |
u.text = text
|
|
142 |
u.audio = self.create_soundchunk(audio_data)
|
|
143 |
u.phonemes = self.create_phonemes(phoneme_list)
|
|
147 | 144 |
|
148 |
self.logger.info("created utterance for 'phonemelist with '" + u.text + "'")
|
|
149 |
return u
|
|
145 |
self.logger.info("created utterance for 'phonemelist with '" + u.text + "'")
|
|
146 |
return u
|
|
150 | 147 |
|
151 | 148 |
def get_error_message(self): |
152 | 149 |
data_wav = pkgutil.get_data('mary_tts_bridge', 'data/connection_failed.wav') |
... | ... | |
154 | 151 |
return (data_wav, data_phonemes) |
155 | 152 |
|
156 | 153 |
def execute_cb(self, goal): |
157 |
self.logger.info("incoming utterance '" + goal.text + "'")
|
|
154 |
self.logger.info("incoming utterance '" + goal.text + "'")
|
|
158 | 155 |
|
159 |
success = True
|
|
160 |
result = ttsResult()
|
|
156 |
success = True
|
|
157 |
result = ttsResult()
|
|
161 | 158 |
|
162 |
#incoming msg, ask mary tts for data:
|
|
163 |
try:
|
|
164 |
audio = self.tts_client.generate_audio(goal.text)
|
|
165 |
phonelist = self.tts_client.generate_phonemes(goal.text)
|
|
159 |
# incoming msg, ask mary tts for data:
|
|
160 |
try:
|
|
161 |
audio = self.tts_client.generate_audio(goal.text)
|
|
162 |
phonelist = self.tts_client.generate_phonemes(goal.text)
|
|
166 | 163 |
|
167 |
except:
|
|
168 |
self.logger.error("failed to create utterance error = '" + str(sys.exc_info()[1]) + "'")
|
|
169 |
#try to open error message from file: |
|
164 |
except:
|
|
165 |
self.logger.error("failed to create utterance error = '" + str(sys.exc_info()[1]) + "'")
|
|
166 |
# try to open error message from file:
|
|
170 | 167 |
success = True |
171 | 168 |
(audio, phonelist) = self.get_error_message() |
172 | 169 |
|
173 |
if success:
|
|
174 |
#build soundchunk
|
|
175 |
result.utterance = self.create_utterance(goal.text, audio, phonelist)
|
|
176 |
self._as.set_succeeded(result)
|
|
177 |
else:
|
|
178 |
self._as.set_aborted(result)
|
|
170 |
if success:
|
|
171 |
# build soundchunk
|
|
172 |
result.utterance = self.create_utterance(goal.text, audio, phonelist)
|
|
173 |
self._as.set_succeeded(result)
|
|
174 |
else:
|
|
175 |
self._as.set_aborted(result)
|
|
179 | 176 |
|
180 | 177 |
def run(self): |
181 |
#run the main loop
|
|
182 |
rospy.spin()
|
|
178 |
# run the main loop
|
|
179 |
rospy.spin()
|
|
183 | 180 |
|
184 |
#test code |
|
181 |
# test code
|
|
185 | 182 |
def main(): |
186 | 183 |
if (len(sys.argv) != 2): |
187 |
print("> usage: "+sys.argv[0]+" <topic>\n\n")
|
|
188 |
sys.exit(1)
|
|
184 |
print("> usage: "+sys.argv[0]+" <topic>\n\n")
|
|
185 |
sys.exit(1)
|
|
189 | 186 |
|
190 | 187 |
bridge = MaryTTSBridge(topic=sys.argv[1], loglevel=logging.INFO) |
191 | 188 |
bridge.run() |
tts_bridge/mary/mary_tts_bridge/MaryTTSClient.py | ||
---|---|---|
26 | 26 |
""" |
27 | 27 |
|
28 | 28 |
import logging |
29 |
#try: |
|
30 |
# import rsb |
|
31 |
#except ImportError: |
|
32 |
# RSB_SUPPORT = False |
|
33 |
#else: |
|
34 |
# from MiddlewareRSB import * |
|
35 |
# RSB_SUPPORT = True |
|
36 |
|
|
37 |
#from MiddlewareROS import * |
|
38 | 29 |
import sys |
39 | 30 |
try: |
40 |
from http.client import HTTPConnection
|
|
41 |
from urllib.parse import urlencode
|
|
31 |
from http.client import HTTPConnection
|
|
32 |
from urllib.parse import urlencode
|
|
42 | 33 |
except ImportError: # Python 2 |
43 |
from httplib import HTTPConnection
|
|
44 |
from urllib import urlencode
|
|
34 |
from httplib import HTTPConnection
|
|
35 |
from urllib import urlencode
|
|
45 | 36 |
import wave |
46 | 37 |
import ctypes |
47 | 38 |
import wave |
48 | 39 |
import sys |
49 | 40 |
|
41 |
|
|
50 | 42 |
class MaryTTSClient: |
51 | 43 |
def __init__(self, voice="cmu-slt-hsmm", locale="en_US", tts_host="127.0.0.1", tts_port=59125, loglevel=logging.WARNING): |
52 |
"""initialise
|
|
53 |
:param loglevel: optional log level
|
|
54 |
"""
|
|
55 |
self.loglevel = loglevel
|
|
56 |
self.logger = logging.getLogger(__name__)
|
|
57 |
# create nice and actually usable formatter and add it to the handler
|
|
58 |
self.config_logger(loglevel)
|
|
44 |
"""initialise
|
|
45 |
:param loglevel: optional log level
|
|
46 |
"""
|
|
47 |
self.loglevel = loglevel
|
|
48 |
self.logger = logging.getLogger(__name__)
|
|
49 |
# create nice and actually usable formatter and add it to the handler
|
|
50 |
self.config_logger(loglevel)
|
|
59 | 51 |
|
60 |
self.logger.info("starting MaryTTSClient (voice="+voice+", locale="+locale+", host="+tts_host+", port="+str(tts_port))
|
|
52 |
self.logger.info("starting MaryTTSClient (voice="+voice+", locale="+locale+", host="+tts_host+", port="+str(tts_port))
|
|
61 | 53 |
|
62 |
self.tts_host = tts_host
|
|
63 |
self.tts_port = tts_port
|
|
64 |
self.locale = locale
|
|
65 |
self.voice = voice
|
|
54 |
self.tts_host = tts_host
|
|
55 |
self.tts_port = tts_port
|
|
56 |
self.locale = locale
|
|
57 |
self.voice = voice
|
|
66 | 58 |
|
67 | 59 |
def __del__(self): |
68 |
"""destructor
|
|
69 |
"""
|
|
70 |
self.logger.debug("destructor of MaryTTSClient called")
|
|
60 |
"""destructor
|
|
61 |
"""
|
|
62 |
self.logger.debug("destructor of MaryTTSClient called")
|
|
71 | 63 |
|
72 | 64 |
def config_logger(self, level): |
73 |
"""initialise a nice logger formatting
|
|
74 |
:param level: log level
|
|
75 |
"""
|
|
76 |
formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
|
|
77 |
ch = logging.StreamHandler()
|
|
78 |
#ch.setLevel(level)
|
|
79 |
ch.setFormatter(formatter)
|
|
80 |
self.logger.setLevel(level)
|
|
81 |
self.logger.addHandler(ch)
|
|
65 |
"""initialise a nice logger formatting
|
|
66 |
:param level: log level
|
|
67 |
"""
|
|
68 |
formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
|
|
69 |
ch = logging.StreamHandler()
|
|
70 |
# ch.setLevel(level)
|
|
71 |
ch.setFormatter(formatter)
|
|
72 |
self.logger.setLevel(level)
|
|
73 |
self.logger.addHandler(ch)
|
|
82 | 74 |
|
83 | 75 |
def generate_audio(self, message): |
84 |
"""generate audio from text
|
|
85 |
:param message: text to synthesize
|
|
86 |
"""
|
|
87 |
return self.generate(message, "AUDIO")
|
|
76 |
"""generate audio from text
|
|
77 |
:param message: text to synthesize
|
|
78 |
"""
|
|
79 |
return self.generate(message, "AUDIO")
|
|
88 | 80 |
|
89 | 81 |
def generate_phonemes(self, message): |
90 |
"""generate phoneme list from text
|
|
91 |
:param message: text to synthesize
|
|
92 |
"""
|
|
93 |
return self.generate(message, "REALISED_DURATIONS")
|
|
82 |
"""generate phoneme list from text
|
|
83 |
:param message: text to synthesize
|
|
84 |
"""
|
|
85 |
return self.generate(message, "REALISED_DURATIONS")
|
|
94 | 86 |
|
95 | 87 |
def generate(self, message, output_type): |
96 |
"""generate requested data object from text
|
|
97 |
:param message: text to synthesize
|
|
98 |
"""
|
|
99 |
|
|
100 |
raw_params = {
|
|
101 |
"INPUT_TEXT": message,
|
|
102 |
"INPUT_TYPE": "RAWMARYXML",
|
|
103 |
"OUTPUT_TYPE": output_type,
|
|
104 |
"LOCALE": self.locale,
|
|
105 |
"AUDIO": "WAVE_FILE",
|
|
106 |
"VOICE": self.voice,
|
|
107 |
}
|
|
108 |
|
|
109 |
params = urlencode(raw_params)
|
|
110 |
headers = {}
|
|
111 |
|
|
112 |
#conn.set_debuglevel(5)
|
|
113 |
#open connection to mary server
|
|
88 |
"""generate requested data object from text
|
|
89 |
:param message: text to synthesize
|
|
90 |
"""
|
|
91 |
|
|
92 |
raw_params = {
|
|
93 |
"INPUT_TEXT": message,
|
|
94 |
"INPUT_TYPE": "RAWMARYXML",
|
|
95 |
"OUTPUT_TYPE": output_type,
|
|
96 |
"LOCALE": self.locale,
|
|
97 |
"AUDIO": "WAVE_FILE",
|
|
98 |
"VOICE": self.voice,
|
|
99 |
}
|
|
100 |
|
|
101 |
params = urlencode(raw_params)
|
|
102 |
headers = {}
|
|
103 |
|
|
104 |
# conn.set_debuglevel(5)
|
|
105 |
# open connection to mary server
|
|
114 | 106 |
conn = HTTPConnection(self.tts_host, self.tts_port) |
115 | 107 |
|
116 |
conn.request("POST", "/process", params, headers)
|
|
117 |
response = conn.getresponse()
|
|
108 |
conn.request("POST", "/process", params, headers)
|
|
109 |
response = conn.getresponse()
|
|
118 | 110 |
|
119 |
if response.status != 200:
|
|
120 |
print(response.getheaders())
|
|
111 |
if response.status != 200:
|
|
112 |
print(response.getheaders())
|
|
121 | 113 |
conn.close() |
122 |
raise RuntimeError("{0}: {1}".format(response.status,response.reason)) |
|
123 |
return response.read() |
|
114 |
raise RuntimeError("{0}: {1}".format(response.status,response.reason)) |
|
115 |
return response.read() |
|
116 |
|
|
124 | 117 |
|
125 |
#test code |
|
118 |
# test code
|
|
126 | 119 |
if __name__ == "__main__": |
127 | 120 |
client = MaryTTSClient() |
128 | 121 |
audio = client.generate_phonemes("test 1 2 3 4 5 6 7 8 9 10") |
Also available in: Unified diff