hlrc / tts_bridge / mary / mary_tts_bridge / MaryTTSBridge.py @ 836f314c
History | View | Annotate | Download (5.45 KB)
| 1 | 0c15613f | Simon Schulz | #!/usr/bin/python
|
|---|---|---|---|
| 2 | """
|
||
| 3 | This file is part of hlrc
|
||
| 4 |
|
||
| 5 | Copyright(c) sschulz <AT> techfak.uni-bielefeld.de
|
||
| 6 | http://opensource.cit-ec.de/projects/hlrc
|
||
| 7 |
|
||
| 8 | This file may be licensed under the terms of the
|
||
| 9 | GNU General Public License Version 3 (the ``GPL''),
|
||
| 10 | or (at your option) any later version.
|
||
| 11 |
|
||
| 12 | Software distributed under the License is distributed
|
||
| 13 | on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
|
||
| 14 | express or implied. See the GPL for the specific language
|
||
| 15 | governing rights and limitations.
|
||
| 16 |
|
||
| 17 | You should have received a copy of the GPL along with this
|
||
| 18 | program. If not, go to http://www.gnu.org/licenses/gpl.html
|
||
| 19 | or write to the Free Software Foundation, Inc.,
|
||
| 20 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||
| 21 |
|
||
| 22 | The development of this software was supported by the
|
||
| 23 | Excellence Cluster EXC 277 Cognitive Interaction Technology.
|
||
| 24 | The Excellence Cluster EXC 277 is a grant of the Deutsche
|
||
| 25 | Forschungsgemeinschaft (DFG) in the context of the German
|
||
| 26 | Excellence Initiative.
|
||
| 27 | """
|
||
| 28 | |||
| 29 | import logging |
||
| 30 | import rospy |
||
| 31 | from hlrc_server.msg import * |
||
| 32 | import time |
||
| 33 | import sys |
||
| 34 | import actionlib |
||
| 35 | from io import BytesIO |
||
| 36 | import wave |
||
| 37 | e21d7f2c | Simon Schulz | import os |
| 38 | import pkgutil |
||
| 39 | 836f314c | Robert Haschke | from .MaryTTSClient import * |
| 40 | try:
|
||
| 41 | from io import StringIO |
||
| 42 | except ImportError: |
||
| 43 | from cStringIO import StringIO |
||
| 44 | 0c15613f | Simon Schulz | |
| 45 | class MaryTTSBridge(object): |
||
| 46 | #_feedback = ttsActionFeedback()
|
||
| 47 | #_result = ttsActionResult()
|
||
| 48 | |||
| 49 | |||
| 50 | def __init__(self, topic, voice="cmu-slt-hsmm", locale="en_GB", tts_host="127.0.0.1", tts_port=59125, loglevel=logging.WARNING): |
||
| 51 | """initialise
|
||
| 52 | :param loglevel: optional log level
|
||
| 53 | """
|
||
| 54 | self.loglevel = loglevel
|
||
| 55 | self.logger = logging.getLogger(__name__)
|
||
| 56 | # create nice and actually usable formatter and add it to the handler
|
||
| 57 | self.config_logger(loglevel)
|
||
| 58 | 163a7434 | Simon Schulz | self.logger.info("starting MaryTTSBridge on topic '"+topic+"'") |
| 59 | 0c15613f | Simon Schulz | |
| 60 | self.tts_client = MaryTTSClient(voice, locale, tts_host, tts_port, loglevel)
|
||
| 61 | |||
| 62 | f1d5f58f | sschulz | rospy.init_node('MaryTTSBridge', anonymous=True) |
| 63 | 0c15613f | Simon Schulz | |
| 64 | self._action_name = topic
|
||
| 65 | self._as = actionlib.SimpleActionServer(self._action_name, ttsAction, execute_cb = self.execute_cb, auto_start = False) |
||
| 66 | self._as.start()
|
||
| 67 | |||
| 68 | |||
| 69 | |||
| 70 | def __del__(self): |
||
| 71 | """destructor
|
||
| 72 | """
|
||
| 73 | self.logger.debug("destructor of MaryTTSBridge called") |
||
| 74 | |||
| 75 | def config_logger(self, level): |
||
| 76 | """initialise a nice logger formatting
|
||
| 77 | :param level: log level
|
||
| 78 | """
|
||
| 79 | formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
|
||
| 80 | ch = logging.StreamHandler() |
||
| 81 | #ch.setLevel(level)
|
||
| 82 | ch.setFormatter(formatter) |
||
| 83 | self.logger.setLevel(level)
|
||
| 84 | self.logger.addHandler(ch)
|
||
| 85 | |||
| 86 | def create_soundchunk(self, audio_data): |
||
| 87 | #extract wave from data
|
||
| 88 | fio = BytesIO(audio_data) |
||
| 89 | wav = wave.open(fio) |
||
| 90 | |||
| 91 | s = soundchunk() |
||
| 92 | |||
| 93 | s.channels = wav.getnchannels() |
||
| 94 | s.data = audio_data |
||
| 95 | s.endianess = s.ENDIAN_LITTLE #guessed?!
|
||
| 96 | s.rate = wav.getframerate() |
||
| 97 | s.samplecount = wav.getnframes() |
||
| 98 | |||
| 99 | |||
| 100 | #sample format:
|
||
| 101 | sample_width = wav.getsampwidth() |
||
| 102 | if (sample_width == 1): |
||
| 103 | s.sample_type = s.SAMPLE_U8 |
||
| 104 | elif (sample_width == 2): |
||
| 105 | s.sample_type = s.SAMPLE_U16 |
||
| 106 | elif (sample_width == 3): |
||
| 107 | s.sample_type = s.SAMPLE_U24 |
||
| 108 | else:
|
||
| 109 | self.logger.error("ERROR: invalid sample width "+str(sample_width) + " detected") |
||
| 110 | s = soundchunk() |
||
| 111 | |||
| 112 | self.logger.info("created soundchunk with "+str(s.samplecount)+" samples") |
||
| 113 | |||
| 114 | return s
|
||
| 115 | |||
| 116 | 836f314c | Robert Haschke | def create_phonemes(self, phoneme_bytes): |
| 117 | 0c15613f | Simon Schulz | last = 0.0
|
| 118 | plist = [] |
||
| 119 | |||
| 120 | 836f314c | Robert Haschke | sio = StringIO(phoneme_bytes.decode('ascii'))
|
| 121 | 0c15613f | Simon Schulz | for line in sio: |
| 122 | if (line[0] != '#'): |
||
| 123 | phoneme_list = line.split(" ")
|
||
| 124 | e21d7f2c | Simon Schulz | if (line == '\n'): |
| 125 | #ignore empty lines
|
||
| 126 | continue
|
||
| 127 | elif (len(phoneme_list) != 3): |
||
| 128 | print("> could not split line '%s' during phoneme seperation\n" % (line))
|
||
| 129 | else:
|
||
| 130 | symbol = phoneme_list[2]
|
||
| 131 | symbol = symbol.rstrip() |
||
| 132 | |||
| 133 | now = float(phoneme_list[0]) |
||
| 134 | duration = (now - last)*1000
|
||
| 135 | last = now |
||
| 136 | plist.append(phoneme(symbol, int(duration)))
|
||
| 137 | 0c15613f | Simon Schulz | |
| 138 | self.logger.info("created phonemelist with " + str(len(plist)) + " elements") |
||
| 139 | |||
| 140 | return plist
|
||
| 141 | |||
| 142 | def create_utterance(self, text, audio_data, phoneme_list): |
||
| 143 | u = utterance() |
||
| 144 | u.text = text |
||
| 145 | u.audio = self.create_soundchunk(audio_data)
|
||
| 146 | u.phonemes = self.create_phonemes(phoneme_list)
|
||
| 147 | |||
| 148 | self.logger.info("created utterance for 'phonemelist with '" + u.text + "'") |
||
| 149 | return u
|
||
| 150 | |||
| 151 | e21d7f2c | Simon Schulz | def get_error_message(self): |
| 152 | data_wav = pkgutil.get_data('mary_tts_bridge', 'data/connection_failed.wav') |
||
| 153 | data_phonemes = pkgutil.get_data('mary_tts_bridge', 'data/connection_failed.phonemes') |
||
| 154 | return (data_wav, data_phonemes)
|
||
| 155 | |||
| 156 | 0c15613f | Simon Schulz | def execute_cb(self, goal): |
| 157 | self.logger.info("incoming utterance '" + goal.text + "'") |
||
| 158 | |||
| 159 | success = True
|
||
| 160 | result = ttsResult() |
||
| 161 | |||
| 162 | #incoming msg, ask mary tts for data:
|
||
| 163 | try:
|
||
| 164 | audio = self.tts_client.generate_audio(goal.text)
|
||
| 165 | phonelist = self.tts_client.generate_phonemes(goal.text)
|
||
| 166 | |||
| 167 | except:
|
||
| 168 | self.logger.error("failed to create utterance error = '" + str(sys.exc_info()[1]) + "'") |
||
| 169 | e21d7f2c | Simon Schulz | #try to open error message from file:
|
| 170 | success = True
|
||
| 171 | (audio, phonelist) = self.get_error_message()
|
||
| 172 | 0c15613f | Simon Schulz | |
| 173 | if success:
|
||
| 174 | #build soundchunk
|
||
| 175 | result.utterance = self.create_utterance(goal.text, audio, phonelist)
|
||
| 176 | self._as.set_succeeded(result)
|
||
| 177 | else:
|
||
| 178 | self._as.set_aborted(result)
|
||
| 179 | |||
| 180 | def run(self): |
||
| 181 | #run the main loop
|
||
| 182 | rospy.spin() |
||
| 183 | |||
| 184 | #test code
|
||
| 185 | 163a7434 | Simon Schulz | def main(): |
| 186 | 0c15613f | Simon Schulz | if (len(sys.argv) != 2): |
| 187 | print("> usage: "+sys.argv[0]+" <topic>\n\n") |
||
| 188 | sys.exit(1)
|
||
| 189 | |||
| 190 | bridge = MaryTTSBridge(topic=sys.argv[1], loglevel=logging.INFO)
|
||
| 191 | bridge.run() |
||
| 192 | |||
| 193 | 163a7434 | Simon Schulz | if __name__ == "__main__": |
| 194 | main() |