/tts_bridge/mary/mary_tts_bridge/MaryTTSBridge.py - hlrc - Research for Cognitive Interaction

hlrc / tts_bridge / mary / mary_tts_bridge / MaryTTSBridge.py @ master

History | View | Annotate | Download (6.089 KB)

       #!/usr/bin/python
       """
       This file is part of hlrc
       Copyright(c) sschulz <AT> techfak.uni-bielefeld.de
       http://opensource.cit-ec.de/projects/hlrc
       This file may be licensed under the terms of the
       GNU General Public License Version 3 (the ``GPL''),
       or (at your option) any later version.
       Software distributed under the License is distributed
       on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
       express or implied. See the GPL for the specific language
       governing rights and limitations.
       You should have received a copy of the GPL along with this
       program. If not, go to http://www.gnu.org/licenses/gpl.html
       or write to the Free Software Foundation, Inc.,
 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
       The development of this software was supported by the
       Excellence Cluster EXC 277 Cognitive Interaction Technology.
       The Excellence Cluster EXC 277 is a grant of the Deutsche
       Forschungsgemeinschaft (DFG) in the context of the German
       Excellence Initiative.
       """
       import logging
       import rospy
       from hlrc_server.msg import *
       import time
       import sys
       import actionlib
       from io import BytesIO
       import wave
       import os
       import pkgutil
       from .MaryTTSClient import *
       try:
           from io import StringIO
       except ImportError:
           from cStringIO import StringIO
       class MaryTTSBridge(object):
           # _feedback = ttsActionFeedback()
           # _result   = ttsActionResult()
           def __init__(self, topic, voice="cmu-slt-hsmm", locale="en_GB", tts_host="127.0.0.1", tts_port=59125, loglevel=logging.WARNING):
               """initialise
               :param  loglevel: optional log level
               """
               self.loglevel = loglevel
               self.logger = logging.getLogger(__name__)
               # create nice and actually usable formatter and add it to the handler
               self.config_logger(loglevel)
               self.logger.info("starting MaryTTSBridge on topic '"+topic+"'")
               self.tts_client = MaryTTSClient(voice, locale, tts_host, tts_port, loglevel)
               rospy.init_node('MaryTTSBridge', anonymous=True)
               self._action_name = topic
               self._as = actionlib.SimpleActionServer(self._action_name, ttsAction, execute_cb = self.execute_cb, auto_start = False)
               self._as.start()
           def __del__(self):
               """destructor
               """
               self.logger.debug("destructor of MaryTTSBridge called")
           def config_logger(self, level):
               """initialise a nice logger formatting
               :param  level: log level
               """
               formatter = logging.Formatter('%(asctime)s %(name)-30s %(levelname)-8s > %(message)s')
               ch = logging.StreamHandler()
               #ch.setLevel(level)
               ch.setFormatter(formatter)
               self.logger.setLevel(level)
               self.logger.addHandler(ch)
           def create_soundchunk(self, audio_data):
               #extract wave from data
               fio = BytesIO(audio_data)
               wav = wave.open(fio)
               s = soundchunk()
               s.channels = wav.getnchannels()
               s.data = audio_data
               s.endianess = s.ENDIAN_LITTLE  # guessed?!
               s.rate = wav.getframerate()
               s.samplecount = wav.getnframes()
               # sample format:
               sample_width = wav.getsampwidth()
               if (sample_width == 1):
                   s.sample_type = s.SAMPLE_U8
               elif (sample_width == 2):
                   s.sample_type = s.SAMPLE_U16
               elif (sample_width == 3):
                   s.sample_type = s.SAMPLE_U24
               else:
                   self.logger.error("ERROR: invalid sample width "+str(sample_width) + " detected")
                   s = soundchunk()
               self.logger.info("created soundchunk with "+str(s.samplecount)+" samples")
               return s
           def create_phonemes(self, phoneme_bytes):
               last = 0.0
               plist = []
               sio = StringIO(phoneme_bytes.decode('ascii'))
               for line in sio:
                   if (line[0] != '#'):
                       phoneme_list = line.split(" ")
                       if (line == '\n'):
                           # ignore empty lines
                           continue
                       elif (len(phoneme_list) != 3):
                           print("> could not split line '%s' during phoneme seperation\n" % (line))
                       else:
                           symbol = phoneme_list[2]
                           symbol = symbol.rstrip()
                           now = float(phoneme_list[0])
                           duration = (now - last)*1000
                           last = now
                           plist.append(phoneme(symbol, int(duration)))
               self.logger.info("created phonemelist with " + str(len(plist)) + " elements")
               return plist
           def create_utterance(self, text, audio_data, phoneme_list):
               u = utterance()
               u.text     = text
               u.audio    = self.create_soundchunk(audio_data)
               u.phonemes = self.create_phonemes(phoneme_list)
               self.logger.info("created utterance for 'phonemelist with '" + u.text + "'")
               return u
           def get_error_message(self):
               data_wav = pkgutil.get_data('mary_tts_bridge', 'data/connection_failed.wav')
               data_phonemes = pkgutil.get_data('mary_tts_bridge', 'data/connection_failed.phonemes')
               return (data_wav, data_phonemes)
           def execute_cb(self, goal):
               self.logger.info("incoming utterance '" + goal.text + "'")
               success = True
               result  = ttsResult()
               # incoming msg, ask mary tts for data:
               try:
                   audio     = self.tts_client.generate_audio(goal.text)
                   phonelist = self.tts_client.generate_phonemes(goal.text)
               except:
                   self.logger.error("failed to create utterance error = '" + str(sys.exc_info()[1]) + "'")
                   # try to open error message from file:
                   success = True
                   (audio, phonelist) = self.get_error_message()
               if success:
                   # build soundchunk
                   result.utterance = self.create_utterance(goal.text, audio, phonelist)
                   self._as.set_succeeded(result)
               else:
                   self._as.set_aborted(result)
           def run(self):
               # run the main loop
               rospy.spin()
       # test code
       def main():
           if (len(sys.argv) != 2):
               print("> usage: "+sys.argv[0]+" <topic>\n\n")
               sys.exit(1)
           bridge = MaryTTSBridge(topic=sys.argv[1], loglevel=logging.INFO)
           bridge.run()
       if __name__ == "__main__":
           main()