/client/python/hlrc_client/hlrc_speak_utterance.py - hlrc - Research for Cognitive Interaction

hlrc / client / python / hlrc_client / hlrc_speak_utterance.py @ 0c286af0

History | View | Annotate | Download (4.192 KB)

       #!/usr/bin/python
       #PYTHONPATH="/opt/ros/groovy/lib/python2.7/dist-packages:/vol/csra/releases/nightly/lib/python2.7/:/vol/csra/releases/nightly/lib/python2.7/site-packages/
       import sys
       import wave
       from textgrid_hlc import *
       import logging
       import errno
       import os.path
       try:
           import rsb
           import rsb.converter
           import rst
           import rstsandbox
           from rst.audition.Utterance_pb2 import Utterance
           from rst.audition.SoundChunk_pb2 import SoundChunk
       except ImportError as exception:
           sys.stderr.write("ImportError: {}\n> HINT: try to export PYTHONPATH=$PYTHONPATH:$YOUR_PREFIX/lib/python2.7/site-packages/\n\n".format(exception))
           sys.exit(errno.ENOPKG)
       class hlrc_utterance():
           def __init__(self, _base_scope):
               #print "> registering rst converter"
               converter = rsb.converter.ProtocolBufferConverter(messageClass = Utterance)
               rsb.converter.registerGlobalConverter(converter)
               self.set_scope(_base_scope)
           def set_scope(self, scope):
               self.base_scope = str(scope) #NOTE: str() is important here, scope is a qstring (?) and gets deleted during call
               print "> setting scope to '%s'" % self.base_scope
               try:
                   self.server = rsb.createRemoteServer(self.base_scope + '/set')
               except ValueError:
                   print "> invalid scope given. server deactivated"
                   self.server.deactivate()
           def trigger_utterance(self, filename_praat, filename_wav, blocking):
               if (self.server is None):
                   print("> invalid server")
                   return
               if (not os.path.isfile(filename_praat)):
                   print "can not open file '%s'" % (filename_praat)
                   return 0
               if (not os.path.isfile(filename_wav)):
                   print "can not open file '%s'" % (filename_wav)
                   return 0
               print "> reading wave file '%s'" % (filename_wav)
               wav = wave.open(filename_wav, "r")
               print "> parsing praat file '%s'" % (filename_praat)
               tgrid = TextGrid.load(filename_praat)
               #create utterance & fill it with values:
               ut = Utterance()
               #textual description of audio file
               ut.text = filename_praat
               ut.audio.data = wav.readframes(-1)
               ut.audio.sample_count = wav.getnframes()
               ut.audio.channels = wav.getnchannels()
               ut.audio.rate = wav.getframerate()
               if (wav.getsampwidth() == 1):
                   ut.audio.sample_type = SoundChunk.SAMPLE_U8
               elif (wav.getsampwidth() == 2):
                   ut.audio.sample_type = SoundChunk.SAMPLE_S16
               else:
                   print "> invalid sample type. py doc says wave files are either u8 or s16"
                   exit(0)
               #wave spec says always little endian
               ut.audio.endianness = SoundChunk.ENDIAN_LITTLE
               print "> filling phones with data from praat"
               for tier in tgrid.tiers:
                   idx = (tgrid.tiers.index(tier)) + 1
                   transcript = tier.simple_transcript
                   for (xmin, xmax, utt) in transcript:
                       phoneme = ut.phonemes.add()
                       phoneme.symbol = utt
                       phoneme.duration = int(1000.0*(float(xmax)-float(xmin)))
               with rsb.createRemoteServer(self.base_scope + '/set') as server:
                   if (blocking):
                       #blocking:
                       print "> calling the utterance rpc (blocking until we finished talking)..."
                       print '> server reply: "%s"' % server.utterance(ut)
                   else:
                      print "> calling the utterance rpc (NON-BLOCKING)..."
                      future = server.utterance.async(ut)
                      #we can block here for a incoming result with a timeout in seconds
                      #print '> server reply: "%s"' % future.get(timeout = 10);
               print "> done"
               return 1
       def main():
           # Pacify logger.
           #logging.basicConfig()
           if (len(sys.argv) != 4):
               print "> usage: %s <base_scope> file.praat file.wav\n>     example: %s /flobi1 hello.praat hello.wav" % (sys.argv[0] , sys.argv[0])
               sys.exit(0)
           filename_praat = sys.argv[2]
           filename_wav = sys.argv[3]
           base = sys.argv[1]
           hlc = hlrc_utterance(base)
           hlc.trigger_utterance(filename_praat, filename_wav, 1)
       if __name__ == '__main__':
           main()