Opened 9 years ago

Closed 9 years ago

#2165 closed defect (fixed)

gst-plugins-espeak: audio output skips about every second word in track=1 (word) mode

Reported by: tshalif Owned by: alsroot
Priority: Unspecified by Maintainer Milestone: Unspecified
Component: gst-plugins-espeak Version: Git as of bugdate
Severity: Unspecified Keywords:
Cc: Distribution/OS: Ubuntu
Bug Status: Unconfirmed

Description

Attached is a patch for espeak.c - my solution is to consider audio samples of each event - not just word or mark events. Also, in SSML mode (track=2) output bus messages for word, mark and sentence.

Attachments (2)

gst-plugins-espeak.diff (5.9 KB) - added by tshalif 9 years ago.
gst-plugins-espeak-after-indent.diff (5.8 KB) - added by alsroot 9 years ago.

Download all attachments as: .zip

Change History (7)

comment:1 Changed 9 years ago by tshalif

patch:
(I couldn't find a way to add the path as attachment)

diff --git a/src/espeak.c b/src/espeak.c
index 138d595..3d9de13 100644
--- a/src/espeak.c
+++ b/src/espeak.c
@@ -90,13 +90,8 @@ spinning(Espin *base, Espin **i)
         *i = base;
 }
 
-static void
-emit_word(Econtext *self, guint offset, guint len)
+static void post_message(Econtext *self, GstStructure *data)
 {
-    GstStructure *data = gst_structure_new("espeak-word",
-            "offset", G_TYPE_UINT, offset,
-            "len", G_TYPE_UINT, len,
-            NULL);
     if (!self->bus)
         self->bus = gst_element_get_bus(self->emitter);
     GstMessage *msg = gst_message_new_element(GST_OBJECT(self->emitter), data);
@@ -104,16 +99,32 @@ emit_word(Econtext *self, guint offset, guint len)
 }
 
 static void
+emit_word(Econtext *self, guint offset, guint len, guint id)
+{
+  post_message(self, gst_structure_new("espeak-word",
+				       "offset", G_TYPE_UINT, offset,
+				       "len", G_TYPE_UINT, len,
+				       "id", G_TYPE_UINT, id,
+				       NULL));
+}
+
+static void
+emit_sentence(Econtext *self, guint offset, guint len, guint id)
+{
+  post_message(self, gst_structure_new("espeak-sentence",
+				       "offset", G_TYPE_UINT, offset,
+				       "len", G_TYPE_UINT, len,
+				       "id", G_TYPE_UINT, id,
+				       NULL));
+}
+
+static void
 emit_mark(Econtext *self, guint offset, const gchar *mark)
 {
-    GstStructure *data = gst_structure_new("espeak-mark",
-            "offset", G_TYPE_UINT, offset,
-            "mark", G_TYPE_STRING, mark,
-            NULL);
-    if (!self->bus)
-        self->bus = gst_element_get_bus(self->emitter);
-    GstMessage *msg = gst_message_new_element(GST_OBJECT(self->emitter), data);
-    gst_bus_post(self->bus, msg);
+  post_message(self, gst_structure_new("espeak-mark",
+				       "offset", G_TYPE_UINT, offset,
+				       "mark", G_TYPE_STRING, mark,
+				       NULL));
 }
 
 static inline gsize
@@ -235,84 +246,37 @@ play(Econtext *self, Espin *spin, gsize size_to_play)
         }
     }
 
-    inline gsize word(Econtext *self, Espin *spin, gsize size_to_play)
-    {
-        gsize spin_size = spin->sound->len;
-        gsize event;
-        gsize sample_offset = 0;
-
-        for (event = spin->events_pos; TRUE; ++event)
-        {
-            espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event);
-
-            GST_DEBUG("event=%zd i->type=%d i->text_position=%d",
-                      event, i->type, i->text_position);
-
-            if (i->type == espeakEVENT_LIST_TERMINATED)
-            {
-                sample_offset = spin_size;
-                break;
-            }
-            else if (i->type == espeakEVENT_WORD)
-            {
-                if (i->text_position != spin->last_word)
-                {
-                    emit_word(self, i->text_position, i->length);
-                    spin->last_word = i->text_position;
-                }
-                sample_offset = i[1].sample*2;
-                break;
-            }
-        }
-
-        return sample_offset - spin->sound_offset;
-    }
 
-    inline gsize mark(Econtext *self, Espin *spin, gsize size_to_play)
+    inline gsize events(Econtext *self, Espin *spin, gsize size_to_play)
     {
-        if (spin->mark_name)
-        {
-            emit_mark(self, spin->mark_offset, spin->mark_name);
-            spin->mark_offset = 0;
-            spin->mark_name = NULL;
-        }
-
         gsize spin_size = spin->sound->len;
         gsize event;
         gsize sample_offset = 0;
-        guint mark_offset = 0;
-        const gchar *mark_name = NULL;
-
-        for (event = spin->events_pos; TRUE; ++event)
-        {
-            espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event);
-
-            GST_DEBUG("event=%zd i->type=%d i->text_position=%d",
-                      event, i->type, i->text_position);
-
-            if (i->type == espeakEVENT_LIST_TERMINATED)
-            {
-                sample_offset = spin_size;
-                break;
-            }
-            else if (i->type == espeakEVENT_MARK)
-            {
-                if (i->sample == 0)
-                {
-                    if (spin->sound_offset == 0)
-                        emit_mark(self, i->text_position, i->id.name);
-                    continue;
-                }
-
-                mark_offset = i->text_position;
-                mark_name = i->id.name;
-                sample_offset = i->sample*2;
-                break;
-            }
-        }
-
-        spin->mark_offset = mark_offset;
-        spin->mark_name = mark_name;
+	espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, spin->events_pos);
+
+	GST_DEBUG("event=%zd i->type=%d i->text_position=%d",
+		  event, i->type, i->text_position);
+
+	    
+	if (i->type == espeakEVENT_LIST_TERMINATED) {
+	  sample_offset = spin_size;
+	} else {
+	  switch (i->type) {
+	  case espeakEVENT_MARK:
+	    emit_mark(self, i->text_position, i->id.name);
+	    break;
+	  case espeakEVENT_WORD:
+	    emit_word(self, i->text_position, i->length, i->id.number);
+	    break;
+	  case espeakEVENT_SENTENCE:
+	    emit_sentence(self, i->text_position, i->length, i->id.number);
+	    break;
+	  }
+	}
+
+	if (!sample_offset) {
+	  sample_offset = i->sample*2;
+	}
 
         return sample_offset - spin->sound_offset;
     }
@@ -320,15 +284,14 @@ play(Econtext *self, Espin *spin, gsize size_to_play)
     g_atomic_int_set(&spin->state, PLAY);
 
     switch (g_atomic_int_get(&self->track))
-    {
-        case ESPEAK_TRACK_WORD:
-            size_to_play = word(self, spin, size_to_play);
-            break;
-        case ESPEAK_TRACK_MARK:
-            size_to_play = mark(self, spin, size_to_play);
-            break;
-        default:
-            size_to_play = whole(spin, size_to_play);
+      {
+      case ESPEAK_TRACK_WORD:
+      case ESPEAK_TRACK_MARK:
+	size_to_play = events(self, spin, size_to_play);
+	break;
+      default:
+	size_to_play = whole(spin, size_to_play);
+	break;	
     }
 
     espeak_EVENT *event = &g_array_index(spin->events, espeak_EVENT,

comment:2 Changed 9 years ago by sascha_silbe

tshalif, I've confirmed your user account. You should now be able to attach files.

Changed 9 years ago by tshalif

comment:3 Changed 9 years ago by alsroot

Thanks for your patch tshalif.
I switched gst-espeak sources to use indent command and created
http://git.sugarlabs.org/projects/gst-plugins-espeak/repos/mainline/blobs/master/HACKING file to guide contributors. Could you follow it and reatach/or-send-me new one.

Changed 9 years ago by alsroot

comment:4 Changed 9 years ago by alsroot

I attached your patch after passing though indent command.

comment:5 Changed 9 years ago by alsroot

  • Resolution set to fixed
  • Status changed from new to closed
Note: See TracTickets for help on using tickets.