1
0
Fork 0
flightgear/3rdparty/flite_hts_engine/flite/lang/usenglish/us_ffeatures.c

520 lines
19 KiB
C
Raw Normal View History

/*************************************************************************/
/* */
/* Language Technologies Institute */
/* Carnegie Mellon University */
/* Copyright (c) 2001 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author: Alan W Black (awb@cs.cmu.edu) */
/* Date: January 2001 */
/*************************************************************************/
/* Feature functions used by various cart trees etc */
/* These have been create as needed, and as some of the trees are */
/* from University of Edinburgh's Festival system their names and */
/* semantics follow them */
/*************************************************************************/
/* ----------------------------------------------------------------- */
/* The English TTS System "Flite+hts_engine" */
/* developed by HTS Working Group */
/* http://hts-engine.sourceforge.net/ */
/* ----------------------------------------------------------------- */
/* */
/* Copyright (c) 2005-2013 Nagoya Institute of Technology */
/* Department of Computer Science */
/* */
/* 2005-2008 Tokyo Institute of Technology */
/* Interdisciplinary Graduate School of */
/* Science and Engineering */
/* */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials provided */
/* with the distribution. */
/* - Neither the name of the HTS working group nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* ----------------------------------------------------------------- */
#include "cst_hrg.h"
#include "cst_phoneset.h"
#include "cst_regex.h"
#include "cst_ffeatures.h"
#include "us_ffeatures.h"
static const cst_val *gpos(const cst_item *word);
DEF_STATIC_CONST_VAL_STRING(val_string_numeric,"numeric");
DEF_STATIC_CONST_VAL_STRING(val_string_number,"number");
DEF_STATIC_CONST_VAL_STRING(val_string_month,"month");
DEF_STATIC_CONST_VAL_STRING(val_string_day,"day");
DEF_STATIC_CONST_VAL_STRING(val_string_other,"_other_");
DEF_STATIC_CONST_VAL_STRING(val_string_a,"a");
DEF_STATIC_CONST_VAL_STRING(val_string_flight,"flight");
DEF_STATIC_CONST_VAL_STRING(val_string_to,"to");
DEF_STATIC_CONST_VAL_STRING(val_string_content,"content");
static const cst_val *gpos(const cst_item *word)
{
/* Guess at part of speech (function/content) */
const char *w;
int s,t;
w = item_feat_string(word,"name");
for (s=0; us_gpos[s]; s++)
{
for (t=1; us_gpos[s][t]; t++)
if (cst_streq(w,val_string(us_gpos[s][t])))
return us_gpos[s][0];
}
return (cst_val *)&val_string_content;
}
static const cst_val *num_digits(const cst_item *token)
{
const char *name = item_feat_string(token,"name");
return val_int_n(cst_strlen(name));
}
static const cst_val *month_range(const cst_item *token)
{
int v = item_feat_int(token,"name");
if ((v > 0) && ( v < 32))
return VAL_STRING_1;
else
return VAL_STRING_0;
}
static const cst_val* token_pos_guess(const cst_item* token)
{
const char* name = item_feat_string(token, "name");
char* dc = cst_downcase(name);
const cst_val* r;
if (cst_regex_match(cst_rx_digits, dc))
r = (cst_val*)&val_string_numeric;
else if ((cst_regex_match(cst_rx_double, dc)) ||
(cst_regex_match(cst_rx_double, dc)))
r = (cst_val*)&val_string_number;
else if (cst_streq(dc, "jan") ||
cst_streq(dc, "january") ||
cst_streq(dc, "feb") ||
cst_streq(dc, "february") ||
cst_streq(dc, "mar") ||
cst_streq(dc, "march") ||
cst_streq(dc, "apr") ||
cst_streq(dc, "april") ||
cst_streq(dc, "may") ||
cst_streq(dc, "jun") ||
cst_streq(dc, "june") ||
cst_streq(dc, "jul") ||
cst_streq(dc, "july") ||
cst_streq(dc, "aug") ||
cst_streq(dc, "august") ||
cst_streq(dc, "sep") ||
cst_streq(dc, "sept") ||
cst_streq(dc, "september") ||
cst_streq(dc, "oct") ||
cst_streq(dc, "october") ||
cst_streq(dc, "nov") ||
cst_streq(dc, "november") ||
cst_streq(dc, "dec") ||
cst_streq(dc, "december"))
r = (cst_val*)&val_string_month;
else if (cst_streq(dc, "sun") ||
cst_streq(dc, "sunday") ||
cst_streq(dc, "mon") ||
cst_streq(dc, "monday") ||
cst_streq(dc, "tue") ||
cst_streq(dc, "tues") ||
cst_streq(dc, "tuesday") ||
cst_streq(dc, "wed") ||
cst_streq(dc, "wednesday") ||
cst_streq(dc, "thu") ||
cst_streq(dc, "thurs") ||
cst_streq(dc, "thursday") ||
cst_streq(dc, "fri") ||
cst_streq(dc, "friday") ||
cst_streq(dc, "sat") ||
cst_streq(dc, "saturday"))
r = (cst_val*)&val_string_day;
/* ignoring the "token_most_common" condition, does get used */
else if (cst_streq(dc, "a"))
r = (cst_val*)&val_string_a;
else if (cst_streq(dc, "flight"))
r = (cst_val*)&val_string_flight;
else if (cst_streq(dc, "to"))
r = (cst_val*)&val_string_to;
else
r = (cst_val*)&val_string_other;
cst_free(dc);
return r;
}
#ifdef FLITE_PLUS_HTS_ENGINE
const cst_val *accented(const cst_item *p); /* defined in cst_ffeatures.c */
int item_after_length(const cst_item *n){
int i = 0;
if(n == NULL) return 0;
for(;n;n=n->n,i++);
return i;
}
const cst_item *item_first(const cst_item *n){
if(n == 0) return 0;
for(;n->p != 0;n=n->p);
return n;
}
/* 11 12 13 53 54 */
static const cst_val *syl_numphones(const cst_item *syl){
return val_string_n(item_after_length(item_daughter(item_as(syl,"SylStructure"))));
}
/* 14 */
static const cst_val *pos_in_word(const cst_item *syl){
const cst_item *ss,*p;
int c = 0;
ss = item_as(syl,"SylStructure");
for(p = item_daughter(item_parent(ss));p;p = item_next(p),c++){
if(p == ss) return val_string_n(c);
}
return val_string_n(c);
}
/* 21 by Toda-san */
static const cst_val *lisp_distance_to_p_stress(const cst_item *syl){
const cst_item *s, *fs;
int c;
s=item_as(syl,"Syllable");
fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughter.R:SylStructure.daughter");
if (item_equal(s,fs)) return val_string_n(0);
s=item_prev(s);
for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX);
s=item_prev(s),c++)
if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c);
if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c);
else return val_string_n(0);
}
/* 22 by Toda-san */
static const cst_val *lisp_distance_to_n_stress(const cst_item *syl){
const cst_item *s, *fs;
int c;
s=item_as(syl,"Syllable");
fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughtern.R:SylStructure.daughtern");
if (item_equal(s,fs)) return val_string_n(0);
s=item_next(s);
for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX);
s=item_next(s),c++)
if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c);
if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c);
else return val_string_n(0);
}
/* 23 by Toda-san */
static const cst_val *lisp_distance_to_p_accent(const cst_item *syl){
const cst_item *s, *fs;
int c;
s=item_as(syl,"Syllable");
fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughter.R:SylStructure.daughter");
if (item_equal(s,fs)) return val_string_n(0);
s=item_prev(s);
for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX);
s=item_prev(s),c++)
if (val_int(accented(s))) return val_string_n(c);
if (val_int(accented(s))) return val_string_n(c);
else return val_string_n(0);
}
/* 24 by Toda-san */
static const cst_val *lisp_distance_to_n_accent(const cst_item *syl){
const cst_item *s, *fs;
int c;
s=item_as(syl,"Syllable");
fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughtern.R:SylStructure.daughtern");
if (item_equal(s,fs)) return val_string_n(0);
s=item_next(s);
for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX);
s=item_next(s),c++)
if (val_int(accented(s))) return val_string_n(c);
if (val_int(accented(s))) return val_string_n(c);
else return val_string_n(0);
}
/* 25 */
static char syl_vowel_str[16];
static cst_val syl_vowel_val;
static const cst_val *syl_vowel(const cst_item *syl){
/* the vowel in the syllable */
const cst_item *ss,*p;
int size;
CST_VAL_TYPE(&syl_vowel_val) = CST_VAL_TYPE_STRING;
ss = item_as(syl,"SylStructure");
for(p = item_daughter(ss),size = 0;p;p = item_next(p),size++){
if(strchr("aeiou",item_name(p)[0]) != NULL){
strcpy(syl_vowel_str,item_name(p));
CST_VAL_STRING_LVAL(&syl_vowel_val) = syl_vowel_str;
return &syl_vowel_val;
}
}
/* no vowel */
strcpy(syl_vowel_str,"novowel");
CST_VAL_STRING_LVAL(&syl_vowel_val) = syl_vowel_str;
return &syl_vowel_val;
}
/* 32 */
static const cst_val *pos_in_phrase(const cst_item *syl){
const cst_item *ss,*p;
int c = 0;
ss = item_as(syl,"Phrase");
for (p = item_first(ss);p;p=item_next(p),c++)
if(p == ss) return val_string_n(c);
return val_string_n(c);
}
/* 33 */
static const cst_val *words_out(const cst_item *syl){
const cst_item *ss,*p;
int c = 0;
ss = item_as(syl,"Phrase");
for(p = ss;p;p=item_next(p),c++);
return val_string_n(c);
}
/* 34 by Toda-san */
static const cst_val *content_words_in(const cst_item *word){
const cst_item *ss,*p,*fs;
int c;
ss = word;
fs = path_to_item(word,"R:Phrase.parent.daughter");
for (c=0, p=ss; p && (!item_equal(p,fs)) && (c < CST_CONST_INT_MAX);
p=item_prev(p))
if (cst_streq("content", ffeature_string(p,"gpos"))) c++;
return val_string_n(c); /* its used randomly as int and float */
}
/* 35 by Toda-san */
static const cst_val *content_words_out(const cst_item *word){
const cst_item *ss,*p,*fs;
int c;
ss = word;
fs = path_to_item(word,"R:Phrase.parent.daughtern");
for (c=0, p=ss; p && (!item_equal(p,fs)) && (c < CST_CONST_INT_MAX);
p=item_next(p))
if (cst_streq("content", ffeature_string(p,"gpos"))) c++;
return val_string_n(c); /* its used randomly as int and float */
}
/* 36 */
static const cst_val *lisp_distance_to_p_content(const cst_item *syl){
const cst_item *p;
int c = 0;
for(p=item_prev(item_as(syl,"Phrase"));p;p=item_prev(p)){
c++;
if(gpos(p)==(cst_val*)&val_string_content)
break;
}
return val_string_n(c);
}
/* 37 */
static const cst_val *lisp_distance_to_n_content(const cst_item *syl){
const cst_item *p;
int c = 0;
for(p=item_next(item_as(syl,"Phrase"));p;p=item_next(p)){
c++;
if (gpos(p)==(cst_val*)&val_string_content)
break;
}
return val_string_n(c);
}
/* 38 39 40 59 60 by Toda-san */
static const cst_val *lisp_num_syls_in_phrase(const cst_item *phrase){
const cst_item *sw,*fw;
int c;
sw = path_to_item(phrase,"daughter");
fw = path_to_item(phrase,"daughtern");
for (c=0; sw && (!item_equal(sw,fw)) && (c < CST_CONST_INT_MAX);
sw=item_next(sw)) c += ffeature_int(sw, "word_numsyls");
c += ffeature_int(sw, "word_numsyls");
return val_string_n(c);
}
/* 41 42 43 61 62 by Toda-san */
static const cst_val *lisp_num_words_in_phrase(const cst_item *phrase){
const cst_item *sw,*fw;
int c;
sw = path_to_item(phrase,"daughter");
fw = path_to_item(phrase,"daughtern");
for (c=1; sw && (!item_equal(sw,fw)) && (c < CST_CONST_INT_MAX);
sw=item_next(sw)) c++;
return val_string_n(c);
}
/* 46 by Toda-san */
static const cst_val *lisp_total_syls(const cst_item *phrase){
const cst_item *sp, *fp;
int c;
sp = phrase;
while (item_prev(sp) != NULL) sp = item_prev(sp);
fp = phrase;
while (item_next(fp) != NULL) fp = item_next(fp);
for (c = 0; sp && (!item_equal(sp, fp)) && (c < CST_CONST_INT_MAX);
sp = item_next(sp)) c += ffeature_int(sp, "lisp_num_syls_in_phrase");
c += ffeature_int(sp, "lisp_num_syls_in_phrase");
return val_string_n(c);
}
/* 47 by Toda-san */
static const cst_val *lisp_total_words(const cst_item *phrase){
const cst_item *sp, *fp;
int c;
sp = phrase;
while (item_prev(sp) != NULL) sp = item_prev(sp);
fp = phrase;
while (item_next(fp) != NULL) fp = item_next(fp);
for (c = 0; sp && (!item_equal(sp, fp)) && (c < CST_CONST_INT_MAX);
sp = item_next(sp)) c += ffeature_int(sp, "lisp_num_words_in_phrase");
c += ffeature_int(sp, "lisp_num_words_in_phrase");
return val_string_n(c);
}
/* 48 by Toda-san */
static const cst_val *lisp_total_phrases(const cst_item *phrase){
const cst_item *sp, *fp;
int c;
sp = phrase;
while (item_prev(sp) != NULL) sp = item_prev(sp);
fp = phrase;
while (item_next(fp) != NULL) fp = item_next(fp);
for (c = 1; sp && (!item_equal(sp, fp)) && (c < CST_CONST_INT_MAX);
sp = item_next(sp)) c++;
return val_string_n(c);
}
#endif /* FLITE_PLUS_HTS_ENGINE */
void us_ff_register(cst_features *ffunctions)
{
/* The language independent ones */
basic_ff_register(ffunctions);
ff_register(ffunctions, "gpos",gpos);
ff_register(ffunctions, "num_digits",num_digits);
ff_register(ffunctions, "month_range",month_range);
ff_register(ffunctions, "token_pos_guess",token_pos_guess);
#ifdef FLITE_PLUS_HTS_ENGINE
ff_register(ffunctions, "syl_numphones",syl_numphones); /* 11 12 13 53 54 */
ff_register(ffunctions, "pos_in_word",pos_in_word); /* 14 */
ff_register(ffunctions, "lisp_distance_to_p_stress",lisp_distance_to_p_stress); /* 21 */
ff_register(ffunctions, "lisp_distance_to_n_stress",lisp_distance_to_n_stress); /* 22 */
ff_register(ffunctions, "lisp_distance_to_p_accent",lisp_distance_to_p_accent); /* 23 */
ff_register(ffunctions, "lisp_distance_to_n_accent",lisp_distance_to_n_accent); /* 24 */
ff_register(ffunctions, "syl_vowel",syl_vowel); /* 25 */
ff_register(ffunctions, "pos_in_phrase",pos_in_phrase); /* 32 */
ff_register(ffunctions, "words_out",words_out); /* 33 */
ff_register(ffunctions, "content_words_in",content_words_in); /* 34 */
ff_register(ffunctions, "content_words_out",content_words_out); /* 35 */
ff_register(ffunctions, "lisp_distance_to_p_content",lisp_distance_to_p_content); /* 36 */
ff_register(ffunctions, "lisp_distance_to_n_content",lisp_distance_to_n_content); /* 37 */
ff_register(ffunctions, "lisp_num_syls_in_phrase",lisp_num_syls_in_phrase); /* 38 39 40 59 60 */
ff_register(ffunctions, "lisp_num_words_in_phrase",lisp_num_words_in_phrase); /* 41 42 43 61 62 */
ff_register(ffunctions, "lisp_total_syls",lisp_total_syls); /* 46 */
ff_register(ffunctions, "lisp_total_words",lisp_total_words); /* 47 */
ff_register(ffunctions, "lisp_total_phrases",lisp_total_phrases); /* 48 */
#endif /* FLITE_PLUS_HTS_ENGINE */
}