2014-03-10 09:32:03 +00:00
/* ----------------------------------------------------------------- */
/* The HMM-Based Speech Synthesis Engine "hts_engine API" */
/* developed by HTS Working Group */
/* http://hts-engine.sourceforge.net/ */
/* ----------------------------------------------------------------- */
/* */
2015-12-30 17:05:25 -06:00
/* Copyright (c) 2001-2015 Nagoya Institute of Technology */
2014-03-10 09:32:03 +00:00
/* Department of Computer Science */
/* */
/* 2001-2008 Tokyo Institute of Technology */
/* Interdisciplinary Graduate School of */
/* Science and Engineering */
/* */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials provided */
/* with the distribution. */
/* - Neither the name of the HTS working group nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* ----------------------------------------------------------------- */
# ifndef HTS_ENGINE_C
# define HTS_ENGINE_C
# ifdef __cplusplus
# define HTS_ENGINE_C_START extern "C" {
# define HTS_ENGINE_C_END }
# else
# define HTS_ENGINE_C_START
# define HTS_ENGINE_C_END
# endif /* __CPLUSPLUS */
HTS_ENGINE_C_START ;
# include <stdlib.h> /* for atof() */
# include <string.h> /* for strcpy() */
# include <math.h> /* for pow() */
/* hts_engine libraries */
# include "HTS_hidden.h"
/* HTS_Engine_initialize: initialize engine */
void HTS_Engine_initialize ( HTS_Engine * engine )
{
/* global */
engine - > condition . sampling_frequency = 0 ;
engine - > condition . fperiod = 0 ;
engine - > condition . audio_buff_size = 0 ;
engine - > condition . stop = FALSE ;
engine - > condition . volume = 1.0 ;
engine - > condition . msd_threshold = NULL ;
engine - > condition . gv_weight = NULL ;
/* duration */
engine - > condition . speed = 1.0 ;
engine - > condition . phoneme_alignment_flag = FALSE ;
/* spectrum */
engine - > condition . stage = 0 ;
engine - > condition . use_log_gain = FALSE ;
engine - > condition . alpha = 0.0 ;
engine - > condition . beta = 0.0 ;
/* log F0 */
engine - > condition . additional_half_tone = 0.0 ;
/* interpolation weights */
engine - > condition . duration_iw = NULL ;
engine - > condition . parameter_iw = NULL ;
engine - > condition . gv_iw = NULL ;
/* initialize audio */
HTS_Audio_initialize ( & engine - > audio ) ;
/* initialize model set */
HTS_ModelSet_initialize ( & engine - > ms ) ;
/* initialize label list */
HTS_Label_initialize ( & engine - > label ) ;
/* initialize state sequence set */
HTS_SStreamSet_initialize ( & engine - > sss ) ;
/* initialize pstream set */
HTS_PStreamSet_initialize ( & engine - > pss ) ;
/* initialize gstream set */
HTS_GStreamSet_initialize ( & engine - > gss ) ;
}
/* HTS_Engine_load: load HTS voices */
HTS_Boolean HTS_Engine_load ( HTS_Engine * engine , char * * voices , size_t num_voices )
{
size_t i , j ;
size_t nstream ;
double average_weight ;
const char * option , * find ;
/* reset engine */
HTS_Engine_clear ( engine ) ;
/* load voices */
if ( HTS_ModelSet_load ( & engine - > ms , voices , num_voices ) ! = TRUE ) {
HTS_Engine_clear ( engine ) ;
return FALSE ;
}
nstream = HTS_ModelSet_get_nstream ( & engine - > ms ) ;
average_weight = 1.0 / num_voices ;
/* global */
engine - > condition . sampling_frequency = HTS_ModelSet_get_sampling_frequency ( & engine - > ms ) ;
engine - > condition . fperiod = HTS_ModelSet_get_fperiod ( & engine - > ms ) ;
engine - > condition . msd_threshold = ( double * ) HTS_calloc ( nstream , sizeof ( double ) ) ;
for ( i = 0 ; i < nstream ; i + + )
engine - > condition . msd_threshold [ i ] = 0.5 ;
engine - > condition . gv_weight = ( double * ) HTS_calloc ( nstream , sizeof ( double ) ) ;
for ( i = 0 ; i < nstream ; i + + )
engine - > condition . gv_weight [ i ] = 1.0 ;
/* spectrum */
option = HTS_ModelSet_get_option ( & engine - > ms , 0 ) ;
find = strstr ( option , " GAMMA= " ) ;
if ( find ! = NULL )
engine - > condition . stage = ( size_t ) atoi ( & find [ strlen ( " GAMMA= " ) ] ) ;
find = strstr ( option , " LN_GAIN= " ) ;
if ( find ! = NULL )
engine - > condition . use_log_gain = atoi ( & find [ strlen ( " LN_GAIN= " ) ] ) = = 1 ? TRUE : FALSE ;
find = strstr ( option , " ALPHA= " ) ;
if ( find ! = NULL )
engine - > condition . alpha = atof ( & find [ strlen ( " ALPHA= " ) ] ) ;
/* interpolation weights */
engine - > condition . duration_iw = ( double * ) HTS_calloc ( num_voices , sizeof ( double ) ) ;
for ( i = 0 ; i < num_voices ; i + + )
engine - > condition . duration_iw [ i ] = average_weight ;
2015-12-30 17:05:25 -06:00
engine - > condition . parameter_iw = ( double * * ) HTS_calloc ( num_voices , sizeof ( double * ) ) ;
for ( i = 0 ; i < num_voices ; i + + ) {
engine - > condition . parameter_iw [ i ] = ( double * ) HTS_calloc ( nstream , sizeof ( double ) ) ;
for ( j = 0 ; j < nstream ; j + + )
2014-03-10 09:32:03 +00:00
engine - > condition . parameter_iw [ i ] [ j ] = average_weight ;
}
2015-12-30 17:05:25 -06:00
engine - > condition . gv_iw = ( double * * ) HTS_calloc ( num_voices , sizeof ( double * ) ) ;
for ( i = 0 ; i < num_voices ; i + + ) {
engine - > condition . gv_iw [ i ] = ( double * ) HTS_calloc ( nstream , sizeof ( double ) ) ;
for ( j = 0 ; j < nstream ; j + + )
2014-03-10 09:32:03 +00:00
engine - > condition . gv_iw [ i ] [ j ] = average_weight ;
}
return TRUE ;
}
/* HTS_Engine_set_sampling_frequency: set sampling frequency */
void HTS_Engine_set_sampling_frequency ( HTS_Engine * engine , size_t i )
{
if ( i < 1 )
i = 1 ;
engine - > condition . sampling_frequency = i ;
HTS_Audio_set_parameter ( & engine - > audio , engine - > condition . sampling_frequency , engine - > condition . audio_buff_size ) ;
}
/* HTS_Engine_get_sampling_frequency: get sampling frequency */
size_t HTS_Engine_get_sampling_frequency ( HTS_Engine * engine )
{
return engine - > condition . sampling_frequency ;
}
/* HTS_Engine_set_fperiod: set frame period */
void HTS_Engine_set_fperiod ( HTS_Engine * engine , size_t i )
{
if ( i < 1 )
i = 1 ;
engine - > condition . fperiod = i ;
}
/* HTS_Engine_get_fperiod: get frame period */
size_t HTS_Engine_get_fperiod ( HTS_Engine * engine )
{
return engine - > condition . fperiod ;
}
/* HTS_Engine_set_audio_buff_size: set audio buffer size */
void HTS_Engine_set_audio_buff_size ( HTS_Engine * engine , size_t i )
{
engine - > condition . audio_buff_size = i ;
HTS_Audio_set_parameter ( & engine - > audio , engine - > condition . sampling_frequency , engine - > condition . audio_buff_size ) ;
}
/* HTS_Engine_get_audio_buff_size: get audio buffer size */
size_t HTS_Engine_get_audio_buff_size ( HTS_Engine * engine )
{
return engine - > condition . audio_buff_size ;
}
/* HTS_Engine_set_stop_flag: set stop flag */
void HTS_Engine_set_stop_flag ( HTS_Engine * engine , HTS_Boolean b )
{
engine - > condition . stop = b ;
}
/* HTS_Engine_get_stop_flag: get stop flag */
HTS_Boolean HTS_Engine_get_stop_flag ( HTS_Engine * engine )
{
return engine - > condition . stop ;
}
/* HTS_Engine_set_volume: set volume in db */
void HTS_Engine_set_volume ( HTS_Engine * engine , double f )
{
engine - > condition . volume = exp ( f * DB ) ;
}
/* HTS_Engine_get_volume: get volume in db */
double HTS_Engine_get_volume ( HTS_Engine * engine )
{
return log ( engine - > condition . volume ) / DB ;
}
/* HTS_Egnine_set_msd_threshold: set MSD threshold */
void HTS_Engine_set_msd_threshold ( HTS_Engine * engine , size_t stream_index , double f )
{
if ( f < 0.0 )
f = 0.0 ;
if ( f > 1.0 )
f = 1.0 ;
engine - > condition . msd_threshold [ stream_index ] = f ;
}
/* HTS_Engine_get_msd_threshold: get MSD threshold */
double HTS_Engine_get_msd_threshold ( HTS_Engine * engine , size_t stream_index )
{
return engine - > condition . msd_threshold [ stream_index ] ;
}
/* HTS_Engine_set_gv_weight: set GV weight */
void HTS_Engine_set_gv_weight ( HTS_Engine * engine , size_t stream_index , double f )
{
if ( f < 0.0 )
f = 0.0 ;
engine - > condition . gv_weight [ stream_index ] = f ;
}
/* HTS_Engine_get_gv_weight: get GV weight */
double HTS_Engine_get_gv_weight ( HTS_Engine * engine , size_t stream_index )
{
return engine - > condition . gv_weight [ stream_index ] ;
}
/* HTS_Engine_set_speed: set speech speed */
void HTS_Engine_set_speed ( HTS_Engine * engine , double f )
{
if ( f < 1.0E-06 )
f = 1.0E-06 ;
engine - > condition . speed = f ;
}
/* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
void HTS_Engine_set_phoneme_alignment_flag ( HTS_Engine * engine , HTS_Boolean b )
{
engine - > condition . phoneme_alignment_flag = b ;
}
/* HTS_Engine_set_alpha: set alpha */
void HTS_Engine_set_alpha ( HTS_Engine * engine , double f )
{
if ( f < 0.0 )
f = 0.0 ;
if ( f > 1.0 )
f = 1.0 ;
engine - > condition . alpha = f ;
}
/* HTS_Engine_get_alpha: get alpha */
double HTS_Engine_get_alpha ( HTS_Engine * engine )
{
return engine - > condition . alpha ;
}
/* HTS_Engine_set_beta: set beta */
void HTS_Engine_set_beta ( HTS_Engine * engine , double f )
{
if ( f < 0.0 )
f = 0.0 ;
if ( f > 1.0 )
f = 1.0 ;
engine - > condition . beta = f ;
}
/* HTS_Engine_get_beta: get beta */
double HTS_Engine_get_beta ( HTS_Engine * engine )
{
return engine - > condition . beta ;
}
/* HTS_Engine_add_half_tone: add half tone */
void HTS_Engine_add_half_tone ( HTS_Engine * engine , double f )
{
engine - > condition . additional_half_tone = f ;
}
/* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
void HTS_Engine_set_duration_interpolation_weight ( HTS_Engine * engine , size_t voice_index , double f )
{
engine - > condition . duration_iw [ voice_index ] = f ;
}
/* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
double HTS_Engine_get_duration_interpolation_weight ( HTS_Engine * engine , size_t voice_index )
{
return engine - > condition . duration_iw [ voice_index ] ;
}
/* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
void HTS_Engine_set_parameter_interpolation_weight ( HTS_Engine * engine , size_t voice_index , size_t stream_index , double f )
{
engine - > condition . parameter_iw [ voice_index ] [ stream_index ] = f ;
}
/* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
double HTS_Engine_get_parameter_interpolation_weight ( HTS_Engine * engine , size_t voice_index , size_t stream_index )
{
return engine - > condition . parameter_iw [ voice_index ] [ stream_index ] ;
}
/* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
void HTS_Engine_set_gv_interpolation_weight ( HTS_Engine * engine , size_t voice_index , size_t stream_index , double f )
{
engine - > condition . gv_iw [ voice_index ] [ stream_index ] = f ;
}
/* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
double HTS_Engine_get_gv_interpolation_weight ( HTS_Engine * engine , size_t voice_index , size_t stream_index )
{
return engine - > condition . gv_iw [ voice_index ] [ stream_index ] ;
}
/* HTS_Engine_get_total_state: get total number of state */
size_t HTS_Engine_get_total_state ( HTS_Engine * engine )
{
return HTS_SStreamSet_get_total_state ( & engine - > sss ) ;
}
/* HTS_Engine_set_state_mean: set mean value of state */
void HTS_Engine_set_state_mean ( HTS_Engine * engine , size_t stream_index , size_t state_index , size_t vector_index , double f )
{
HTS_SStreamSet_set_mean ( & engine - > sss , stream_index , state_index , vector_index , f ) ;
}
/* HTS_Engine_get_state_mean: get mean value of state */
double HTS_Engine_get_state_mean ( HTS_Engine * engine , size_t stream_index , size_t state_index , size_t vector_index )
{
return HTS_SStreamSet_get_mean ( & engine - > sss , stream_index , state_index , vector_index ) ;
}
/* HTS_Engine_get_state_duration: get state duration */
size_t HTS_Engine_get_state_duration ( HTS_Engine * engine , size_t state_index )
{
return HTS_SStreamSet_get_duration ( & engine - > sss , state_index ) ;
}
/* HTS_Engine_get_nvoices: get number of voices */
size_t HTS_Engine_get_nvoices ( HTS_Engine * engine )
{
return HTS_ModelSet_get_nvoices ( & engine - > ms ) ;
}
/* HTS_Engine_get_nstream: get number of stream */
size_t HTS_Engine_get_nstream ( HTS_Engine * engine )
{
return HTS_ModelSet_get_nstream ( & engine - > ms ) ;
}
/* HTS_Engine_get_nstate: get number of state */
size_t HTS_Engine_get_nstate ( HTS_Engine * engine )
{
return HTS_ModelSet_get_nstate ( & engine - > ms ) ;
}
2015-12-30 17:05:25 -06:00
/* HTS_Engine_get_fullcontext_label_format: get full context label format */
const char * HTS_Engine_get_fullcontext_label_format ( HTS_Engine * engine )
{
return HTS_ModelSet_get_fullcontext_label_format ( & engine - > ms ) ;
}
/* HTS_Engine_get_fullcontext_label_version: get full context label version */
const char * HTS_Engine_get_fullcontext_label_version ( HTS_Engine * engine )
{
return HTS_ModelSet_get_fullcontext_label_version ( & engine - > ms ) ;
}
2014-03-10 09:32:03 +00:00
/* HTS_Engine_get_total_frame: get total number of frame */
size_t HTS_Engine_get_total_frame ( HTS_Engine * engine )
{
return HTS_GStreamSet_get_total_frame ( & engine - > gss ) ;
}
/* HTS_Engine_get_nsamples: get number of samples */
size_t HTS_Engine_get_nsamples ( HTS_Engine * engine )
{
return HTS_GStreamSet_get_total_nsamples ( & engine - > gss ) ;
}
/* HTS_Engine_get_generated_parameter: output generated parameter */
double HTS_Engine_get_generated_parameter ( HTS_Engine * engine , size_t stream_index , size_t frame_index , size_t vector_index )
{
return HTS_GStreamSet_get_parameter ( & engine - > gss , stream_index , frame_index , vector_index ) ;
}
/* HTS_Engine_get_generated_speech: output generated speech */
double HTS_Engine_get_generated_speech ( HTS_Engine * engine , size_t index )
{
return HTS_GStreamSet_get_speech ( & engine - > gss , index ) ;
}
/* HTS_Engine_generate_state_sequence: genereate state sequence (1st synthesis step) */
static HTS_Boolean HTS_Engine_generate_state_sequence ( HTS_Engine * engine )
{
size_t i , state_index , model_index ;
double f ;
if ( HTS_SStreamSet_create ( & engine - > sss , & engine - > ms , & engine - > label , engine - > condition . phoneme_alignment_flag , engine - > condition . speed , engine - > condition . duration_iw , engine - > condition . parameter_iw , engine - > condition . gv_iw ) ! = TRUE ) {
HTS_Engine_refresh ( engine ) ;
return FALSE ;
}
if ( engine - > condition . additional_half_tone ! = 0.0 ) {
state_index = 0 ;
model_index = 0 ;
for ( i = 0 ; i < HTS_Engine_get_total_state ( engine ) ; i + + ) {
f = HTS_Engine_get_state_mean ( engine , 1 , i , 0 ) ;
f + = engine - > condition . additional_half_tone * HALF_TONE ;
if ( f < MIN_LF0 )
f = MIN_LF0 ;
else if ( f > MAX_LF0 )
f = MAX_LF0 ;
HTS_Engine_set_state_mean ( engine , 1 , i , 0 , f ) ;
state_index + + ;
if ( state_index > = HTS_Engine_get_nstate ( engine ) ) {
state_index = 0 ;
model_index + + ;
}
}
}
return TRUE ;
}
/* HTS_Engine_generate_state_sequence_from_fn: genereate state sequence from file name (1st synthesis step) */
HTS_Boolean HTS_Engine_generate_state_sequence_from_fn ( HTS_Engine * engine , const char * fn )
{
HTS_Engine_refresh ( engine ) ;
HTS_Label_load_from_fn ( & engine - > label , engine - > condition . sampling_frequency , engine - > condition . fperiod , fn ) ;
return HTS_Engine_generate_state_sequence ( engine ) ;
}
/* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from strings (1st synthesis step) */
HTS_Boolean HTS_Engine_generate_state_sequence_from_strings ( HTS_Engine * engine , char * * lines , size_t num_lines )
{
HTS_Engine_refresh ( engine ) ;
HTS_Label_load_from_strings ( & engine - > label , engine - > condition . sampling_frequency , engine - > condition . fperiod , lines , num_lines ) ;
return HTS_Engine_generate_state_sequence ( engine ) ;
}
/* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
HTS_Boolean HTS_Engine_generate_parameter_sequence ( HTS_Engine * engine )
{
return HTS_PStreamSet_create ( & engine - > pss , & engine - > sss , engine - > condition . msd_threshold , engine - > condition . gv_weight ) ;
}
/* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
HTS_Boolean HTS_Engine_generate_sample_sequence ( HTS_Engine * engine )
{
return HTS_GStreamSet_create ( & engine - > gss , & engine - > pss , engine - > condition . stage , engine - > condition . use_log_gain , engine - > condition . sampling_frequency , engine - > condition . fperiod , engine - > condition . alpha , engine - > condition . beta , & engine - > condition . stop , engine - > condition . volume , engine - > condition . audio_buff_size > 0 ? & engine - > audio : NULL ) ;
}
/* HTS_Engine_synthesize: synthesize speech */
static HTS_Boolean HTS_Engine_synthesize ( HTS_Engine * engine )
{
if ( HTS_Engine_generate_state_sequence ( engine ) ! = TRUE ) {
HTS_Engine_refresh ( engine ) ;
return FALSE ;
}
if ( HTS_Engine_generate_parameter_sequence ( engine ) ! = TRUE ) {
HTS_Engine_refresh ( engine ) ;
return FALSE ;
}
if ( HTS_Engine_generate_sample_sequence ( engine ) ! = TRUE ) {
HTS_Engine_refresh ( engine ) ;
return FALSE ;
}
return TRUE ;
}
/* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
HTS_Boolean HTS_Engine_synthesize_from_fn ( HTS_Engine * engine , const char * fn )
{
HTS_Engine_refresh ( engine ) ;
HTS_Label_load_from_fn ( & engine - > label , engine - > condition . sampling_frequency , engine - > condition . fperiod , fn ) ;
return HTS_Engine_synthesize ( engine ) ;
}
/* HTS_Engine_synthesize_from_strings: synthesize speech from strings */
HTS_Boolean HTS_Engine_synthesize_from_strings ( HTS_Engine * engine , char * * lines , size_t num_lines )
{
HTS_Engine_refresh ( engine ) ;
HTS_Label_load_from_strings ( & engine - > label , engine - > condition . sampling_frequency , engine - > condition . fperiod , lines , num_lines ) ;
return HTS_Engine_synthesize ( engine ) ;
}
/* HTS_Engine_save_information: save trace information */
void HTS_Engine_save_information ( HTS_Engine * engine , FILE * fp )
{
size_t i , j , k , l , m , n ;
double temp ;
HTS_Condition * condition = & engine - > condition ;
HTS_ModelSet * ms = & engine - > ms ;
HTS_Label * label = & engine - > label ;
HTS_SStreamSet * sss = & engine - > sss ;
HTS_PStreamSet * pss = & engine - > pss ;
/* global parameter */
fprintf ( fp , " [Global parameter] \n " ) ;
fprintf ( fp , " Sampring frequency -> %8lu(Hz) \n " , ( unsigned long ) condition - > sampling_frequency ) ;
fprintf ( fp , " Frame period -> %8lu(point) \n " , ( unsigned long ) condition - > fperiod ) ;
fprintf ( fp , " %8.5f(msec) \n " , 1e+3 * condition - > fperiod / condition - > sampling_frequency ) ;
fprintf ( fp , " All-pass constant -> %8.5f \n " , ( float ) condition - > alpha ) ;
fprintf ( fp , " Gamma -> %8.5f \n " , ( float ) ( condition - > stage = = 0 ? 0.0 : - 1.0 / condition - > stage ) ) ;
if ( condition - > stage ! = 0 ) {
if ( condition - > use_log_gain = = TRUE )
fprintf ( fp , " Log gain flag -> TRUE \n " ) ;
else
fprintf ( fp , " Log gain flag -> FALSE \n " ) ;
}
fprintf ( fp , " Postfiltering coefficient -> %8.5f \n " , ( float ) condition - > beta ) ;
fprintf ( fp , " Audio buffer size -> %8lu(sample) \n " , ( unsigned long ) condition - > audio_buff_size ) ;
fprintf ( fp , " \n " ) ;
/* duration parameter */
fprintf ( fp , " [Duration parameter] \n " ) ;
fprintf ( fp , " Number of states -> %8lu \n " , ( unsigned long ) HTS_ModelSet_get_nstate ( ms ) ) ;
fprintf ( fp , " Interpolation size -> %8lu \n " , ( unsigned long ) HTS_ModelSet_get_nvoices ( ms ) ) ;
/* check interpolation */
for ( i = 0 , temp = 0.0 ; i < HTS_ModelSet_get_nvoices ( ms ) ; i + + )
temp + = condition - > duration_iw [ i ] ;
for ( i = 0 ; i < HTS_ModelSet_get_nvoices ( ms ) ; i + + )
if ( condition - > duration_iw [ i ] ! = 0.0 )
condition - > duration_iw [ i ] / = temp ;
for ( i = 0 ; i < HTS_ModelSet_get_nvoices ( ms ) ; i + + )
fprintf ( fp , " Interpolation weight[%2lu] -> %8.0f(%%) \n " , ( unsigned long ) i , ( float ) ( 100 * condition - > duration_iw [ i ] ) ) ;
fprintf ( fp , " \n " ) ;
fprintf ( fp , " [Stream parameter] \n " ) ;
for ( i = 0 ; i < HTS_ModelSet_get_nstream ( ms ) ; i + + ) {
/* stream parameter */
fprintf ( fp , " Stream[%2lu] vector length -> %8lu \n " , ( unsigned long ) i , ( unsigned long ) HTS_ModelSet_get_vector_length ( ms , i ) ) ;
fprintf ( fp , " Dynamic window size -> %8lu \n " , ( unsigned long ) HTS_ModelSet_get_window_size ( ms , i ) ) ;
/* interpolation */
fprintf ( fp , " Interpolation size -> %8lu \n " , ( unsigned long ) HTS_ModelSet_get_nvoices ( ms ) ) ;
for ( j = 0 , temp = 0.0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + )
2015-12-30 17:05:25 -06:00
temp + = condition - > parameter_iw [ j ] [ i ] ;
2014-03-10 09:32:03 +00:00
for ( j = 0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + )
2015-12-30 17:05:25 -06:00
if ( condition - > parameter_iw [ j ] [ i ] ! = 0.0 )
condition - > parameter_iw [ j ] [ i ] / = temp ;
2014-03-10 09:32:03 +00:00
for ( j = 0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + )
2015-12-30 17:05:25 -06:00
fprintf ( fp , " Interpolation weight[%2lu] -> %8.0f(%%) \n " , ( unsigned long ) j , ( float ) ( 100 * condition - > parameter_iw [ j ] [ i ] ) ) ;
2014-03-10 09:32:03 +00:00
/* MSD */
if ( HTS_ModelSet_is_msd ( ms , i ) ) { /* for MSD */
fprintf ( fp , " MSD flag -> TRUE \n " ) ;
fprintf ( fp , " MSD threshold -> %8.5f \n " , condition - > msd_threshold [ i ] ) ;
} else { /* for non MSD */
fprintf ( fp , " MSD flag -> FALSE \n " ) ;
}
/* GV */
if ( HTS_ModelSet_use_gv ( ms , i ) ) {
fprintf ( fp , " GV flag -> TRUE \n " ) ;
fprintf ( fp , " GV weight -> %8.0f(%%) \n " , ( float ) ( 100 * condition - > gv_weight [ i ] ) ) ;
fprintf ( fp , " GV interpolation size -> %8lu \n " , ( unsigned long ) HTS_ModelSet_get_nvoices ( ms ) ) ;
/* interpolation */
for ( j = 0 , temp = 0.0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + )
2015-12-30 17:05:25 -06:00
temp + = condition - > gv_iw [ j ] [ i ] ;
2014-03-10 09:32:03 +00:00
for ( j = 0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + )
2015-12-30 17:05:25 -06:00
if ( condition - > gv_iw [ j ] [ i ] ! = 0.0 )
condition - > gv_iw [ j ] [ i ] / = temp ;
2014-03-10 09:32:03 +00:00
for ( j = 0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + )
2015-12-30 17:05:25 -06:00
fprintf ( fp , " GV interpolation weight[%2lu] -> %8.0f(%%) \n " , ( unsigned long ) j , ( float ) ( 100 * condition - > gv_iw [ j ] [ i ] ) ) ;
2014-03-10 09:32:03 +00:00
} else {
fprintf ( fp , " GV flag -> FALSE \n " ) ;
}
}
fprintf ( fp , " \n " ) ;
/* generated sequence */
fprintf ( fp , " [Generated sequence] \n " ) ;
fprintf ( fp , " Number of HMMs -> %8lu \n " , ( unsigned long ) HTS_Label_get_size ( label ) ) ;
fprintf ( fp , " Number of stats -> %8lu \n " , ( unsigned long ) HTS_Label_get_size ( label ) * HTS_ModelSet_get_nstate ( ms ) ) ;
fprintf ( fp , " Length of this speech -> %8.3f(sec) \n " , ( float ) ( ( double ) HTS_PStreamSet_get_total_frame ( pss ) * condition - > fperiod / condition - > sampling_frequency ) ) ;
fprintf ( fp , " -> %8lu(frames) \n " , ( unsigned long ) HTS_PStreamSet_get_total_frame ( pss ) * condition - > fperiod ) ;
for ( i = 0 ; i < HTS_Label_get_size ( label ) ; i + + ) {
fprintf ( fp , " HMM[%2lu] \n " , ( unsigned long ) i ) ;
fprintf ( fp , " Name -> %s \n " , HTS_Label_get_string ( label , i ) ) ;
fprintf ( fp , " Duration \n " ) ;
for ( j = 0 ; j < HTS_ModelSet_get_nvoices ( ms ) ; j + + ) {
fprintf ( fp , " Interpolation[%2lu] \n " , ( unsigned long ) j ) ;
HTS_ModelSet_get_duration_index ( ms , j , HTS_Label_get_string ( label , i ) , & k , & l ) ;
fprintf ( fp , " Tree index -> %8lu \n " , ( unsigned long ) k ) ;
fprintf ( fp , " PDF index -> %8lu \n " , ( unsigned long ) l ) ;
}
for ( j = 0 ; j < HTS_ModelSet_get_nstate ( ms ) ; j + + ) {
fprintf ( fp , " State[%2lu] \n " , ( unsigned long ) j + 2 ) ;
fprintf ( fp , " Length -> %8lu(frames) \n " , ( unsigned long ) HTS_SStreamSet_get_duration ( sss , i * HTS_ModelSet_get_nstate ( ms ) + j ) ) ;
for ( k = 0 ; k < HTS_ModelSet_get_nstream ( ms ) ; k + + ) {
fprintf ( fp , " Stream[%2lu] \n " , ( unsigned long ) k ) ;
if ( HTS_ModelSet_is_msd ( ms , k ) ) {
if ( HTS_SStreamSet_get_msd ( sss , k , i * HTS_ModelSet_get_nstate ( ms ) + j ) > condition - > msd_threshold [ k ] )
fprintf ( fp , " MSD flag -> TRUE \n " ) ;
else
fprintf ( fp , " MSD flag -> FALSE \n " ) ;
}
for ( l = 0 ; l < HTS_ModelSet_get_nvoices ( ms ) ; l + + ) {
fprintf ( fp , " Interpolation[%2lu] \n " , ( unsigned long ) l ) ;
HTS_ModelSet_get_parameter_index ( ms , l , k , j + 2 , HTS_Label_get_string ( label , i ) , & m , & n ) ;
fprintf ( fp , " Tree index -> %8lu \n " , ( unsigned long ) m ) ;
fprintf ( fp , " PDF index -> %8lu \n " , ( unsigned long ) n ) ;
}
}
}
}
}
/* HTS_Engine_save_label: save label with time */
void HTS_Engine_save_label ( HTS_Engine * engine , FILE * fp )
{
size_t i , j ;
size_t frame , state , duration ;
HTS_Label * label = & engine - > label ;
HTS_SStreamSet * sss = & engine - > sss ;
size_t nstate = HTS_ModelSet_get_nstate ( & engine - > ms ) ;
double rate = engine - > condition . fperiod * 1.0e+07 / engine - > condition . sampling_frequency ;
for ( i = 0 , state = 0 , frame = 0 ; i < HTS_Label_get_size ( label ) ; i + + ) {
for ( j = 0 , duration = 0 ; j < nstate ; j + + )
duration + = HTS_SStreamSet_get_duration ( sss , state + + ) ;
fprintf ( fp , " %lu %lu %s \n " , ( unsigned long ) ( frame * rate ) , ( unsigned long ) ( ( frame + duration ) * rate ) , HTS_Label_get_string ( label , i ) ) ;
frame + = duration ;
}
}
/* HTS_Engine_save_generated_parameter: save generated parameter */
void HTS_Engine_save_generated_parameter ( HTS_Engine * engine , size_t stream_index , FILE * fp )
{
size_t i , j ;
float temp ;
HTS_GStreamSet * gss = & engine - > gss ;
for ( i = 0 ; i < HTS_GStreamSet_get_total_frame ( gss ) ; i + + )
for ( j = 0 ; j < HTS_GStreamSet_get_vector_length ( gss , stream_index ) ; j + + ) {
temp = ( float ) HTS_GStreamSet_get_parameter ( gss , stream_index , i , j ) ;
fwrite ( & temp , sizeof ( float ) , 1 , fp ) ;
}
}
/* HTS_Engine_save_generated_speech: save generated speech */
void HTS_Engine_save_generated_speech ( HTS_Engine * engine , FILE * fp )
{
size_t i ;
double x ;
short temp ;
HTS_GStreamSet * gss = & engine - > gss ;
for ( i = 0 ; i < HTS_GStreamSet_get_total_nsamples ( gss ) ; i + + ) {
x = HTS_GStreamSet_get_speech ( gss , i ) ;
if ( x > 32767.0 )
temp = 32767 ;
else if ( x < - 32768.0 )
temp = - 32768 ;
else
temp = ( short ) x ;
fwrite ( & temp , sizeof ( short ) , 1 , fp ) ;
}
}
/* HTS_Engine_save_riff: save RIFF format file */
void HTS_Engine_save_riff ( HTS_Engine * engine , FILE * fp )
{
size_t i ;
double x ;
short temp ;
HTS_GStreamSet * gss = & engine - > gss ;
char data_01_04 [ ] = { ' R ' , ' I ' , ' F ' , ' F ' } ;
int data_05_08 = HTS_GStreamSet_get_total_nsamples ( gss ) * sizeof ( short ) + 36 ;
char data_09_12 [ ] = { ' W ' , ' A ' , ' V ' , ' E ' } ;
char data_13_16 [ ] = { ' f ' , ' m ' , ' t ' , ' ' } ;
int data_17_20 = 16 ;
short data_21_22 = 1 ; /* PCM */
short data_23_24 = 1 ; /* monoral */
int data_25_28 = engine - > condition . sampling_frequency ;
int data_29_32 = engine - > condition . sampling_frequency * sizeof ( short ) ;
short data_33_34 = sizeof ( short ) ;
short data_35_36 = ( short ) ( sizeof ( short ) * 8 ) ;
char data_37_40 [ ] = { ' d ' , ' a ' , ' t ' , ' a ' } ;
int data_41_44 = HTS_GStreamSet_get_total_nsamples ( gss ) * sizeof ( short ) ;
/* write header */
HTS_fwrite_little_endian ( data_01_04 , sizeof ( char ) , 4 , fp ) ;
HTS_fwrite_little_endian ( & data_05_08 , sizeof ( int ) , 1 , fp ) ;
HTS_fwrite_little_endian ( data_09_12 , sizeof ( char ) , 4 , fp ) ;
HTS_fwrite_little_endian ( data_13_16 , sizeof ( char ) , 4 , fp ) ;
HTS_fwrite_little_endian ( & data_17_20 , sizeof ( int ) , 1 , fp ) ;
HTS_fwrite_little_endian ( & data_21_22 , sizeof ( short ) , 1 , fp ) ;
HTS_fwrite_little_endian ( & data_23_24 , sizeof ( short ) , 1 , fp ) ;
HTS_fwrite_little_endian ( & data_25_28 , sizeof ( int ) , 1 , fp ) ;
HTS_fwrite_little_endian ( & data_29_32 , sizeof ( int ) , 1 , fp ) ;
HTS_fwrite_little_endian ( & data_33_34 , sizeof ( short ) , 1 , fp ) ;
HTS_fwrite_little_endian ( & data_35_36 , sizeof ( short ) , 1 , fp ) ;
HTS_fwrite_little_endian ( data_37_40 , sizeof ( char ) , 4 , fp ) ;
HTS_fwrite_little_endian ( & data_41_44 , sizeof ( int ) , 1 , fp ) ;
/* write data */
for ( i = 0 ; i < HTS_GStreamSet_get_total_nsamples ( gss ) ; i + + ) {
x = HTS_GStreamSet_get_speech ( gss , i ) ;
if ( x > 32767.0 )
temp = 32767 ;
else if ( x < - 32768.0 )
temp = - 32768 ;
else
temp = ( short ) x ;
HTS_fwrite_little_endian ( & temp , sizeof ( short ) , 1 , fp ) ;
}
}
/* HTS_Engine_refresh: free model per one time synthesis */
void HTS_Engine_refresh ( HTS_Engine * engine )
{
/* free generated parameter stream set */
HTS_GStreamSet_clear ( & engine - > gss ) ;
/* free parameter stream set */
HTS_PStreamSet_clear ( & engine - > pss ) ;
/* free state stream set */
HTS_SStreamSet_clear ( & engine - > sss ) ;
/* free label list */
HTS_Label_clear ( & engine - > label ) ;
/* stop flag */
engine - > condition . stop = FALSE ;
}
/* HTS_Engine_clear: free engine */
void HTS_Engine_clear ( HTS_Engine * engine )
{
size_t i ;
if ( engine - > condition . msd_threshold ! = NULL )
HTS_free ( engine - > condition . msd_threshold ) ;
if ( engine - > condition . duration_iw ! = NULL )
HTS_free ( engine - > condition . duration_iw ) ;
if ( engine - > condition . gv_weight ! = NULL )
HTS_free ( engine - > condition . gv_weight ) ;
if ( engine - > condition . parameter_iw ! = NULL ) {
2015-12-30 17:05:25 -06:00
for ( i = 0 ; i < HTS_ModelSet_get_nvoices ( & engine - > ms ) ; i + + )
2014-03-10 09:32:03 +00:00
HTS_free ( engine - > condition . parameter_iw [ i ] ) ;
HTS_free ( engine - > condition . parameter_iw ) ;
}
if ( engine - > condition . gv_iw ! = NULL ) {
2015-12-30 17:05:25 -06:00
for ( i = 0 ; i < HTS_ModelSet_get_nvoices ( & engine - > ms ) ; i + + )
2014-03-10 09:32:03 +00:00
HTS_free ( engine - > condition . gv_iw [ i ] ) ;
HTS_free ( engine - > condition . gv_iw ) ;
}
HTS_ModelSet_clear ( & engine - > ms ) ;
HTS_Audio_clear ( & engine - > audio ) ;
HTS_Engine_initialize ( engine ) ;
}
HTS_ENGINE_C_END ;
# endif /* !HTS_ENGINE_C */