204 lines
8.4 KiB
C
204 lines
8.4 KiB
C
|
/* ----------------------------------------------------------------- */
|
||
|
/* The HMM-Based Speech Synthesis Engine "hts_engine API" */
|
||
|
/* developed by HTS Working Group */
|
||
|
/* http://hts-engine.sourceforge.net/ */
|
||
|
/* ----------------------------------------------------------------- */
|
||
|
/* */
|
||
|
/* Copyright (c) 2001-2013 Nagoya Institute of Technology */
|
||
|
/* Department of Computer Science */
|
||
|
/* */
|
||
|
/* 2001-2008 Tokyo Institute of Technology */
|
||
|
/* Interdisciplinary Graduate School of */
|
||
|
/* Science and Engineering */
|
||
|
/* */
|
||
|
/* All rights reserved. */
|
||
|
/* */
|
||
|
/* Redistribution and use in source and binary forms, with or */
|
||
|
/* without modification, are permitted provided that the following */
|
||
|
/* conditions are met: */
|
||
|
/* */
|
||
|
/* - Redistributions of source code must retain the above copyright */
|
||
|
/* notice, this list of conditions and the following disclaimer. */
|
||
|
/* - Redistributions in binary form must reproduce the above */
|
||
|
/* copyright notice, this list of conditions and the following */
|
||
|
/* disclaimer in the documentation and/or other materials provided */
|
||
|
/* with the distribution. */
|
||
|
/* - Neither the name of the HTS working group nor the names of its */
|
||
|
/* contributors may be used to endorse or promote products derived */
|
||
|
/* from this software without specific prior written permission. */
|
||
|
/* */
|
||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
|
||
|
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||
|
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
|
||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
|
||
|
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
|
||
|
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
|
||
|
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
|
||
|
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
|
||
|
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
|
||
|
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||
|
/* ----------------------------------------------------------------- */
|
||
|
|
||
|
#ifndef HTS_GSTREAM_C
|
||
|
#define HTS_GSTREAM_C
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
#define HTS_GSTREAM_C_START extern "C" {
|
||
|
#define HTS_GSTREAM_C_END }
|
||
|
#else
|
||
|
#define HTS_GSTREAM_C_START
|
||
|
#define HTS_GSTREAM_C_END
|
||
|
#endif /* __CPLUSPLUS */
|
||
|
|
||
|
HTS_GSTREAM_C_START;
|
||
|
|
||
|
/* hts_engine libraries */
|
||
|
#include "HTS_hidden.h"
|
||
|
|
||
|
/* HTS_GStreamSet_initialize: initialize generated parameter stream set */
|
||
|
void HTS_GStreamSet_initialize(HTS_GStreamSet * gss)
|
||
|
{
|
||
|
gss->nstream = 0;
|
||
|
gss->total_frame = 0;
|
||
|
gss->total_nsample = 0;
|
||
|
gss->gstream = NULL;
|
||
|
gss->gspeech = NULL;
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_create: generate speech */
|
||
|
HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio)
|
||
|
{
|
||
|
size_t i, j, k;
|
||
|
size_t msd_frame;
|
||
|
HTS_Vocoder v;
|
||
|
size_t nlpf = 0;
|
||
|
double *lpf = NULL;
|
||
|
|
||
|
/* check */
|
||
|
if (gss->gstream || gss->gspeech) {
|
||
|
HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
/* initialize */
|
||
|
gss->nstream = HTS_PStreamSet_get_nstream(pss);
|
||
|
gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
|
||
|
gss->total_nsample = fperiod * gss->total_frame;
|
||
|
gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
|
||
|
for (i = 0; i < gss->nstream; i++) {
|
||
|
gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i);
|
||
|
gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *));
|
||
|
for (j = 0; j < gss->total_frame; j++)
|
||
|
gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double));
|
||
|
}
|
||
|
gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double));
|
||
|
|
||
|
/* copy generated parameter */
|
||
|
for (i = 0; i < gss->nstream; i++) {
|
||
|
if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */
|
||
|
for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
|
||
|
if (HTS_PStreamSet_get_msd_flag(pss, i, j)) {
|
||
|
for (k = 0; k < gss->gstream[i].vector_length; k++)
|
||
|
gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
|
||
|
msd_frame++;
|
||
|
} else
|
||
|
for (k = 0; k < gss->gstream[i].vector_length; k++)
|
||
|
gss->gstream[i].par[j][k] = HTS_NODATA;
|
||
|
} else { /* for non MSD */
|
||
|
for (j = 0; j < gss->total_frame; j++)
|
||
|
for (k = 0; k < gss->gstream[i].vector_length; k++)
|
||
|
gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* check */
|
||
|
if (gss->nstream != 2 && gss->nstream != 3) {
|
||
|
HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n");
|
||
|
HTS_GStreamSet_clear(gss);
|
||
|
return FALSE;
|
||
|
}
|
||
|
if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) {
|
||
|
HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");
|
||
|
HTS_GStreamSet_clear(gss);
|
||
|
return FALSE;
|
||
|
}
|
||
|
if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) {
|
||
|
HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers.");
|
||
|
HTS_GStreamSet_clear(gss);
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
/* synthesize speech waveform */
|
||
|
HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod);
|
||
|
if (gss->nstream >= 3)
|
||
|
nlpf = gss->gstream[2].vector_length;
|
||
|
for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) {
|
||
|
j = i * fperiod;
|
||
|
if (gss->nstream >= 3)
|
||
|
lpf = &gss->gstream[2].par[i][0];
|
||
|
HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio);
|
||
|
}
|
||
|
HTS_Vocoder_clear(&v);
|
||
|
if (audio)
|
||
|
HTS_Audio_flush(audio);
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_get_total_nsamples: get total number of sample */
|
||
|
size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss)
|
||
|
{
|
||
|
return gss->total_nsample;
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_get_total_frame: get total number of frame */
|
||
|
size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss)
|
||
|
{
|
||
|
return gss->total_frame;
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_get_vector_length: get features length */
|
||
|
size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index)
|
||
|
{
|
||
|
return gss->gstream[stream_index].vector_length;
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_get_speech: get synthesized speech parameter */
|
||
|
double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index)
|
||
|
{
|
||
|
return gss->gspeech[sample_index];
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_get_parameter: get generated parameter */
|
||
|
double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index)
|
||
|
{
|
||
|
return gss->gstream[stream_index].par[frame_index][vector_index];
|
||
|
}
|
||
|
|
||
|
/* HTS_GStreamSet_clear: free generated parameter stream set */
|
||
|
void HTS_GStreamSet_clear(HTS_GStreamSet * gss)
|
||
|
{
|
||
|
size_t i, j;
|
||
|
|
||
|
if (gss->gstream) {
|
||
|
for (i = 0; i < gss->nstream; i++) {
|
||
|
if (gss->gstream[i].par != NULL) {
|
||
|
for (j = 0; j < gss->total_frame; j++)
|
||
|
HTS_free(gss->gstream[i].par[j]);
|
||
|
HTS_free(gss->gstream[i].par);
|
||
|
}
|
||
|
}
|
||
|
HTS_free(gss->gstream);
|
||
|
}
|
||
|
if (gss->gspeech)
|
||
|
HTS_free(gss->gspeech);
|
||
|
HTS_GStreamSet_initialize(gss);
|
||
|
}
|
||
|
|
||
|
HTS_GSTREAM_C_END;
|
||
|
|
||
|
#endif /* !HTS_GSTREAM_C */
|