/* ----------------------------------------------------------------- */ /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ /* developed by HTS Working Group */ /* http://hts-engine.sourceforge.net/ */ /* ----------------------------------------------------------------- */ /* */ /* Copyright (c) 2001-2015 Nagoya Institute of Technology */ /* Department of Computer Science */ /* */ /* 2001-2008 Tokyo Institute of Technology */ /* Interdisciplinary Graduate School of */ /* Science and Engineering */ /* */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* - Redistributions of source code must retain the above copyright */ /* notice, this list of conditions and the following disclaimer. */ /* - Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials provided */ /* with the distribution. */ /* - Neither the name of the HTS working group nor the names of its */ /* contributors may be used to endorse or promote products derived */ /* from this software without specific prior written permission. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* ----------------------------------------------------------------- */ #ifndef HTS_VOCODER_C #define HTS_VOCODER_C #ifdef __cplusplus #define HTS_VOCODER_C_START extern "C" { #define HTS_VOCODER_C_END } #else #define HTS_VOCODER_C_START #define HTS_VOCODER_C_END #endif /* __CPLUSPLUS */ HTS_VOCODER_C_START; #include /* for sqrt(),log(),exp(),pow(),cos() */ /* hts_engine libraries */ #include "HTS_hidden.h" static const double HTS_pade[21] = { 1.00000000000, 1.00000000000, 0.00000000000, 1.00000000000, 0.00000000000, 0.00000000000, 1.00000000000, 0.00000000000, 0.00000000000, 0.00000000000, 1.00000000000, 0.49992730000, 0.10670050000, 0.01170221000, 0.00056562790, 1.00000000000, 0.49993910000, 0.11070980000, 0.01369984000, 0.00095648530, 0.00003041721 }; /* HTS_movem: move memory */ static void HTS_movem(double *a, double *b, const int nitem) { long i = (long) nitem; if (a > b) while (i--) *b++ = *a++; else { a += i; b += i; while (i--) *--b = *--a; } } /* HTS_mlsafir: sub functions for MLSA filter */ static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d) { double y = 0.0; int i; d[0] = x; d[1] = aa * d[0] + a * d[1]; for (i = 2; i <= m; i++) d[i] += a * (d[i + 1] - d[i - 1]); for (i = 2; i <= m; i++) y += d[i] * b[i]; for (i = m + 1; i > 1; i--) d[i] = d[i - 1]; return (y); } /* HTS_mlsadf1: sub functions for MLSA filter */ static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade) { double v, out = 0.0, *pt; int i; pt = &d[pd + 1]; for (i = pd; i >= 1; i--) { d[i] = aa * pt[i - 1] + a * d[i]; pt[i] = d[i] * b[1]; v = pt[i] * ppade[i]; x += (1 & i) ? v : -v; out += v; } pt[0] = x; out += x; return (out); } /* HTS_mlsadf2: sub functions for MLSA filter */ static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade) { double v, out = 0.0, *pt; int i; pt = &d[pd * (m + 2)]; for (i = pd; i >= 1; i--) { pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]); v = pt[i] * ppade[i]; x += (1 & i) ? v : -v; out += v; } pt[0] = x; out += x; return (out); } /* HTS_mlsadf: functions for MLSA filter */ static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d) { const double aa = 1 - a * a; const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]); x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade); x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade); return (x); } /* HTS_rnd: functions for random noise generation */ static double HTS_rnd(unsigned long *next) { double r; *next = *next * 1103515245L + 12345; r = (*next / 65536L) % 32768L; return (r / RANDMAX); } /* HTS_nrandom: functions for gaussian random noise generation */ static double HTS_nrandom(HTS_Vocoder * v) { if (v->sw == 0) { v->sw = 1; do { v->r1 = 2 * HTS_rnd(&v->next) - 1; v->r2 = 2 * HTS_rnd(&v->next) - 1; v->s = v->r1 * v->r1 + v->r2 * v->r2; } while (v->s > 1 || v->s == 0); v->s = sqrt(-2 * log(v->s) / v->s); return (v->r1 * v->s); } else { v->sw = 0; return (v->r2 * v->s); } } /* HTS_mceq: function for M-sequence random noise generation */ static int HTS_mseq(HTS_Vocoder * v) { int x0, x28; v->x >>= 1; if (v->x & B0) x0 = 1; else x0 = -1; if (v->x & B28) x28 = 1; else x28 = -1; if (x0 + x28) v->x &= B31_; else v->x |= B31; return (x0); } /* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */ static void HTS_mc2b(double *mc, double *b, int m, const double a) { if (mc != b) { if (a != 0.0) { b[m] = mc[m]; for (m--; m >= 0; m--) b[m] = mc[m] - a * b[m + 1]; } else HTS_movem(mc, b, m + 1); } else if (a != 0.0) for (m--; m >= 0; m--) b[m] -= a * b[m + 1]; } /* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */ static void HTS_b2mc(const double *b, double *mc, int m, const double a) { double d, o; d = mc[m] = b[m]; for (m--; m >= 0; m--) { o = b[m] + a * d; d = b[m]; mc[m] = o; } } /* HTS_freqt: frequency transformation */ static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a) { int i, j; const double b = 1 - a * a; double *g; if (m2 > v->freqt_size) { if (v->freqt_buff != NULL) HTS_free(v->freqt_buff); v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double)); v->freqt_size = m2; } g = v->freqt_buff + v->freqt_size + 1; for (i = 0; i < m2 + 1; i++) g[i] = 0.0; for (i = -m1; i <= 0; i++) { if (0 <= m2) g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]); if (1 <= m2) g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]); for (j = 2; j <= m2; j++) g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]); } HTS_movem(g, c2, m2 + 1); } /* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */ static void HTS_c2ir(const double *c, const int nc, double *h, const int leng) { int n, k, upl; double d; h[0] = exp(c[0]); for (n = 1; n < leng; n++) { d = 0; upl = (n >= nc) ? nc - 1 : n; for (k = 1; k <= upl; k++) d += k * c[k] * h[n - k]; h[n] = d / n; } } /* HTS_b2en: calculate frame energy */ static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a) { int i; double en = 0.0; double *cep; double *ir; if (v->spectrum2en_size < m) { if (v->spectrum2en_buff != NULL) HTS_free(v->spectrum2en_buff); v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double)); v->spectrum2en_size = m; } cep = v->spectrum2en_buff + m + 1; ir = cep + IRLENG; HTS_b2mc(b, v->spectrum2en_buff, m, a); HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a); HTS_c2ir(cep, IRLENG, ir, IRLENG); for (i = 0; i < IRLENG; i++) en += ir[i] * ir[i]; return (en); } /* HTS_ignorm: inverse gain normalization */ static void HTS_ignorm(double *c1, double *c2, int m, const double g) { double k; if (g != 0.0) { k = pow(c1[0], g); for (; m >= 1; m--) c2[m] = k * c1[m]; c2[0] = (k - 1.0) / g; } else { HTS_movem(&c1[1], &c2[1], m); c2[0] = log(c1[0]); } } /* HTS_gnorm: gain normalization */ static void HTS_gnorm(double *c1, double *c2, int m, const double g) { double k; if (g != 0.0) { k = 1.0 + g * c1[0]; for (; m >= 1; m--) c2[m] = c1[m] / k; c2[0] = pow(k, 1.0 / g); } else { HTS_movem(&c1[1], &c2[1], m); c2[0] = exp(c1[0]); } } /* HTS_lsp2lpc: transform LSP to LPC */ static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m) { int i, k, mh1, mh2, flag_odd; double xx, xf, xff; double *p, *q; double *a0, *a1, *a2, *b0, *b1, *b2; flag_odd = 0; if (m % 2 == 0) mh1 = mh2 = m / 2; else { mh1 = (m + 1) / 2; mh2 = (m - 1) / 2; flag_odd = 1; } if (m > v->lsp2lpc_size) { if (v->lsp2lpc_buff != NULL) HTS_free(v->lsp2lpc_buff); v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double)); v->lsp2lpc_size = m; } p = v->lsp2lpc_buff + m; q = p + mh1; a0 = q + mh2; a1 = a0 + (mh1 + 1); a2 = a1 + (mh1 + 1); b0 = a2 + (mh1 + 1); b1 = b0 + (mh2 + 1); b2 = b1 + (mh2 + 1); HTS_movem(lsp, v->lsp2lpc_buff, m); for (i = 0; i < mh1 + 1; i++) a0[i] = 0.0; for (i = 0; i < mh1 + 1; i++) a1[i] = 0.0; for (i = 0; i < mh1 + 1; i++) a2[i] = 0.0; for (i = 0; i < mh2 + 1; i++) b0[i] = 0.0; for (i = 0; i < mh2 + 1; i++) b1[i] = 0.0; for (i = 0; i < mh2 + 1; i++) b2[i] = 0.0; /* lsp filter parameters */ for (i = k = 0; i < mh1; i++, k += 2) p[i] = -2.0 * cos(v->lsp2lpc_buff[k]); for (i = k = 0; i < mh2; i++, k += 2) q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]); /* impulse response of analysis filter */ xx = 1.0; xf = xff = 0.0; for (k = 0; k <= m; k++) { if (flag_odd) { a0[0] = xx; b0[0] = xx - xff; xff = xf; xf = xx; } else { a0[0] = xx + xf; b0[0] = xx - xf; xf = xx; } for (i = 0; i < mh1; i++) { a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i]; a2[i] = a1[i]; a1[i] = a0[i]; } for (i = 0; i < mh2; i++) { b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i]; b2[i] = b1[i]; b1[i] = b0[i]; } if (k != 0) a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]); xx = 0.0; } for (i = m - 1; i >= 0; i--) a[i + 1] = -a[i]; a[0] = 1.0; } /* HTS_gc2gc: generalized cepstral transformation */ static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2) { int i, min, k, mk; double ss1, ss2, cc; if (m1 > v->gc2gc_size) { if (v->gc2gc_buff != NULL) HTS_free(v->gc2gc_buff); v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double)); v->gc2gc_size = m1; } HTS_movem(c1, v->gc2gc_buff, m1 + 1); c2[0] = v->gc2gc_buff[0]; for (i = 1; i <= m2; i++) { ss1 = ss2 = 0.0; min = m1 < i ? m1 : i - 1; for (k = 1; k <= min; k++) { mk = i - k; cc = v->gc2gc_buff[k] * c2[mk]; ss2 += k * cc; ss1 += mk * cc; } if (i <= m1) c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i; else c2[i] = (g2 * ss2 - g1 * ss1) / i; } } /* HTS_mgc2mgc: frequency and generalized cepstral transformation */ static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2) { double a; if (a1 == a2) { HTS_gnorm(c1, c1, m1, g1); HTS_gc2gc(v, c1, m1, g1, c2, m2, g2); HTS_ignorm(c2, c2, m2, g2); } else { a = (a2 - a1) / (1 - a1 * a2); HTS_freqt(v, c1, m1, c2, m2, a); HTS_gnorm(c2, c2, m2, g1); HTS_gc2gc(v, c2, m2, g1, c2, m2, g2); HTS_ignorm(c2, c2, m2, g2); } } /* HTS_lsp2mgc: transform LSP to MGC */ static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha) { int i; /* lsp2lpc */ HTS_lsp2lpc(v, lsp + 1, mgc, m); if (v->use_log_gain) mgc[0] = exp(lsp[0]); else mgc[0] = lsp[0]; /* mgc2mgc */ if (NORMFLG1) HTS_ignorm(mgc, mgc, m, v->gamma); else if (MULGFLG1) mgc[0] = (1.0 - mgc[0]) * ((double) v->stage); if (MULGFLG1) for (i = m; i >= 1; i--) mgc[i] *= -((double) v->stage); HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma); if (NORMFLG2) HTS_gnorm(mgc, mgc, m, v->gamma); else if (MULGFLG2) mgc[0] = mgc[0] * v->gamma + 1.0; if (MULGFLG2) for (i = m; i >= 1; i--) mgc[i] *= v->gamma; } /* HTS_mglsadff: sub functions for MGLSA filter */ static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d) { int i; double y; y = d[0] * b[1]; for (i = 1; i < m; i++) { d[i] += a * (d[i + 1] - d[i - 1]); y += d[i] * b[i + 1]; } x -= y; for (i = m; i > 0; i--) d[i] = d[i - 1]; d[0] = a * d[0] + (1 - a * a) * x; return x; } /* HTS_mglsadf: sub functions for MGLSA filter */ static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d) { int i; for (i = 0; i < n; i++) x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]); return x; } /* THS_check_lsp_stability: check LSP stability */ static void HTS_check_lsp_stability(double *lsp, size_t m) { size_t i, j; double tmp; double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1); HTS_Boolean find; for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) { find = FALSE; for (j = 1; j < m; j++) { tmp = lsp[j + 1] - lsp[j]; if (tmp < min) { lsp[j] -= 0.5 * (min - tmp); lsp[j + 1] += 0.5 * (min - tmp); find = TRUE; } } if (lsp[1] < min) { lsp[1] = min; find = TRUE; } if (lsp[m] > PI - min) { lsp[m] = PI - min; find = TRUE; } if (find == FALSE) break; } } /* HTS_lsp2en: calculate frame energy */ static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha) { size_t i; double en = 0.0; double *buff; if (v->spectrum2en_size < m) { if (v->spectrum2en_buff != NULL) HTS_free(v->spectrum2en_buff); v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double)); v->spectrum2en_size = m; } buff = v->spectrum2en_buff + m + 1; /* lsp2lpc */ HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m); if (v->use_log_gain) v->spectrum2en_buff[0] = exp(lsp[0]); else v->spectrum2en_buff[0] = lsp[0]; /* mgc2mgc */ if (NORMFLG1) HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma); else if (MULGFLG1) v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage); if (MULGFLG1) for (i = 1; i <= m; i++) v->spectrum2en_buff[i] *= -((double) v->stage); HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1); for (i = 0; i < IRLENG; i++) en += buff[i] * buff[i]; return en; } /* HTS_white_noise: return white noise */ static double HTS_white_noise(HTS_Vocoder * v) { if (v->gauss) return (double) HTS_nrandom(v); else return (double) HTS_mseq(v); } /* HTS_Vocoder_initialize_excitation: initialize excitation */ static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf) { size_t i; v->pitch_of_curr_point = pitch; v->pitch_counter = pitch; v->pitch_inc_per_point = 0.0; if (nlpf > 0) { v->excite_buff_size = nlpf; v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double)); for (i = 0; i < v->excite_buff_size; i++) v->excite_ring_buff[i] = 0.0; v->excite_buff_index = 0; } else { v->excite_buff_size = 0; v->excite_ring_buff = NULL; v->excite_buff_index = 0; } } /* HTS_Vocoder_start_excitation: start excitation of each frame */ static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch) { if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) { v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd; } else { v->pitch_inc_per_point = 0.0; v->pitch_of_curr_point = pitch; v->pitch_counter = pitch; } } /* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */ static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise) { size_t center = (v->excite_buff_size - 1) / 2; v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise; } /* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */ static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf) { size_t i; size_t center = (v->excite_buff_size - 1) / 2; if (noise != 0.0) { for (i = 0; i < v->excite_buff_size; i++) { if (i == center) v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]); else v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]); } } if (pulse != 0.0) { for (i = 0; i < v->excite_buff_size; i++) v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i]; } } /* HTS_Vocoder_get_excitation: get excitation of each sample */ static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf) { double x; double noise, pulse = 0.0; if (v->excite_buff_size > 0) { noise = HTS_white_noise(v); pulse = 0.0; if (v->pitch_of_curr_point == 0.0) { HTS_Vocoder_excite_unvoiced_frame(v, noise); } else { v->pitch_counter += 1.0; if (v->pitch_counter >= v->pitch_of_curr_point) { pulse = sqrt(v->pitch_of_curr_point); v->pitch_counter -= v->pitch_of_curr_point; } HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf); v->pitch_of_curr_point += v->pitch_inc_per_point; } x = v->excite_ring_buff[v->excite_buff_index]; v->excite_ring_buff[v->excite_buff_index] = 0.0; v->excite_buff_index++; if (v->excite_buff_index >= v->excite_buff_size) v->excite_buff_index = 0; } else { if (v->pitch_of_curr_point == 0.0) { x = HTS_white_noise(v); } else { v->pitch_counter += 1.0; if (v->pitch_counter >= v->pitch_of_curr_point) { x = sqrt(v->pitch_of_curr_point); v->pitch_counter -= v->pitch_of_curr_point; } else { x = 0.0; } v->pitch_of_curr_point += v->pitch_inc_per_point; } } return x; } /* HTS_Vocoder_end_excitation: end excitation of each frame */ static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch) { v->pitch_of_curr_point = pitch; } /* HTS_Vocoder_postfilter_mcp: postfilter for MCP */ static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta) { double e1, e2; int k; if (beta > 0.0 && m > 1) { if (v->postfilter_size < m) { if (v->postfilter_buff != NULL) HTS_free(v->postfilter_buff); v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double)); v->postfilter_size = m; } HTS_mc2b(mcp, v->postfilter_buff, m, alpha); e1 = HTS_b2en(v, v->postfilter_buff, m, alpha); v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2]; for (k = 2; k <= m; k++) v->postfilter_buff[k] *= (1.0 + beta); e2 = HTS_b2en(v, v->postfilter_buff, m, alpha); v->postfilter_buff[0] += log(e1 / e2) / 2; HTS_b2mc(v->postfilter_buff, mcp, m, alpha); } } /* HTS_Vocoder_postfilter_lsp: postfilter for LSP */ static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta) { double e1, e2; size_t i; double d1, d2; if (beta > 0.0 && m > 1) { if (v->postfilter_size < m) { if (v->postfilter_buff != NULL) HTS_free(v->postfilter_buff); v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double)); v->postfilter_size = m; } e1 = HTS_lsp2en(v, lsp, m, alpha); /* postfiltering */ for (i = 0; i <= m; i++) { if (i > 1 && i < m) { d1 = beta * (lsp[i + 1] - lsp[i]); d2 = beta * (lsp[i] - lsp[i - 1]); v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1)); } else { v->postfilter_buff[i] = lsp[i]; } } HTS_movem(v->postfilter_buff, lsp, m + 1); e2 = HTS_lsp2en(v, lsp, m, alpha); if (e1 != e2) { if (v->use_log_gain) lsp[0] += 0.5 * log(e1 / e2); else lsp[0] *= sqrt(e1 / e2); } } } /* HTS_Vocoder_initialize: initialize vocoder */ void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod) { /* set parameter */ v->is_first = TRUE; v->stage = stage; if (stage != 0) v->gamma = -1.0 / v->stage; else v->gamma = 0.0; v->use_log_gain = use_log_gain; v->fprd = fperiod; v->next = SEED; v->gauss = GAUSS; v->rate = rate; v->pitch_of_curr_point = 0.0; v->pitch_counter = 0.0; v->pitch_inc_per_point = 0.0; v->excite_ring_buff = NULL; v->excite_buff_size = 0; v->excite_buff_index = 0; v->sw = 0; v->x = 0x55555555; /* init buffer */ v->freqt_buff = NULL; v->freqt_size = 0; v->gc2gc_buff = NULL; v->gc2gc_size = 0; v->lsp2lpc_buff = NULL; v->lsp2lpc_size = 0; v->postfilter_buff = NULL; v->postfilter_size = 0; v->spectrum2en_buff = NULL; v->spectrum2en_size = 0; if (v->stage == 0) { /* for MCP */ v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double)); v->cc = v->c + m + 1; v->cinc = v->cc + m + 1; v->d1 = v->cinc + m + 1; } else { /* for LSP */ v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double)); v->cc = v->c + m + 1; v->cinc = v->cc + m + 1; v->d1 = v->cinc + m + 1; } } /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */ void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio) { double x; int i, j; short xs; int rawidx = 0; double p; /* lf0 -> pitch */ if (lf0 == LZERO) p = 0.0; else if (lf0 <= MIN_LF0) p = v->rate / MIN_F0; else if (lf0 >= MAX_LF0) p = v->rate / MAX_F0; else p = v->rate / exp(lf0); /* first time */ if (v->is_first == TRUE) { HTS_Vocoder_initialize_excitation(v, p, nlpf); if (v->stage == 0) { /* for MCP */ HTS_mc2b(spectrum, v->c, m, alpha); } else { /* for LSP */ HTS_movem(spectrum, v->c, m + 1); HTS_lsp2mgc(v, v->c, v->c, m, alpha); HTS_mc2b(v->c, v->c, m, alpha); HTS_gnorm(v->c, v->c, m, v->gamma); for (i = 1; i <= m; i++) v->c[i] *= v->gamma; } v->is_first = FALSE; } HTS_Vocoder_start_excitation(v, p); if (v->stage == 0) { /* for MCP */ HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta); HTS_mc2b(spectrum, v->cc, m, alpha); for (i = 0; i <= m; i++) v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd; } else { /* for LSP */ HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta); HTS_check_lsp_stability(spectrum, m); HTS_lsp2mgc(v, spectrum, v->cc, m, alpha); HTS_mc2b(v->cc, v->cc, m, alpha); HTS_gnorm(v->cc, v->cc, m, v->gamma); for (i = 1; i <= m; i++) v->cc[i] *= v->gamma; for (i = 0; i <= m; i++) v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd; } for (j = 0; j < v->fprd; j++) { x = HTS_Vocoder_get_excitation(v, lpf); if (v->stage == 0) { /* for MCP */ if (x != 0.0) x *= exp(v->c[0]); x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1); } else { /* for LSP */ if (!NGAIN) x *= v->c[0]; x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1); } x *= volume; /* output */ if (rawdata) rawdata[rawidx++] = x; if (audio) { if (x > 32767.0) xs = 32767; else if (x < -32768.0) xs = -32768; else xs = (short) x; HTS_Audio_write(audio, xs); } for (i = 0; i <= m; i++) v->c[i] += v->cinc[i]; } HTS_Vocoder_end_excitation(v, p); HTS_movem(v->cc, v->c, m + 1); } /* HTS_Vocoder_clear: clear vocoder */ void HTS_Vocoder_clear(HTS_Vocoder * v) { if (v != NULL) { /* free buffer */ if (v->freqt_buff != NULL) { HTS_free(v->freqt_buff); v->freqt_buff = NULL; } v->freqt_size = 0; if (v->gc2gc_buff != NULL) { HTS_free(v->gc2gc_buff); v->gc2gc_buff = NULL; } v->gc2gc_size = 0; if (v->lsp2lpc_buff != NULL) { HTS_free(v->lsp2lpc_buff); v->lsp2lpc_buff = NULL; } v->lsp2lpc_size = 0; if (v->postfilter_buff != NULL) { HTS_free(v->postfilter_buff); v->postfilter_buff = NULL; } v->postfilter_size = 0; if (v->spectrum2en_buff != NULL) { HTS_free(v->spectrum2en_buff); v->spectrum2en_buff = NULL; } v->spectrum2en_size = 0; if (v->c != NULL) { HTS_free(v->c); v->c = NULL; } v->excite_buff_size = 0; v->excite_buff_index = 0; if (v->excite_ring_buff != NULL) { HTS_free(v->excite_ring_buff); v->excite_ring_buff = NULL; } } } HTS_VOCODER_C_END; #endif /* !HTS_VOCODER_C */