RTS API Documentation  1.10.11
switch_vad.c
Go to the documentation of this file.
1 /*
2  * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
3  * Copyright (C) 2018-2020, Anthony Minessale II <anthm@freeswitch.org>
4  *
5  * Version: MPL 1.1
6  *
7  * The contents of this file are subject to the Mozilla Public License Version
8  * 1.1 (the "License"); you may not use this file except in compliance with
9  * the License. You may obtain a copy of the License at
10  * http://www.mozilla.org/MPL/
11  *
12  * Software distributed under the License is distributed on an "AS IS" basis,
13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14  * for the specific language governing rights and limitations under the
15  * License.
16  *
17  * The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
18  *
19  * The Initial Developer of the Original Code is
20  * Anthony Minessale II <anthm@freeswitch.org>
21  * Portions created by the Initial Developer are Copyright (C)
22  * the Initial Developer. All Rights Reserved.
23  *
24  * Contributor(s):
25  *
26  * Seven Du <dujinfang@gmail.com>
27  * Chris Rienzo <chris@signalwire.com>
28  *
29  *
30  * switch_vad.c VAD code with optional libfvad
31  *
32  */
33 
34 #include <switch.h>
35 
36 #ifdef SWITCH_HAVE_FVAD
37 #include <fvad.h>
38 #endif
39 
40 struct switch_vad_s {
41  // configs
42  int channels;
44  int debug;
45  int divisor;
46  int thresh;
49 
50  // VAD state
54 #ifdef SWITCH_HAVE_FVAD
55  Fvad *fvad;
56 #endif
57 };
58 
60 {
61  switch(state) {
63  return "none";
65  return "start_talking";
67  return "talking";
69  return "stop_talking";
70  default:
71  return "error";
72  }
73 }
74 
76 {
77  switch_vad_t *vad = malloc(sizeof(switch_vad_t));
78 
79  if (!vad) return NULL;
80 
81  memset(vad, 0, sizeof(*vad));
82  vad->sample_rate = sample_rate ? sample_rate : 8000;
83  vad->channels = channels;
84  vad->silence_samples_thresh = 500 * (vad->sample_rate / 1000);
85  vad->voice_samples_thresh = 200 * (vad->sample_rate / 1000);
86  vad->thresh = 100;
87  vad->divisor = vad->sample_rate / 8000;
88  if (vad->divisor <= 0) {
89  vad->divisor = 1;
90  }
91  switch_vad_reset(vad);
92 
93  return vad;
94 }
95 
97 {
98 #ifdef SWITCH_HAVE_FVAD
99  int ret = 0;
100 
101  if (mode < 0) {
102  if (vad->fvad) fvad_free(vad->fvad);
103 
104  vad->fvad = NULL;
105  return ret;
106  } else if (mode > 3) {
107  mode = 3;
108  }
109 
110  if (!vad->fvad) {
111  vad->fvad = fvad_new();
112 
113  if (!vad->fvad) {
114  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "libfvad init error\n");
115  }
116  }
117 
118  if (vad->fvad) {
119  ret = fvad_set_mode(vad->fvad, mode);
120  fvad_set_sample_rate(vad->fvad, vad->sample_rate);
121  }
122 
123  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "libfvad started, mode = %d\n", mode);
124  return ret;
125 #else
126  if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "set vad mode = %d\n", mode);
127 
128  return 0;
129 #endif
130 }
131 
132 SWITCH_DECLARE(void) switch_vad_set_param(switch_vad_t *vad, const char *key, int val)
133 {
134  if (!key) return;
135 
136  if (!strcmp(key, "hangover_len")) {
137  /* convert old-style hits to samples assuming 20ms ptime */
138  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "hangover_len is deprecated, setting silence_ms to %d\n", 20 * val);
139  switch_vad_set_param(vad, "silence_ms", val * 20);
140  } else if (!strcmp(key, "silence_ms")) {
141  if (val > 0) {
142  vad->silence_samples_thresh = val * (vad->sample_rate / 1000);
143  } else {
144  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring invalid silence_ms of %d\n", val);
145  }
146  } else if (!strcmp(key, "thresh")) {
147  vad->thresh = val;
148  } else if (!strcmp(key, "debug")) {
149  vad->debug = val;
150  } else if (!strcmp(key, "voice_ms")) {
151  if (val > 0) {
152  vad->voice_samples_thresh = val * (vad->sample_rate / 1000);
153  } else {
154  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring invalid voice_ms of %d\n", val);
155  }
156  } else if (!strcmp(key, "listen_hits")) {
157  /* convert old-style hits to samples assuming 20ms ptime */
158  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "listen_hits is deprecated, setting voice_ms to %d\n", 20 * val);
159  switch_vad_set_param(vad, "voice_ms", 20 * val);
160  }
161 
162  if (vad->debug) {
163  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "set %s to %d\n", key, val);
164  }
165 }
166 
168 {
169 #ifdef SWITCH_HAVE_FVAD
170  if (vad->fvad) {
171  fvad_reset(vad->fvad);
172  }
173 #endif
175  vad->voice_samples = 0;
176  vad->silence_samples = 0;
177 
178  if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "reset vad state\n");
179 }
180 
181 SWITCH_DECLARE(switch_vad_state_t) switch_vad_process(switch_vad_t *vad, int16_t *data, unsigned int samples)
182 {
183  int score = 0;
184 
185  // Each frame has 2 possible outcomes- voice or not voice.
186  // The VAD has 2 real states- talking / not talking with
187  // begin talking and stop talking as events to mark transitions
188 
189 
190  // determine if this is a voice or non-voice frame
191 #ifdef SWITCH_HAVE_FVAD
192  if (vad->fvad) {
193  // fvad returns -1, 0, or 1
194  // -1: error
195  // 0: non-voice frame
196  // 1: voice frame
197  int ret = fvad_process(vad->fvad, data, samples);
198 
199  // if voice frame set score > threshold
200  score = ret > 0 ? vad->thresh + 100 : 0;
201  } else {
202 #endif
203  int energy = 0, j = 0, count = 0;
204  for (energy = 0, j = 0, count = 0; count < samples; count++) {
205  energy += abs(data[j]);
206  j += vad->channels;
207  }
208 
209  if (samples && vad->divisor && samples >= vad->divisor) {
210  score = (uint32_t)(energy / (samples / vad->divisor));
211  }
212 #ifdef SWITCH_HAVE_FVAD
213  }
214 #endif
215 
216  if (vad->debug > 9) {
217  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "score: %d\n", score);
218  }
219 
220  // clear the STOP/START TALKING events
223  } else if (vad->vad_state == SWITCH_VAD_STATE_START_TALKING) {
225  }
226 
227  // adjust voice/silence run length counters
228  if (score > vad->thresh) {
229  vad->silence_samples = 0;
230  vad->voice_samples += samples;
231  } else {
232  vad->silence_samples += samples;
233  vad->voice_samples = 0;
234  }
235 
236  // check for state transitions
239  if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "vad state STOP_TALKING\n");
240  } else if (vad->vad_state == SWITCH_VAD_STATE_NONE && vad->voice_samples > vad->voice_samples_thresh) {
242  if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "vad state START_TALKING\n");
243  }
244 
246 
247  return vad->vad_state;
248 }
249 
251 {
252 
253  return vad->vad_state;
254 }
255 
257 {
258  if (*vad) {
259 
260 #ifdef SWITCH_HAVE_FVAD
261  if ((*vad)->fvad) fvad_free ((*vad)->fvad);
262 #endif
263 
264  free(*vad);
265  *vad = NULL;
266  }
267 }
#define SWITCH_CHANNEL_LOG
switch_vad_state_t switch_vad_get_state(switch_vad_t *vad)
Definition: switch_vad.c:250
void switch_vad_reset(switch_vad_t *vad)
Definition: switch_vad.c:167
int voice_samples_thresh
Definition: switch_vad.c:47
void switch_vad_destroy(switch_vad_t **vad)
Definition: switch_vad.c:256
switch_vad_state_t vad_state
Definition: switch_vad.c:53
int silence_samples_thresh
Definition: switch_vad.c:48
int voice_samples
Definition: switch_vad.c:51
switch_vad_state_t
Definition: switch_types.h:674
int sample_rate
Definition: switch_vad.c:43
switch_byte_t switch_byte_t uint32_t switch_bitpack_mode_t mode
switch_vad_t * switch_vad_init(int sample_rate, int channels)
Definition: switch_vad.c:75
int silence_samples
Definition: switch_vad.c:52
switch_vad_state_t switch_vad_process(switch_vad_t *vad, int16_t *data, unsigned int samples)
Definition: switch_vad.c:181
void switch_vad_set_param(switch_vad_t *vad, const char *key, int val)
Definition: switch_vad.c:132
Main Library Header.
#define SWITCH_DECLARE(type)
const char * switch_vad_state2str(switch_vad_state_t state)
Definition: switch_vad.c:59
char * key
Definition: switch_msrp.c:64
int switch_vad_set_mode(switch_vad_t *vad, int mode)
Definition: switch_vad.c:96
void switch_log_printf(_In_ switch_text_channel_t channel, _In_z_ const char *file, _In_z_ const char *func, _In_ int line, _In_opt_z_ const char *userdata, _In_ switch_log_level_t level, _In_z_ _Printf_format_string_ const char *fmt,...) PRINTF_FUNCTION(7
Write log data to the logging engine.
int count
Definition: switch_cJSON.h:204
memset(buf, 0, buflen)