SDL  2.0
SDL_audiotypecvt.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #include "../SDL_internal.h"
23 #include "SDL_audio.h"
24 #include "SDL_audio_c.h"
25 #include "SDL_cpuinfo.h"
26 #include "SDL_assert.h"
27 
28 /* !!! FIXME: disabled until we fix https://bugzilla.libsdl.org/show_bug.cgi?id=4186 */
29 #if 0 /*def __ARM_NEON__*/
30 #define HAVE_NEON_INTRINSICS 1
31 #endif
32 
33 #ifdef __SSE2__
34 #define HAVE_SSE2_INTRINSICS 1
35 #endif
36 
37 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
39 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* Mac OS X/Intel guarantees SSE2. */
41 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* ARMv8+ promise NEON. */
43 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
44 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* All Apple ARMv7 chips promise NEON support. */
45 #endif
46 
47 /* Set to zero if platform is guaranteed to use a SIMD codepath here. */
48 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
49 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
50 #endif
51 
52 /* Function pointers set to a CPU-specific implementation. */
63 
64 
65 #define DIVBY128 0.0078125f
66 #define DIVBY32768 0.000030517578125f
67 #define DIVBY8388607 0.00000011920930376163766f
68 
69 
70 #if NEED_SCALAR_CONVERTER_FALLBACKS
71 static void SDLCALL
73 {
74  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
75  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
76  int i;
77 
78  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32");
79 
80  for (i = cvt->len_cvt; i; --i, --src, --dst) {
81  *dst = ((float) *src) * DIVBY128;
82  }
83 
84  cvt->len_cvt *= 4;
85  if (cvt->filters[++cvt->filter_index]) {
86  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
87  }
88 }
89 
90 static void SDLCALL
92 {
93  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
94  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
95  int i;
96 
97  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32");
98 
99  for (i = cvt->len_cvt; i; --i, --src, --dst) {
100  *dst = (((float) *src) * DIVBY128) - 1.0f;
101  }
102 
103  cvt->len_cvt *= 4;
104  if (cvt->filters[++cvt->filter_index]) {
105  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
106  }
107 }
108 
109 static void SDLCALL
111 {
112  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
113  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
114  int i;
115 
116  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32");
117 
118  for (i = cvt->len_cvt / sizeof (Sint16); i; --i, --src, --dst) {
119  *dst = ((float) *src) * DIVBY32768;
120  }
121 
122  cvt->len_cvt *= 2;
123  if (cvt->filters[++cvt->filter_index]) {
124  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
125  }
126 }
127 
128 static void SDLCALL
130 {
131  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
132  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
133  int i;
134 
135  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32");
136 
137  for (i = cvt->len_cvt / sizeof (Uint16); i; --i, --src, --dst) {
138  *dst = (((float) *src) * DIVBY32768) - 1.0f;
139  }
140 
141  cvt->len_cvt *= 2;
142  if (cvt->filters[++cvt->filter_index]) {
143  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
144  }
145 }
146 
147 static void SDLCALL
149 {
150  const Sint32 *src = (const Sint32 *) cvt->buf;
151  float *dst = (float *) cvt->buf;
152  int i;
153 
154  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32");
155 
156  for (i = cvt->len_cvt / sizeof (Sint32); i; --i, ++src, ++dst) {
157  *dst = ((float) (*src>>8)) * DIVBY8388607;
158  }
159 
160  if (cvt->filters[++cvt->filter_index]) {
161  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
162  }
163 }
164 
165 static void SDLCALL
167 {
168  const float *src = (const float *) cvt->buf;
169  Sint8 *dst = (Sint8 *) cvt->buf;
170  int i;
171 
172  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8");
173 
174  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
175  const float sample = *src;
176  if (sample >= 1.0f) {
177  *dst = 127;
178  } else if (sample <= -1.0f) {
179  *dst = -128;
180  } else {
181  *dst = (Sint8)(sample * 127.0f);
182  }
183  }
184 
185  cvt->len_cvt /= 4;
186  if (cvt->filters[++cvt->filter_index]) {
187  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
188  }
189 }
190 
191 static void SDLCALL
193 {
194  const float *src = (const float *) cvt->buf;
195  Uint8 *dst = (Uint8 *) cvt->buf;
196  int i;
197 
198  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8");
199 
200  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
201  const float sample = *src;
202  if (sample >= 1.0f) {
203  *dst = 255;
204  } else if (sample <= -1.0f) {
205  *dst = 0;
206  } else {
207  *dst = (Uint8)((sample + 1.0f) * 127.0f);
208  }
209  }
210 
211  cvt->len_cvt /= 4;
212  if (cvt->filters[++cvt->filter_index]) {
213  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
214  }
215 }
216 
217 static void SDLCALL
219 {
220  const float *src = (const float *) cvt->buf;
221  Sint16 *dst = (Sint16 *) cvt->buf;
222  int i;
223 
224  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16");
225 
226  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
227  const float sample = *src;
228  if (sample >= 1.0f) {
229  *dst = 32767;
230  } else if (sample <= -1.0f) {
231  *dst = -32768;
232  } else {
233  *dst = (Sint16)(sample * 32767.0f);
234  }
235  }
236 
237  cvt->len_cvt /= 2;
238  if (cvt->filters[++cvt->filter_index]) {
239  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
240  }
241 }
242 
243 static void SDLCALL
245 {
246  const float *src = (const float *) cvt->buf;
247  Uint16 *dst = (Uint16 *) cvt->buf;
248  int i;
249 
250  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16");
251 
252  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
253  const float sample = *src;
254  if (sample >= 1.0f) {
255  *dst = 65535;
256  } else if (sample <= -1.0f) {
257  *dst = 0;
258  } else {
259  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
260  }
261  }
262 
263  cvt->len_cvt /= 2;
264  if (cvt->filters[++cvt->filter_index]) {
265  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
266  }
267 }
268 
269 static void SDLCALL
271 {
272  const float *src = (const float *) cvt->buf;
273  Sint32 *dst = (Sint32 *) cvt->buf;
274  int i;
275 
276  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32");
277 
278  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
279  const float sample = *src;
280  if (sample >= 1.0f) {
281  *dst = 2147483647;
282  } else if (sample <= -1.0f) {
283  *dst = (Sint32) -2147483648LL;
284  } else {
285  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
286  }
287  }
288 
289  if (cvt->filters[++cvt->filter_index]) {
290  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
291  }
292 }
293 #endif
294 
295 
296 #if HAVE_SSE2_INTRINSICS
297 static void SDLCALL
298 SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
299 {
300  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
301  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
302  int i;
303 
304  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
305 
306  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
307  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
308  *dst = ((float) *src) * DIVBY128;
309  }
310 
311  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
312  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
313 
314  /* Make sure src is aligned too. */
315  if ((((size_t) src) & 15) == 0) {
316  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
317  const __m128i *mmsrc = (const __m128i *) src;
318  const __m128i zero = _mm_setzero_si128();
319  const __m128 divby128 = _mm_set1_ps(DIVBY128);
320  while (i >= 16) { /* 16 * 8-bit */
321  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 sint8 into an XMM register. */
322  /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
323  const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
324  /* right-shift-sign-extend gets us sint16 with the other set of values. */
325  const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
326  /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
327  const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
328  const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
329  const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
330  const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
331  /* Interleave back into correct order, store. */
332  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
333  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
334  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
335  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
336  i -= 16; mmsrc--; dst -= 16;
337  }
338 
339  src = (const Sint8 *) mmsrc;
340  }
341 
342  src += 15; dst += 15; /* adjust for any scalar finishing. */
343 
344  /* Finish off any leftovers with scalar operations. */
345  while (i) {
346  *dst = ((float) *src) * DIVBY128;
347  i--; src--; dst--;
348  }
349 
350  cvt->len_cvt *= 4;
351  if (cvt->filters[++cvt->filter_index]) {
352  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
353  }
354 }
355 
356 static void SDLCALL
357 SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
358 {
359  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
360  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
361  int i;
362 
363  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
364 
365  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
366  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
367  *dst = (((float) *src) * DIVBY128) - 1.0f;
368  }
369 
370  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
371  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
372 
373  /* Make sure src is aligned too. */
374  if ((((size_t) src) & 15) == 0) {
375  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
376  const __m128i *mmsrc = (const __m128i *) src;
377  const __m128i zero = _mm_setzero_si128();
378  const __m128 divby128 = _mm_set1_ps(DIVBY128);
379  const __m128 minus1 = _mm_set1_ps(-1.0f);
380  while (i >= 16) { /* 16 * 8-bit */
381  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 uint8 into an XMM register. */
382  /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
383  const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
384  /* right-shift-zero-extend gets us uint16 with the other set of values. */
385  const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
386  /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
387  /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
388  const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
389  const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
390  const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
391  const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
392  /* Interleave back into correct order, store. */
393  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
394  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
395  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
396  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
397  i -= 16; mmsrc--; dst -= 16;
398  }
399 
400  src = (const Uint8 *) mmsrc;
401  }
402 
403  src += 15; dst += 15; /* adjust for any scalar finishing. */
404 
405  /* Finish off any leftovers with scalar operations. */
406  while (i) {
407  *dst = (((float) *src) * DIVBY128) - 1.0f;
408  i--; src--; dst--;
409  }
410 
411  cvt->len_cvt *= 4;
412  if (cvt->filters[++cvt->filter_index]) {
413  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
414  }
415 }
416 
417 static void SDLCALL
418 SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
419 {
420  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
421  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
422  int i;
423 
424  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
425 
426  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
427  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
428  *dst = ((float) *src) * DIVBY32768;
429  }
430 
431  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
432  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
433 
434  /* Make sure src is aligned too. */
435  if ((((size_t) src) & 15) == 0) {
436  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
437  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
438  while (i >= 8) { /* 8 * 16-bit */
439  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
440  /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
441  const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
442  /* right-shift-sign-extend gets us sint32 with the other set of values. */
443  const __m128i b = _mm_srai_epi32(ints, 16);
444  /* Interleave these back into the right order, convert to float, multiply, store. */
445  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
446  _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
447  i -= 8; src -= 8; dst -= 8;
448  }
449  }
450 
451  src += 7; dst += 7; /* adjust for any scalar finishing. */
452 
453  /* Finish off any leftovers with scalar operations. */
454  while (i) {
455  *dst = ((float) *src) * DIVBY32768;
456  i--; src--; dst--;
457  }
458 
459  cvt->len_cvt *= 2;
460  if (cvt->filters[++cvt->filter_index]) {
461  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
462  }
463 }
464 
465 static void SDLCALL
466 SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
467 {
468  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
469  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
470  int i;
471 
472  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
473 
474  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
475  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
476  *dst = (((float) *src) * DIVBY32768) - 1.0f;
477  }
478 
479  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
480  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
481 
482  /* Make sure src is aligned too. */
483  if ((((size_t) src) & 15) == 0) {
484  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
485  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
486  const __m128 minus1 = _mm_set1_ps(1.0f);
487  while (i >= 8) { /* 8 * 16-bit */
488  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
489  /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
490  const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
491  /* right-shift-sign-extend gets us sint32 with the other set of values. */
492  const __m128i b = _mm_srli_epi32(ints, 16);
493  /* Interleave these back into the right order, convert to float, multiply, store. */
494  _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
495  _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
496  i -= 8; src -= 8; dst -= 8;
497  }
498  }
499 
500  src += 7; dst += 7; /* adjust for any scalar finishing. */
501 
502  /* Finish off any leftovers with scalar operations. */
503  while (i) {
504  *dst = (((float) *src) * DIVBY32768) - 1.0f;
505  i--; src--; dst--;
506  }
507 
508  cvt->len_cvt *= 2;
509  if (cvt->filters[++cvt->filter_index]) {
510  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
511  }
512 }
513 
514 static void SDLCALL
515 SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
516 {
517  const Sint32 *src = (const Sint32 *) cvt->buf;
518  float *dst = (float *) cvt->buf;
519  int i;
520 
521  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
522 
523  /* Get dst aligned to 16 bytes */
524  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
525  *dst = ((float) (*src>>8)) * DIVBY8388607;
526  }
527 
528  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
529  SDL_assert(!i || ((((size_t) src) & 15) == 0));
530 
531  {
532  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
533  const __m128 divby8388607 = _mm_set1_ps(DIVBY8388607);
534  const __m128i *mmsrc = (const __m128i *) src;
535  while (i >= 4) { /* 4 * sint32 */
536  /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
537  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
538  i -= 4; mmsrc++; dst += 4;
539  }
540  src = (const Sint32 *) mmsrc;
541  }
542 
543  /* Finish off any leftovers with scalar operations. */
544  while (i) {
545  *dst = ((float) (*src>>8)) * DIVBY8388607;
546  i--; src++; dst++;
547  }
548 
549  if (cvt->filters[++cvt->filter_index]) {
550  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
551  }
552 }
553 
554 static void SDLCALL
555 SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
556 {
557  const float *src = (const float *) cvt->buf;
558  Sint8 *dst = (Sint8 *) cvt->buf;
559  int i;
560 
561  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
562 
563  /* Get dst aligned to 16 bytes */
564  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
565  const float sample = *src;
566  if (sample >= 1.0f) {
567  *dst = 127;
568  } else if (sample <= -1.0f) {
569  *dst = -128;
570  } else {
571  *dst = (Sint8)(sample * 127.0f);
572  }
573  }
574 
575  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
576 
577  /* Make sure src is aligned too. */
578  if ((((size_t) src) & 15) == 0) {
579  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
580  const __m128 one = _mm_set1_ps(1.0f);
581  const __m128 negone = _mm_set1_ps(-1.0f);
582  const __m128 mulby127 = _mm_set1_ps(127.0f);
583  __m128i *mmdst = (__m128i *) dst;
584  while (i >= 16) { /* 16 * float32 */
585  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
586  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
587  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
588  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
589  _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
590  i -= 16; src += 16; mmdst++;
591  }
592  dst = (Sint8 *) mmdst;
593  }
594 
595  /* Finish off any leftovers with scalar operations. */
596  while (i) {
597  const float sample = *src;
598  if (sample >= 1.0f) {
599  *dst = 127;
600  } else if (sample <= -1.0f) {
601  *dst = -128;
602  } else {
603  *dst = (Sint8)(sample * 127.0f);
604  }
605  i--; src++; dst++;
606  }
607 
608  cvt->len_cvt /= 4;
609  if (cvt->filters[++cvt->filter_index]) {
610  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
611  }
612 }
613 
614 static void SDLCALL
615 SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
616 {
617  const float *src = (const float *) cvt->buf;
618  Uint8 *dst = (Uint8 *) cvt->buf;
619  int i;
620 
621  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
622 
623  /* Get dst aligned to 16 bytes */
624  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
625  const float sample = *src;
626  if (sample >= 1.0f) {
627  *dst = 255;
628  } else if (sample <= -1.0f) {
629  *dst = 0;
630  } else {
631  *dst = (Uint8)((sample + 1.0f) * 127.0f);
632  }
633  }
634 
635  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
636 
637  /* Make sure src is aligned too. */
638  if ((((size_t) src) & 15) == 0) {
639  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
640  const __m128 one = _mm_set1_ps(1.0f);
641  const __m128 negone = _mm_set1_ps(-1.0f);
642  const __m128 mulby127 = _mm_set1_ps(127.0f);
643  __m128i *mmdst = (__m128i *) dst;
644  while (i >= 16) { /* 16 * float32 */
645  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
646  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
647  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
648  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
649  _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
650  i -= 16; src += 16; mmdst++;
651  }
652  dst = (Uint8 *) mmdst;
653  }
654 
655  /* Finish off any leftovers with scalar operations. */
656  while (i) {
657  const float sample = *src;
658  if (sample >= 1.0f) {
659  *dst = 255;
660  } else if (sample <= -1.0f) {
661  *dst = 0;
662  } else {
663  *dst = (Uint8)((sample + 1.0f) * 127.0f);
664  }
665  i--; src++; dst++;
666  }
667 
668  cvt->len_cvt /= 4;
669  if (cvt->filters[++cvt->filter_index]) {
670  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
671  }
672 }
673 
674 static void SDLCALL
675 SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
676 {
677  const float *src = (const float *) cvt->buf;
678  Sint16 *dst = (Sint16 *) cvt->buf;
679  int i;
680 
681  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
682 
683  /* Get dst aligned to 16 bytes */
684  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
685  const float sample = *src;
686  if (sample >= 1.0f) {
687  *dst = 32767;
688  } else if (sample <= -1.0f) {
689  *dst = -32768;
690  } else {
691  *dst = (Sint16)(sample * 32767.0f);
692  }
693  }
694 
695  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
696 
697  /* Make sure src is aligned too. */
698  if ((((size_t) src) & 15) == 0) {
699  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
700  const __m128 one = _mm_set1_ps(1.0f);
701  const __m128 negone = _mm_set1_ps(-1.0f);
702  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
703  __m128i *mmdst = (__m128i *) dst;
704  while (i >= 8) { /* 8 * float32 */
705  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
706  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
707  _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2)); /* pack to sint16, store out. */
708  i -= 8; src += 8; mmdst++;
709  }
710  dst = (Sint16 *) mmdst;
711  }
712 
713  /* Finish off any leftovers with scalar operations. */
714  while (i) {
715  const float sample = *src;
716  if (sample >= 1.0f) {
717  *dst = 32767;
718  } else if (sample <= -1.0f) {
719  *dst = -32768;
720  } else {
721  *dst = (Sint16)(sample * 32767.0f);
722  }
723  i--; src++; dst++;
724  }
725 
726  cvt->len_cvt /= 2;
727  if (cvt->filters[++cvt->filter_index]) {
728  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
729  }
730 }
731 
732 static void SDLCALL
733 SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
734 {
735  const float *src = (const float *) cvt->buf;
736  Uint16 *dst = (Uint16 *) cvt->buf;
737  int i;
738 
739  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
740 
741  /* Get dst aligned to 16 bytes */
742  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
743  const float sample = *src;
744  if (sample >= 1.0f) {
745  *dst = 65535;
746  } else if (sample <= -1.0f) {
747  *dst = 0;
748  } else {
749  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
750  }
751  }
752 
753  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
754 
755  /* Make sure src is aligned too. */
756  if ((((size_t) src) & 15) == 0) {
757  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
758  /* This calculates differently than the scalar path because SSE2 can't
759  pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
760  saturation, so that would corrupt our data. _mm_packus_epi32 exists,
761  but not before SSE 4.1. So we convert from float to sint16, packing
762  that down with legit signed saturation, and then xor the top bit
763  against 1. This results in the correct unsigned 16-bit value, even
764  though it looks like dark magic. */
765  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
766  const __m128i topbit = _mm_set1_epi16(-32768);
767  const __m128 one = _mm_set1_ps(1.0f);
768  const __m128 negone = _mm_set1_ps(-1.0f);
769  __m128i *mmdst = (__m128i *) dst;
770  while (i >= 8) { /* 8 * float32 */
771  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
772  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
773  _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit)); /* pack to sint16, xor top bit, store out. */
774  i -= 8; src += 8; mmdst++;
775  }
776  dst = (Uint16 *) mmdst;
777  }
778 
779  /* Finish off any leftovers with scalar operations. */
780  while (i) {
781  const float sample = *src;
782  if (sample >= 1.0f) {
783  *dst = 65535;
784  } else if (sample <= -1.0f) {
785  *dst = 0;
786  } else {
787  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
788  }
789  i--; src++; dst++;
790  }
791 
792  cvt->len_cvt /= 2;
793  if (cvt->filters[++cvt->filter_index]) {
794  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
795  }
796 }
797 
798 static void SDLCALL
799 SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
800 {
801  const float *src = (const float *) cvt->buf;
802  Sint32 *dst = (Sint32 *) cvt->buf;
803  int i;
804 
805  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
806 
807  /* Get dst aligned to 16 bytes */
808  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
809  const float sample = *src;
810  if (sample >= 1.0f) {
811  *dst = 2147483647;
812  } else if (sample <= -1.0f) {
813  *dst = (Sint32) -2147483648LL;
814  } else {
815  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
816  }
817  }
818 
819  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
820  SDL_assert(!i || ((((size_t) src) & 15) == 0));
821 
822  {
823  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
824  const __m128 one = _mm_set1_ps(1.0f);
825  const __m128 negone = _mm_set1_ps(-1.0f);
826  const __m128 mulby8388607 = _mm_set1_ps(8388607.0f);
827  __m128i *mmdst = (__m128i *) dst;
828  while (i >= 4) { /* 4 * float32 */
829  _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby8388607)), 8)); /* load 4 floats, clamp, convert to sint32 */
830  i -= 4; src += 4; mmdst++;
831  }
832  dst = (Sint32 *) mmdst;
833  }
834 
835  /* Finish off any leftovers with scalar operations. */
836  while (i) {
837  const float sample = *src;
838  if (sample >= 1.0f) {
839  *dst = 2147483647;
840  } else if (sample <= -1.0f) {
841  *dst = (Sint32) -2147483648LL;
842  } else {
843  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
844  }
845  i--; src++; dst++;
846  }
847 
848  if (cvt->filters[++cvt->filter_index]) {
849  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
850  }
851 }
852 #endif
853 
854 
855 #if HAVE_NEON_INTRINSICS
856 static void SDLCALL
857 SDL_Convert_S8_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
858 {
859  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
860  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
861  int i;
862 
863  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using NEON)");
864 
865  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
866  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
867  *dst = ((float) *src) * DIVBY128;
868  }
869 
870  src -= 15; dst -= 15; /* adjust to read NEON blocks from the start. */
871  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
872 
873  /* Make sure src is aligned too. */
874  if ((((size_t) src) & 15) == 0) {
875  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
876  const int8_t *mmsrc = (const int8_t *) src;
877  const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
878  while (i >= 16) { /* 16 * 8-bit */
879  const int8x16_t bytes = vld1q_s8(mmsrc); /* get 16 sint8 into a NEON register. */
880  const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes)); /* convert top 8 bytes to 8 int16 */
881  const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes)); /* convert bottom 8 bytes to 8 int16 */
882  /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
883  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
884  vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
885  vst1q_f32(dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
886  vst1q_f32(dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
887  i -= 16; mmsrc -= 16; dst -= 16;
888  }
889 
890  src = (const Sint8 *) mmsrc;
891  }
892 
893  src += 15; dst += 15; /* adjust for any scalar finishing. */
894 
895  /* Finish off any leftovers with scalar operations. */
896  while (i) {
897  *dst = ((float) *src) * DIVBY128;
898  i--; src--; dst--;
899  }
900 
901  cvt->len_cvt *= 4;
902  if (cvt->filters[++cvt->filter_index]) {
903  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
904  }
905 }
906 
907 static void SDLCALL
908 SDL_Convert_U8_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
909 {
910  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
911  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
912  int i;
913 
914  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using NEON)");
915 
916  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
917  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
918  *dst = (((float) *src) * DIVBY128) - 1.0f;
919  }
920 
921  src -= 15; dst -= 15; /* adjust to read NEON blocks from the start. */
922  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
923 
924  /* Make sure src is aligned too. */
925  if ((((size_t) src) & 15) == 0) {
926  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
927  const uint8_t *mmsrc = (const uint8_t *) src;
928  const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
929  const float32x4_t one = vdupq_n_f32(1.0f);
930  while (i >= 16) { /* 16 * 8-bit */
931  const uint8x16_t bytes = vld1q_u8(mmsrc); /* get 16 uint8 into a NEON register. */
932  const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes)); /* convert top 8 bytes to 8 uint16 */
933  const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes)); /* convert bottom 8 bytes to 8 uint16 */
934  /* split uint16 to two uint32, then convert to float, then multiply to normalize, subtract to adjust for sign, store. */
935  vst1q_f32(dst, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128, one));
936  vst1q_f32(dst+4, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128, one));
937  vst1q_f32(dst+8, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128, one));
938  vst1q_f32(dst+12, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128, one));
939  i -= 16; mmsrc -= 16; dst -= 16;
940  }
941 
942  src = (const Uint8 *) mmsrc;
943  }
944 
945  src += 15; dst += 15; /* adjust for any scalar finishing. */
946 
947  /* Finish off any leftovers with scalar operations. */
948  while (i) {
949  *dst = (((float) *src) * DIVBY128) - 1.0f;
950  i--; src--; dst--;
951  }
952 
953  cvt->len_cvt *= 4;
954  if (cvt->filters[++cvt->filter_index]) {
955  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
956  }
957 }
958 
959 static void SDLCALL
960 SDL_Convert_S16_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
961 {
962  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
963  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
964  int i;
965 
966  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using NEON)");
967 
968  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
969  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
970  *dst = ((float) *src) * DIVBY32768;
971  }
972 
973  src -= 7; dst -= 7; /* adjust to read NEON blocks from the start. */
974  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
975 
976  /* Make sure src is aligned too. */
977  if ((((size_t) src) & 15) == 0) {
978  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
979  const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
980  while (i >= 8) { /* 8 * 16-bit */
981  const int16x8_t ints = vld1q_s16((int16_t const *) src); /* get 8 sint16 into a NEON register. */
982  /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
983  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
984  vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
985  i -= 8; src -= 8; dst -= 8;
986  }
987  }
988 
989  src += 7; dst += 7; /* adjust for any scalar finishing. */
990 
991  /* Finish off any leftovers with scalar operations. */
992  while (i) {
993  *dst = ((float) *src) * DIVBY32768;
994  i--; src--; dst--;
995  }
996 
997  cvt->len_cvt *= 2;
998  if (cvt->filters[++cvt->filter_index]) {
999  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1000  }
1001 }
1002 
1003 static void SDLCALL
1004 SDL_Convert_U16_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1005 {
1006  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
1007  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
1008  int i;
1009 
1010  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using NEON)");
1011 
1012  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
1013  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
1014  *dst = (((float) *src) * DIVBY32768) - 1.0f;
1015  }
1016 
1017  src -= 7; dst -= 7; /* adjust to read NEON blocks from the start. */
1018  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1019 
1020  /* Make sure src is aligned too. */
1021  if ((((size_t) src) & 15) == 0) {
1022  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1023  const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
1024  const float32x4_t one = vdupq_n_f32(1.0f);
1025  while (i >= 8) { /* 8 * 16-bit */
1026  const uint16x8_t uints = vld1q_u16((uint16_t const *) src); /* get 8 uint16 into a NEON register. */
1027  /* split uint16 to two int32, then convert to float, then multiply to normalize, subtract for sign, store. */
1028  vst1q_f32(dst, vmlsq_f32(one, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1029  vst1q_f32(dst+4, vmlsq_f32(one, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1030  i -= 8; src -= 8; dst -= 8;
1031  }
1032  }
1033 
1034  src += 7; dst += 7; /* adjust for any scalar finishing. */
1035 
1036  /* Finish off any leftovers with scalar operations. */
1037  while (i) {
1038  *dst = (((float) *src) * DIVBY32768) - 1.0f;
1039  i--; src--; dst--;
1040  }
1041 
1042  cvt->len_cvt *= 2;
1043  if (cvt->filters[++cvt->filter_index]) {
1044  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1045  }
1046 }
1047 
1048 static void SDLCALL
1049 SDL_Convert_S32_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1050 {
1051  const Sint32 *src = (const Sint32 *) cvt->buf;
1052  float *dst = (float *) cvt->buf;
1053  int i;
1054 
1055  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using NEON)");
1056 
1057  /* Get dst aligned to 16 bytes */
1058  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1059  *dst = ((float) (*src>>8)) * DIVBY8388607;
1060  }
1061 
1062  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1063  SDL_assert(!i || ((((size_t) src) & 15) == 0));
1064 
1065  {
1066  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1067  const float32x4_t divby8388607 = vdupq_n_f32(DIVBY8388607);
1068  const int32_t *mmsrc = (const int32_t *) src;
1069  while (i >= 4) { /* 4 * sint32 */
1070  /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
1071  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1072  i -= 4; mmsrc += 4; dst += 4;
1073  }
1074  src = (const Sint32 *) mmsrc;
1075  }
1076 
1077  /* Finish off any leftovers with scalar operations. */
1078  while (i) {
1079  *dst = ((float) (*src>>8)) * DIVBY8388607;
1080  i--; src++; dst++;
1081  }
1082 
1083  if (cvt->filters[++cvt->filter_index]) {
1084  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1085  }
1086 }
1087 
1088 static void SDLCALL
1089 SDL_Convert_F32_to_S8_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1090 {
1091  const float *src = (const float *) cvt->buf;
1092  Sint8 *dst = (Sint8 *) cvt->buf;
1093  int i;
1094 
1095  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using NEON)");
1096 
1097  /* Get dst aligned to 16 bytes */
1098  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1099  const float sample = *src;
1100  if (sample >= 1.0f) {
1101  *dst = 127;
1102  } else if (sample <= -1.0f) {
1103  *dst = -128;
1104  } else {
1105  *dst = (Sint8)(sample * 127.0f);
1106  }
1107  }
1108 
1109  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1110 
1111  /* Make sure src is aligned too. */
1112  if ((((size_t) src) & 15) == 0) {
1113  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1114  const float32x4_t one = vdupq_n_f32(1.0f);
1115  const float32x4_t negone = vdupq_n_f32(-1.0f);
1116  const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1117  int8_t *mmdst = (int8_t *) dst;
1118  while (i >= 16) { /* 16 * float32 */
1119  const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1120  const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1121  const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1122  const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1123  const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, narrow to sint8 */
1124  const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4))); /* narrow to sint16, combine, narrow to sint8 */
1125  vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi)); /* combine to int8x16_t, store out */
1126  i -= 16; src += 16; mmdst += 16;
1127  }
1128  dst = (Sint8 *) mmdst;
1129  }
1130 
1131  /* Finish off any leftovers with scalar operations. */
1132  while (i) {
1133  const float sample = *src;
1134  if (sample >= 1.0f) {
1135  *dst = 127;
1136  } else if (sample <= -1.0f) {
1137  *dst = -128;
1138  } else {
1139  *dst = (Sint8)(sample * 127.0f);
1140  }
1141  i--; src++; dst++;
1142  }
1143 
1144  cvt->len_cvt /= 4;
1145  if (cvt->filters[++cvt->filter_index]) {
1146  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
1147  }
1148 }
1149 
1150 static void SDLCALL
1151 SDL_Convert_F32_to_U8_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1152 {
1153  const float *src = (const float *) cvt->buf;
1154  Uint8 *dst = (Uint8 *) cvt->buf;
1155  int i;
1156 
1157  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using NEON)");
1158 
1159  /* Get dst aligned to 16 bytes */
1160  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1161  const float sample = *src;
1162  if (sample >= 1.0f) {
1163  *dst = 255;
1164  } else if (sample <= -1.0f) {
1165  *dst = 0;
1166  } else {
1167  *dst = (Uint8)((sample + 1.0f) * 127.0f);
1168  }
1169  }
1170 
1171  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1172 
1173  /* Make sure src is aligned too. */
1174  if ((((size_t) src) & 15) == 0) {
1175  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1176  const float32x4_t one = vdupq_n_f32(1.0f);
1177  const float32x4_t negone = vdupq_n_f32(-1.0f);
1178  const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1179  uint8_t *mmdst = (uint8_t *) dst;
1180  while (i >= 16) { /* 16 * float32 */
1181  const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1182  const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1183  const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1184  const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1185  const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, narrow to uint8 */
1186  const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4))); /* narrow to uint16, combine, narrow to uint8 */
1187  vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi)); /* combine to uint8x16_t, store out */
1188  i -= 16; src += 16; mmdst += 16;
1189  }
1190 
1191  dst = (Uint8 *) mmdst;
1192  }
1193 
1194  /* Finish off any leftovers with scalar operations. */
1195  while (i) {
1196  const float sample = *src;
1197  if (sample >= 1.0f) {
1198  *dst = 255;
1199  } else if (sample <= -1.0f) {
1200  *dst = 0;
1201  } else {
1202  *dst = (Uint8)((sample + 1.0f) * 127.0f);
1203  }
1204  i--; src++; dst++;
1205  }
1206 
1207  cvt->len_cvt /= 4;
1208  if (cvt->filters[++cvt->filter_index]) {
1209  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
1210  }
1211 }
1212 
1213 static void SDLCALL
1214 SDL_Convert_F32_to_S16_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1215 {
1216  const float *src = (const float *) cvt->buf;
1217  Sint16 *dst = (Sint16 *) cvt->buf;
1218  int i;
1219 
1220  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using NEON)");
1221 
1222  /* Get dst aligned to 16 bytes */
1223  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1224  const float sample = *src;
1225  if (sample >= 1.0f) {
1226  *dst = 32767;
1227  } else if (sample <= -1.0f) {
1228  *dst = -32768;
1229  } else {
1230  *dst = (Sint16)(sample * 32767.0f);
1231  }
1232  }
1233 
1234  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1235 
1236  /* Make sure src is aligned too. */
1237  if ((((size_t) src) & 15) == 0) {
1238  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1239  const float32x4_t one = vdupq_n_f32(1.0f);
1240  const float32x4_t negone = vdupq_n_f32(-1.0f);
1241  const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1242  int16_t *mmdst = (int16_t *) dst;
1243  while (i >= 8) { /* 8 * float32 */
1244  const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
1245  const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
1246  vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, store out. */
1247  i -= 8; src += 8; mmdst += 8;
1248  }
1249  dst = (Sint16 *) mmdst;
1250  }
1251 
1252  /* Finish off any leftovers with scalar operations. */
1253  while (i) {
1254  const float sample = *src;
1255  if (sample >= 1.0f) {
1256  *dst = 32767;
1257  } else if (sample <= -1.0f) {
1258  *dst = -32768;
1259  } else {
1260  *dst = (Sint16)(sample * 32767.0f);
1261  }
1262  i--; src++; dst++;
1263  }
1264 
1265  cvt->len_cvt /= 2;
1266  if (cvt->filters[++cvt->filter_index]) {
1267  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
1268  }
1269 }
1270 
1271 static void SDLCALL
1272 SDL_Convert_F32_to_U16_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1273 {
1274  const float *src = (const float *) cvt->buf;
1275  Uint16 *dst = (Uint16 *) cvt->buf;
1276  int i;
1277 
1278  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using NEON)");
1279 
1280  /* Get dst aligned to 16 bytes */
1281  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1282  const float sample = *src;
1283  if (sample >= 1.0f) {
1284  *dst = 65535;
1285  } else if (sample <= -1.0f) {
1286  *dst = 0;
1287  } else {
1288  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
1289  }
1290  }
1291 
1292  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1293 
1294  /* Make sure src is aligned too. */
1295  if ((((size_t) src) & 15) == 0) {
1296  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1297  const float32x4_t one = vdupq_n_f32(1.0f);
1298  const float32x4_t negone = vdupq_n_f32(-1.0f);
1299  const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1300  uint16_t *mmdst = (uint16_t *) dst;
1301  while (i >= 8) { /* 8 * float32 */
1302  const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby32767)); /* load 4 floats, clamp, convert to uint32 */
1303  const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby32767)); /* load 4 floats, clamp, convert to uint32 */
1304  vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, store out. */
1305  i -= 8; src += 8; mmdst += 8;
1306  }
1307  dst = (Uint16 *) mmdst;
1308  }
1309 
1310  /* Finish off any leftovers with scalar operations. */
1311  while (i) {
1312  const float sample = *src;
1313  if (sample >= 1.0f) {
1314  *dst = 65535;
1315  } else if (sample <= -1.0f) {
1316  *dst = 0;
1317  } else {
1318  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
1319  }
1320  i--; src++; dst++;
1321  }
1322 
1323  cvt->len_cvt /= 2;
1324  if (cvt->filters[++cvt->filter_index]) {
1325  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
1326  }
1327 }
1328 
1329 static void SDLCALL
1330 SDL_Convert_F32_to_S32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1331 {
1332  const float *src = (const float *) cvt->buf;
1333  Sint32 *dst = (Sint32 *) cvt->buf;
1334  int i;
1335 
1336  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using NEON)");
1337 
1338  /* Get dst aligned to 16 bytes */
1339  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1340  const float sample = *src;
1341  if (sample >= 1.0f) {
1342  *dst = 2147483647;
1343  } else if (sample <= -1.0f) {
1344  *dst = -2147483648;
1345  } else {
1346  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
1347  }
1348  }
1349 
1350  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1351  SDL_assert(!i || ((((size_t) src) & 15) == 0));
1352 
1353  {
1354  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1355  const float32x4_t one = vdupq_n_f32(1.0f);
1356  const float32x4_t negone = vdupq_n_f32(-1.0f);
1357  const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0f);
1358  int32_t *mmdst = (int32_t *) dst;
1359  while (i >= 4) { /* 4 * float32 */
1360  vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby8388607)), 8));
1361  i -= 4; src += 4; mmdst += 4;
1362  }
1363  dst = (Sint32 *) mmdst;
1364  }
1365 
1366  /* Finish off any leftovers with scalar operations. */
1367  while (i) {
1368  const float sample = *src;
1369  if (sample >= 1.0f) {
1370  *dst = 2147483647;
1371  } else if (sample <= -1.0f) {
1372  *dst = -2147483648;
1373  } else {
1374  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
1375  }
1376  i--; src++; dst++;
1377  }
1378 
1379  if (cvt->filters[++cvt->filter_index]) {
1380  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
1381  }
1382 }
1383 #endif
1384 
1385 
1386 
1388 {
1389  static SDL_bool converters_chosen = SDL_FALSE;
1390 
1391  if (converters_chosen) {
1392  return;
1393  }
1394 
1395 #define SET_CONVERTER_FUNCS(fntype) \
1396  SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
1397  SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
1398  SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
1399  SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
1400  SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
1401  SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
1402  SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
1403  SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
1404  SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
1405  SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
1406  converters_chosen = SDL_TRUE
1407 
1408 #if HAVE_SSE2_INTRINSICS
1409  if (SDL_HasSSE2()) {
1410  SET_CONVERTER_FUNCS(SSE2);
1411  return;
1412  }
1413 #endif
1414 
1415 #if HAVE_NEON_INTRINSICS
1416  if (SDL_HasNEON()) {
1417  SET_CONVERTER_FUNCS(NEON);
1418  return;
1419  }
1420 #endif
1421 
1422 #if NEED_SCALAR_CONVERTER_FALLBACKS
1423  SET_CONVERTER_FUNCS(Scalar);
1424 #endif
1425 
1426 #undef SET_CONVERTER_FUNCS
1427 
1428  SDL_assert(converters_chosen == SDL_TRUE);
1429 }
1430 
1431 /* vi: set ts=4 sw=4 expandtab: */
#define LOG_DEBUG_CONVERT(from, to)
Definition: SDL_audio_c.h:34
GLenum GLenum dst
signed int int32_t
Uint8 * buf
Definition: SDL_audio.h:232
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
unsigned short uint16_t
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
int filter_index
Definition: SDL_audio.h:238
SDL_AudioFilter SDL_Convert_U8_to_F32
uint16_t Uint16
Definition: SDL_stdinc.h:191
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
GLfloat f
Uint16 SDL_AudioFormat
Audio format flags.
Definition: SDL_audio.h:64
static const double one
Definition: e_exp.c:79
signed short int16_t
GLenum src
#define AUDIO_S16SYS
Definition: SDL_audio.h:123
SDL_AudioFilter SDL_Convert_F32_to_U8
unsigned int size_t
A structure to hold a set of audio conversion filters and buffers.
Definition: SDL_audio.h:226
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: SDL_opengl.h:1572
static const double zero
Definition: e_atan2.c:44
#define AUDIO_U8
Definition: SDL_audio.h:89
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
Definition: SDL_audio.h:237
#define AUDIO_F32SYS
Definition: SDL_audio.h:125
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int8_t Sint8
Definition: SDL_stdinc.h:173
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
uint8_t Uint8
Definition: SDL_stdinc.h:179
#define SDL_HasNEON
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int32_t Sint32
Definition: SDL_stdinc.h:197
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
Definition: SDL_audio.h:193
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define SDL_assert(condition)
Definition: SDL_assert.h:169
#define NULL
Definition: begin_code.h:164
SDL_bool
Definition: SDL_stdinc.h:161
unsigned char uint8_t
signed char int8_t
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define SDL_HasSSE2
SDL_AudioFilter SDL_Convert_S8_to_F32
#define AUDIO_S32SYS
Definition: SDL_audio.h:124
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define DIVBY32768
#define DIVBY128
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
#define AUDIO_S8
Definition: SDL_audio.h:90
SDL_AudioFilter SDL_Convert_S32_to_F32
#define SDLCALL
Definition: SDL_internal.h:45
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)
#define AUDIO_U16SYS
Definition: SDL_audio.h:122
int16_t Sint16
Definition: SDL_stdinc.h:185
#define DIVBY8388607