Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

Committer:
bikeNomad
Date:
Wed May 15 23:36:12 2013 +0000
Revision:
7:f0e5450449cb
Parent:
4:c989412b91ea
turn DAC off after playing

Who changed what in which revision?

UserRevisionLine numberNew contents of line
bikeNomad 3:c04d8d0493f4 1 #ifndef __included_audio_analyzer_h
bikeNomad 3:c04d8d0493f4 2 #define __included_audio_analyzer_h
bikeNomad 3:c04d8d0493f4 3
bikeNomad 4:c989412b91ea 4 #include <math.h>
bikeNomad 4:c989412b91ea 5
bikeNomad 3:c04d8d0493f4 6 namespace NK
bikeNomad 3:c04d8d0493f4 7 {
bikeNomad 3:c04d8d0493f4 8
bikeNomad 3:c04d8d0493f4 9 class AudioAnalyzer
bikeNomad 3:c04d8d0493f4 10 {
bikeNomad 3:c04d8d0493f4 11 protected:
bikeNomad 3:c04d8d0493f4 12 int8_t const *samples;
bikeNomad 3:c04d8d0493f4 13 uint16_t nsamples;
bikeNomad 3:c04d8d0493f4 14 uint16_t zeroCrossings;
bikeNomad 3:c04d8d0493f4 15 uint32_t power;
bikeNomad 4:c989412b91ea 16 float logPower;
bikeNomad 4:c989412b91ea 17 float powerRef;
bikeNomad 3:c04d8d0493f4 18 int8_t minValue;
bikeNomad 3:c04d8d0493f4 19 int8_t maxValue;
bikeNomad 3:c04d8d0493f4 20 bool analyzed;
bikeNomad 3:c04d8d0493f4 21
bikeNomad 3:c04d8d0493f4 22 void analyze();
bikeNomad 3:c04d8d0493f4 23
bikeNomad 3:c04d8d0493f4 24 public:
bikeNomad 3:c04d8d0493f4 25 AudioAnalyzer(int8_t const *_samples, uint16_t _nsamples)
bikeNomad 4:c989412b91ea 26 : samples(_samples), nsamples(_nsamples), zeroCrossings(0), power(0), logPower(0.0), powerRef(0.0), analyzed(false) {
bikeNomad 3:c04d8d0493f4 27 }
bikeNomad 3:c04d8d0493f4 28
bikeNomad 3:c04d8d0493f4 29 uint16_t getZeroCrossings() {
bikeNomad 3:c04d8d0493f4 30 if (!analyzed) analyze();
bikeNomad 3:c04d8d0493f4 31 return zeroCrossings;
bikeNomad 3:c04d8d0493f4 32 }
bikeNomad 3:c04d8d0493f4 33
bikeNomad 4:c989412b91ea 34 float getZeroCrossingRatioPercent() {
bikeNomad 4:c989412b91ea 35 return getZeroCrossings() * 100.0 / nsamples;
bikeNomad 4:c989412b91ea 36 }
bikeNomad 4:c989412b91ea 37
bikeNomad 3:c04d8d0493f4 38 uint32_t getPower() {
bikeNomad 3:c04d8d0493f4 39 if (!analyzed) analyze();
bikeNomad 3:c04d8d0493f4 40 return power;
bikeNomad 3:c04d8d0493f4 41 }
bikeNomad 3:c04d8d0493f4 42
bikeNomad 4:c989412b91ea 43 float getLogPower() {
bikeNomad 4:c989412b91ea 44 if (!analyzed) analyze();
bikeNomad 4:c989412b91ea 45 logPower = ::log((double)power) - powerRef;
bikeNomad 4:c989412b91ea 46 return logPower;
bikeNomad 4:c989412b91ea 47 }
bikeNomad 4:c989412b91ea 48
bikeNomad 3:c04d8d0493f4 49 void getMinMaxValues(int8_t *min, int8_t *max) {
bikeNomad 3:c04d8d0493f4 50 if (!analyzed) analyze();
bikeNomad 3:c04d8d0493f4 51 *min = minValue;
bikeNomad 3:c04d8d0493f4 52 *max = maxValue;
bikeNomad 3:c04d8d0493f4 53 }
bikeNomad 4:c989412b91ea 54
bikeNomad 4:c989412b91ea 55 bool isVoiced() {
bikeNomad 4:c989412b91ea 56 return !(isnan(getLogPower()) || logPower < PowerThreshold);
bikeNomad 4:c989412b91ea 57 }
bikeNomad 4:c989412b91ea 58
bikeNomad 4:c989412b91ea 59 void setPowerRef(float _powerRef) {
bikeNomad 4:c989412b91ea 60 powerRef = _powerRef;
bikeNomad 4:c989412b91ea 61 }
bikeNomad 4:c989412b91ea 62
bikeNomad 4:c989412b91ea 63 // anything with logPower above PowerThreshold
bikeNomad 4:c989412b91ea 64 // and below the line
bikeNomad 4:c989412b91ea 65 // zeroCrossingRatioPercent = VowelSlope * logPower + VowelIntercept
bikeNomad 4:c989412b91ea 66 bool isVowel() {
bikeNomad 4:c989412b91ea 67 getLogPower();
bikeNomad 4:c989412b91ea 68 if (logPower < PowerThreshold)
bikeNomad 4:c989412b91ea 69 return false;
bikeNomad 4:c989412b91ea 70 return (getZeroCrossingRatioPercent() < VowelSlope * (logPower - VowelXIntercept));
bikeNomad 4:c989412b91ea 71 }
bikeNomad 4:c989412b91ea 72
bikeNomad 4:c989412b91ea 73 static const float PowerThreshold = -4.0;
bikeNomad 4:c989412b91ea 74 // anything below the line
bikeNomad 4:c989412b91ea 75 // zeroCrossingRatioPercent = VowelSlope * logPower + VowelIntercept
bikeNomad 4:c989412b91ea 76 // and above PowerThreshold
bikeNomad 4:c989412b91ea 77 // is considered a vowel.
bikeNomad 4:c989412b91ea 78 static const float VowelSlope = 14.7;
bikeNomad 4:c989412b91ea 79 static const float VowelXIntercept = -0.7;
bikeNomad 3:c04d8d0493f4 80 };
bikeNomad 3:c04d8d0493f4 81
bikeNomad 3:c04d8d0493f4 82 } // namespace NK
bikeNomad 3:c04d8d0493f4 83 #endif