Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

Committer:
bikeNomad
Date:
Wed May 15 17:53:33 2013 +0000
Revision:
4:c989412b91ea
Parent:
3:c04d8d0493f4
Child:
5:9f4ffb2b0e6b
tuned analyzer; added play with billy

Who changed what in which revision?

UserRevisionLine numberNew contents of line
bikeNomad 0:1ddd40d843cb 1 #include "mbed.h"
bikeNomad 1:2fa375aacece 2 #include "FastAnalogIn.h"
bikeNomad 3:c04d8d0493f4 3 #include "AudioAnalyzer.h"
bikeNomad 3:c04d8d0493f4 4 extern "C" {
bikeNomad 3:c04d8d0493f4 5 #include <math.h>
bikeNomad 3:c04d8d0493f4 6 }
bikeNomad 3:c04d8d0493f4 7
bikeNomad 1:2fa375aacece 8 using namespace NK;
bikeNomad 1:2fa375aacece 9
bikeNomad 1:2fa375aacece 10 // Power:
bikeNomad 1:2fa375aacece 11 // Power GND J9/14
bikeNomad 1:2fa375aacece 12 // Vin (6V) J9/16
bikeNomad 0:1ddd40d843cb 13
bikeNomad 1:2fa375aacece 14 // Digital:
bikeNomad 1:2fa375aacece 15 DigitalOut tail(PTA13); // J3/2
bikeNomad 1:2fa375aacece 16 DigitalOut mouth(PTC12); // J3/1
bikeNomad 1:2fa375aacece 17 DigitalOut head(PTC13); // J3/3
bikeNomad 1:2fa375aacece 18 DigitalIn pushbutton(PTD5); // J3/4
bikeNomad 1:2fa375aacece 19
bikeNomad 1:2fa375aacece 20 PwmOut redLED(LED_RED);
bikeNomad 1:2fa375aacece 21 PwmOut greenLED(LED_GREEN);
bikeNomad 1:2fa375aacece 22 PwmOut blueLED(LED_BLUE);
bikeNomad 0:1ddd40d843cb 23
bikeNomad 1:2fa375aacece 24 // Analog:
bikeNomad 1:2fa375aacece 25 // GND J3/14
bikeNomad 1:2fa375aacece 26 // VrefH J3/16
bikeNomad 1:2fa375aacece 27 FastAnalogIn microphone(PTB0); // J10/2
bikeNomad 1:2fa375aacece 28 AnalogOut speaker(PTE30); // J10/11
bikeNomad 1:2fa375aacece 29
bikeNomad 1:2fa375aacece 30 // Communications:
bikeNomad 1:2fa375aacece 31 // Serial uart1(PTC4, PTC3);
bikeNomad 1:2fa375aacece 32 Serial pc(USBTX, USBRX);
bikeNomad 1:2fa375aacece 33
bikeNomad 2:5bcd2f55a294 34 const unsigned SAMPLE_RATE_HZ = 7889;
bikeNomad 4:c989412b91ea 35 const unsigned SAMPLE_PERIOD_US = (1000000U / SAMPLE_RATE_HZ);
bikeNomad 2:5bcd2f55a294 36 const unsigned SAMPLE_BUFFER_SIZE = 9000;
bikeNomad 4:c989412b91ea 37 const unsigned CHUNK_DURATION_MS = 80;
bikeNomad 4:c989412b91ea 38 const unsigned CHUNK_SIZE = SAMPLE_RATE_HZ * CHUNK_DURATION_MS / 1000;
bikeNomad 4:c989412b91ea 39 const unsigned NUM_CHUNKS = SAMPLE_BUFFER_SIZE / CHUNK_SIZE;
bikeNomad 1:2fa375aacece 40
bikeNomad 1:2fa375aacece 41 Ticker sampleTicker;
bikeNomad 2:5bcd2f55a294 42 Timer timer;
bikeNomad 1:2fa375aacece 43
bikeNomad 4:c989412b91ea 44 // audio samples
bikeNomad 3:c04d8d0493f4 45 int8_t sampleBuffer[SAMPLE_BUFFER_SIZE]; // 1 second buffer
bikeNomad 3:c04d8d0493f4 46 int8_t * volatile nextSample;
bikeNomad 3:c04d8d0493f4 47 uint16_t volatile samplesRemaining;
bikeNomad 1:2fa375aacece 48
bikeNomad 4:c989412b91ea 49 // vowel decisions
bikeNomad 4:c989412b91ea 50 bool vowels[ NUM_CHUNKS ];
bikeNomad 4:c989412b91ea 51
bikeNomad 1:2fa375aacece 52 extern "C"
bikeNomad 1:2fa375aacece 53 void ADC0_IRQHandler(void)
bikeNomad 0:1ddd40d843cb 54 {
bikeNomad 1:2fa375aacece 55 if (samplesRemaining) {
bikeNomad 3:c04d8d0493f4 56 *nextSample++ = microphone.read_s8_nowait();
bikeNomad 2:5bcd2f55a294 57 microphone.start_read();
bikeNomad 1:2fa375aacece 58 samplesRemaining--;
bikeNomad 1:2fa375aacece 59 } else {
bikeNomad 1:2fa375aacece 60 microphone.disable_interrupt();
bikeNomad 2:5bcd2f55a294 61 microphone.abort_read();
bikeNomad 2:5bcd2f55a294 62 timer.stop();
bikeNomad 0:1ddd40d843cb 63 }
bikeNomad 0:1ddd40d843cb 64 }
bikeNomad 0:1ddd40d843cb 65
bikeNomad 1:2fa375aacece 66 void playAudioSample()
bikeNomad 1:2fa375aacece 67 {
bikeNomad 1:2fa375aacece 68 if (samplesRemaining) {
bikeNomad 3:c04d8d0493f4 69 int8_t val = *nextSample++;
bikeNomad 3:c04d8d0493f4 70 speaker.write_u16((val + 128) << 8);
bikeNomad 1:2fa375aacece 71 samplesRemaining--;
bikeNomad 1:2fa375aacece 72 } else {
bikeNomad 1:2fa375aacece 73 sampleTicker.detach();
bikeNomad 2:5bcd2f55a294 74 timer.stop();
bikeNomad 1:2fa375aacece 75 }
bikeNomad 1:2fa375aacece 76 }
bikeNomad 1:2fa375aacece 77
bikeNomad 3:c04d8d0493f4 78 void resetSampleBuffer(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 1:2fa375aacece 79 {
bikeNomad 3:c04d8d0493f4 80 nextSample = start;
bikeNomad 3:c04d8d0493f4 81 samplesRemaining = nsamples;
bikeNomad 1:2fa375aacece 82 }
bikeNomad 1:2fa375aacece 83
bikeNomad 1:2fa375aacece 84 void recordAudio()
bikeNomad 1:2fa375aacece 85 {
bikeNomad 2:5bcd2f55a294 86 pc.printf("Recording %d samples... ", SAMPLE_BUFFER_SIZE);
bikeNomad 1:2fa375aacece 87 blueLED = 0.0;
bikeNomad 1:2fa375aacece 88
bikeNomad 1:2fa375aacece 89 resetSampleBuffer();
bikeNomad 3:c04d8d0493f4 90 timer.reset();
bikeNomad 2:5bcd2f55a294 91 timer.start();
bikeNomad 1:2fa375aacece 92 microphone.enable_interrupt();
bikeNomad 2:5bcd2f55a294 93 microphone.start_read();
bikeNomad 1:2fa375aacece 94
bikeNomad 1:2fa375aacece 95 while (samplesRemaining) {
bikeNomad 2:5bcd2f55a294 96 wait_us(50000);
bikeNomad 2:5bcd2f55a294 97 blueLED.write(1.0 - (1.0 * samplesRemaining / SAMPLE_BUFFER_SIZE));
bikeNomad 1:2fa375aacece 98 }
bikeNomad 1:2fa375aacece 99
bikeNomad 3:c04d8d0493f4 100 microphone.abort_read();
bikeNomad 3:c04d8d0493f4 101
bikeNomad 2:5bcd2f55a294 102 float elapsed = timer.read();
bikeNomad 2:5bcd2f55a294 103 pc.printf("Done. %u samples in %f usec = %f samples/sec\r\n", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
bikeNomad 1:2fa375aacece 104 }
bikeNomad 1:2fa375aacece 105
bikeNomad 4:c989412b91ea 106 void playAudio(unsigned duration_ms, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 1:2fa375aacece 107 {
bikeNomad 3:c04d8d0493f4 108 resetSampleBuffer(start, nsamples);
bikeNomad 2:5bcd2f55a294 109 timer.reset();
bikeNomad 2:5bcd2f55a294 110 timer.start();
bikeNomad 4:c989412b91ea 111 sampleTicker.attach_us(&playAudioSample, duration_ms*1000/nsamples);
bikeNomad 1:2fa375aacece 112 while (samplesRemaining) {
bikeNomad 4:c989412b91ea 113 wait_us(CHUNK_DURATION_MS * 1000);
bikeNomad 3:c04d8d0493f4 114 }
bikeNomad 3:c04d8d0493f4 115 }
bikeNomad 3:c04d8d0493f4 116
bikeNomad 3:c04d8d0493f4 117 void audioTest()
bikeNomad 3:c04d8d0493f4 118 {
bikeNomad 3:c04d8d0493f4 119 double phase = 0.0;
bikeNomad 3:c04d8d0493f4 120 resetSampleBuffer();
bikeNomad 3:c04d8d0493f4 121 for (int8_t *p = sampleBuffer; p < sampleBuffer + SAMPLE_BUFFER_SIZE; p++) {
bikeNomad 3:c04d8d0493f4 122 double s = sin(phase) * 125.0;
bikeNomad 3:c04d8d0493f4 123 phase += 2000 * 3.1416 / SAMPLE_BUFFER_SIZE;
bikeNomad 3:c04d8d0493f4 124 *p = static_cast<int8_t>(s);
bikeNomad 3:c04d8d0493f4 125 }
bikeNomad 3:c04d8d0493f4 126 }
bikeNomad 3:c04d8d0493f4 127
bikeNomad 3:c04d8d0493f4 128 // returns true if chunk was louder than minimum
bikeNomad 4:c989412b91ea 129 bool analyzeChunk(int8_t *chunkStart, uint16_t CHUNK_SIZE, float powerRef, bool *pisvowel = 0)
bikeNomad 3:c04d8d0493f4 130 {
bikeNomad 4:c989412b91ea 131 AudioAnalyzer analyzer(chunkStart, CHUNK_SIZE);
bikeNomad 3:c04d8d0493f4 132 uint32_t power = analyzer.getPower();
bikeNomad 3:c04d8d0493f4 133 uint16_t zcs = analyzer.getZeroCrossings();
bikeNomad 3:c04d8d0493f4 134 int8_t min, max;
bikeNomad 3:c04d8d0493f4 135 analyzer.getMinMaxValues(&min, &max);
bikeNomad 4:c989412b91ea 136 analyzer.setPowerRef(powerRef);
bikeNomad 4:c989412b91ea 137 float logPower = analyzer.getLogPower();
bikeNomad 4:c989412b91ea 138 float zcRatio = analyzer.getZeroCrossingRatioPercent();
bikeNomad 4:c989412b91ea 139 pc.printf("%.2f\t%.2f\t%.2f\t%d\t%d\t%d\t", zcRatio, logPower, zcRatio / (logPower - AudioAnalyzer::VowelXIntercept), min, max, analyzer.isVowel());
bikeNomad 4:c989412b91ea 140 if (pisvowel)
bikeNomad 4:c989412b91ea 141 *pisvowel = analyzer.isVowel();
bikeNomad 4:c989412b91ea 142 return analyzer.isVoiced();
bikeNomad 3:c04d8d0493f4 143 }
bikeNomad 3:c04d8d0493f4 144
bikeNomad 3:c04d8d0493f4 145 void analyze(bool playToo = false)
bikeNomad 3:c04d8d0493f4 146 {
bikeNomad 3:c04d8d0493f4 147 int8_t *chunkStart = sampleBuffer;
bikeNomad 3:c04d8d0493f4 148 AudioAnalyzer analyzer(sampleBuffer, SAMPLE_BUFFER_SIZE);
bikeNomad 3:c04d8d0493f4 149 uint32_t power = analyzer.getPower();
bikeNomad 3:c04d8d0493f4 150 float powerRef = ::log((double)power);
bikeNomad 3:c04d8d0493f4 151 pc.printf("Reference power = %.2f\r\n", powerRef);
bikeNomad 4:c989412b91ea 152 pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", NUM_CHUNKS, CHUNK_SIZE, CHUNK_DURATION_MS);
bikeNomad 4:c989412b91ea 153 pc.printf("chunk\tstartms\tzcratio\tlogp\tmaxs\tmin\tmax\tisVowel\tvowel\r\n");
bikeNomad 4:c989412b91ea 154 for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
bikeNomad 4:c989412b91ea 155 pc.printf("%u\t%u\t", chunk, chunk * CHUNK_DURATION_MS);
bikeNomad 4:c989412b91ea 156 bool loudEnough = analyzeChunk(chunkStart, CHUNK_SIZE, powerRef, &vowels[chunk]);
bikeNomad 3:c04d8d0493f4 157 if (loudEnough) {
bikeNomad 3:c04d8d0493f4 158 if (playToo) {
bikeNomad 3:c04d8d0493f4 159 while (! pc.readable())
bikeNomad 4:c989412b91ea 160 playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
bikeNomad 3:c04d8d0493f4 161 int c = pc.getc();
bikeNomad 3:c04d8d0493f4 162 pc.putc(c);
bikeNomad 3:c04d8d0493f4 163 } else
bikeNomad 3:c04d8d0493f4 164 pc.puts("-");
bikeNomad 3:c04d8d0493f4 165 }
bikeNomad 3:c04d8d0493f4 166 pc.puts("\r\n");
bikeNomad 4:c989412b91ea 167 chunkStart += CHUNK_SIZE;
bikeNomad 4:c989412b91ea 168 }
bikeNomad 4:c989412b91ea 169 }
bikeNomad 4:c989412b91ea 170
bikeNomad 4:c989412b91ea 171 // assumes that vowels[] has been set by analyze
bikeNomad 4:c989412b91ea 172 void playWithBilly()
bikeNomad 4:c989412b91ea 173 {
bikeNomad 4:c989412b91ea 174 int8_t *chunkStart = sampleBuffer;
bikeNomad 4:c989412b91ea 175 for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
bikeNomad 4:c989412b91ea 176 greenLED = vowels[chunk] ? 0.0 : 1.0;
bikeNomad 4:c989412b91ea 177 playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
bikeNomad 4:c989412b91ea 178 chunkStart += CHUNK_SIZE;
bikeNomad 4:c989412b91ea 179
bikeNomad 3:c04d8d0493f4 180 }
bikeNomad 3:c04d8d0493f4 181 }
bikeNomad 3:c04d8d0493f4 182
bikeNomad 3:c04d8d0493f4 183 void dumpAudio(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 3:c04d8d0493f4 184 {
bikeNomad 3:c04d8d0493f4 185 for (int8_t *p = start; p < sampleBuffer + nsamples; p++) {
bikeNomad 3:c04d8d0493f4 186 pc.printf("%d\r\n", *p);
bikeNomad 3:c04d8d0493f4 187 }
bikeNomad 2:5bcd2f55a294 188 }
bikeNomad 1:2fa375aacece 189
bikeNomad 0:1ddd40d843cb 190 int main()
bikeNomad 0:1ddd40d843cb 191 {
bikeNomad 1:2fa375aacece 192 pc.baud(115200);
bikeNomad 2:5bcd2f55a294 193 pc.printf("\r\n\r\nSample buffer = %u samples; rate = %u Hz; period = %u usec\r\n", SAMPLE_BUFFER_SIZE, SAMPLE_RATE_HZ, SAMPLE_PERIOD_US);
bikeNomad 3:c04d8d0493f4 194
bikeNomad 3:c04d8d0493f4 195 for (;;) {
bikeNomad 3:c04d8d0493f4 196 redLED = 1.0;
bikeNomad 3:c04d8d0493f4 197 greenLED = 1.0;
bikeNomad 3:c04d8d0493f4 198 blueLED = 1.0;
bikeNomad 3:c04d8d0493f4 199
bikeNomad 3:c04d8d0493f4 200 pc.puts("ENTER when ready:");
bikeNomad 3:c04d8d0493f4 201 pc.getc();
bikeNomad 3:c04d8d0493f4 202 pc.puts("\r\n");
bikeNomad 1:2fa375aacece 203
bikeNomad 3:c04d8d0493f4 204 #if 0
bikeNomad 3:c04d8d0493f4 205 audioTest();
bikeNomad 4:c989412b91ea 206 playAudio(1000);
bikeNomad 3:c04d8d0493f4 207 analyze();
bikeNomad 3:c04d8d0493f4 208 #endif
bikeNomad 1:2fa375aacece 209
bikeNomad 3:c04d8d0493f4 210 recordAudio();
bikeNomad 3:c04d8d0493f4 211 float duration = timer.read();
bikeNomad 4:c989412b91ea 212 // playAudio(duration * 1000);
bikeNomad 3:c04d8d0493f4 213 float elapsed = timer.read();
bikeNomad 3:c04d8d0493f4 214 pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
bikeNomad 3:c04d8d0493f4 215 pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
bikeNomad 4:c989412b91ea 216 analyze(false);
bikeNomad 3:c04d8d0493f4 217 // dumpAudio();
bikeNomad 4:c989412b91ea 218
bikeNomad 4:c989412b91ea 219 playWithBilly();
bikeNomad 3:c04d8d0493f4 220 }
bikeNomad 0:1ddd40d843cb 221 }