function [wts,binfrqs] = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp) % wts = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp) % Generate a matrix of weights to combine FFT bins into Mel % bins. nfft defines the source FFT size at sampling rate sr. % Optional nfilts specifies the number of output bands required % (else one per bark), and width is the constant width of each % band relative to standard Mel (default 1). % While wts has nfft columns, the second half are all zero. % Hence, Mel spectrum is fft2melmx(nfft,sr)*abs(fft(xincols,nfft)); % minfrq is the frequency (in Hz) of the lowest band edge; % default is 0, but 133.33 is a common standard (to skip LF). % maxfrq is frequency in Hz of upper edge; default sr/2. % You can exactly duplicate the mel matrix in Slaney's mfcc.m % as fft2melmx(512, 8000, 40, 1, 133.33, 6855.5, 0); % htkmel=1 means use HTK's version of the mel curve, not Slaney's. % constamp=1 means make integration windows peak at 1, not sum to 1. % 2004-09-05 dpwe@ee.columbia.edu based on fft2barkmx if nargin < 2; sr = 8000; end if nargin < 3; nfilts = 40; end if nargin < 4; width = 1.0; end if nargin < 5; minfrq = 0; end % default bottom edge at 0 if nargin < 6; maxfrq = sr/2; end % default top edge at nyquist if nargin < 7; htkmel = 0; end if nargin < 8; constamp = 0; end wts = zeros(nfilts, nfft); % Center freqs of each FFT bin fftfrqs = [0:nfft-1]/nfft*sr; % 'Center freqs' of mel bands - uniformly spaced between limits minmel = hz2mel(minfrq, htkmel); maxmel = hz2mel(maxfrq, htkmel); binfrqs = mel2hz(minmel+[0:(nfilts+1)]/(nfilts+1)*(maxmel-minmel), htkmel); binbin = round(binfrqs/sr*(nfft-1)); for i = 1:nfilts % fs = mel2hz(i + [-1 0 1], htkmel); fs = binfrqs(i+[0 1 2]); % scale by width fs = fs(2)+width*(fs - fs(2)); % lower and upper slopes for all bins loslope = (fftfrqs - fs(1))/(fs(2) - fs(1)); hislope = (fs(3) - fftfrqs)/(fs(3) - fs(2)); % .. then intersect them with each other and zero % wts(i,:) = 2/(fs(3)-fs(1))*max(0,min(loslope, hislope)); wts(i,:) = max(0,min(loslope, hislope)); % actual algo and weighting in feacalc (more or less) % wts(i,:) = 0; % ww = binbin(i+2)-binbin(i); % usl = binbin(i+1)-binbin(i); % wts(i,1+binbin(i)+[1:usl]) = 2/ww * [1:usl]/usl; % dsl = binbin(i+2)-binbin(i+1); % wts(i,1+binbin(i+1)+[1:(dsl-1)]) = 2/ww * [(dsl-1):-1:1]/dsl; % need to disable weighting below if you use this one end if (constamp == 0) % Slaney-style mel is scaled to be approx constant E per channel wts = diag(2./(binfrqs(2+[1:nfilts])-binfrqs(1:nfilts)))*wts; end % Make sure 2nd half of FFT is zero wts(:,(nfft/2+1):nfft) = 0; % seems like a good idea to avoid aliasing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function f = mel2hz(z, htk) % f = mel2hz(z, htk) % Convert 'mel scale' frequencies into Hz % Optional htk = 1 means use the HTK formula % else use the formula from Slaney's mfcc.m % 2005-04-19 dpwe@ee.columbia.edu if nargin < 2 htk = 0; end if htk == 1 f = 700*(10.^(z/2595)-1); else f_0 = 0; % 133.33333; f_sp = 200/3; % 66.66667; brkfrq = 1000; brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447) linpts = (z < brkpt); f = 0*z; % fill in parts separately f(linpts) = f_0 + f_sp*z(linpts); f(~linpts) = brkfrq*exp(log(logstep)*(z(~linpts)-brkpt)); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function z = hz2mel(f,htk) % z = hz2mel(f,htk) % Convert frequencies f (in Hz) to mel 'scale'. % Optional htk = 1 uses the mel axis defined in the HTKBook % otherwise use Slaney's formula % 2005-04-19 dpwe@ee.columbia.edu if nargin < 2 htk = 0; end if htk == 1 z = 2595 * log10(1+f/700); else % Mel fn to match Slaney's Auditory Toolbox mfcc.m f_0 = 0; % 133.33333; f_sp = 200/3; % 66.66667; brkfrq = 1000; brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447) linpts = (f < brkfrq); z = 0*f; % fill in parts separately z(linpts) = (f(linpts) - f_0)/f_sp; z(~linpts) = brkpt+(log(f(~linpts)/brkfrq))./log(logstep); end