forked from alexanderlerch/ACA-Code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ExampleMusicSpeechClassification.m
83 lines (67 loc) · 2.66 KB
/
ExampleMusicSpeechClassification.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
function ExampleMusicSpeechClassification(cDatasetPath)
if (nargin<1)
% this script is written for the GTZAN music/speech dataset
% modify this path or use the function parameter to specify your
% dataset path
cDatasetPath = 'd:\dataset\music_speech\';
end
if (exist('ComputeFeature') ~= 2)
error('Please add the ACA scripts (https://github.com/alexanderlerch/ACA-Code) to your path!');
end
if ((exist([cDatasetPath 'music']) ~= 7) || (exist([cDatasetPath 'speech']) ~= 7))
error('Dataset path wrong or does not contain music/speech folders!')
end
iNumFeatures = 2;
% read directory contents
music_files = dir([cDatasetPath 'music/*.au']);
speech_files = dir([cDatasetPath 'speech/*.au']);
v_music = zeros(iNumFeatures,size(music_files,1));
v_speech = zeros(iNumFeatures,size(speech_files,1));
% extract features, this may take a while...
for i = 1:size(music_files, 1)
v_music(:, i) = ExtractFeaturesFromFile_I(...
[cDatasetPath 'music/' music_files(i).name]);
end
for i = 1:size(speech_files, 1)
v_speech(:, i) = ExtractFeaturesFromFile_I(...
[cDatasetPath 'speech/' speech_files(i).name]);
end
% assign class labels for training and eval
C = [zeros(1, size(music_files, 1)) ones(1, size(speech_files, 1))];
% normalize features
v = [v_music, v_speech];
m = mean(v, 2);
s = std(v, 0, 2);
v = (v - repmat(m, 1, size(music_files, 1) + size(speech_files, 1)))./...
repmat(s, 1, size(music_files, 1)+size(speech_files, 1));
% compute the overall accuracy with cross validation
[acc, mat] = ToolLooCrossVal(v, C);
disp('confusion matrix:'),
disp(mat);
disp('micro accuracy:'),
disp(sum(diag(mat)) / sum(sum(mat)))
tmp = zeros(size(mat, 1), 1);
for i = 1:size(mat, 1)
tmp(i) = mat(i, i) / sum(mat(i, :));
end
disp('macro accuracy:'),
disp(mean(tmp))
% compute the individual feature performance
[acc1, mat1] = ToolLooCrossVal(v(1, :), C);
sprintf('centroid accuracy: %f', acc1)
[acc2, mat2] = ToolLooCrossVal(v(2 ,:), C);
sprintf('rms accuracy: %f', acc2)
end
function [v] = ExtractFeaturesFromFile_I(cFilePath)
cFeatureNames = char('SpectralCentroid',...
'TimeRms');
% read audio
[x, fs] = audioread(cFilePath);
x = x / max(abs(x));
% compute first feature
feature = ComputeFeature (deblank(cFeatureNames(1, :)), x, fs);
v(1, 1) = mean(feature);
% compute second feature
feature = ComputeFeature (deblank(cFeatureNames(2, :)), x, fs);
v(2, 1) = std(feature(1,:));
end