-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataloader.py
executable file
·101 lines (86 loc) · 3.79 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Wei-Cheng (Winston) Lin
"""
import numpy as np
from torch.utils.data import Dataset
from scipy.io import loadmat
from utils import getPaths_attri, getPaths_unlabel
NUM_CHUNKS_PER_SENT = 11
class MspPodcastEmoDataset(Dataset):
"""MSP-Podcast Dataset (labeled data)"""
def __init__(self, root_dir, label_dir, split_set, emo_attr):
# Init parameters
self.root_dir = root_dir
self.label_dir = label_dir
self.split_set = split_set
self.emo_attr = emo_attr
# Label and data paths
self._paths, self._labels = getPaths_attri(label_dir, split_set, emo_attr)
# Norm-parameters
self.Feat_mean = loadmat('./NormTerm/feat_norm_means.mat')['normal_para']
self.Feat_std = loadmat('./NormTerm/feat_norm_stds.mat')['normal_para']
if emo_attr == 'Act':
self.Label_mean = loadmat('./NormTerm/act_norm_means.mat')['normal_para'][0][0]
self.Label_std = loadmat('./NormTerm/act_norm_stds.mat')['normal_para'][0][0]
elif emo_attr == 'Dom':
self.Label_mean = loadmat('./NormTerm/dom_norm_means.mat')['normal_para'][0][0]
self.Label_std = loadmat('./NormTerm/dom_norm_stds.mat')['normal_para'][0][0]
elif emo_attr == 'Val':
self.Label_mean = loadmat('./NormTerm/val_norm_means.mat')['normal_para'][0][0]
self.Label_std = loadmat('./NormTerm/val_norm_stds.mat')['normal_para'][0][0]
# Each utterance is split into fixed C chunks
C = NUM_CHUNKS_PER_SENT
self.imgs = []
# Every sentence becomes C chunks, so we repeat the same path/label for C times
repeat_paths = self._paths.tolist()
repeat_labels = ((self._labels-self.Label_mean)/self.Label_std).tolist()
for i in range(len(repeat_paths)):
self.imgs.extend([(root_dir+repeat_paths[i], repeat_labels[i])]*C)
def __len__(self):
return len(self._paths)
def __getitem__(self, idx):
# Loading acoustic features
data = loadmat(self.root_dir + self._paths[idx].replace('.wav','.mat'))['Audio_data']
# Z-normalization
data = (data-self.Feat_mean)/self.Feat_std
# Bounded NormFeat Range -3~3 and assign NaN to 0
data[np.isnan(data)]=0
data[data>3]=3
data[data<-3]=-3
# Loading Label & Normalization
label = self._labels[idx]
label = (label-self.Label_mean)/self.Label_std
return data, label
class UnlabeledDataset(Dataset):
"""Unlabeled Dataset"""
def __init__(self, root_dir, size=None):
# Init parameters
self.root_dir = root_dir
self.size = size
# Data paths
self._paths = getPaths_unlabel(self.root_dir, sample_num=self.size)
np.random.shuffle(self._paths)
# Norm-parameters
self.Feat_mean = loadmat('./NormTerm/feat_norm_means.mat')['normal_para']
self.Feat_std = loadmat('./NormTerm/feat_norm_stds.mat')['normal_para']
# Each utterance is split into fixed C chunks
C = NUM_CHUNKS_PER_SENT
self.imgs = []
# Every sentence becomes C chunks, so we repeat the same path/label for C times
repeat_paths = self._paths.tolist()
for i in range(len(repeat_paths)):
self.imgs.extend([root_dir+repeat_paths[i]]*C)
def __len__(self):
return len(self._paths)
def __getitem__(self, idx):
# Loading acoustic features
data = loadmat(self.root_dir + self._paths[idx].replace('.wav','.mat'))['Audio_data']
# Z-normalization
data = (data-self.Feat_mean)/self.Feat_std
# Bounded NormFeat Range -3~3 and assign NaN to 0
data[np.isnan(data)]=0
data[data>3]=3
data[data<-3]=-3
return data