Unsupervised Learning by Auto-Encoder¶
In this tutorial we demonstrate how to train an Auto-Encoder for spectrogram denoising. AE is a type of unsupervised learning that can be applied on unlabelled data.
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # disable GPU devices
os.environ["TFDS_DATA_DIR"] = "/home/han/Database/tensorflow_datasets" # default location of tfds database
import tensorflow as tf
import tensorflow_datasets as tfds
import librosa
import librosa.display
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path
from IPython.display import Audio
# Turn off logging for TF
import logging
tf.get_logger().setLevel(logging.ERROR)
import dpmhm
from dpmhm.datasets import transformer, preprocessing, feature, utils
takesamples = lambda ds,n=1: list(ds.take(n).as_numpy_iterator())
Load a built dataset¶
Suppose the dataset CWRU has been correctly installed, first we need to load the built dataset into memory. Note that the original dataset doesn't specify any split, and all data are contained in the field train
of the built dataset.
dataset_name = 'CWRU'
ds_all, ds_info = tfds.load(
dataset_name,
# data_dir='/home/han/Database/tensorflow_datasets/',
# split=['train[:75%]', 'train[75%:]'],
# shuffle_files=True,
with_info=True,
download=False,
# batch_size=-1, # load the whole dataset into a single batch
)
ds0 = ds_all['train'] #.take(30)
2023-03-17 10:44:21.046223: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected 2023-03-17 10:44:21.046252: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: Pluto 2023-03-17 10:44:21.046257: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: Pluto 2023-03-17 10:44:21.046355: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 525.85.5 2023-03-17 10:44:21.046375: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 525.85.5 2023-03-17 10:44:21.046379: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 525.85.5 2023-03-17 10:44:21.051281: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
# File-level parameters
channels = ['DE', 'FE', 'BA'] # active channels
keys = ['FaultLocation', 'FaultComponent', 'FaultSize'] # keys for label
# Compactor: data extraction & label ramification
compactor = transformer.DatasetCompactor(ds0, channels=channels, keys=keys, resampling_rate=12000)
# Feature extractor
_func = lambda x, sr: feature.spectral_features(x, sr, 'spectrogram',
# n_mfcc=256,
time_window=0.025, hop_step=0.0125, n_fft=512,
normalize=False, to_db=True)[0]
extractor = transformer.FeatureExtractor(compactor.dataset, _func)
# A window of width w correspond to w*0.0125 seconds
# window = transformer.WindowSlider(extractor.dataset, window_size=(64,64), hop_size=(32,32))
window = transformer.WindowSlider(extractor.dataset, window_size=(256, 80), hop_size=40) # 1s, full bandwidth
# specaug = transformer.SpecAugment(extractor.dataset, output_shape=(64, 64))
ds1 = window.dataset
eles = takesamples(ds1, 10)
x = eles[0]['feature']
print(x.shape)
(3, 256, 80)
plt.imshow(x[0])
plt.xlabel('Time index')
plt.ylabel('Frequency index')
plt.title('Spectrogram patch: original')
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x7fab507943d0>
Model-level preprocessing¶
We use the sliding window view as input data to AE. First let's randomly split the whole dataset into training/validation/test using the method utils.split_dataset()
. Note that this step may be time-consuming since it has to iterate over the entire dataset.
splits = {'train':0.7, 'val':0.2, 'test':0.1}
%time dw_split = utils.split_dataset(ds1, splits)
# for k, ds in dw_split.items():
# print(k, utils.get_dataset_size(ds)) # len(ds) or ds.cardinality() might not work here
CPU times: user 26.9 s, sys: 490 ms, total: 27.4 s Wall time: 18.8 s
Note that at this point the dataset is channel-first and its label is in string format.
Keras preprocessing model¶
We use Keras preprocessing layers to convert the label from string to integer and make the dataset channel-last. This is done via a Keras model returned by the method keras_model_supervised()
.
Gotcha: The processed labels typically reserves the first numbers to out-of-distribution class (e.g. 0 if only one o.o.d. is used). The number of classes passed to the Keras model must be accordingly modified (e.g. increasing by 1). This fact is however not relevant to the Auto-encoder that we train in the following, which doesn't exploit the label information.
labels = list(compactor.full_label_dict.keys()) # need the whole list of labels
preprocessing_model = preprocessing.keras_model_supervised(dw_split['train'], labels, normalize=False)
Now apply the preprocessing model on the split data.
ds_split = {}
for k,v in dw_split.items():
ds_split[k] = v.map(preprocessing_model, num_parallel_calls=tf.data.AUTOTUNE)#.map(lambda x,y: (x, y-1))
# set([y.numpy() for x,y in ds_split['val']])
After preprocessing, the dataset follows the channel-last convention and has integer labels. Moreover, the data structure is changed from dictionray to tuple of the form (data, label)
.
ele = takesamples(ds_split['train'])[0]
ds_split['train'].element_spec
2023-03-17 10:45:06.690366: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 10:45:16.650208: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 10:45:16.678378: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
(TensorSpec(shape=(None, None, 3), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))
Batch and shuffle¶
The last step before training a ML is to make batched and shuffled dataset. This is easily done with the methods .shuffle()
and .batch()
.
batch_size = 16
ds_size = 5000 # use a large number
ds_train = ds_split['train'].map(lambda x,l:(x,x))
ds_val = ds_split['val'].map(lambda x,l:(x,x)).batch(batch_size)
ds_test = ds_split['test'].map(lambda x,l:x).batch(1) # input only, batch size set to 1
ds_train = ds_train.shuffle(ds_size, reshuffle_each_iteration=True).batch(batch_size).prefetch(tf.data.AUTOTUNE)
Auto-Encoder for spectrogram denoising¶
Now we can build an convolution AE stacks (CAES) model:
from dpmhm.models.ul import autoencoder
input_shape = ele[0].shape
# n_classes = len(labels)+1 # must add one for the outlier class, Keras uses zero-based class labels.
# print(n_classes, input_shape)
config = autoencoder.Config(input_shape=input_shape) # configuration for AE
model = autoencoder.CAES(config) # build an AE model
# model.summary()
model.encoder.summary()
model.decoder.summary()
Model: "encoder" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv1_enc (Conv2D) (None, 256, 80, 32) 896 pool1_enc (MaxPooling2D) (None, 128, 40, 32) 0 bn1_enc (BatchNormalization (None, 128, 40, 32) 128 ) conv2_enc (Conv2D) (None, 128, 40, 64) 18496 pool2_enc (MaxPooling2D) (None, 64, 20, 64) 0 bn2_enc (BatchNormalization (None, 64, 20, 64) 256 ) conv3_enc (Conv2D) (None, 64, 20, 128) 73856 pool3_enc (MaxPooling2D) (None, 32, 10, 128) 0 bn3_enc (BatchNormalization (None, 32, 10, 128) 512 ) flatten (Flatten) (None, 40960) 0 fc1_enc (Dense) (None, 128) 5243008 ================================================================= Total params: 5,337,152 Trainable params: 5,336,704 Non-trainable params: 448 _________________________________________________________________ Model: "decoder" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= fc1_dec (Dense) (None, 40960) 5283840 reshape (Reshape) (None, 32, 10, 128) 0 bn3_dec (BatchNormalization (None, 32, 10, 128) 512 ) ups3_dec (UpSampling2D) (None, 64, 20, 128) 0 tconv3_dec (Conv2DTranspose (None, 64, 20, 64) 73792 ) bn2_dec (BatchNormalization (None, 64, 20, 64) 256 ) ups2_dec (UpSampling2D) (None, 128, 40, 64) 0 tconv2_dec (Conv2DTranspose (None, 128, 40, 32) 18464 ) bn1_dec (BatchNormalization (None, 128, 40, 32) 128 ) ups1_dec (UpSampling2D) (None, 256, 80, 32) 0 tconv1_dec (Conv2DTranspose (None, 256, 80, 3) 867 ) ================================================================= Total params: 5,377,859 Trainable params: 5,377,411 Non-trainable params: 448 _________________________________________________________________
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.MeanSquaredError(),
# metrics=['accuracy'],
)
Training¶
AE is a unsupervised model so we drop the label information during the training:
history = model.fit(
ds_train,
validation_data=ds_val,
epochs=10,
callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=3),
)
Epoch 1/10
2023-03-17 10:54:15.895927: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1046 of 1843 2023-03-17 10:54:24.176148: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 10:54:24.176352: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 10:54:24.236975: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/Unknown - 44s 306ms/step - loss: 146.5909
2023-03-17 10:55:00.297315: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 10:55:10.143631: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 10:55:10.173630: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 67s 594ms/step - loss: 146.5909 - val_loss: 162.9496 Epoch 2/10
2023-03-17 10:55:22.303231: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 970 of 1843 2023-03-17 10:55:32.555948: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 10:55:32.586763: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 10:55:32.587061: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 10:55:32.596215: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 10:55:32.638531: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 81.2995
2023-03-17 10:56:07.716685: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 10:56:17.369690: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 67s 583ms/step - loss: 81.2995 - val_loss: 57.7633 Epoch 3/10
2023-03-17 10:56:30.021124: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 970 of 1843 2023-03-17 10:56:39.663643: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1730 of 1843 2023-03-17 10:56:40.375815: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 10:56:40.376102: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 10:56:40.381488: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 10:56:40.416941: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 43.6608
2023-03-17 10:57:16.837054: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 970 of 1843 2023-03-17 10:57:26.639120: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1730 of 1843 2023-03-17 10:57:27.335273: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 70s 610ms/step - loss: 43.6608 - val_loss: 46.0658 Epoch 4/10
2023-03-17 10:57:39.861808: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 932 of 1843 2023-03-17 10:57:49.977905: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1711 of 1843 2023-03-17 10:57:51.213839: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 10:57:51.214055: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 10:57:51.220545: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 10:57:51.258423: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 34.3431
2023-03-17 10:58:25.068257: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 10:58:34.934166: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 10:58:34.964333: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 68s 571ms/step - loss: 34.3431 - val_loss: 33.6310 Epoch 5/10
2023-03-17 10:58:48.059121: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 970 of 1843 2023-03-17 10:58:57.418337: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1730 of 1843 2023-03-17 10:58:58.117862: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 10:58:58.118135: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 10:58:58.124506: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 10:58:58.157095: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 33.9373
2023-03-17 10:59:31.941019: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 10:59:41.485170: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 66s 565ms/step - loss: 33.9373 - val_loss: 32.7637 Epoch 6/10
2023-03-17 10:59:54.103754: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:00:03.770034: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 11:00:03.795975: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 11:00:03.796198: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 11:00:03.799334: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 11:00:03.833225: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 33.1406
2023-03-17 11:00:38.325215: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:00:48.271727: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 11:00:48.300359: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 67s 579ms/step - loss: 33.1406 - val_loss: 33.0974 Epoch 7/10
2023-03-17 11:01:00.906407: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:01:10.526526: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 11:01:10.557132: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 11:01:10.557435: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 11:01:10.563384: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 11:01:10.596803: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 32.6493
2023-03-17 11:01:44.134805: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 932 of 1843 2023-03-17 11:01:54.088026: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1711 of 1843 2023-03-17 11:01:55.295886: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 67s 581ms/step - loss: 32.6493 - val_loss: 31.2715 Epoch 8/10
2023-03-17 11:02:08.225931: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 970 of 1843 2023-03-17 11:02:17.595554: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1730 of 1843 2023-03-17 11:02:18.312393: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 11:02:18.312668: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 11:02:18.322048: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 11:02:18.355890: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 32.6016
2023-03-17 11:02:53.959615: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:03:03.548299: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 68s 588ms/step - loss: 32.6016 - val_loss: 31.3838 Epoch 9/10
2023-03-17 11:03:15.960196: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:03:25.652976: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 11:03:25.653277: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 11:03:25.703541: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 32.2186
2023-03-17 11:03:59.734602: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:04:09.376379: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 66s 568ms/step - loss: 32.2186 - val_loss: 30.9938 Epoch 10/10
2023-03-17 11:04:21.888870: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:04:31.774742: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 11:04:31.801511: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled. 2023-03-17 11:04:31.801775: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1 of 5000 2023-03-17 11:04:31.807285: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 2 of 5000 2023-03-17 11:04:31.840962: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - ETA: 0s - loss: 32.2665
2023-03-17 11:05:06.345664: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:05:16.215324: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 1806 of 1843 2023-03-17 11:05:16.242367: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
81/81 [==============================] - 67s 579ms/step - loss: 32.2665 - val_loss: 31.2279
model.save('/home/han/tmp/ae_[256x80]')
WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 6). These functions will not be directly callable after loading.
model = tf.keras.models.load_model('/home/han/tmp/ae')
Denoising effect of AE¶
We test the trained AE on test data and visualize the effect of denoising.
Let's show first statistics of the reconstruction error.
X0 = list(ds_test.as_numpy_iterator())
X1 = [model(x, training=False) for x in X0]
err = [np.linalg.norm(y-x) for x,y in zip(X0,X1)]
# dy_val = model.predict(ds_val)
2023-03-17 11:05:45.248071: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 989 of 1843 2023-03-17 11:05:54.972395: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.
_ = plt.hist(err, bins=50)
n = 1; ch = 0
a = X0[n][0, :,:,ch]
b = X1[n][0, :,:,ch]
from mpl_toolkits.axes_grid1 import make_axes_locatable
fig, axes = plt.subplots(1,3, figsize=(5,5), sharey=True)
ax = axes[0]
ax.imshow(a)
ax.set_title('Spectrogram patch: original')
ax = axes[1]
ax.imshow(b)
ax.set_title('Spectrogram patch: reconstructed')
ax = axes[2]
im = ax.imshow(a-b); #plt.colorbar()
ax.set_title('Spectrogram patch: error')
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)
# im = ax.imshow(data, cmap='bone')
fig.colorbar(im, cax=cax, orientation='vertical')
fig.tight_layout()
Transfer to another dataset¶
Now let's see the effect of the AE on another dataset
dataset_name = 'DIRG'
ds_all, ds_info = tfds.load(
dataset_name,
# data_dir='/home/han/Database/tensorflow_datasets/',
# split=['train[:75%]', 'train[75%:]'],
# shuffle_files=True,
with_info=True,
download=False,
# batch_size=-1, # load the whole dataset into a single batch
)
print(ds_all.keys())
ds1 = ds_all['variation'].take(10)
dict_keys(['variation', 'endurance'])
# File-level parameters
channels = ['A1']
keys = ['FaultComponent', 'FaultSize'] # finest label
compactor = transformer.DatasetCompactor(ds1, channels=channels, keys=keys, resampling_rate=12000)
extractor = transformer.FeatureExtractor(compactor.dataset, _func)
window = transformer.WindowSlider(extractor.dataset, window_size=(256, 80), hop_size=40) # 1s, full bandwidth
dw = window.dataset
labels = list(compactor.full_label_dict.keys()) # need the whole list of labels
preprocessing_model = preprocessing.keras_model_supervised(dw, labels, normalize=False)
ds = dw.map(preprocessing_model).map(lambda x,l:x).batch(1)
X0 = list(ds.as_numpy_iterator())
X1 = [model(x, training=False) for x in X0]
err = [np.linalg.norm(y-x) for x,y in zip(X0,X1)]
# dy_val = model.predict(ds_val)
n = 2; ch = 1
a = X0[n][0, :,:,ch]
b = X1[n][0, :,:,ch]
from mpl_toolkits.axes_grid1 import make_axes_locatable
fig, axes = plt.subplots(1,3, figsize=(5,5), sharey=True)
ax = axes[0]
ax.imshow(a)
ax.set_title('Spectrogram patch: original')
ax = axes[1]
ax.imshow(b)
ax.set_title('Spectrogram patch: reconstructed')
ax = axes[2]
im = ax.imshow(a-b); #plt.colorbar()
ax.set_title('Spectrogram patch: error')
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)
# im = ax.imshow(data, cmap='bone')
fig.colorbar(im, cax=cax, orientation='vertical')
fig.tight_layout()