Source code for musisep.neuralsep.trainsep

#!python3

"""
All the training mechanisms for blind separation via neural networks.
"""

import itertools
import types
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from ..audio import spect
from ..audio import wav
from ..audio import performance
from ..dictsep.__main__ import correct_signal_length
from .adamax import AdamaxDict

[docs]@tf.function def gauss(x, mean, stdev): """ Evaluate the Gaussian function. Parameters ---------- x : tensor of float Points of evaluation mean : tensor of float Mean value(s) stdev : tensor of float Standard deviation(s) Returns ------- spect : tensor of float Values of the Gaussian """ return tf.exp(- (x - mean)**2 / (2 * stdev**2))
[docs]@tf.function def inst_scale_raw(params, har_num, spectheight, sigmas_an): """ Evaluate the linear-frequency spectra for the harmonics of tones, disregarding the amplitudes. Parameters ---------- params : tensor of float Continous parameters for the tones, stacked along axis 2 har_num : int Number of harmonics to evaluate spectheight : int Size of the output spectrum sigmas_an : float Number of standard deviations at which the analysis window is cut Returns ------- spect : tensor of float Spectra of the harmonics """ x = tf.range(spectheight, dtype=tf.float32) x = tf.reshape(x, [1, 1, -1, 1]) amps, scales, sigmas, spreads = \ trans_params(*tf.unstack(params, axis=2), sigmas_an) har_range = tf.reshape(tf.range(1, har_num + 1, dtype=tf.float32), [1, 1, 1, -1]) scales = tf.expand_dims(tf.expand_dims(scales, axis=2), axis=3) sigmas = tf.expand_dims(tf.expand_dims(sigmas, axis=2), axis=3) spreads = tf.expand_dims(tf.expand_dims(spreads, axis=2), axis=3) means = scales * har_range * tf.sqrt(1 + spreads * har_range**2) return gauss(x, means, sigmas)
[docs]@tf.function def inst_scale(params, insts, inst_dict, har_coeffs, spectheight, sigmas_an): """ Evaluate the linear-frequency spectra for tones. Parameters ---------- params : tensor of float Continous parameters for the tones, stacked along axis 2 insts : int Indices of the instruments playing the tones inst_dict : tensor of float Dictionary with the shape [instruments, harmonics] har_coeffs : tensor of float Relative complexe amplitudes of the harmonics spectheight : int Size of the output spectrum sigmas_an : float Number of standard deviations at which the analysis window is cut Returns ------- spect : tensor of float Spectra of the tones """ inst_num, har_num = tf.unstack(tf.shape(inst_dict)) amps, scales, sigmas, spreads = tf.unstack(params, axis=2) amps = tf.math.abs(amps) tone_factors = tf.gather(inst_dict, insts) tone_factors = tf.einsum('bt,bth->bth', amps, tone_factors) tone_factors = tf.einsum('bth,bthc->bthc', tone_factors, har_coeffs) har_spects = inst_scale_raw(params, har_num, spectheight, sigmas_an) tone_spects = tf.einsum('bthc,btsh->btcs', tone_factors, har_spects) inst_mask = tf.where(tf.expand_dims(insts, 2) == tf.reshape(tf.range(inst_num), [1, 1, -1]), 1., 0.) inst_spects = tf.einsum('bti,btcs->bics', inst_mask, tone_spects) return inst_spects
[docs]@tf.function def inst_scale_post(amps, insts, inst_dict, inst_num, har_coeffs, har_spect): """ Combine the spectra of individual harmonics into tone spectra. Parameters ---------- amps : tensor of float Amplitudes of the tones insts : int Indices of the instruments playing the tones inst_dict : tensor of float Dictionary with the shape [instruments, harmonics] inst_num : int Number of instruments available har_coeffs : tensor of float Relative complexe amplitudes of the harmonics har_spect : tensor of float Spectra of the individual harmonics (without amplitudes) Returns ------- spect : tensor of float Spectra of the tones """ if inst_dict is not None: har_coeffs = tf.einsum('bghc,bgh->bghc', har_coeffs, tf.gather(inst_dict, insts)) if amps is not None: har_coeffs = tf.einsum('bghc,bg->bghc', har_coeffs, tf.math.abs(amps)) tone_spects = tf.einsum('bghc,bgsh->bgcs', har_coeffs, har_spect) # for many tones, scatter_nd might be cheaper inst_mask = tf.where(tf.expand_dims(insts, 2) == tf.reshape(tf.range(inst_num), [1, 1, -1]), 1., 0.) inst_spects = tf.einsum('bgi,bgcs->bgics', inst_mask, tone_spects) return inst_spects
[docs]@tf.function def trans_params(amps, scales, sigmas, spreads, sigmas_an): """ Apply transformations on instrument parameters to ensure their validity. Parameters ---------- amps : tensor of float Amplitudes of the tones scales : tensor of float Natural fundamental frequencies of the tones sigmas : tensor of float Widths of the Gaussians spreads : tensor of float Inharmonicities of the tones Returns ------- amps : tensor of float Amplitudes of the tones scales : tensor of float Natural fundamental frequencies of the tones sigmas : tensor of float Widths of the Gaussians spreads : tensor of float Inharmonicities of the tones """ sigmas = (tf.math.softplus(tf.math.maximum(-3., sigmas)) * sigmas_an / np.pi / np.log(2)) spreads = tf.math.abs(spreads) return amps, scales, sigmas, spreads
[docs]class SpectLoss: """ Container object for the spectra and losses for the individual tones of the instruments. Parameters ---------- batch_size : int Batch size of all the data structures num_guesses_prod : int Total number of all the samples per spectrum inst_num : int Number of instruments in the sample spectheight : int Size of the input/output spectrum """ def __init__(self, batch_size, num_guesses_prod, inst_num, spectheight): self.spects = types.SimpleNamespace() self.losses = types.SimpleNamespace() self.spects.insts = tf.zeros( [batch_size, num_guesses_prod, inst_num, 2, spectheight]) self.spects.insts_sparse = tf.zeros( [batch_size, num_guesses_prod, inst_num, 2, spectheight]) self.spects.insts_dir = tf.zeros( [batch_size, num_guesses_prod, inst_num, 2, spectheight]) self.spects.mix = None self.spects.mix_sparse = None self.spects.mix_dir = None
[docs] def add_tone(self, params, har_coeffs, on_factors, insts, har_spect, inst_dict, orig_spect): """ Add the results of a new tone to the object Parameters ---------- params : tensor of float Instrument parameters for the tone har_coeffs : tensor of float Relative complexe amplitudes of the harmonics on_factors : tensor of int Binary indicator if a tone contributes to the sparse prediction insts : tensor of int Indices of the instruments playing the tones har_spect : tensor of float Spectra of the individual harmonics (without amplitudes) inst_dict : tensor of float Dictionary with the shape [instruments, harmonics] orig_spect : tensor of float Original input sampled spectrum """ loss_fun = lifted_l2_cmplx orig_spect = tf.expand_dims(orig_spect, 1) phase_coeffs = (complex_arg(har_coeffs, axis=3)) phase_coeffs_sparse = tf.einsum('bg,bghc->bghc', on_factors, phase_coeffs) amps, scales, sigmas, spreads = tf.unstack(params, axis=2) inst_num = tf.shape(inst_dict)[0] self.spects.insts += inst_scale_post( amps, insts, inst_dict, inst_num, phase_coeffs, har_spect) self.spects.insts_sparse += inst_scale_post( amps, insts, inst_dict, inst_num, phase_coeffs_sparse, har_spect) self.spects.insts_dir += inst_scale_post( None, insts, None, inst_num, har_coeffs, har_spect) self.spects.mix = mix_inst_spects( self.spects.insts, axis=2) self.spects.mix_sparse = mix_inst_spects( self.spects.insts_sparse, axis=2) self.spects.mix_dir = mix_inst_spects( self.spects.insts_dir, axis=2) self.losses.mix = tf.reduce_mean( lifted_l2_abs(orig_spect, self.spects.mix, axis=2), axis=2) self.losses.mix_sparse = tf.reduce_mean( lifted_l2_abs(orig_spect, self.spects.mix_sparse, axis=2), axis=2) self.losses.mix_dir = tf.reduce_mean( loss_fun(orig_spect, self.spects.mix_dir, axis=2), axis=2) self.losses.inst = tf.reduce_mean( loss_fun(self.spects.insts_dir, self.spects.insts, axis=3), axis=[2, 3])
[docs]def comp_total_loss(losses, on_factors, spl, loss_coeffs): """ Compute the linear combination of losses Parameters ---------- losses : SimpleNamespace The individual loss values on_factors : tensor of int Binary indicator if a tone contributes to the sparse prediction spl : float Discount factor for the sparsity loss_coeffs : array_like Linear weights for the loss terms Returns ------- total_loss : tensor of float Computed total loss discounts : tensor of float Sparsity discount factors """ discounts = spl ** tf.reduce_sum(1 - on_factors, axis=2) return (losses.mix * loss_coeffs[0] + losses.mix_sparse * discounts * loss_coeffs[1] + losses.inst * loss_coeffs[2] + losses.mix_dir * loss_coeffs[3], discounts)
[docs]@tf.function def add_linspace(x): """ Add a linear range layer to a CNN tensor Parameters ---------- x : tensor of float Layer to add the linear range to Returns ------- model_in : tensor of float Input layer with the linear range added """ shape = tf.shape(x) model_in = tf.concat( [x, tf.broadcast_to( tf.reshape(tf.linspace(1e-2, 0, shape[2]), [1, 1, shape[2], 1]), [shape[0], 1, shape[2], 1])], axis=1) return model_in
[docs]def unet(x, inst_num, spectheight): """ Create the U-Net as a Keras model. Parameters ---------- inst_num : int Number of instruments expected in the sample spectheight : int Size of the input/output spectrum Returns ------- y : tensor of float Network output """ y = x w = 80 nonlin = tf.nn.relu # Keras, y u no Conv1Transpose? y = tf.keras.layers.Lambda(lambda w: tf.expand_dims(w, 3))(y) layers = [y] reductions = [4, 4, 4, 4, 4, 3, 2] channels = [80, 160, 240, 320, 400, 480, 560] for i in range(len(reductions)): if i == 0: y = tf.keras.layers.Lambda( add_linspace, lambda s: (s[0], s[1] + 1, s[2], s[3]))(y) y = tf.keras.layers.Conv2D(channels[i], (5, 1), data_format='channels_first', padding='same', strides=(reductions[i], 1), use_bias=True)(y) y = tf.keras.layers.Activation(nonlin)(y) layers.append(y) for i in reversed(range(len(reductions))): y = tf.keras.layers.Conv2DTranspose(channels[i], (5, 1), data_format='channels_first', padding='same', strides=(reductions[i], 1), use_bias=True)(y) y = tf.keras.layers.Activation(nonlin)(y) y = tf.keras.layers.concatenate([y, layers[i]], axis=1) y = tf.keras.layers.Conv2D(w, (3, 1), data_format='channels_first', padding='same', use_bias=True)(y) y = tf.keras.layers.Activation(tf.nn.relu)(y) y = tf.keras.layers.Conv2D(w, (1, 1), data_format='channels_first', padding='same', use_bias=True)(y) y = tf.keras.layers.Activation(tf.nn.relu)(y) out_dim = 9 y = tf.keras.layers.Conv2D(inst_num * out_dim, (1, 1), data_format='channels_first', padding='same', use_bias=False)(y) y = tf.keras.layers.Reshape((inst_num, out_dim, spectheight))(y) return y
[docs]@tf.function def lift_spect(x, shift=1e-7, qexp=0.5): """ Lift a positive-valued spectrum via a concave power function. Parameters ---------- x : tensor of float Spectrum shift : float Additive constant to keep the transform differentiable qexp : float Exponent of the power function Returns ------- The lifted spectrum """ return (x + shift)**qexp - shift**qexp
[docs]@tf.function def lift_spect_sign(x, shift=1e-7, qexp=0.5): """ Lift a real-valued spectrum via a concave power function. Parameters ---------- x : tensor of float Spectrum shift : float Additive constant to keep the transform differentiable qexp : float Exponent of the power function Returns ------- The lifted spectrum """ return ((tf.math.abs(x) + shift)**qexp - shift**qexp) * tf.math.sign(x)
[docs]@tf.function def lift_cmplx(x, axis, shift=1e-7, qexp=0.5): """ Lift a complex-valued spectrum via a concave power function. Parameters ---------- x : tensor of float Spectrum shift : float Additive constant to keep the transform differentiable qexp : float Exponent of the power function Returns ------- The lifted spectrum """ spect_abs = lift_spect(complex_abs(x, axis), shift, qexp) spect_arg = complex_arg(x, axis) return spect_abs * spect_arg
[docs]@tf.function def lifted_l2_cmplx(x, y, axis): """ Radially symmetric lifted l2 distance between two spectra. Parameters ---------- x : tensor of float First spectrum y : tensor of float Second spectrum axis : int Complex axis Returns ------- l2 loss """ return tf.reduce_sum((lift_cmplx(x, axis) - lift_cmplx(y, axis))**2, axis=axis)
[docs]@tf.function def lifted_l2_abs(x, y, axis): """ Radially symmetric lifted l2 distance between two spectra. Parameters ---------- x : tensor of float First spectrum y : tensor of float Second spectrum axis : int Complex axis Returns ------- l2 loss """ return tf.reduce_sum((lift_spect(complex_abs(x, axis)) - lift_spect(complex_abs(y, axis)))**2, axis=axis)
[docs]@tf.function def mix_inst_spects(inst_spects, axis): """ Combine the spectra for multiple instruments, dropping the summation axis. Parameters ---------- inst_spects : tensor of float Spectra for the individual instruments axis : int Summation axis Returns ------- Mixture spectrum """ return tf.reduce_sum(inst_spects, axis=axis)
[docs]@tf.function def complex_abs(spect, axis): """ Compute the absolute value of a complex tensor. Parameters ---------- spect : tensor of float Real-valued tensor with a complex axis axis : int Complex axis Returns ------- Absolute value of the input tensor (with complex axis shrunken to size 1) """ tf.debugging.assert_equal(tf.shape(spect)[axis], 2) real_abs = tf.math.abs(tf.gather(spect, [0], axis=axis)) imag_abs = tf.math.abs(tf.gather(spect, [1], axis=axis)) min_abs = tf.math.minimum(real_abs, imag_abs) max_abs = tf.math.maximum(real_abs, imag_abs) retval = max_abs * tf.sqrt(1 + (min_abs / tf.maximum(max_abs, 1e-20))**2) return retval
[docs]@tf.function def complex_arg(spect, axis, bias=1e-20): """ Compute the argument of a complex tensor. Parameters ---------- spect : tensor of float Real-valued tensor with a complex axis axis : int Complex axis bias : float Offset to avoid division by 0 Returns ------- Tensor normalized to an absolute value of 1 """ real_part = tf.gather(spect, [0], axis=axis) imag_part = tf.gather(spect, [1], axis=axis) spect_sp = tf.concat([real_part, imag_part], axis=axis) spect_sp_abs = complex_abs(spect_sp, axis) return spect_sp / (spect_sp_abs + bias)
[docs]def plot_spectrum(filename, spectheight, *spects): """ Plot real-valued spectra to a file Parameters ---------- filename : string Name of the file to save the figure to spectheight : int Size of the output spectra spects : sequence of array_like of float Spectra to plot """ for sp in spects: plt.plot(np.arange(spectheight), sp) plt.savefig(filename) plt.clf()
[docs]@tf.function def sample_multi(pdf): """ Sample batch-wise from a categorical distribution. Parameters ---------- pdf : tensor of float Log probabilies of [batch, insts, scales] Returns ------- insts : tensor of int Indices of the sampled instruments scales : tensor of int Discrete sampled frequencies """ pdf_shape = tf.shape(pdf) pdf = tf.reshape(pdf, [pdf_shape[0], pdf_shape[1] * pdf_shape[2]]) samples = tf.random.categorical(pdf, 1, dtype=tf.int32) samples = tf.stop_gradient(tf.squeeze(samples, axis=1)) insts = samples // pdf_shape[2] scales = samples % pdf_shape[2] return insts, scales
[docs]@tf.function def sample_multi_max(pdf): """ Pick the mode batch-wise from a categorical distribution. Parameters ---------- pdf : tensor of float Log probabilies of [batch, insts, scales] Returns ------- insts : tensor of int Indices of the selected instruments scales : tensor of int Discrete selected frequencies """ pdf_shape = tf.shape(pdf) pdf = tf.reshape(pdf, [pdf_shape[0], pdf_shape[1] * pdf_shape[2]]) samples = tf.math.argmax(pdf, axis=1, output_type=tf.int32) insts = samples // pdf_shape[2] scales = samples % pdf_shape[2] return insts, scales
[docs]@tf.function def norm_pdf(pdf): """ Normalize a categorical distribution batch-wise via softmax. Parameters ---------- pdf : tensor of float Log probabilies of [batch, insts, scales] Returns ------- Normalized log probabilities """ pdf_shape = tf.shape(pdf) pdf = tf.reshape(pdf, [pdf_shape[0], pdf_shape[1] * pdf_shape[2]]) pdf = tf.nn.log_softmax(pdf, axis=1) pdf = tf.reshape(pdf, pdf_shape) return pdf
[docs]@tf.function def gamma_probs(spreads, spreads_a, spreads_b): """ Evaluate the gamma distribution Parameters ---------- spreads : tensor of float Values where to evaluate spreads_a : tensor of float "alpha" parameter of the distribution spreads_b : tensor of float "beta" parameter of the distribution Returns ------- Log probabilities """ g = tfp.distributions.Gamma(spreads_a, spreads_b, allow_nan_stats=False) return g.log_prob(spreads)
[docs]@tf.function def lsq_stock(har_spects, samp): """ Solve a regularized least-squares system. Parameters ---------- har_spects : tensor of float Spectra of the individual harmonics (without amplitudes) samp : tensor of float Direct prediction Returns ------- Phase values for the harmonics """ retval = tf.linalg.lstsq(har_spects, samp, 1e-3) return retval
[docs]class ParamsDict: """ Container object for the model parameters. Parameters ---------- batch_size : int Batch size of all the data structures num_guesses_prod : int Total number of all the samples per spectrum """ _keys = ('probs', 'probs_samp', 'g_probs', 'on_probs', 'on_probs_samp', 'on_factors') _special_keys = ('inst_params', 'insts', 'reg', 'entropy') def __init__(self, batch_size, num_guesses_prod): for key in self._keys: setattr(self, key, tf.zeros([batch_size, num_guesses_prod, 0])) self.inst_params = tf.zeros([batch_size, num_guesses_prod, 0, 4]) self.insts = tf.zeros([batch_size, num_guesses_prod, 0], dtype=tf.int32) self.reg = tf.zeros([0]) self.entropy = tf.zeros([batch_size, 0]) def _append(self, new): """ Add parameters from a new tone to the object. Parameters ---------- new : object Object with tone parameters """ for key in self._keys + ('inst_params', 'insts'): setattr(self, key, tf.concat( [getattr(self, key), tf.expand_dims(getattr(new, key), 2)], axis=2)) self.entropy = tf.concat( [self.entropy, tf.expand_dims(new.entropy, 1)], axis=1) def _sum(self): """ Sum up all the parameters from the collected tones. Returns ------- params_sum : SimpleNamespace Sum of the parameters """ params_sum = types.SimpleNamespace() for key in self._keys + ('inst_params', 'insts'): setattr(params_sum, key, tf.reduce_sum( getattr(self, key), axis=2)) for key in ('reg', 'entropy'): setattr(params_sum, key, tf.reduce_sum( getattr(self, key), axis=0)) return params_sum
[docs]class SpectvisDict: """ Container object to visualize spectra. Parameters ---------- spectheight : int Size of the spectrum """ _keys = ('pdf', 'on_probs', 'amps', 'sigmas', 'offsets', 'spreads_a', 'spreads_b') def __init__(self, spectheight): for key in self._keys: setattr(self, key, tf.zeros([0, spectheight])) def _append(self, new): """ Add spectra from a new tone to the object. Parameters ---------- new : object Object with tone parameters """ for key in self._keys: setattr(self, key, tf.concat( [getattr(self, key), getattr(new, key)[0, :, :]], axis=0))
[docs]class Trainer: """ Object containing all the data necessary for the training. Parameters ---------- name : string Name of the training run. Used for files names and logging. mixed_soundfile : string Name of the sound file containing the mixture orig_soundfiles : sequence of string Name of the sound files containing the individual instrument tracks loss_coeffs : sequence of float Weights of the dictionary prediction loss, the sparse loss, the regularization loss, and the direct prediction loss har_num : int Number of harmonics to identify num_guesses : sequence of int Number of samples per tone spl : float Discount factor for the sparsity batch_size : int Batch size for training batch_size_pred : int Batch size for prediction virt_batch_mul : int Virtual batch multiplier stepsize_net : float Learning rate for training the neural network stepsize_dict : float Learning rate for training the dictionary tau : float Exponent to control exploration sampdist : int Time interval of the spectrogram sub_factor : int Factor by which to subsample the spectrogram for resynthesis sigmas_an : float Number of standard deviations at which the analysis window is cut plot_color : string or NoneType Whether to make a color plot save_points: sequence of int Iterations at which to save the output init_dict : tensor of float Dictionary with the shape [instruments, harmonics] """ def __init__(self, name, mixed_soundfile, orig_soundfiles, loss_coeffs, har_num, num_guesses, spl, batch_size, batch_size_pred, virt_batch_mul, stepsize_net, stepsize_dict, tau, sampdist, sub_factor, sigmas_an, plot_color, save_points, init_dict): self.loss_coeffs = loss_coeffs self.har_num = har_num self.num_guesses = num_guesses self.spl = spl self.batch_size = batch_size self.batch_size_pred = batch_size_pred self.virt_batch_mul = virt_batch_mul self.tau = tau self.plot_color = plot_color self.save_points = save_points self.inst_num = len(self.num_guesses) self.spectheight = 6144 self.sampdist = sampdist self.sigmas_an = sigmas_an self.name = name self.log_dir = 'logs-nn/' + name self.param_scales = tf.Variable( np.broadcast_to(np.log([10000, 1, 3, 1, 30, 30, 1, 1, 1000]), [self.inst_num, 9]), dtype=tf.float32, name="param_scales") self.inst_model = self.make_model(self.inst_num * 4 + 6) self.variables_cnn = self.inst_model.trainable_weights self.variables_out = None self.variables_trans = None self.gradients_model_cnn = None self.gradients_model_out = None self.gradient_dict = None self.gradient_scales = None if init_dict is None: self.inst_dict = tf.Variable( np.asarray( np.maximum( 1e-3, np.expand_dims(0.5 / np.arange(1, self.inst_num + 1), axis=1) ** np.arange(0, self.har_num)), dtype=np.float32), name="inst_dict") else: self.inst_dict = tf.Variable( np.asarray(init_dict, dtype=np.float32), name="inst_dict") print(self.inst_dict) self.save_module = tf.Module() self.save_module.inst_dict = self.inst_dict self.save_module.inst_model = self.inst_model self.save_module.param_scales = self.param_scales self.file_writer = tf.summary.create_file_writer( self.log_dir, flush_millis=30000) self.signal, self.samprate = wav.read(mixed_soundfile) print("samprate", self.samprate) self.sigmas_an *= (48000 / self.samprate) self.orig_signals = np.stack( [correct_signal_length(wav.read(os)[0], self.signal.size) for os in orig_soundfiles], axis=0) self.sign_mask = np.tile([1, -1], self.spectheight // 2) spectrogram = spect.stft( self.signal, self.spectheight, self.sigmas_an, self.sampdist )[:self.spectheight, :].T spectrogram *= self.sign_mask self.spectlen = spectrogram.shape[0] self.sub_factor = sub_factor self.spectlen_sub = (self.spectlen - 1) // self.sub_factor + 1 self.spect_data = tf.data.Dataset.from_tensor_slices( tf.convert_to_tensor( np.stack([np.real(spectrogram), np.imag(spectrogram)], axis=1), dtype=tf.float32)) self.spect_data_sub = self.spect_data.shard(self.sub_factor, 0) print("spectlen", self.spectlen) self.random_slice = iter( self.spect_data.shuffle(self.spectlen) .repeat().batch(self.batch_size, drop_remainder=True)) self.inline_slices = self.spect_data_sub.padded_batch( self.batch_size_pred) self.eval_writer = tf.summary.create_file_writer( self.log_dir + '-eval', flush_millis=30000) self.measures_table = [] self.optimizer = tf.keras.optimizers.Adamax( stepsize_net, 0.99, 0.999) self.dict_optimizer = AdamaxDict( stepsize_dict, 0.99, 0.999, axis=1)
[docs] def load(self, module): """ Load dictionary, output scalings, and the model from a module. module : tf.Module Module with the saved data """ self.inst_dict = module.inst_dict self.param_scales = module.param_scales self.inst_model = module.inst_model self.variables_cnn = self.inst_model.trainable_variables
[docs] def make_model(self, input_sizes): """ Construct a Keras model for the parameter prediction Parameters ---------- input_sizes : int Total number of input channels for the network Returns ------- The neural network as a Keras model """ spect_in = tf.keras.Input(shape=(input_sizes, self.spectheight)) pdf_out = unet(spect_in, self.inst_num, self.spectheight) model_cnn = tf.keras.Model(inputs=spect_in, outputs=pdf_out) return model_cnn
[docs] def separate_inst(self, model, model_in, training, batch_factor, fan_factor, num_guesses, inst_mask, tau, batch_size): """ Identify the parameters for one tone in a given spectrum Parameters ---------- model : tf.Keras.Model Definition of the neural network model_in : tensor of float Input channels that the neural network receives training : bool Whether to perform training batch_factor : int Product of the number of samples for previous tones fan_factor : int Product of the number of samples for current and future tones num_guesses : int Number of samples for the current tone inst_mask : tensor of int 1 for instruments that have already played a tone, 0 otherwise tau : float Exponent to control exploration batch_size : int Batch size for training Returns ------- params_tone : SimpleNamespace All parameters relating to an identified tone params_spect : SimpleNamespace Unsampled parameters in the dimensionality of the spectrum har_spects_raw : tensor of float Spectra of the harmonics """ model_in_shape = tf.shape(model_in) model_in = tf.reshape(model_in, [batch_size * batch_factor, model_in_shape[2], self.spectheight]) model_out = model(model_in, training) model_out = tf.einsum('bips,ip->bips', model_out, tf.math.exp(self.param_scales)) out_p = types.SimpleNamespace() (out_p.pdf, out_p.amps, out_p.sigmas, out_p.offsets, out_p.spreads_a, out_p.spreads_b, out_p.dir_r, out_p.dir_i, out_p.on_probs) = tf.unstack(model_out, axis=2) num_guesses_prod = np.prod(num_guesses, dtype=np.int32) shape = [batch_size, batch_factor * fan_factor] pdf_raw = tf.reshape(norm_pdf(out_p.pdf), [batch_size, batch_factor, self.inst_num, self.spectheight]) entropy = tf.reduce_mean(pdf_raw, axis=[2,3]) entropy = - fan_factor * tf.reduce_sum(entropy, axis=1) inst_mask_shape = tf.shape(inst_mask) inst_mask = tf.reshape(inst_mask, [batch_size * batch_factor, inst_mask_shape[2], 1]) pdf_masked = tf.where(inst_mask == 0, out_p.pdf, - np.infty) pdf_norm = norm_pdf(pdf_masked) if tau is not None: tau_range = tf.exp(tf.linspace(0., tf.math.log(tau), num_guesses[0])) tau_range = tf.keras.backend.repeat_elements( tau_range, rep=np.prod(num_guesses[1:], dtype=np.int32), axis=0) tau_range = tf.reshape(tau_range, [1, num_guesses_prod]) tau_range = tf.tile(tau_range, [batch_size * batch_factor * fan_factor // num_guesses_prod, 1]) tau_range = tf.reshape(tau_range, [batch_size * batch_factor, fan_factor]) pdf_samp = tf.einsum('bis,bg->bgis', pdf_norm, tau_range) pdf_samp = tf.reshape(pdf_samp, [batch_size * batch_factor * fan_factor, self.inst_num, self.spectheight]) pdf_samp = norm_pdf(pdf_samp) params_spect = types.SimpleNamespace() params_spect.pdf = pdf_norm params_spect.on_probs = out_p.on_probs + 500 params_spect.amps = out_p.amps params_spect.sigmas = out_p.sigmas params_spect.offsets = tf.math.tanh(out_p.offsets) * 5 params_spect.spreads_a = tf.math.exp( tf.clip_by_value(out_p.spreads_a, -30, 30)) params_spect.spreads_b = tf.math.exp( tf.clip_by_value(out_p.spreads_b, -30, 30)) * 1000 if tau is None: insts, scales = sample_multi_max(pdf_norm) else: insts, scales = sample_multi(pdf_samp) insts = tf.reshape(insts, [batch_size * batch_factor, fan_factor]) scales = tf.reshape(scales, [batch_size * batch_factor, fan_factor]) idcs = tf.stack([insts, scales], axis=2) probs = tf.reshape(tf.gather_nd(pdf_norm, idcs, batch_dims=1), shape) if tau is None: probs_samp = tf.zeros(shape) else: pdf_samp = tf.reshape(pdf_samp, [batch_size * batch_factor, fan_factor, self.inst_num, self.spectheight]) probs_samp = tf.reshape( tf.gather_nd(pdf_samp, tf.stack([tf.broadcast_to( tf.range(fan_factor), [batch_size * batch_factor, fan_factor]), insts, scales], axis=2), batch_dims=1), shape) amps = tf.reshape( tf.gather_nd(params_spect.amps, idcs, batch_dims=1), shape) sigmas = tf.reshape( tf.gather_nd(params_spect.sigmas, idcs, batch_dims=1), shape) offsets = tf.reshape( tf.gather_nd(params_spect.offsets, idcs, batch_dims=1), shape) on_probs = tf.gather_nd(params_spect.on_probs, idcs, batch_dims=1) if tau is not None: on_probs_samp = tf.einsum('bg,bg->bg', on_probs, tau_range) on_probs_samp = tf.reshape(on_probs_samp, shape) else: on_probs_samp = tf.zeros(shape) on_probs = tf.reshape(on_probs, shape) if tau is not None: on_factors = tf.stop_gradient(tf.math.sign(tf.nn.relu( tf.math.sigmoid(on_probs_samp) - tf.random.uniform(tf.shape(on_probs))))) else: on_factors = tf.stop_gradient(tf.math.sign(tf.nn.relu( tf.math.sigmoid(on_probs) - 0.5))) on_probs = ( tf.math.log_sigmoid(on_probs) * on_factors + tf.math.log_sigmoid(-on_probs) * (1 - on_factors)) on_probs_samp = ( tf.math.log_sigmoid(on_probs_samp) * on_factors + tf.math.log_sigmoid(- on_probs_samp) * (1 - on_factors)) spreads_a = tf.reshape( tf.gather_nd(params_spect.spreads_a, idcs, batch_dims=1), shape) spreads_b = tf.reshape( tf.gather_nd(params_spect.spreads_b, idcs, batch_dims=1), shape) if training: spreads = tf.stop_gradient( tf.random.gamma([], spreads_a, spreads_b)) else: spreads = tf.math.maximum(spreads_a - 1, 0) / spreads_b g_probs = gamma_probs(spreads, spreads_a, spreads_b) scales = tf.reshape(tf.cast(scales, tf.float32), shape) + offsets inst_params = tf.stack([amps, scales, sigmas, spreads], axis=2) dir_r = tf.reshape(tf.gather(out_p.dir_r, insts, batch_dims=1), [batch_size * batch_factor * fan_factor, self.spectheight]) dir_i = tf.reshape(tf.gather(out_p.dir_i, insts, batch_dims=1), [batch_size * batch_factor * fan_factor, self.spectheight]) dir_c = tf.stack([dir_r, dir_i], axis=2) insts = tf.reshape(insts, shape) har_spects_raw = inst_scale_raw( inst_params, self.har_num, self.spectheight, self.sigmas_an) har_spects = tf.reshape(har_spects_raw, [batch_size * batch_factor * fan_factor, self.spectheight, self.har_num]) coeffs = tf.reshape(lsq_stock(har_spects, dir_c), [batch_size, batch_factor * fan_factor, self.har_num, 1, 2]) har_coeffs, = tf.unstack(coeffs, axis=3) params_tone = types.SimpleNamespace( probs=probs, probs_samp=probs_samp, g_probs=g_probs, on_probs=on_probs, on_probs_samp=on_probs_samp, on_factors=on_factors, inst_params=inst_params, insts=insts, har_coeffs=har_coeffs, entropy=entropy) return params_tone, params_spect, har_spects_raw
[docs] def separate(self, mix_spect_in, tau, batch_size, predict=False): """ Identify all the parameters for the tones in the spectrum. Parameters ---------- mix_spect_in : tensor of float Mixture spectrogram to perform the separation on tau : float Exponent to control exploration batch_size : int Batch size for training predict : bool Whether to go into prediction mode instead of training Results ------- spects : SimpleNamespace Spectra related to the mixture losses : SimpleNamespace Losses related to the mixture params_d : ParamsDict Parameters related to the mixture spectvis_d : SpectvisDict Visualization spectra related to the mixture """ if predict: num_guesses = [1] * self.inst_num else: num_guesses = self.num_guesses num_guesses_prod = np.product(num_guesses, dtype=np.int32) params_d = ParamsDict(batch_size, num_guesses_prod) spectvis_d = SpectvisDict(self.spectheight) constant_in = tf.reshape(mix_spect_in, [batch_size, 1, 2, self.spectheight]) constant_in_abs = complex_abs(constant_in, axis=2) inst_spects_flat = np.zeros([batch_size, 1, self.inst_num * 2, self.spectheight]) model_in = tf.concat([constant_in, constant_in_abs, inst_spects_flat, constant_in, constant_in_abs, inst_spects_flat], axis=2) inst_mask = tf.zeros([batch_size, 1, self.inst_num], dtype=tf.int32) spectloss = SpectLoss(batch_size, num_guesses_prod, self.inst_num, self.spectheight) for i in range(self.inst_num): if i == 0: batch_factor = 1 fan_factor = num_guesses_prod else: batch_factor = num_guesses_prod fan_factor = 1 params_tone, spectvis_tone, har_spect = \ self.separate_inst(self.inst_model, model_in, not predict, batch_factor, fan_factor, num_guesses[i:], inst_mask, tau, batch_size) spectvis_d._append(spectvis_tone) params_d._append(params_tone) spectloss.add_tone(params_tone.inst_params, params_tone.har_coeffs, params_tone.on_factors, params_tone.insts, har_spect, self.inst_dict, mix_spect_in) spects = spectloss.spects losses = spectloss.losses idcs = tf.stack( [tf.tile(tf.reshape(tf.range(batch_size), [batch_size, 1, 1]), [1, batch_factor * fan_factor, i+1]), tf.tile(tf.reshape(tf.range(batch_factor * fan_factor, dtype=tf.int32), [1, batch_factor * fan_factor, 1]), [batch_size, 1, i+1]), params_d.insts], axis=3) inst_mask = tf.scatter_nd( idcs, tf.broadcast_to(1, [batch_size, batch_factor * fan_factor, i+1]), [batch_size, batch_factor * fan_factor, self.inst_num]) if i+1 < self.inst_num: constant_in = ( tf.reshape(mix_spect_in, [batch_size, 1, 2, self.spectheight]) - tf.reshape(spects.mix, [batch_size, batch_factor * fan_factor, 2, self.spectheight])) constant_in_abs = complex_abs(constant_in, axis=2) inst_spects_flat = tf.reshape( spects.insts, [batch_size, batch_factor * fan_factor, self.inst_num * 2, self.spectheight]) constant_in_dir = ( tf.reshape(mix_spect_in, [batch_size, 1, 2, self.spectheight]) - tf.reshape(spects.mix_dir, [batch_size, batch_factor * fan_factor, 2, self.spectheight])) constant_in_abs_dir = complex_abs(constant_in_dir, axis=2) inst_spects_flat_dir = tf.reshape( spects.insts_dir, [batch_size, batch_factor * fan_factor, self.inst_num * 2, self.spectheight]) model_in = tf.concat( [constant_in, constant_in_abs, inst_spects_flat, constant_in_dir, constant_in_abs_dir, inst_spects_flat_dir], axis=2) spectvis_d.constant_in = constant_in return spects, losses, params_d, spectvis_d
[docs] def add_gradients_model_cnn(self, gradients_model_cnn): """ Add gradients related to the CNN model Parameters ---------- gradients_model_cnn : sequence of tensor of float Gradients with respect to the model """ if self.gradients_model_cnn is None: self.gradients_model_cnn = gradients_model_cnn else: self.gradients_model_cnn = [ g1 + g2 for g1, g2 in zip(self.gradients_model_cnn, gradients_model_cnn) ]
[docs] def add_gradient_dict(self, gradient_dict): """ Add gradients related to the dictionary. Parameters ---------- gradient_dict : tensor of float Gradient with respect to the dictionary """ if self.gradient_dict is None: self.gradient_dict = gradient_dict else: self.gradient_dict += gradient_dict
[docs] def add_gradient_scales(self, gradient_scales): """ Add gradients related to the output scalings. Parameters ---------- gradient_dict : tensor of float Gradient with respect to the output scalings """ if self.gradient_scales is None: self.gradient_scales = gradient_scales else: self.gradient_scales += gradient_scales
[docs] def apply_gradients(self): """ Apply all gradients to the optimization algorithm. """ variables_list = [self.param_scales] + self.variables_cnn gradient_list = [self.gradient_scales] + self.gradients_model_cnn self.optimizer.apply_gradients( zip(gradient_list, variables_list)) self.gradients_model_cnn = None self.gradient_scales = None self.dict_optimizer.apply_gradients( [[self.gradient_dict, self.inst_dict]]) self.gradient_dict = None
[docs] def predict_mix_spect(self, mix_spect_in): """ Predict the separation of a mixture spectrum and compute the losses. Parameters ---------- mix_spect_in : tensor of float Mixture spectrogram to perform the separation on Returns ------- spects : SimpleNamespace Spectra related to the mixture losses : SimpleNamespace Losses related to the mixture """ # add a minimum amount of noise mix_spect_in += tf.random.normal(tf.shape(mix_spect_in), stddev=1e-10) spects, losses, params, spectvis = \ self.separate(mix_spect_in, None, self.batch_size_pred, predict=True) params_sum = params._sum() total_loss, _ = comp_total_loss(losses, params.on_factors, self.spl, self.loss_coeffs) return spects, losses
[docs] def train_mix_spect(self, mix_spect_in, writer, k): """ Train the separation of a mixture spectrum and compute the losses. Parameters ---------- mix_spect_in : tensor of float Mixture spectrogram to perform the separation on writer : SummaryWriter Writer object to capture the summarized variables k : int Iteration number Returns ------- spects : SimpleNamespace Spectra related to the mixture losses : SimpleNamespace Losses related to the mixture params : ParamsDict Parameters related to the mixture spectvis : SpectvisDict Visualization spectra related to the mixture """ # add a minimum amount of noise mix_spect_in += tf.random.normal(tf.shape(mix_spect_in), stddev=1e-10) with tf.GradientTape(persistent=True) as self.tape: spects, losses, params, spectvis = \ self.separate(mix_spect_in, self.tau, self.batch_size) params_sum = params._sum() probs_factor = tf.stop_gradient(tf.nn.softmax( params_sum.probs + params_sum.on_probs - params_sum.probs_samp - params_sum.on_probs_samp)) probs_factor_avg = tf.stop_gradient( tf.nn.softmax(tf.zeros_like(params_sum.probs))) probs_corr = probs_factor mix_spect_loss = losses.mix mix_spect_loss_dir = losses.mix_dir mix_spect_loss_sparse = losses.mix_sparse loss_inst = losses.inst total_loss, _ = comp_total_loss(losses, params.on_factors, self.spl, self.loss_coeffs) baseline = tf.reduce_sum(probs_factor_avg * total_loss, axis=1, keepdims=True) pol_obj = (((params_sum.probs_samp + params_sum.on_probs_samp) * tf.stop_gradient(total_loss - baseline) + (params_sum.g_probs * tf.stop_gradient(total_loss - baseline))) #) * probs_factor_avg) / 10 total_loss_obj = probs_factor_avg * total_loss total_loss_obj_w = probs_factor * total_loss obj = total_loss_obj + pol_obj dict_obj = total_loss_obj_w mix_spect_loss_w = mix_spect_loss * probs_factor mix_spect_loss_dir_w = mix_spect_loss_dir * probs_factor mix_spect_loss_sparse_w = mix_spect_loss_sparse * probs_factor loss_inst_w = loss_inst * probs_factor neff = 1 / tf.reduce_sum(probs_factor**2, axis=1) variance = (tf.reduce_sum(probs_factor * (total_loss - baseline)**2, axis=1) / (1 - 1 / neff)) gradients_model_cnn, gradient_scales_loss = \ self.tape.gradient(obj, [self.variables_cnn, self.param_scales], unconnected_gradients=tf.UnconnectedGradients.ZERO) self.add_gradients_model_cnn(gradients_model_cnn) self.add_gradient_scales(gradient_scales_loss) gradient_dict = self.tape.gradient( dict_obj, self.inst_dict, unconnected_gradients=tf.UnconnectedGradients.ZERO) grad_norm = np.sum([tf.reduce_sum(g**2) for g in self.gradients_model_cnn]) grad_scales_norm = tf.reduce_sum(gradient_scales_loss**2) grad_dict_norm = tf.reduce_sum(tf.convert_to_tensor(gradient_dict)**2) self.add_gradient_dict(gradient_dict) with writer.as_default(): tf.summary.scalar('probs_corr_norm', tf.reduce_sum(probs_corr**2), step=k) tf.summary.scalar('mix_spect_loss', tf.reduce_sum( tf.reduce_mean(mix_spect_loss_w, axis=0)), step=k) tf.summary.scalar('mix_spect_loss_dir', tf.reduce_sum( tf.reduce_mean(mix_spect_loss_dir_w, axis=0)), step=k) tf.summary.scalar('mix_spect_loss_sparse', tf.reduce_sum( tf.reduce_mean(mix_spect_loss_sparse_w, axis=0)), step=k) tf.summary.scalar('inst_loss', tf.reduce_sum( tf.reduce_mean(loss_inst_w, axis=0)), step=k) tf.summary.scalar('total_loss', tf.reduce_sum( tf.reduce_mean(total_loss_obj, axis=0)), step=k) tf.summary.scalar('on_factors', tf.reduce_sum( tf.reduce_mean(params_sum.on_factors, axis=0)), step=k) tf.summary.scalar('on_probs', - tf.reduce_sum( tf.reduce_mean(params_sum.on_probs, axis=0)), step=k) tf.summary.scalar('probs', - tf.reduce_sum( tf.reduce_mean(params_sum.probs, axis=0)), step=k) tf.summary.scalar('g_probs', - tf.reduce_sum( tf.reduce_mean(params_sum.g_probs, axis=0)), step=k) tf.summary.scalar('grad_norm', grad_norm, step=k) tf.summary.scalar('grad_dict_norm', grad_dict_norm, step=k) tf.summary.scalar('neff', tf.reduce_mean(neff), step=k) tf.summary.scalar('stdev', tf.reduce_mean(tf.sqrt(variance)), step=k) tf.summary.scalar('entropy', tf.reduce_mean(params_sum.entropy), step=k) return spects, losses, params, spectvis
[docs] def train_dict_norm(self, writer, k): """ Train the dictionary such that the largest entry for each instrument gets to 1. Parameters ---------- writer : SummaryWriter Writer object to capture the summarized variables k : int Iteration number Returns ------- inst_dict_norm : tensor of float Dictionary norm loss """ with tf.GradientTape() as tape: inst_dict_norm = tf.reduce_mean(tf.math.log(tf.reduce_max( self.inst_dict, axis=1)) ** 2) gradient_dict = tape.gradient(inst_dict_norm, self.inst_dict) grad_dict_norm = tf.reduce_sum(gradient_dict**2) with writer.as_default(): tf.summary.scalar('inst_dict_norm', tf.reduce_sum(inst_dict_norm), step=k) tf.summary.scalar('grad_dict_scale_norm', tf.reduce_sum(grad_dict_norm), step=k) self.add_gradient_dict(gradient_dict) return inst_dict_norm
[docs] def predict_loop(self, k, write): """ Predict and resynthesize the entire spectrogram. Parameters ---------- k : int Iteration number write : bool Whether to save the output to files """ inst_spects_full_dir = np.zeros( [self.spectlen_sub, self.inst_num, 2, self.spectheight]) inst_spects_full = np.zeros( [self.spectlen_sub, self.inst_num, 2, self.spectheight]) count = 0 mix_spect_loss = 0 mix_spect_loss_dir = 0 mix_spect_loss_sparse = 0 loss_inst = 0 cnt = 0 for mix_spect_in in iter(self.inline_slices): print("{} out of {}". format(cnt, self.spectlen_sub // self.batch_size_pred)) cnt += 1 retcount = tf.shape(mix_spect_in)[0] mix_spect_in = tf.concat( [mix_spect_in, tf.zeros([self.batch_size_pred - retcount, 2, self.spectheight])], axis=0) spects, losses = \ self.predict_mix_spect(mix_spect_in) inst_spects_dir = (self.sign_mask * spects.insts_dir).numpy() inst_spects_dir = np.squeeze(inst_spects_dir, axis=1) inst_spects = (self.sign_mask * spects.insts).numpy() inst_spects = np.squeeze(inst_spects, axis=1) newcount = count + inst_spects_dir.shape[0] inst_spects_full_dir[count:newcount, :, :, :] = \ inst_spects_dir[:retcount, :, :, :] inst_spects_full[count:newcount, :, :, :] = \ inst_spects[:retcount, :, :, :] count = newcount mix_spect_loss += losses.mix mix_spect_loss_dir += losses.mix_dir mix_spect_loss_sparse += losses.mix_sparse loss_inst += losses.inst corr_factor = 1 / self.spectlen_sub with self.eval_writer.as_default(): tf.summary.scalar('mix_spect_loss', tf.reduce_sum(mix_spect_loss) * corr_factor, step=k) tf.summary.scalar('mix_spect_loss_dir', tf.reduce_sum(mix_spect_loss_dir) * corr_factor, step=k) tf.summary.scalar('mix_spect_loss_sparse', tf.reduce_sum(mix_spect_loss_sparse) * corr_factor, step=k) tf.summary.scalar('inst_loss', tf.reduce_sum(loss_inst) * corr_factor, step=k) synth_signals = np.zeros((self.inst_num, self.signal.size)) synth_signals_dir = np.zeros((self.inst_num, self.signal.size)) for i in range(self.inst_num): inst_spect = (inst_spects_full[:, i, 0, :] + 1j * inst_spects_full[:, i, 1, :]) audio = spect.project_audio( inst_spect.T, self.signal.size, self.sigmas_an, self.sampdist * self.sub_factor) synth_signals[i, :] = audio if write and False: wav.write('out-nn/' + self.name + '-{}-{}.wav'.format(i, k), audio, self.samprate) inst_spect = None inst_spect_dir = (inst_spects_full_dir[:, i, 0, :] + 1j * inst_spects_full_dir[:, i, 1, :]) audio = spect.project_audio( inst_spect_dir.T, self.signal.size, self.sigmas_an, self.sampdist * self.sub_factor) synth_signals_dir[i, :] = audio if write: wav.write('out-nn/' + self.name + '-dir-{}-{}.wav' .format(i, k), audio, self.samprate) if write and self.plot_color is not None: spect.spectwrite('out-nn/' + self.name + '-dir-{}-{}.png' .format(i, k), np.abs(inst_spect_dir.T), self.plot_color) inst_spect_dir = None perm, perf = performance.select_perm(*performance.measures( synth_signals, self.orig_signals)) print("model") print(perf) with self.eval_writer.as_default(): tf.summary.scalar('SDR', np.mean(perf[0, :]), step=k) tf.summary.scalar('SIR', np.mean(perf[1, :]), step=k) tf.summary.scalar('SAR', np.mean(perf[2, :]), step=k) perm_dir, perf_dir = performance.select_perm(*performance.measures( synth_signals_dir, self.orig_signals)) print("dir") print(perf_dir) with self.eval_writer.as_default(): tf.summary.scalar('SDR_dir', np.mean(perf_dir[0, :]), step=k) tf.summary.scalar('SIR_dir', np.mean(perf_dir[1, :]), step=k) tf.summary.scalar('SAR_dir', np.mean(perf_dir[2, :]), step=k) self.measures_table.append( np.ravel(np.concatenate([perf, perf_dir], axis=1))) np.savetxt('out-nn/' + self.name + '-measures.dat', np.vstack([np.arange(len(self.measures_table)), np.asarray(self.measures_table).T]).T) mix_spect = np.sum(inst_spects_full[:, :, 0, :] + 1j * inst_spects_full[:, :, 1, :], axis=1) audio = spect.project_audio( mix_spect.T, self.signal.size, self.sigmas_an, self.sampdist * self.sub_factor) if write and False: wav.write('out-nn/' + self.name + '-{}.wav'.format(k), audio, self.samprate) mix_spect = None mix_spect_dir = np.sum(inst_spects_full_dir[:, :, 0, :] + 1j * inst_spects_full_dir[:, :, 1, :], axis=1) audio = spect.project_audio( mix_spect_dir.T, self.signal.size, self.sigmas_an, self.sampdist * self.sub_factor) if write: wav.write('out-nn/' + self.name + '-dir-{}.wav'.format(k), audio, self.samprate) if write and self.plot_color is not None: spect.spectwrite('out-nn/' + self.name + '-dir-{}.png'.format(k), np.abs(mix_spect_dir.T), self.plot_color) mix_spect_dir = None
[docs] def train_loop(self, max_iter, eval_interval, interval=50): """ Train the neural network. Predict and resynthesize the entire spectrogram. Parameters ---------- max_iter : int Total number of training iterations eval_interval : int Interval at which to evaluate the entire spectrogram interval : bool Interval at which to output debug information """ for k in itertools.count(): print("iteration: {}".format(k), end="\r") if k % eval_interval == 0: self.predict_loop(k, k in self.save_points) if k in self.save_points: tf.saved_model.save(self.save_module, 'out-nn/{}-{}'.format(self.name, k)) if k >= max_iter: break mix_spect_in = next(self.random_slice) mix_spect_in_abs = complex_abs(mix_spect_in, axis=1) spects, losses, params, spectvis = \ self.train_mix_spect(mix_spect_in, self.file_writer, k) inst_dict_norm = self.train_dict_norm(self.file_writer, k) if k % self.virt_batch_mul == 0: self.apply_gradients() if k % interval == 0 and False: plot_spectrum('out-nn/' + self.name + '-pdf.pdf'.format(), self.spectheight, lift_spect_sign(mix_spect_in_abs[0, 0, :]), *tf.unstack(spectvis.pdf, axis=0)) plot_spectrum('out-nn/' + self.name + '-amps.pdf'.format(), self.spectheight, lift_spect_sign(mix_spect_in_abs[0, 0, :]), *[lift_spect_sign(s) for s in tf.unstack(spectvis.amps, axis=0)]) plot_spectrum('out-nn/' + self.name + '-probs.pdf'.format(), self.spectheight, lift_spect_sign(mix_spect_in_abs[0, 0, :]), *tf.unstack(spectvis.on_probs, axis=0)) plot_spectrum('out-nn/' + self.name + '-sigmas.pdf'.format(), self.spectheight, lift_spect_sign(mix_spect_in_abs[0, 0, :]), *tf.unstack(spectvis.sigmas, axis=0)) plot_spectrum('out-nn/' + self.name + '-offsets.pdf'.format(), self.spectheight, lift_spect_sign(mix_spect_in_abs[0, 0, :]), *tf.unstack(spectvis.offsets, axis=0)) plot_spectrum('out-nn/' + self.name + '-spreads_a.pdf'.format(), self.spectheight, *tf.unstack(tf.math.log(spectvis.spreads_a) / np.log(10), axis=0)) plot_spectrum('out-nn/' + self.name + '-spreads_b.pdf'.format(), self.spectheight, *tf.unstack(tf.math.log(spectvis.spreads_b) / np.log(10), axis=0)) plot_spectrum('out-nn/' + self.name + '-spect.pdf'.format(), self.spectheight, *[lift_spect_sign(s) for s in tf.reshape(mix_spect_in[0, :, :], [-1, self.spectheight])], *[lift_spect_sign(s) for s in tf.unstack( tf.reshape(spects.insts[0, 0, :, :, :], [-1, self.spectheight]), axis=0)]) plot_spectrum('out-nn/' + self.name + '-resin.pdf'.format(), self.spectheight, *[lift_spect_sign(s) for s in tf.unstack( tf.reshape( spectvis.constant_in[0, 0, :, :], [-1, self.spectheight]), axis=0)]) plot_spectrum('out-nn/' + self.name + '-dir.pdf'.format(), self.spectheight, *[lift_spect_sign(s) for s in tf.reshape(mix_spect_in[0, :, :], [-1, self.spectheight])], *[lift_spect_sign(s) for s in tf.unstack( tf.reshape( spects.insts_dir[0, 0, :, :, :], [-1, self.spectheight]), axis=0)]) plot_spectrum('out-nn/' + self.name + '-sp.pdf'.format(), self.spectheight, *[lift_spect_sign(s) for s in tf.reshape(mix_spect_in[0, :, :], [-1, self.spectheight])], *[lift_spect_sign(s) for s in tf.unstack( tf.reshape( spects.insts_sparse[0, 0, :, :, :], [-1, self.spectheight]), axis=0)]) print("param_scales", tf.math.exp(self.param_scales)) print("inst_dict", self.inst_dict)