Source code for musisep.dictsep.main

#!python3

"""
Wrapper for the dictionary learning algorithm.  When invoked, the audio
sources in the supplied audio file are separated.
"""

from __future__ import absolute_import, division, print_function

import numpy as np
import sys
import os.path
import pickle
import matplotlib
matplotlib.use('Agg')
#import matplotlib.pyplot as plt
import matplotlib.cm as cm

from ..audio import spect
from ..audio import wav
from ..audio import performance
from . import dictlearn

[docs]def correct_signal_length(signal, length):
    """
    Right-pad or right-crop the signal such that it fits the desired length.

    Arguments
    ---------
    signal : ndarray
        Signal to be adjusted
    length : int
        Desired length of the signal

    Returns
    -------
    ndarray
        Adjusted signal
    """

    if signal.size > length:
        return signal[:length]
    elif signal.size < length:
        return np.concatenate([signal, np.zeros(length - signal.size)])
    else:
        return signal

[docs]def main(mixed_soundfile, orig_soundfiles, out_name, out_name_run_suffix="",
         inst_num=2, tone_num=1, pexp=1, qexp=0.5, har=25, sigmas=6,
         sampdist=256, spectheight=6*1024, logspectheight=1024, minfreq=20,
         maxfreq=20480, runs=10000, lifetime=500, num_dicts=10, mask=True,
         color=False, plot_range=None, spect_method="pursuit",
         supply_dicts=None, spect_plots=()):
    """
    Wrapper function for the dictionary learning algorithm.

    Parameters
    ----------
    mixed_soundfile : string
        Name of the mixed input file
    orig_soundfiles : list of string or NoneType
        Names of the files with the isolated instrument tracks or None
    out_name : string
        Prefix for the file names
    out_name_suffix : string
        Extra label for the output files
    inst_num : int
        Number of instruments
    tone_num : int
        Maximum number of simultaneous tones for each instrument
    pexp : float
        Exponent for the addition of sinusoids
    qexp : float
        Exponent to be applied on the spectrum
    har : int
        Number of harmonics
    sigmas : float
        Number of standard deviations after which to cut the window/kernel
    sampdist : int
        Time intervals to sample the spectrogram
    spectheight : int
        Height of the linear-frequency spectrogram
    logspectheight : int
        Height of the log-frequency spectrogram
    minfreq : float
        Minimum frequency in Hz to be represented (included)
    maxfreq : float
        Maximum frequency in Hz to be represented (excluded)
    runs : int
        Number of training iterations to perform
    lifetime : int
        Number of steps after which to renew the dictionary
    num_dicts : int
        Number of different dictionaries to generate and train
    mask : bool
        Whether to apply spectral masking
    color : bool or string
        Whether color should be used, or specification of the color scheme
    plot_range : slice or NoneType
        Part of the spectrogram to plot
    spect_method : string
        If set to `"mel"`, a mel spectrogram is used for separation.
        Otherwise, the log-frequency spectrogram is generated via
        sparse pursuit.
    supply_dicts : NoneType or list of array_like
        Is specified, use the given dictionaries rather than computing
        new ones
    spect_plots : sequence of int
        Time frames for which to output the spectrum as a text file

    Returns
    -------
    inst_dicts : list of ndarray
        Dictionaries that were used for the separation
    """

    signal, samprate = wav.read(mixed_soundfile)

    plotlen = signal.size
    freqrange = np.linspace(0, samprate/2000, spectheight, endpoint=False)

    orig_spectrum = spect.spectrogram(
        signal, spectheight, sigmas, sampdist)[:spectheight, :]

    if plot_range is not None:
        spect.spectwrite('output/{}-orig.png'.format(out_name),
                         orig_spectrum[:spectheight, plot_range],
                         color)
    for sp in spect_plots:
        np.savetxt('output/{}-orig-{}.dat'.format(out_name, sp),
                   np.stack([freqrange, orig_spectrum[:spectheight, sp]], axis=1))

    if orig_soundfiles is None:
        orig_signals = None
    else:
        orig_signals = np.asarray(
            [correct_signal_length(wav.read(f)[0], signal.size)
             for f in orig_soundfiles])
        orig_spectrums = [spect.spectrogram(
            os, spectheight, sigmas, sampdist)[:spectheight, :]
            for os in orig_signals]

    fsigma = sigmas/np.pi

    if (os.path.exists('output/{}-lin.npy'.format(out_name))
        and os.path.exists('output/{}-log.npy'.format(out_name))
        and os.path.exists('output/{}-stretch.npy'.format(out_name))):
        linspect = np.load('output/{}-lin.npy'.format(out_name))
        logspect = np.load('output/{}-log.npy'.format(out_name))
        stretch = np.load('output/{}-stretch.npy'.format(out_name))
    elif spect_method == "mel":
        stretch = (logspectheight / np.log(maxfreq/minfreq)
                   / (minfreq / samprate * 2 * spectheight))
        print("stretch: {}".format(stretch))
        logspect, linspect = spect.logspect_mel(signal, spectheight,
                                                sigmas, sampdist,
                                                minfreq/samprate,
                                                minfreq/samprate,
                                                maxfreq/samprate,
                                                logspectheight)
        logspect = np.sqrt(logspect)
        linspect = np.sqrt(linspect)
        np.save('output/{}-lin.npy'.format(out_name), linspect)
        np.save('output/{}-log.npy'.format(out_name), logspect)
        np.save('output/{}-stretch.npy'.format(out_name), stretch)
    else:
        logspect, linspect = spect.logspect_pursuit(signal, spectheight,
                                                    sigmas, sampdist, None,
                                                    minfreq/samprate,
                                                    maxfreq/samprate,
                                                    logspectheight, fsigma)
        stretch = 1
        np.save('output/{}-lin.npy'.format(out_name), linspect)
        np.save('output/{}-log.npy'.format(out_name), logspect)
        np.save('output/{}-stretch.npy'.format(out_name), stretch)

    if plot_range is not None:
        spect.spectwrite('output/{}-lin.png'.format(out_name),
                         linspect[:, plot_range], color)
        spect.spectwrite('output/{}-log.png'.format(out_name),
                         logspect[:, plot_range], color)

    for sp in spect_plots:
        np.savetxt('output/{}-lin-{}.dat'.format(out_name, sp),
                   np.stack([freqrange, linspect[:, sp]], axis=1))

    audio_measures = []
    inst_dicts = []

    for r in range(0, num_dicts):
        print("seed: {}".format(r))
        out_name_run = out_name + out_name_run_suffix + '-{}'.format(r)
        np.random.seed(r)

        if supply_dicts is not None:
            inst_dict = np.asarray(supply_dicts[r])
        elif os.path.exists('output/{}-dict.npy'.format(out_name_run)):
            inst_dict = np.load('output/{}-dict.npy'.format(out_name_run))
        else:
            inst_dict = dictlearn.learn_spect_dict(
                logspect, fsigma*stretch, tone_num, inst_num * 2, pexp, qexp,
                har, minfreq, maxfreq, runs, lifetime)
            np.save('output/{}-dict.npy'.format(out_name_run), inst_dict)

        print(inst_dict)
        inst_dicts.append(inst_dict)

        if os.path.exists('output/{}-spect.pkl'.format(out_name_run)):
            [dict_spectrum, inst_spectrums,
             dict_spectrum_lin, inst_spectrums_lin] = \
                pickle.load(open('output/{}-spect.pkl'.format(out_name_run),
                                 'rb'))
        else:
            (dict_spectrum, inst_spectrums,
             dict_spectrum_lin, inst_spectrums_lin) = \
                dictlearn.synth_spect(
                    logspect, tone_num, inst_dict, fsigma*stretch,
                    spectheight, pexp, qexp,
                    minfreq/samprate, maxfreq/samprate, stretch)
            pickle.dump([dict_spectrum, inst_spectrums,
                         dict_spectrum_lin, inst_spectrums_lin],
                        open('output/{}-spect.pkl'.format(out_name_run), 'wb'))

        if mask:
            inst_spectrums_lin, mask_spect = dictlearn.mask_spectrums(
                inst_spectrums_lin, orig_spectrum)
            dict_spectrum_lin = dict_spectrum_lin * mask_spect
            mask_str = "mask"
        else:
            mask_str = "nomask"

        if plot_range is not None:
            spect.spectwrite('output/{}-synth.png'
                             .format(out_name_run),
                             dict_spectrum[:, plot_range], color)
            spect.spectwrite('output/{}-synth-lin-{}.png'
                             .format(out_name_run, mask_str),
                             dict_spectrum_lin[:, plot_range], color)
            for sp in spect_plots:
                np.savetxt('output/{}-synth-lin-{}-{}.dat'.format(out_name_run, mask_str, sp),
                           np.stack([freqrange, dict_spectrum_lin[:, sp]], axis=1))
            for i in range(len(inst_spectrums)):
                spect.spectwrite(
                    'output/{}-synth{}.png'
                    .format(out_name_run, i),
                    inst_spectrums[i][:, plot_range], color)
                spect.spectwrite(
                    'output/{}-synth{}-lin-{}.png'
                    .format(out_name_run, i, mask_str),
                    inst_spectrums_lin[i][:, plot_range], color)
                for sp in spect_plots:
                    np.savetxt('output/{}-synth{}-lin-{}-{}.dat'.format(out_name_run, i, mask_str, sp),
                               np.stack([inst_spectrums_lin[i][:, sp]], axis=1))

        siglen = signal.size
        synth_signals = np.zeros((inst_num, siglen))
        audio, _ = spect.synth_audio(dict_spectrum_lin, siglen,
                                     sigmas, sampdist, 1, signal)
        wav.write('output/{}-synth-{}.wav'.format(out_name_run, mask_str),
                  audio, samprate)
        for i in range(len(inst_spectrums_lin)):
            audio, _ = spect.synth_audio(inst_spectrums_lin[i],
                                         siglen, sigmas, sampdist, 1,
                                         signal)
            synth_signals[i, :] = audio
            wav.write('output/{}-synth{}-{}.wav'
                      .format(out_name_run, i, mask_str),
                      audio, samprate)

        if orig_signals is not None:
            perm, perf = performance.select_perm(*performance.measures(
                synth_signals, orig_signals))
            audio_measures.append(perf)
            print("Permutation:")
            print(perm)
            print("Performance:")
            print(perf)

    if orig_signals is not None:
        audio_measures = np.asarray(audio_measures)
        print("Global measures mean:")
        print(np.mean(audio_measures, axis=0))
        print("Global measures stdev:")
        print(np.std(audio_measures, axis=0, ddof=1))
        bestidx = np.argmax(np.sum(audio_measures, axis=2)[:, 0])
        print("Global measures best index: {}".format(bestidx))
        print("Global measures best:")
        print(audio_measures[bestidx, :, :])

        np.savetxt('output/{}{}-{}-measures.dat'
                   .format(out_name, out_name_run_suffix, mask_str),
                   np.reshape(audio_measures, [num_dicts, 3 * inst_num]))

    return inst_dicts

[docs]def separate_mozart_recorder_violin():
    "Separation of recorder and violin on the piece by Mozart"

    main(mixed_soundfile='input/mozart/mix.wav',
         orig_soundfiles=['input/mozart/recorder.wav',
                          'input/mozart/violin.wav'],
         out_name='mozart/mozart',
         runs=100000,
         mask=True,
         plot_range=slice(0, 1580))
    main(mixed_soundfile='input/mozart/mix.wav',
         orig_soundfiles=['input/mozart/recorder.wav',
                          'input/mozart/violin.wav'],
         out_name='mozart/mozart',
         runs=100000,
         mask=False,
         plot_range=slice(0, 1580))

[docs]def separate_mozart_recorder_violin_mel():
    "Separation of recorder and violin on the piece by Mozart"

    main(mixed_soundfile='input/mozart/mix.wav',
         orig_soundfiles=['input/mozart/recorder.wav',
                          'input/mozart/violin.wav'],
         out_name='mozart_mel/mozart',
         minfreq=200,
         runs=100000,
         mask=True,
         plot_range=slice(0, 1580),
         spect_method="mel")
    main(mixed_soundfile='input/mozart/mix.wav',
         orig_soundfiles=['input/mozart/recorder.wav',
                          'input/mozart/violin.wav'],
         out_name='mozart_mel/mozart',
         minfreq=200,
         runs=100000,
         mask=False,
         plot_range=slice(0, 1580),
         spect_method="mel")

[docs]def separate_mozart_clarinet_piano():
    "Separation of clarinet and piano on the piece by Mozart"

    main(mixed_soundfile='input/mozart-cl/mix-cl-piano.wav',
         orig_soundfiles=['input/mozart-cl/clarinet-high.wav',
                          'input/mozart-cl/piano-low.wav'],
         out_name='mozart-cl/mozart',
         runs=100000)

[docs]def separate_mozart_piano_mock():
    "Mock separation of the piano track."

    main(mixed_soundfile='input/mozart-cl/piano-low.wav',
         orig_soundfiles=['input/mozart-cl/piano-low.wav'],
         out_name='mozart-cl/mozart-mock',
         runs=100000,
         inst_num=1,
         mask=False,
         plot_range=slice(0, 1580),
         spect_plots=[100]
    )

[docs]def separate_urmp():
    "Separation of selected samples from the URMP dataset."

    main(mixed_soundfile='input/URMP/AuMix_03_Dance_fl_cl.wav',
         orig_soundfiles=['input/URMP/AuSep_1_fl_03_Dance.wav',
                          'input/URMP/AuSep_2_cl_03_Dance.wav'],
         out_name='URMP/03',
         runs=100000)
    main(mixed_soundfile='input/URMP/AuMix_09_Jesus_tpt_vn.wav',
         orig_soundfiles=['input/URMP/AuSep_1_tpt_09_Jesus.wav',
                          'input/URMP/AuSep_2_vn_09_Jesus.wav'],
         out_name='URMP/09',
         runs=100000)
    main(mixed_soundfile='input/URMP/AuMix_10_March_tpt_sax.wav',
         orig_soundfiles=['input/URMP/AuSep_1_tpt_10_March.wav',
                          'input/URMP/AuSep_2_sax_10_March.wav'],
         out_name='URMP/10',
         runs=100000)
    main(mixed_soundfile='input/URMP/AuMix_11_Maria_ob_vc.wav',
         orig_soundfiles=['input/URMP/AuSep_1_ob_11_Maria.wav',
                          'input/URMP/AuSep_2_vc_11_Maria.wav'],
         out_name='URMP/11',
         runs=100000)
    main(mixed_soundfile='input/URMP/AuMix_17_Nocturne_vn_fl_cl.wav',
         orig_soundfiles=['input/URMP/AuSep_1_vn_17_Nocturne.wav',
                          'input/URMP/AuSep_2_fl_17_Nocturne.wav',
                          'input/URMP/AuSep_3_cl_17_Nocturne.wav'],
         out_name='URMP/17',
         runs=100000,
         inst_num=3)
    main(mixed_soundfile='input/URMP/AuMix_18_Nocturne_vn_fl_tpt.wav',
         orig_soundfiles=['input/URMP/AuSep_1_vn_18_Nocturne.wav',
                          'input/URMP/AuSep_2_fl_18_Nocturne.wav',
                          'input/URMP/AuSep_3_tpt_18_Nocturne.wav'],
         out_name='URMP/18',
         runs=100000,
         inst_num=3)

[docs]def separate_frere_jacques():
    """
    Separation of Bb tin whistle and viola and generalization to
    C tin whistle and violin, then vice versa.
    """

    inst_dicts = main(mixed_soundfile='input/fj/bb.wav',
                      orig_soundfiles=['input/fj/bb-tw.wav',
                                       'input/fj/bb-viola.wav'],
                      out_name='fj/bb',
                      runs=100000)
    main(mixed_soundfile='input/fj/c.wav',
         orig_soundfiles=['input/fj/c-tw.wav',
                          'input/fj/c-violin.wav'],
         out_name='fj/c',
         out_name_run_suffix='-gen',
         runs=100000,
         supply_dicts=inst_dicts)

    inst_dicts = main(mixed_soundfile='input/fj/c.wav',
                      orig_soundfiles=['input/fj/c-tw.wav',
                                       'input/fj/c-violin.wav'],
                      out_name='fj/c',
                      runs=100000)
    main(mixed_soundfile='input/fj/bb.wav',
         orig_soundfiles=['input/fj/bb-tw.wav',
                          'input/fj/bb-viola.wav'],
         out_name='fj/bb',
         out_name_run_suffix='-gen',
         supply_dicts=inst_dicts,
         runs=100000)

[docs]def separate_jaiswal(number):
    """
    Separation of the data by Jaiswal et al.

    Parameters
    ----------
    number : int
        Number of the sample to be considered.
    """

    main(mixed_soundfile='input/jaiswal/test{}.wav'.format(number),
         orig_soundfiles=['input/jaiswal/test{}-01.wav'.format(number),
                          'input/jaiswal/test{}-02.wav'.format(number)],
         out_name='jaiswal/jaiswal{}'.format(number))

[docs]def separate_duan():
    """
    Separation of the data by Duan et al.

    Parameters
    ----------
    number : int
        Number of the sample to be considered.
    """

    main(mixed_soundfile='input/duan/Euphonium_Oboe.wav',
         orig_soundfiles=['input/duan/Oboe.wav',
                          'input/duan/Euphonium.wav'],
         out_name='duan/eo')

    main(mixed_soundfile='input/duan/dyrcj_wqyn.wav',
         orig_soundfiles=['input/duan/dyrcj_piccolo.wav',
                          'input/duan/wqyn_organ.wav'],
         out_name='duan/po')

    main(mixed_soundfile='input/duan/dyrcj_wqyn_fywz.wav',
         orig_soundfiles=['input/duan/dyrcj_piccolo.wav',
                          'input/duan/wqyn_organ.wav',
                          'input/duan/fywz_oboe.wav'],
         out_name='duan/poo',
         inst_num=3)

if __name__ == '__main__':
    separate_mozart_recorder_violin()
    separate_mozart_recorder_violin_mel()
    separate_mozart_clarinet_piano()
    separate_mozart_piano_mock()
    separate_frere_jacques()
    separate_urmp()

    # The number of the sample is given via command line.
    # Unfortunately, we cannot distribute the data.
    #separate_jaiswal(int(sys.argv[1]))

    # Get the data from:
    # https://sites.google.com/site/mperesult/musicseparationresults
    # Upsample to 44100 Hz.
    #separate_duan()
Source code for musisep.dictsep.main

Musisep

Navigation

Related Topics

Source code for musisep.dictsep.__main__

Source code for musisep.dictsep.main