diff --git a/docs/search.js b/docs/search.js
index fef9748..1876d85 100644
--- a/docs/search.js
+++ b/docs/search.js
@@ -1,6 +1,6 @@
window.pdocSearch = (function(){
/** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();oThe module for Unbounded Interleaved-State Recurrent Neural Network.
\n\nAn introduction is available at [README.md].
\n"}, {"fullname": "uisrnn.parse_arguments", "modulename": "uisrnn", "qualname": "parse_arguments", "kind": "function", "doc": "Parse arguments.
\n\nReturns:\n A tuple of:
\n\n- `model_args`: model arguments\n- `training_args`: training arguments\n- `inference_args`: inference arguments\n
\n", "signature": "(): ", "funcdef": "def"}, {"fullname": "uisrnn.compute_sequence_match_accuracy", "modulename": "uisrnn", "qualname": "compute_sequence_match_accuracy", "kind": "function", "doc": "Compute the accuracy between two sequences by finding optimal matching.
\n\nArgs:\n sequence1: A list of integers or strings.\n sequence2: A list of integers or strings.
\n\nReturns:\n accuracy: sequence matching accuracy as a number in [0.0, 1.0]
\n\nRaises:\n TypeError: If sequence1 or sequence2 is not list.\n ValueError: If sequence1 and sequence2 are not same size.
\n", "signature": "(sequence1 , sequence2 ): ", "funcdef": "def"}, {"fullname": "uisrnn.output_result", "modulename": "uisrnn", "qualname": "output_result", "kind": "function", "doc": "Produce a string to summarize the experiment.
\n", "signature": "(model_args , training_args , test_record ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN", "modulename": "uisrnn", "qualname": "UISRNN", "kind": "class", "doc": "Unbounded Interleaved-State Recurrent Neural Networks.
\n"}, {"fullname": "uisrnn.UISRNN.__init__", "modulename": "uisrnn", "qualname": "UISRNN.__init__", "kind": "function", "doc": "Construct the UISRNN object.
\n\nArgs:\n args: Model configurations. See arguments.py
for details.
\n", "signature": "(args ) "}, {"fullname": "uisrnn.UISRNN.observation_dim", "modulename": "uisrnn", "qualname": "UISRNN.observation_dim", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.device", "modulename": "uisrnn", "qualname": "UISRNN.device", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.rnn_model", "modulename": "uisrnn", "qualname": "UISRNN.rnn_model", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.rnn_init_hidden", "modulename": "uisrnn", "qualname": "UISRNN.rnn_init_hidden", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.estimate_sigma2", "modulename": "uisrnn", "qualname": "UISRNN.estimate_sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.estimate_transition_bias", "modulename": "uisrnn", "qualname": "UISRNN.estimate_transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.sigma2", "modulename": "uisrnn", "qualname": "UISRNN.sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.transition_bias", "modulename": "uisrnn", "qualname": "UISRNN.transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.transition_bias_denominator", "modulename": "uisrnn", "qualname": "UISRNN.transition_bias_denominator", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.crp_alpha", "modulename": "uisrnn", "qualname": "UISRNN.crp_alpha", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.logger", "modulename": "uisrnn", "qualname": "UISRNN.logger", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.save", "modulename": "uisrnn", "qualname": "UISRNN.save", "kind": "function", "doc": "Save the model to a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.load", "modulename": "uisrnn", "qualname": "UISRNN.load", "kind": "function", "doc": "Load the model from a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.fit_concatenated", "modulename": "uisrnn", "qualname": "UISRNN.fit_concatenated", "kind": "function", "doc": "Fit UISRNN model to concatenated sequence and cluster_id.
\n\nArgs:\n train_sequence: the training observation sequence, which is a\n 2-dim numpy array of real numbers, of size N * D
.
\n\n- `N`: summation of lengths of all utterances.\n- `D`: observation dimension.\n\nFor example,\n
\n\n
train_sequence =\n[[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'\n [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'\n [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'\n [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'\n [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'\n\n
\n Here `N=5`, `D=4`.\nWe concatenate all training utterances into this single sequence.\n\n
\n train_cluster_id: the speaker id sequence, which is 1-dim list or\n numpy array of strings, of size N
.\n For example,\n train_cluster_id =\n ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']\n
\n 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.\n\nNote that the order of entries within an utterance are preserved,\nand all utterances are simply concatenated together.\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequence or train_cluster_id is of wrong type.\n ValueError: If train_sequence or train_cluster_id has wrong dimension.
\n", "signature": "(self , train_sequence , train_cluster_id , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.fit", "modulename": "uisrnn", "qualname": "UISRNN.fit", "kind": "function", "doc": "Fit UISRNN model.
\n\nArgs:\n train_sequences: Either a list of training sequences, or a single\n concatenated training sequence:
\n\n1. train_sequences is list, and each element is a 2-dim numpy array\n of real numbers, of size: `length * D`.\n The length varies among different sequences, but the D is the same.\n In speaker diarization, each sequence is the sequence of speaker\n embeddings of one utterance.\n2. train_sequences is a single concatenated sequence, which is a\n 2-dim numpy array of real numbers. See `fit_concatenated()`\n for more details.\n
\n\ntrain_cluster_ids: Ground truth labels for train_sequences:
\n\n1. if train_sequences is a list, this must also be a list of the same\n size, each element being a 1-dim list or numpy array of strings.\n2. if train_sequences is a single concatenated sequence, this\n must also be the concatenated 1-dim list or numpy array of strings\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequences or train_cluster_ids is of wrong type.
\n", "signature": "(self , train_sequences , train_cluster_ids , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.predict_single", "modulename": "uisrnn", "qualname": "UISRNN.predict_single", "kind": "function", "doc": "Predict labels for a single test sequence using UISRNN model.
\n\nArgs:\n test_sequence: the test observation sequence, which is 2-dim numpy array\n of real numbers, of size N * D
.
\n\n- `N`: length of one test utterance.\n- `D` : observation dimension.\n\nFor example:\n
\n\n
test_sequence =\n[[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'\n [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'\n [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'\n [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'\n [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'\n
\n Here N=5
, D=4
.\n args: Inference configurations. See arguments.py
for details.\n\nReturns:\n predicted_cluster_id: predicted speaker id sequence, which is\n an array of integers, of size N
.\n For example, predicted_cluster_id = [0, 1, 0, 0, 1]
\n\nRaises:\n TypeError: If test_sequence is of wrong type.\n ValueError: If test_sequence has wrong dimension.
\n", "signature": "(self , test_sequence , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.predict", "modulename": "uisrnn", "qualname": "UISRNN.predict", "kind": "function", "doc": "Predict labels for a single or many test sequences using UISRNN model.
\n\nArgs:\n test_sequences: Either a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.
\n\nReturns:\n predicted_cluster_ids: Predicted labels for test_sequences.
\n\n1. if test_sequences is a list, predicted_cluster_ids will be a list\n of the same size, where each element being a 1-dim list of strings.\n2. if test_sequences is a single sequence, predicted_cluster_ids will\n be a 1-dim list of strings\n
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(self , test_sequences , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.parallel_predict", "modulename": "uisrnn", "qualname": "parallel_predict", "kind": "function", "doc": "Run prediction in parallel using torch.multiprocessing.
\n\nThis is a beta feature. It makes prediction slower on CPU. But it's reported\nthat it makes prediction faster on GPU.
\n\nArgs:\n model: instance of UISRNN model\n test_sequences: a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.\n num_processes: number of parallel processes.
\n\nReturns:\n a list of the same size as test_sequences, where each element\n being a 1-dim list of strings.
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(model , test_sequences , args , num_processes = 4 ): ", "funcdef": "def"}, {"fullname": "uisrnn.arguments", "modulename": "uisrnn.arguments", "kind": "module", "doc": "Arguments for UISRNN.
\n"}, {"fullname": "uisrnn.arguments.str2bool", "modulename": "uisrnn.arguments", "qualname": "str2bool", "kind": "function", "doc": "A function to convert string to bool value.
\n", "signature": "(value ): ", "funcdef": "def"}, {"fullname": "uisrnn.arguments.parse_arguments", "modulename": "uisrnn.arguments", "qualname": "parse_arguments", "kind": "function", "doc": "Parse arguments.
\n\nReturns:\n A tuple of:
\n\n- `model_args`: model arguments\n- `training_args`: training arguments\n- `inference_args`: inference arguments\n
\n", "signature": "(): ", "funcdef": "def"}, {"fullname": "uisrnn.contrib", "modulename": "uisrnn.contrib", "kind": "module", "doc": "The module for community contributed code.
\n\nAn introduction is available at [README.md].
\n"}, {"fullname": "uisrnn.contrib.contrib_template", "modulename": "uisrnn.contrib.contrib_template", "kind": "module", "doc": "This is a template for community contributions.
\n"}, {"fullname": "uisrnn.contrib.contrib_template.example_function", "modulename": "uisrnn.contrib.contrib_template", "qualname": "example_function", "kind": "function", "doc": "This is an example function.
\n", "signature": "(): ", "funcdef": "def"}, {"fullname": "uisrnn.contrib.range_search_crp_alpha", "modulename": "uisrnn.contrib.range_search_crp_alpha", "kind": "module", "doc": "This module implements method to search for best crp_alpha within a range for\na given data set.\n For example\n
train_cluster_id = np.array(\n ['0_0', '0_0', '0_1', '0_1', '0_1', '0_0', '0_0', '1_0', '1_0', '1_0',\n '1_1', '1_1', '1_1', '1_0', '1_0','1_0', '1_2', '1_2', '1_2'])\n print(estimate_crp_alpha(train_cluster_id))\n 0.5\n
\n Function for user:\n estimate_crp_alpha: see docstring for details.\n Internal functions:\n _get_cdf: see docstring for details.\n _get_cdf_single: see docstring for details.\n _get_k_t: see docstring for details.\n _get_n_kt: see docstring for details.\n _get_cluster_id_single: see docstring for details.\n _get_normalized_id: see docstring for details.\n"}, {"fullname": "uisrnn.contrib.range_search_crp_alpha.estimate_crp_alpha", "modulename": "uisrnn.contrib.range_search_crp_alpha", "qualname": "estimate_crp_alpha", "kind": "function", "doc": "Iterate through a range of alpha, return alpha with maximum cdf P{Y|Z}.
\n\nArgs:\n train_cluster_id: same as train_cluster_id in demo.py. See demo.py
for\n details.\n search_range: the range to search for crp_alpha.\n search_step: the step to search for crp_alpha.\nReturns:\n cur_alpha: a float variable.
\n", "signature": "(train_cluster_id , search_range = 1 , search_step = 0.01 ): ", "funcdef": "def"}, {"fullname": "uisrnn.evals", "modulename": "uisrnn.evals", "kind": "module", "doc": "Utils for model evaluation.
\n"}, {"fullname": "uisrnn.evals.get_list_inverse_index", "modulename": "uisrnn.evals", "qualname": "get_list_inverse_index", "kind": "function", "doc": "Get value to position index from a list of unique ids.
\n\nArgs:\n unique_ids: A list of unique integers of strings.
\n\nReturns:\n result: a dict from value to position
\n\nRaises:\n TypeError: If unique_ids is not a list.
\n", "signature": "(unique_ids ): ", "funcdef": "def"}, {"fullname": "uisrnn.evals.compute_sequence_match_accuracy", "modulename": "uisrnn.evals", "qualname": "compute_sequence_match_accuracy", "kind": "function", "doc": "Compute the accuracy between two sequences by finding optimal matching.
\n\nArgs:\n sequence1: A list of integers or strings.\n sequence2: A list of integers or strings.
\n\nReturns:\n accuracy: sequence matching accuracy as a number in [0.0, 1.0]
\n\nRaises:\n TypeError: If sequence1 or sequence2 is not list.\n ValueError: If sequence1 and sequence2 are not same size.
\n", "signature": "(sequence1 , sequence2 ): ", "funcdef": "def"}, {"fullname": "uisrnn.loss_func", "modulename": "uisrnn.loss_func", "kind": "module", "doc": "Loss functions for training.
\n"}, {"fullname": "uisrnn.loss_func.weighted_mse_loss", "modulename": "uisrnn.loss_func", "qualname": "weighted_mse_loss", "kind": "function", "doc": "Compute weighted MSE loss.
\n\nNote that we are doing weighted loss that only sum up over non-zero entries.
\n\nArgs:\n input_tensor: input tensor\n target_tensor: target tensor\n weight: weight tensor, in this case 1/sigma^2
\n\nReturns:\n the weighted MSE loss
\n", "signature": "(input_tensor , target_tensor , weight = 1 ): ", "funcdef": "def"}, {"fullname": "uisrnn.loss_func.sigma2_prior_loss", "modulename": "uisrnn.loss_func", "qualname": "sigma2_prior_loss", "kind": "function", "doc": "Compute sigma2 prior loss.
\n\nArgs:\n num_non_zero: since rnn_truth is a collection of different length sequences\n padded with zeros to fit them into a tensor, we count the sum of\n 'real lengths' of all sequences\n sigma_alpha: inverse gamma shape\n sigma_beta: inverse gamma scale\n sigma2: sigma squared
\n\nReturns:\n the sigma2 prior loss
\n", "signature": "(num_non_zero , sigma_alpha , sigma_beta , sigma2 ): ", "funcdef": "def"}, {"fullname": "uisrnn.loss_func.regularization_loss", "modulename": "uisrnn.loss_func", "qualname": "regularization_loss", "kind": "function", "doc": "Compute regularization loss.
\n\nArgs:\n params: iterable of all parameters\n weight: weight for the regularization term
\n\nReturns:\n the regularization loss
\n", "signature": "(params , weight ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn", "modulename": "uisrnn.uisrnn", "kind": "module", "doc": "The UIS-RNN model.
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN", "kind": "class", "doc": "The core Recurent Neural Network used by UIS-RNN.
\n", "bases": "torch.nn.modules.module.Module"}, {"fullname": "uisrnn.uisrnn.CoreRNN.__init__", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.__init__", "kind": "function", "doc": "Initializes internal Module state, shared by both nn.Module and ScriptModule.
\n", "signature": "(input_dim , hidden_size , depth , observation_dim , dropout = 0 ) "}, {"fullname": "uisrnn.uisrnn.CoreRNN.hidden_size", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.hidden_size", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN.linear_mean1", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.linear_mean1", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN.linear_mean2", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.linear_mean2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN.forward", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.forward", "kind": "function", "doc": "The forward function of the module.
\n", "signature": "(self , input_seq , hidden = None ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.BeamState", "modulename": "uisrnn.uisrnn", "qualname": "BeamState", "kind": "class", "doc": "Structure that contains necessary states for beam search.
\n"}, {"fullname": "uisrnn.uisrnn.BeamState.__init__", "modulename": "uisrnn.uisrnn", "qualname": "BeamState.__init__", "kind": "function", "doc": "
\n", "signature": "(source = None ) "}, {"fullname": "uisrnn.uisrnn.BeamState.append", "modulename": "uisrnn.uisrnn", "qualname": "BeamState.append", "kind": "function", "doc": "Append new item to the BeamState.
\n", "signature": "(self , mean , hidden , cluster ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN", "kind": "class", "doc": "Unbounded Interleaved-State Recurrent Neural Networks.
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.__init__", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.__init__", "kind": "function", "doc": "Construct the UISRNN object.
\n\nArgs:\n args: Model configurations. See arguments.py
for details.
\n", "signature": "(args ) "}, {"fullname": "uisrnn.uisrnn.UISRNN.observation_dim", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.observation_dim", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.device", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.device", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.rnn_model", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.rnn_model", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.rnn_init_hidden", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.rnn_init_hidden", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.estimate_sigma2", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.estimate_sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.estimate_transition_bias", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.estimate_transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.sigma2", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.transition_bias", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.transition_bias_denominator", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.transition_bias_denominator", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.crp_alpha", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.crp_alpha", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.logger", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.logger", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.save", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.save", "kind": "function", "doc": "Save the model to a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.load", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.load", "kind": "function", "doc": "Load the model from a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.fit_concatenated", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.fit_concatenated", "kind": "function", "doc": "Fit UISRNN model to concatenated sequence and cluster_id.
\n\nArgs:\n train_sequence: the training observation sequence, which is a\n 2-dim numpy array of real numbers, of size N * D
.
\n\n- `N`: summation of lengths of all utterances.\n- `D`: observation dimension.\n\nFor example,\n
\n\n
train_sequence =\n[[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'\n [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'\n [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'\n [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'\n [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'\n\n
\n Here `N=5`, `D=4`.\nWe concatenate all training utterances into this single sequence.\n\n
\n train_cluster_id: the speaker id sequence, which is 1-dim list or\n numpy array of strings, of size N
.\n For example,\n train_cluster_id =\n ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']\n
\n 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.\n\nNote that the order of entries within an utterance are preserved,\nand all utterances are simply concatenated together.\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequence or train_cluster_id is of wrong type.\n ValueError: If train_sequence or train_cluster_id has wrong dimension.
\n", "signature": "(self , train_sequence , train_cluster_id , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.fit", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.fit", "kind": "function", "doc": "Fit UISRNN model.
\n\nArgs:\n train_sequences: Either a list of training sequences, or a single\n concatenated training sequence:
\n\n1. train_sequences is list, and each element is a 2-dim numpy array\n of real numbers, of size: `length * D`.\n The length varies among different sequences, but the D is the same.\n In speaker diarization, each sequence is the sequence of speaker\n embeddings of one utterance.\n2. train_sequences is a single concatenated sequence, which is a\n 2-dim numpy array of real numbers. See `fit_concatenated()`\n for more details.\n
\n\ntrain_cluster_ids: Ground truth labels for train_sequences:
\n\n1. if train_sequences is a list, this must also be a list of the same\n size, each element being a 1-dim list or numpy array of strings.\n2. if train_sequences is a single concatenated sequence, this\n must also be the concatenated 1-dim list or numpy array of strings\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequences or train_cluster_ids is of wrong type.
\n", "signature": "(self , train_sequences , train_cluster_ids , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.predict_single", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.predict_single", "kind": "function", "doc": "Predict labels for a single test sequence using UISRNN model.
\n\nArgs:\n test_sequence: the test observation sequence, which is 2-dim numpy array\n of real numbers, of size N * D
.
\n\n- `N`: length of one test utterance.\n- `D` : observation dimension.\n\nFor example:\n
\n\n
test_sequence =\n[[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'\n [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'\n [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'\n [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'\n [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'\n
\n Here N=5
, D=4
.\n args: Inference configurations. See arguments.py
for details.\n\nReturns:\n predicted_cluster_id: predicted speaker id sequence, which is\n an array of integers, of size N
.\n For example, predicted_cluster_id = [0, 1, 0, 0, 1]
\n\nRaises:\n TypeError: If test_sequence is of wrong type.\n ValueError: If test_sequence has wrong dimension.
\n", "signature": "(self , test_sequence , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.predict", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.predict", "kind": "function", "doc": "Predict labels for a single or many test sequences using UISRNN model.
\n\nArgs:\n test_sequences: Either a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.
\n\nReturns:\n predicted_cluster_ids: Predicted labels for test_sequences.
\n\n1. if test_sequences is a list, predicted_cluster_ids will be a list\n of the same size, where each element being a 1-dim list of strings.\n2. if test_sequences is a single sequence, predicted_cluster_ids will\n be a 1-dim list of strings\n
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(self , test_sequences , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.parallel_predict", "modulename": "uisrnn.uisrnn", "qualname": "parallel_predict", "kind": "function", "doc": "Run prediction in parallel using torch.multiprocessing.
\n\nThis is a beta feature. It makes prediction slower on CPU. But it's reported\nthat it makes prediction faster on GPU.
\n\nArgs:\n model: instance of UISRNN model\n test_sequences: a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.\n num_processes: number of parallel processes.
\n\nReturns:\n a list of the same size as test_sequences, where each element\n being a 1-dim list of strings.
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(model , test_sequences , args , num_processes = 4 ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils", "modulename": "uisrnn.utils", "kind": "module", "doc": "Utils for UIS-RNN.
\n"}, {"fullname": "uisrnn.utils.Logger", "modulename": "uisrnn.utils", "qualname": "Logger", "kind": "class", "doc": "A class for printing logging information to screen.
\n"}, {"fullname": "uisrnn.utils.Logger.__init__", "modulename": "uisrnn.utils", "qualname": "Logger.__init__", "kind": "function", "doc": "
\n", "signature": "(verbosity ) "}, {"fullname": "uisrnn.utils.Logger.print", "modulename": "uisrnn.utils", "qualname": "Logger.print", "kind": "function", "doc": "Print a message if level is not higher than verbosity.
\n\nArgs:\n level: the level of this message, smaller value means more important\n message: the message to be printed
\n", "signature": "(self , level , message ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.generate_random_string", "modulename": "uisrnn.utils", "qualname": "generate_random_string", "kind": "function", "doc": "Generate a random string of upper case letters and digits.
\n\nArgs:\n length: length of the generated string
\n\nReturns:\n the generated string
\n", "signature": "(length = 6 ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.enforce_cluster_id_uniqueness", "modulename": "uisrnn.utils", "qualname": "enforce_cluster_id_uniqueness", "kind": "function", "doc": "Enforce uniqueness of cluster id across sequences.
\n\nArgs:\n cluster_ids: a list of 1-dim list/numpy.ndarray of strings
\n\nReturns:\n a new list with same length of cluster_ids
\n\nRaises:\n TypeError: if cluster_ids or its element has wrong type
\n", "signature": "(cluster_ids ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.concatenate_training_data", "modulename": "uisrnn.utils", "qualname": "concatenate_training_data", "kind": "function", "doc": "Concatenate training data.
\n\nArgs:\n train_sequences: a list of 2-dim numpy arrays to be concatenated\n train_cluster_ids: a list of 1-dim list/numpy.ndarray of strings\n enforce_uniqueness: a boolean indicated whether we should enfore uniqueness\n to train_cluster_ids\n shuffle: whether to randomly shuffle input order
\n\nReturns:\n concatenated_train_sequence: a 2-dim numpy array\n concatenated_train_cluster_id: a list of strings
\n\nRaises:\n TypeError: if input has wrong type\n ValueError: if sizes/dimensions of input or their elements are incorrect
\n", "signature": "(\ttrain_sequences , \ttrain_cluster_ids , \tenforce_uniqueness = True , \tshuffle = True ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.sample_permuted_segments", "modulename": "uisrnn.utils", "qualname": "sample_permuted_segments", "kind": "function", "doc": "Sample sequences with permuted blocks.
\n\nArgs:\n index_sequence: (integer array, size: L)\n - subsequence index\n For example, index_sequence = [1,2,6,10,11,12].\n number_samples: (integer)\n - number of subsampled block-preserving permuted sequences.\n For example, number_samples = 5
\n\nReturns:\n sampled_index_sequences: (a list of numpy arrays) - a list of subsampled\n block-preserving permuted sequences. For example,\n
sampled_index_sequences =\n[[10,11,12,1,2,6],\n [6,1,2,10,11,12],\n [1,2,10,11,12,6],\n [6,1,2,10,11,12],\n [1,2,6,10,11,12]]\n
\n The length of \"sampled_index_sequences\" is \"number_samples\".\n", "signature": "(index_sequence , number_samples ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.resize_sequence", "modulename": "uisrnn.utils", "qualname": "resize_sequence", "kind": "function", "doc": "Resize sequences for packing and batching.
\n\nArgs:\n sequence: (real numpy matrix, size: seq_len*obs_size) - observed sequence\n cluster_id: (numpy vector, size: seq_len) - cluster indicator sequence\n num_permutations: int - Number of permutations per utterance sampled.
\n\nReturns:\n sub_sequences: A list of numpy array, with obsevation vector from the same\n cluster in the same list.\n seq_lengths: The length of each cluster (+1).
\n", "signature": "(sequence , cluster_id , num_permutations = None ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.pack_sequence", "modulename": "uisrnn.utils", "qualname": "pack_sequence", "kind": "function", "doc": "Pack sequences for training.
\n\nArgs:\n sub_sequences: A list of numpy array, with obsevation vector from the same\n cluster in the same list.\n seq_lengths: The length of each cluster (+1).\n batch_size: int or None - Run batch learning if batch_size is None. Else,\n run online learning with specified batch size.\n observation_dim: int - dimension for observation vectors\n device: str - Your device. E.g., cuda:0
or cpu
.
\n\nReturns:\n packed_rnn_input: (PackedSequence object) packed rnn input\n rnn_truth: ground truth
\n", "signature": "(sub_sequences , seq_lengths , batch_size , observation_dim , device ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.output_result", "modulename": "uisrnn.utils", "qualname": "output_result", "kind": "function", "doc": "Produce a string to summarize the experiment.
\n", "signature": "(model_args , training_args , test_record ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.estimate_transition_bias", "modulename": "uisrnn.utils", "qualname": "estimate_transition_bias", "kind": "function", "doc": "Estimate the transition bias.
\n\nArgs:\n cluster_id: Either a list of cluster indicator sequences, or a single\n concatenated sequence. The former is strongly preferred, since the\n transition_bias estimated from the latter will be inaccurate.\n smooth: int or float - Smoothing coefficient, avoids -inf value in np.log\n in the case of a sequence with a single speaker and division by 0 in the\n case of empty sequences. Using a small value for smooth decreases the\n bias in the calculation of transition_bias but can also lead to underflow\n in some remote cases, larger values are safer but less accurate.
\n\nReturns:\n bias: Flipping coin head probability.\n bias_denominator: The denominator of the bias, used for multiple calls to\n fit().
\n", "signature": "(cluster_ids , smooth = 1 ): ", "funcdef": "def"}];
+ /** pdoc search index */const docs = [{"fullname": "uisrnn", "modulename": "uisrnn", "kind": "module", "doc": "The module for Unbounded Interleaved-State Recurrent Neural Network.
\n\nAn introduction is available at [README.md].
\n"}, {"fullname": "uisrnn.parse_arguments", "modulename": "uisrnn", "qualname": "parse_arguments", "kind": "function", "doc": "Parse arguments.
\n\nReturns:\n A tuple of:
\n\n- `model_args`: model arguments\n- `training_args`: training arguments\n- `inference_args`: inference arguments\n
\n", "signature": "(): ", "funcdef": "def"}, {"fullname": "uisrnn.compute_sequence_match_accuracy", "modulename": "uisrnn", "qualname": "compute_sequence_match_accuracy", "kind": "function", "doc": "Compute the accuracy between two sequences by finding optimal matching.
\n\nArgs:\n sequence1: A list of integers or strings.\n sequence2: A list of integers or strings.
\n\nReturns:\n accuracy: sequence matching accuracy as a number in [0.0, 1.0]
\n\nRaises:\n TypeError: If sequence1 or sequence2 is not list.\n ValueError: If sequence1 and sequence2 are not same size.
\n", "signature": "(sequence1 , sequence2 ): ", "funcdef": "def"}, {"fullname": "uisrnn.output_result", "modulename": "uisrnn", "qualname": "output_result", "kind": "function", "doc": "Produce a string to summarize the experiment.
\n", "signature": "(model_args , training_args , test_record ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN", "modulename": "uisrnn", "qualname": "UISRNN", "kind": "class", "doc": "Unbounded Interleaved-State Recurrent Neural Networks.
\n"}, {"fullname": "uisrnn.UISRNN.__init__", "modulename": "uisrnn", "qualname": "UISRNN.__init__", "kind": "function", "doc": "Construct the UISRNN object.
\n\nArgs:\n args: Model configurations. See arguments.py
for details.
\n", "signature": "(args ) "}, {"fullname": "uisrnn.UISRNN.observation_dim", "modulename": "uisrnn", "qualname": "UISRNN.observation_dim", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.device", "modulename": "uisrnn", "qualname": "UISRNN.device", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.rnn_model", "modulename": "uisrnn", "qualname": "UISRNN.rnn_model", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.rnn_init_hidden", "modulename": "uisrnn", "qualname": "UISRNN.rnn_init_hidden", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.estimate_sigma2", "modulename": "uisrnn", "qualname": "UISRNN.estimate_sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.estimate_transition_bias", "modulename": "uisrnn", "qualname": "UISRNN.estimate_transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.sigma2", "modulename": "uisrnn", "qualname": "UISRNN.sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.transition_bias", "modulename": "uisrnn", "qualname": "UISRNN.transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.transition_bias_denominator", "modulename": "uisrnn", "qualname": "UISRNN.transition_bias_denominator", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.crp_alpha", "modulename": "uisrnn", "qualname": "UISRNN.crp_alpha", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.logger", "modulename": "uisrnn", "qualname": "UISRNN.logger", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.UISRNN.save", "modulename": "uisrnn", "qualname": "UISRNN.save", "kind": "function", "doc": "Save the model to a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.load", "modulename": "uisrnn", "qualname": "UISRNN.load", "kind": "function", "doc": "Load the model from a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.fit_concatenated", "modulename": "uisrnn", "qualname": "UISRNN.fit_concatenated", "kind": "function", "doc": "Fit UISRNN model to concatenated sequence and cluster_id.
\n\nArgs:\n train_sequence: the training observation sequence, which is a\n 2-dim numpy array of real numbers, of size N * D
.
\n\n- `N`: summation of lengths of all utterances.\n- `D`: observation dimension.\n\nFor example,\n
\n\n
train_sequence =\n[[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'\n [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'\n [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'\n [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'\n [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'\n
\n\nHere `N=5`, `D=4`.\n\nWe concatenate all training utterances into this single sequence.\n
\n\ntrain_cluster_id: the speaker id sequence, which is 1-dim list or\n numpy array of strings, of size N
.\n For example,
\n\n
train_cluster_id =\n ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']\n
\n\n'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.\n\nNote that the order of entries within an utterance are preserved,\nand all utterances are simply concatenated together.\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequence or train_cluster_id is of wrong type.\n ValueError: If train_sequence or train_cluster_id has wrong dimension.
\n", "signature": "(self , train_sequence , train_cluster_id , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.fit", "modulename": "uisrnn", "qualname": "UISRNN.fit", "kind": "function", "doc": "Fit UISRNN model.
\n\nArgs:\n train_sequences: Either a list of training sequences, or a single\n concatenated training sequence:
\n\n1. train_sequences is list, and each element is a 2-dim numpy array\n of real numbers, of size: `length * D`.\n The length varies among different sequences, but the D is the same.\n In speaker diarization, each sequence is the sequence of speaker\n embeddings of one utterance.\n2. train_sequences is a single concatenated sequence, which is a\n 2-dim numpy array of real numbers. See `fit_concatenated()`\n for more details.\n
\n\ntrain_cluster_ids: Ground truth labels for train_sequences:
\n\n1. if train_sequences is a list, this must also be a list of the same\n size, each element being a 1-dim list or numpy array of strings.\n2. if train_sequences is a single concatenated sequence, this\n must also be the concatenated 1-dim list or numpy array of strings\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequences or train_cluster_ids is of wrong type.
\n", "signature": "(self , train_sequences , train_cluster_ids , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.predict_single", "modulename": "uisrnn", "qualname": "UISRNN.predict_single", "kind": "function", "doc": "Predict labels for a single test sequence using UISRNN model.
\n\nArgs:\n test_sequence: the test observation sequence, which is 2-dim numpy array\n of real numbers, of size N * D
.
\n\n- `N`: length of one test utterance.\n- `D` : observation dimension.\n\nFor example:\n
\n\n
test_sequence =\n[[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'\n [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'\n [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'\n [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'\n [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'\n
\n\nHere `N=5`, `D=4`.\n
\n\nargs: Inference configurations. See arguments.py
for details.
\n\nReturns:\n predicted_cluster_id: predicted speaker id sequence, which is\n an array of integers, of size N
.\n For example, predicted_cluster_id = [0, 1, 0, 0, 1]
\n\nRaises:\n TypeError: If test_sequence is of wrong type.\n ValueError: If test_sequence has wrong dimension.
\n", "signature": "(self , test_sequence , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.UISRNN.predict", "modulename": "uisrnn", "qualname": "UISRNN.predict", "kind": "function", "doc": "Predict labels for a single or many test sequences using UISRNN model.
\n\nArgs:\n test_sequences: Either a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.
\n\nReturns:\n predicted_cluster_ids: Predicted labels for test_sequences.
\n\n1. if test_sequences is a list, predicted_cluster_ids will be a list\n of the same size, where each element being a 1-dim list of strings.\n2. if test_sequences is a single sequence, predicted_cluster_ids will\n be a 1-dim list of strings\n
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(self , test_sequences , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.parallel_predict", "modulename": "uisrnn", "qualname": "parallel_predict", "kind": "function", "doc": "Run prediction in parallel using torch.multiprocessing.
\n\nThis is a beta feature. It makes prediction slower on CPU. But it's reported\nthat it makes prediction faster on GPU.
\n\nArgs:\n model: instance of UISRNN model\n test_sequences: a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.\n num_processes: number of parallel processes.
\n\nReturns:\n a list of the same size as test_sequences, where each element\n being a 1-dim list of strings.
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(model , test_sequences , args , num_processes = 4 ): ", "funcdef": "def"}, {"fullname": "uisrnn.arguments", "modulename": "uisrnn.arguments", "kind": "module", "doc": "Arguments for UISRNN.
\n"}, {"fullname": "uisrnn.arguments.str2bool", "modulename": "uisrnn.arguments", "qualname": "str2bool", "kind": "function", "doc": "A function to convert string to bool value.
\n", "signature": "(value ): ", "funcdef": "def"}, {"fullname": "uisrnn.arguments.parse_arguments", "modulename": "uisrnn.arguments", "qualname": "parse_arguments", "kind": "function", "doc": "Parse arguments.
\n\nReturns:\n A tuple of:
\n\n- `model_args`: model arguments\n- `training_args`: training arguments\n- `inference_args`: inference arguments\n
\n", "signature": "(): ", "funcdef": "def"}, {"fullname": "uisrnn.contrib", "modulename": "uisrnn.contrib", "kind": "module", "doc": "The module for community contributed code.
\n\nAn introduction is available at [README.md].
\n"}, {"fullname": "uisrnn.contrib.contrib_template", "modulename": "uisrnn.contrib.contrib_template", "kind": "module", "doc": "This is a template for community contributions.
\n"}, {"fullname": "uisrnn.contrib.contrib_template.example_function", "modulename": "uisrnn.contrib.contrib_template", "qualname": "example_function", "kind": "function", "doc": "This is an example function.
\n", "signature": "(): ", "funcdef": "def"}, {"fullname": "uisrnn.contrib.range_search_crp_alpha", "modulename": "uisrnn.contrib.range_search_crp_alpha", "kind": "module", "doc": "This module implements method to search for best crp_alpha within a range for\na given data set.\n For example
\n\n
train_cluster_id = np.array(\n ['0_0', '0_0', '0_1', '0_1', '0_1', '0_0', '0_0', '1_0', '1_0', '1_0',\n '1_1', '1_1', '1_1', '1_0', '1_0','1_0', '1_2', '1_2', '1_2'])\n print(estimate_crp_alpha(train_cluster_id))\n 0.5\n
\n\nFunction for user:\n estimate_crp_alpha: see docstring for details.\n Internal functions:\n _get_cdf: see docstring for details.\n _get_cdf_single: see docstring for details.\n _get_k_t: see docstring for details.\n _get_n_kt: see docstring for details.\n _get_cluster_id_single: see docstring for details.\n _get_normalized_id: see docstring for details.
\n"}, {"fullname": "uisrnn.contrib.range_search_crp_alpha.estimate_crp_alpha", "modulename": "uisrnn.contrib.range_search_crp_alpha", "qualname": "estimate_crp_alpha", "kind": "function", "doc": "Iterate through a range of alpha, return alpha with maximum cdf P{Y|Z}.
\n\nArgs:\n train_cluster_id: same as train_cluster_id in demo.py. See demo.py
for\n details.\n search_range: the range to search for crp_alpha.\n search_step: the step to search for crp_alpha.\nReturns:\n cur_alpha: a float variable.
\n", "signature": "(train_cluster_id , search_range = 1 , search_step = 0.01 ): ", "funcdef": "def"}, {"fullname": "uisrnn.evals", "modulename": "uisrnn.evals", "kind": "module", "doc": "Utils for model evaluation.
\n"}, {"fullname": "uisrnn.evals.get_list_inverse_index", "modulename": "uisrnn.evals", "qualname": "get_list_inverse_index", "kind": "function", "doc": "Get value to position index from a list of unique ids.
\n\nArgs:\n unique_ids: A list of unique integers of strings.
\n\nReturns:\n result: a dict from value to position
\n\nRaises:\n TypeError: If unique_ids is not a list.
\n", "signature": "(unique_ids ): ", "funcdef": "def"}, {"fullname": "uisrnn.evals.compute_sequence_match_accuracy", "modulename": "uisrnn.evals", "qualname": "compute_sequence_match_accuracy", "kind": "function", "doc": "Compute the accuracy between two sequences by finding optimal matching.
\n\nArgs:\n sequence1: A list of integers or strings.\n sequence2: A list of integers or strings.
\n\nReturns:\n accuracy: sequence matching accuracy as a number in [0.0, 1.0]
\n\nRaises:\n TypeError: If sequence1 or sequence2 is not list.\n ValueError: If sequence1 and sequence2 are not same size.
\n", "signature": "(sequence1 , sequence2 ): ", "funcdef": "def"}, {"fullname": "uisrnn.loss_func", "modulename": "uisrnn.loss_func", "kind": "module", "doc": "Loss functions for training.
\n"}, {"fullname": "uisrnn.loss_func.weighted_mse_loss", "modulename": "uisrnn.loss_func", "qualname": "weighted_mse_loss", "kind": "function", "doc": "Compute weighted MSE loss.
\n\nNote that we are doing weighted loss that only sum up over non-zero entries.
\n\nArgs:\n input_tensor: input tensor\n target_tensor: target tensor\n weight: weight tensor, in this case 1/sigma^2
\n\nReturns:\n the weighted MSE loss
\n", "signature": "(input_tensor , target_tensor , weight = 1 ): ", "funcdef": "def"}, {"fullname": "uisrnn.loss_func.sigma2_prior_loss", "modulename": "uisrnn.loss_func", "qualname": "sigma2_prior_loss", "kind": "function", "doc": "Compute sigma2 prior loss.
\n\nArgs:\n num_non_zero: since rnn_truth is a collection of different length sequences\n padded with zeros to fit them into a tensor, we count the sum of\n 'real lengths' of all sequences\n sigma_alpha: inverse gamma shape\n sigma_beta: inverse gamma scale\n sigma2: sigma squared
\n\nReturns:\n the sigma2 prior loss
\n", "signature": "(num_non_zero , sigma_alpha , sigma_beta , sigma2 ): ", "funcdef": "def"}, {"fullname": "uisrnn.loss_func.regularization_loss", "modulename": "uisrnn.loss_func", "qualname": "regularization_loss", "kind": "function", "doc": "Compute regularization loss.
\n\nArgs:\n params: iterable of all parameters\n weight: weight for the regularization term
\n\nReturns:\n the regularization loss
\n", "signature": "(params , weight ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn", "modulename": "uisrnn.uisrnn", "kind": "module", "doc": "The UIS-RNN model.
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN", "kind": "class", "doc": "The core Recurent Neural Network used by UIS-RNN.
\n", "bases": "torch.nn.modules.module.Module"}, {"fullname": "uisrnn.uisrnn.CoreRNN.__init__", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.__init__", "kind": "function", "doc": "Initialize internal Module state, shared by both nn.Module and ScriptModule.
\n", "signature": "(input_dim , hidden_size , depth , observation_dim , dropout = 0 ) "}, {"fullname": "uisrnn.uisrnn.CoreRNN.hidden_size", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.hidden_size", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN.linear_mean1", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.linear_mean1", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN.linear_mean2", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.linear_mean2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.CoreRNN.forward", "modulename": "uisrnn.uisrnn", "qualname": "CoreRNN.forward", "kind": "function", "doc": "The forward function of the module.
\n", "signature": "(self , input_seq , hidden = None ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.BeamState", "modulename": "uisrnn.uisrnn", "qualname": "BeamState", "kind": "class", "doc": "Structure that contains necessary states for beam search.
\n"}, {"fullname": "uisrnn.uisrnn.BeamState.__init__", "modulename": "uisrnn.uisrnn", "qualname": "BeamState.__init__", "kind": "function", "doc": "
\n", "signature": "(source = None ) "}, {"fullname": "uisrnn.uisrnn.BeamState.append", "modulename": "uisrnn.uisrnn", "qualname": "BeamState.append", "kind": "function", "doc": "Append new item to the BeamState.
\n", "signature": "(self , mean , hidden , cluster ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN", "kind": "class", "doc": "Unbounded Interleaved-State Recurrent Neural Networks.
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.__init__", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.__init__", "kind": "function", "doc": "Construct the UISRNN object.
\n\nArgs:\n args: Model configurations. See arguments.py
for details.
\n", "signature": "(args ) "}, {"fullname": "uisrnn.uisrnn.UISRNN.observation_dim", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.observation_dim", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.device", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.device", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.rnn_model", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.rnn_model", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.rnn_init_hidden", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.rnn_init_hidden", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.estimate_sigma2", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.estimate_sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.estimate_transition_bias", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.estimate_transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.sigma2", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.sigma2", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.transition_bias", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.transition_bias", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.transition_bias_denominator", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.transition_bias_denominator", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.crp_alpha", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.crp_alpha", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.logger", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.logger", "kind": "variable", "doc": "
\n"}, {"fullname": "uisrnn.uisrnn.UISRNN.save", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.save", "kind": "function", "doc": "Save the model to a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.load", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.load", "kind": "function", "doc": "Load the model from a file.
\n\nArgs:\n filepath: the path of the file.
\n", "signature": "(self , filepath ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.fit_concatenated", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.fit_concatenated", "kind": "function", "doc": "Fit UISRNN model to concatenated sequence and cluster_id.
\n\nArgs:\n train_sequence: the training observation sequence, which is a\n 2-dim numpy array of real numbers, of size N * D
.
\n\n- `N`: summation of lengths of all utterances.\n- `D`: observation dimension.\n\nFor example,\n
\n\n
train_sequence =\n[[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'\n [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'\n [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'\n [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'\n [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'\n
\n\nHere `N=5`, `D=4`.\n\nWe concatenate all training utterances into this single sequence.\n
\n\ntrain_cluster_id: the speaker id sequence, which is 1-dim list or\n numpy array of strings, of size N
.\n For example,
\n\n
train_cluster_id =\n ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']\n
\n\n'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.\n\nNote that the order of entries within an utterance are preserved,\nand all utterances are simply concatenated together.\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequence or train_cluster_id is of wrong type.\n ValueError: If train_sequence or train_cluster_id has wrong dimension.
\n", "signature": "(self , train_sequence , train_cluster_id , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.fit", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.fit", "kind": "function", "doc": "Fit UISRNN model.
\n\nArgs:\n train_sequences: Either a list of training sequences, or a single\n concatenated training sequence:
\n\n1. train_sequences is list, and each element is a 2-dim numpy array\n of real numbers, of size: `length * D`.\n The length varies among different sequences, but the D is the same.\n In speaker diarization, each sequence is the sequence of speaker\n embeddings of one utterance.\n2. train_sequences is a single concatenated sequence, which is a\n 2-dim numpy array of real numbers. See `fit_concatenated()`\n for more details.\n
\n\ntrain_cluster_ids: Ground truth labels for train_sequences:
\n\n1. if train_sequences is a list, this must also be a list of the same\n size, each element being a 1-dim list or numpy array of strings.\n2. if train_sequences is a single concatenated sequence, this\n must also be the concatenated 1-dim list or numpy array of strings\n
\n\nargs: Training configurations. See arguments.py
for details.
\n\nRaises:\n TypeError: If train_sequences or train_cluster_ids is of wrong type.
\n", "signature": "(self , train_sequences , train_cluster_ids , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.predict_single", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.predict_single", "kind": "function", "doc": "Predict labels for a single test sequence using UISRNN model.
\n\nArgs:\n test_sequence: the test observation sequence, which is 2-dim numpy array\n of real numbers, of size N * D
.
\n\n- `N`: length of one test utterance.\n- `D` : observation dimension.\n\nFor example:\n
\n\n
test_sequence =\n[[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'\n [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'\n [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'\n [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'\n [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'\n
\n\nHere `N=5`, `D=4`.\n
\n\nargs: Inference configurations. See arguments.py
for details.
\n\nReturns:\n predicted_cluster_id: predicted speaker id sequence, which is\n an array of integers, of size N
.\n For example, predicted_cluster_id = [0, 1, 0, 0, 1]
\n\nRaises:\n TypeError: If test_sequence is of wrong type.\n ValueError: If test_sequence has wrong dimension.
\n", "signature": "(self , test_sequence , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.UISRNN.predict", "modulename": "uisrnn.uisrnn", "qualname": "UISRNN.predict", "kind": "function", "doc": "Predict labels for a single or many test sequences using UISRNN model.
\n\nArgs:\n test_sequences: Either a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.
\n\nReturns:\n predicted_cluster_ids: Predicted labels for test_sequences.
\n\n1. if test_sequences is a list, predicted_cluster_ids will be a list\n of the same size, where each element being a 1-dim list of strings.\n2. if test_sequences is a single sequence, predicted_cluster_ids will\n be a 1-dim list of strings\n
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(self , test_sequences , args ): ", "funcdef": "def"}, {"fullname": "uisrnn.uisrnn.parallel_predict", "modulename": "uisrnn.uisrnn", "qualname": "parallel_predict", "kind": "function", "doc": "Run prediction in parallel using torch.multiprocessing.
\n\nThis is a beta feature. It makes prediction slower on CPU. But it's reported\nthat it makes prediction faster on GPU.
\n\nArgs:\n model: instance of UISRNN model\n test_sequences: a list of test sequences, or a single test\n sequence. Each test sequence is a 2-dim numpy array\n of real numbers. See predict_single()
for details.\n args: Inference configurations. See arguments.py
for details.\n num_processes: number of parallel processes.
\n\nReturns:\n a list of the same size as test_sequences, where each element\n being a 1-dim list of strings.
\n\nRaises:\n TypeError: If test_sequences is of wrong type.
\n", "signature": "(model , test_sequences , args , num_processes = 4 ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils", "modulename": "uisrnn.utils", "kind": "module", "doc": "Utils for UIS-RNN.
\n"}, {"fullname": "uisrnn.utils.generate_random_string", "modulename": "uisrnn.utils", "qualname": "generate_random_string", "kind": "function", "doc": "Generate a random string of upper case letters and digits.
\n\nArgs:\n length: length of the generated string
\n\nReturns:\n the generated string
\n", "signature": "(length = 6 ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.enforce_cluster_id_uniqueness", "modulename": "uisrnn.utils", "qualname": "enforce_cluster_id_uniqueness", "kind": "function", "doc": "Enforce uniqueness of cluster id across sequences.
\n\nArgs:\n cluster_ids: a list of 1-dim list/numpy.ndarray of strings
\n\nReturns:\n a new list with same length of cluster_ids
\n\nRaises:\n TypeError: if cluster_ids or its element has wrong type
\n", "signature": "(cluster_ids ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.concatenate_training_data", "modulename": "uisrnn.utils", "qualname": "concatenate_training_data", "kind": "function", "doc": "Concatenate training data.
\n\nArgs:\n train_sequences: a list of 2-dim numpy arrays to be concatenated\n train_cluster_ids: a list of 1-dim list/numpy.ndarray of strings\n enforce_uniqueness: a boolean indicated whether we should enfore uniqueness\n to train_cluster_ids\n shuffle: whether to randomly shuffle input order
\n\nReturns:\n concatenated_train_sequence: a 2-dim numpy array\n concatenated_train_cluster_id: a list of strings
\n\nRaises:\n TypeError: if input has wrong type\n ValueError: if sizes/dimensions of input or their elements are incorrect
\n", "signature": "(\ttrain_sequences , \ttrain_cluster_ids , \tenforce_uniqueness = True , \tshuffle = True ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.sample_permuted_segments", "modulename": "uisrnn.utils", "qualname": "sample_permuted_segments", "kind": "function", "doc": "Sample sequences with permuted blocks.
\n\nArgs:\n index_sequence: (integer array, size: L)\n - subsequence index\n For example, index_sequence = [1,2,6,10,11,12].\n number_samples: (integer)\n - number of subsampled block-preserving permuted sequences.\n For example, number_samples = 5
\n\nReturns:\n sampled_index_sequences: (a list of numpy arrays) - a list of subsampled\n block-preserving permuted sequences. For example,
\n\n
sampled_index_sequences =\n[[10,11,12,1,2,6],\n [6,1,2,10,11,12],\n [1,2,10,11,12,6],\n [6,1,2,10,11,12],\n [1,2,6,10,11,12]]\n
\n\nThe length of \"sampled_index_sequences\" is \"number_samples\".\n
\n", "signature": "(index_sequence , number_samples ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.resize_sequence", "modulename": "uisrnn.utils", "qualname": "resize_sequence", "kind": "function", "doc": "Resize sequences for packing and batching.
\n\nArgs:\n sequence: (real numpy matrix, size: seq_len*obs_size) - observed sequence\n cluster_id: (numpy vector, size: seq_len) - cluster indicator sequence\n num_permutations: int - Number of permutations per utterance sampled.
\n\nReturns:\n sub_sequences: A list of numpy array, with obsevation vector from the same\n cluster in the same list.\n seq_lengths: The length of each cluster (+1).
\n", "signature": "(sequence , cluster_id , num_permutations = None ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.pack_sequence", "modulename": "uisrnn.utils", "qualname": "pack_sequence", "kind": "function", "doc": "Pack sequences for training.
\n\nArgs:\n sub_sequences: A list of numpy array, with obsevation vector from the same\n cluster in the same list.\n seq_lengths: The length of each cluster (+1).\n batch_size: int or None - Run batch learning if batch_size is None. Else,\n run online learning with specified batch size.\n observation_dim: int - dimension for observation vectors\n device: str - Your device. E.g., cuda:0
or cpu
.
\n\nReturns:\n packed_rnn_input: (PackedSequence object) packed rnn input\n rnn_truth: ground truth
\n", "signature": "(sub_sequences , seq_lengths , batch_size , observation_dim , device ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.output_result", "modulename": "uisrnn.utils", "qualname": "output_result", "kind": "function", "doc": "Produce a string to summarize the experiment.
\n", "signature": "(model_args , training_args , test_record ): ", "funcdef": "def"}, {"fullname": "uisrnn.utils.estimate_transition_bias", "modulename": "uisrnn.utils", "qualname": "estimate_transition_bias", "kind": "function", "doc": "Estimate the transition bias.
\n\nArgs:\n cluster_id: Either a list of cluster indicator sequences, or a single\n concatenated sequence. The former is strongly preferred, since the\n transition_bias estimated from the latter will be inaccurate.\n smooth: int or float - Smoothing coefficient, avoids -inf value in np.log\n in the case of a sequence with a single speaker and division by 0 in the\n case of empty sequences. Using a small value for smooth decreases the\n bias in the calculation of transition_bias but can also lead to underflow\n in some remote cases, larger values are safer but less accurate.
\n\nReturns:\n bias: Flipping coin head probability.\n bias_denominator: The denominator of the bias, used for multiple calls to\n fit().
\n", "signature": "(cluster_ids , smooth = 1 ): ", "funcdef": "def"}];
// mirrored in build-search-index.js (part 1)
// Also split on html tags. this is a cheap heuristic, but good enough.
diff --git a/docs/uisrnn.html b/docs/uisrnn.html
index ff7f5db..9b922a4 100644
--- a/docs/uisrnn.html
+++ b/docs/uisrnn.html
@@ -3,14 +3,14 @@
-
+
uisrnn API documentation
-
-
+
+
@@ -153,16 +153,14 @@
20
21 from . import arguments
22 from . import evals
-23 from . import loss_func
-24 from . import uisrnn
-25 from . import utils
-26
-27 #pylint: disable=C0103
-28 parse_arguments = arguments . parse_arguments
-29 compute_sequence_match_accuracy = evals . compute_sequence_match_accuracy
-30 output_result = utils . output_result
-31 UISRNN = uisrnn . UISRNN
-32 parallel_predict = uisrnn . parallel_predict
+23 from . import uisrnn
+24 from . import utils
+25
+26 parse_arguments = arguments . parse_arguments
+27 compute_sequence_match_accuracy = evals . compute_sequence_match_accuracy
+28 output_result = utils . output_result
+29 UISRNN = uisrnn . UISRNN
+30 parallel_predict = uisrnn . parallel_predict
@@ -238,12 +236,12 @@
88 'value is None, we will estimate it from training data.' )
89 model_parser . add_argument (
90 '--verbosity' ,
- 91 default = 2 ,
+ 91 default = 3 ,
92 type = int ,
93 help = 'How verbose will the logging information be. Higher value '
94 'represents more verbose information. A general guideline: '
- 95 '0 for errors; 1 for finishing important steps; '
- 96 '2 for finishing less important steps; 3 or above for debugging '
+ 95 '0 for fatals; 1 for errors; 2 for finishing important steps; '
+ 96 '3 for finishing less important steps; 4 or above for debugging '
97 'information.' )
98 model_parser . add_argument (
99 '--enable_cuda' ,
@@ -445,39 +443,39 @@
- 271 def output_result ( model_args , training_args , test_record ):
-272 """Produce a string to summarize the experiment."""
-273 accuracy_array , _ = zip ( * test_record )
-274 total_accuracy = np . mean ( accuracy_array )
-275 output_string = """
-276 Config:
-277 sigma_alpha: {}
-278 sigma_beta: {}
-279 crp_alpha: {}
-280 learning rate: {}
-281 regularization: {}
-282 batch size: {}
-283
-284 Performance:
-285 averaged accuracy: {:.6f}
-286 accuracy numbers for all testing sequences:
-287 """ . strip () . format (
-288 training_args . sigma_alpha ,
-289 training_args . sigma_beta ,
-290 model_args . crp_alpha ,
-291 training_args . learning_rate ,
-292 training_args . regularization_weight ,
-293 training_args . batch_size ,
-294 total_accuracy )
-295 for accuracy in accuracy_array :
-296 output_string += ' \n {:.6f} ' . format ( accuracy )
-297 output_string += ' \n ' + '=' * 80 + ' \n '
-298 filename = 'layer_ {} _ {} _ {:.1f} _result.txt' . format (
-299 model_args . rnn_hidden_size ,
-300 model_args . rnn_depth , model_args . rnn_dropout )
-301 with open ( filename , 'a' ) as file_object :
-302 file_object . write ( output_string )
-303 return output_string
+ 254 def output_result ( model_args , training_args , test_record ):
+255 """Produce a string to summarize the experiment."""
+256 accuracy_array , _ = zip ( * test_record )
+257 total_accuracy = np . mean ( accuracy_array )
+258 output_string = """
+259 Config:
+260 sigma_alpha: {}
+261 sigma_beta: {}
+262 crp_alpha: {}
+263 learning rate: {}
+264 regularization: {}
+265 batch size: {}
+266
+267 Performance:
+268 averaged accuracy: {:.6f}
+269 accuracy numbers for all testing sequences:
+270 """ . strip () . format (
+271 training_args . sigma_alpha ,
+272 training_args . sigma_beta ,
+273 model_args . crp_alpha ,
+274 training_args . learning_rate ,
+275 training_args . regularization_weight ,
+276 training_args . batch_size ,
+277 total_accuracy )
+278 for accuracy in accuracy_array :
+279 output_string += ' \n {:.6f} ' . format ( accuracy )
+280 output_string += ' \n ' + '=' * 80 + ' \n '
+281 filename = 'layer_ {} _ {} _ {:.1f} _result.txt' . format (
+282 model_args . rnn_hidden_size ,
+283 model_args . rnn_depth , model_args . rnn_dropout )
+284 with open ( filename , 'a' ) as file_object :
+285 file_object . write ( output_string )
+286 return output_string
@@ -497,517 +495,517 @@
- 80 class UISRNN :
- 81 """Unbounded Interleaved-State Recurrent Neural Networks."""
- 82
- 83 def __init__ ( self , args ):
- 84 """Construct the UISRNN object.
- 85
- 86 Args:
- 87 args: Model configurations. See `arguments.py` for details.
- 88 """
- 89 self . observation_dim = args . observation_dim
- 90 self . device = torch . device (
- 91 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
- 92 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
- 93 args . rnn_depth , self . observation_dim ,
- 94 args . rnn_dropout ) . to ( self . device )
- 95 self . rnn_init_hidden = nn . Parameter (
- 96 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
- 97 # booleans indicating which variables are trainable
- 98 self . estimate_sigma2 = ( args . sigma2 is None )
- 99 self . estimate_transition_bias = ( args . transition_bias is None )
-100 # initial values of variables
-101 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
-102 self . sigma2 = nn . Parameter (
-103 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
-104 self . transition_bias = args . transition_bias
-105 self . transition_bias_denominator = 0.0
-106 self . crp_alpha = args . crp_alpha
-107 self . logger = utils . Logger ( args . verbosity )
-108
-109 def _get_optimizer ( self , optimizer , learning_rate ):
-110 """Get optimizer for UISRNN.
-111
-112 Args:
-113 optimizer: string - name of the optimizer.
-114 learning_rate: - learning rate for the entire model.
-115 We do not customize learning rate for separate parts.
-116
-117 Returns:
-118 a pytorch "optim" object
-119 """
-120 params = [
-121 {
-122 'params' : self . rnn_model . parameters ()
-123 }, # rnn parameters
-124 {
-125 'params' : self . rnn_init_hidden
-126 } # rnn initial hidden state
-127 ]
-128 if self . estimate_sigma2 : # train sigma2
-129 params . append ({
-130 'params' : self . sigma2
-131 }) # variance parameters
-132 assert optimizer == 'adam' , 'Only adam optimizer is supported.'
-133 return optim . Adam ( params , lr = learning_rate )
-134
-135 def save ( self , filepath ):
-136 """Save the model to a file.
-137
-138 Args:
-139 filepath: the path of the file.
-140 """
-141 torch . save ({
-142 'rnn_state_dict' : self . rnn_model . state_dict (),
-143 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
-144 'transition_bias' : self . transition_bias ,
-145 'transition_bias_denominator' : self . transition_bias_denominator ,
-146 'crp_alpha' : self . crp_alpha ,
-147 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
-148
-149 def load ( self , filepath ):
-150 """Load the model from a file.
-151
-152 Args:
-153 filepath: the path of the file.
-154 """
-155 var_dict = torch . load ( filepath )
-156 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
-157 self . rnn_init_hidden = nn . Parameter (
-158 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
-159 self . transition_bias = float ( var_dict [ 'transition_bias' ])
-160 self . transition_bias_denominator = float (
-161 var_dict [ 'transition_bias_denominator' ])
-162 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
-163 self . sigma2 = nn . Parameter (
-164 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
-165
-166 self . logger . print (
-167 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
-168 'rnn_init_hidden= {} ' . format (
-169 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
-170 var_dict [ 'rnn_init_hidden' ]))
-171
-172 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
-173 """Fit UISRNN model to concatenated sequence and cluster_id.
-174
-175 Args:
-176 train_sequence: the training observation sequence, which is a
-177 2-dim numpy array of real numbers, of size `N * D`.
-178
-179 - `N`: summation of lengths of all utterances.
-180 - `D`: observation dimension.
-181
-182 For example,
-183 ```
-184 train_sequence =
-185 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
-186 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
-187 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
-188 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
-189 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
-190 ```
-191 Here `N=5`, `D=4`.
-192
-193 We concatenate all training utterances into this single sequence.
-194 train_cluster_id: the speaker id sequence, which is 1-dim list or
-195 numpy array of strings, of size `N`.
-196 For example,
-197 ```
-198 train_cluster_id =
-199 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-200 ```
-201 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-202
-203 Note that the order of entries within an utterance are preserved,
-204 and all utterances are simply concatenated together.
-205 args: Training configurations. See `arguments.py` for details.
-206
-207 Raises:
-208 TypeError: If train_sequence or train_cluster_id is of wrong type.
-209 ValueError: If train_sequence or train_cluster_id has wrong dimension.
-210 """
-211 # check type
-212 if ( not isinstance ( train_sequence , np . ndarray ) or
-213 train_sequence . dtype != float ):
-214 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
-215 if isinstance ( train_cluster_id , list ):
-216 train_cluster_id = np . array ( train_cluster_id )
-217 if ( not isinstance ( train_cluster_id , np . ndarray ) or
-218 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
-219 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
-220 # check dimension
-221 if train_sequence . ndim != 2 :
-222 raise ValueError ( 'train_sequence must be 2-dim array.' )
-223 if train_cluster_id . ndim != 1 :
-224 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
-225 # check length and size
-226 train_total_length , observation_dim = train_sequence . shape
-227 if observation_dim != self . observation_dim :
-228 raise ValueError ( 'train_sequence does not match the dimension specified '
-229 'by args.observation_dim.' )
-230 if train_total_length != len ( train_cluster_id ):
-231 raise ValueError ( 'train_sequence length is not equal to '
-232 'train_cluster_id length.' )
-233
-234 self . rnn_model . train ()
-235 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
-236 learning_rate = args . learning_rate )
-237
-238 sub_sequences , seq_lengths = utils . resize_sequence (
-239 sequence = train_sequence ,
-240 cluster_id = train_cluster_id ,
-241 num_permutations = args . num_permutations )
-242
-243 # For batch learning, pack the entire dataset.
-244 if args . batch_size is None :
-245 packed_train_sequence , rnn_truth = utils . pack_sequence (
-246 sub_sequences ,
-247 seq_lengths ,
-248 args . batch_size ,
-249 self . observation_dim ,
-250 self . device )
-251 train_loss = []
-252 for num_iter in range ( args . train_iteration ):
-253 optimizer . zero_grad ()
-254 # For online learning, pack a subset in each iteration.
-255 if args . batch_size is not None :
-256 packed_train_sequence , rnn_truth = utils . pack_sequence (
-257 sub_sequences ,
-258 seq_lengths ,
-259 args . batch_size ,
-260 self . observation_dim ,
-261 self . device )
-262 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
-263 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
-264 # use mean to predict
-265 mean = torch . cumsum ( mean , dim = 0 )
-266 mean_size = mean . size ()
-267 mean = torch . mm (
-268 torch . diag (
-269 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
-270 mean . view ( mean_size [ 0 ], - 1 ))
-271 mean = mean . view ( mean_size )
-272
-273 # Likelihood part.
-274 loss1 = loss_func . weighted_mse_loss (
-275 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
-276 target_tensor = rnn_truth ,
-277 weight = 1 / ( 2 * self . sigma2 ))
-278
-279 # Sigma2 prior part.
-280 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
-281 ** 2 ) . view ( - 1 , observation_dim )
-282 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
-283 loss2 = loss_func . sigma2_prior_loss (
-284 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
-285
-286 # Regularization part.
-287 loss3 = loss_func . regularization_loss (
-288 self . rnn_model . parameters (), args . regularization_weight )
-289
-290 loss = loss1 + loss2 + loss3
-291 loss . backward ()
-292 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
-293 optimizer . step ()
-294 # avoid numerical issues
-295 self . sigma2 . data . clamp_ ( min = 1e-6 )
-296
-297 if ( np . remainder ( num_iter , 10 ) == 0 or
-298 num_iter == args . train_iteration - 1 ):
-299 self . logger . print (
-300 2 ,
-301 'Iter: {:d} \t '
-302 'Training Loss: {:.4f} \n '
-303 ' Negative Log Likelihood: {:.4f} \t '
-304 'Sigma2 Prior: {:.4f} \t '
-305 'Regularization: {:.4f} ' . format (
-306 num_iter ,
-307 float ( loss . data ),
-308 float ( loss1 . data ),
-309 float ( loss2 . data ),
-310 float ( loss3 . data )))
-311 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
-312 self . logger . print (
-313 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
-314
-315 def fit ( self , train_sequences , train_cluster_ids , args ):
-316 """Fit UISRNN model.
-317
-318 Args:
-319 train_sequences: Either a list of training sequences, or a single
-320 concatenated training sequence:
-321
-322 1. train_sequences is list, and each element is a 2-dim numpy array
-323 of real numbers, of size: `length * D`.
-324 The length varies among different sequences, but the D is the same.
-325 In speaker diarization, each sequence is the sequence of speaker
-326 embeddings of one utterance.
-327 2. train_sequences is a single concatenated sequence, which is a
-328 2-dim numpy array of real numbers. See `fit_concatenated()`
-329 for more details.
-330 train_cluster_ids: Ground truth labels for train_sequences:
-331
-332 1. if train_sequences is a list, this must also be a list of the same
-333 size, each element being a 1-dim list or numpy array of strings.
-334 2. if train_sequences is a single concatenated sequence, this
-335 must also be the concatenated 1-dim list or numpy array of strings
-336 args: Training configurations. See `arguments.py` for details.
-337
-338 Raises:
-339 TypeError: If train_sequences or train_cluster_ids is of wrong type.
-340 """
-341 if isinstance ( train_sequences , np . ndarray ):
-342 # train_sequences is already the concatenated sequence
-343 if self . estimate_transition_bias :
-344 # see issue #55: https://github.com/google/uis-rnn/issues/55
-345 self . logger . print (
-346 2 ,
-347 'Warning: transition_bias cannot be correctly estimated from a '
-348 'concatenated sequence; train_sequences will be treated as a '
-349 'single sequence. This can lead to inaccurate estimation of '
-350 'transition_bias. Please, consider estimating transition_bias '
-351 'before concatenating the sequences and passing it as argument.' )
-352 train_sequences = [ train_sequences ]
-353 train_cluster_ids = [ train_cluster_ids ]
-354 elif isinstance ( train_sequences , list ):
-355 # train_sequences is a list of un-concatenated sequences
-356 # we will concatenate it later, after estimating transition_bias
-357 pass
-358 else :
-359 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
-360
-361 # estimate transition_bias
-362 if self . estimate_transition_bias :
-363 ( transition_bias ,
-364 transition_bias_denominator ) = utils . estimate_transition_bias (
-365 train_cluster_ids )
-366 # set or update transition_bias
-367 if self . transition_bias is None :
-368 self . transition_bias = transition_bias
-369 self . transition_bias_denominator = transition_bias_denominator
-370 else :
-371 self . transition_bias = (
-372 self . transition_bias * self . transition_bias_denominator +
-373 transition_bias * transition_bias_denominator ) / (
-374 self . transition_bias_denominator + transition_bias_denominator )
-375 self . transition_bias_denominator += transition_bias_denominator
-376
-377 # concatenate train_sequences
-378 ( concatenated_train_sequence ,
-379 concatenated_train_cluster_id ) = utils . concatenate_training_data (
-380 train_sequences ,
-381 train_cluster_ids ,
-382 args . enforce_cluster_id_uniqueness ,
-383 True )
-384
-385 self . fit_concatenated (
-386 concatenated_train_sequence , concatenated_train_cluster_id , args )
-387
-388 def _update_beam_state ( self , beam_state , look_ahead_seq , cluster_seq ):
-389 """Update a beam state given a look ahead sequence and known cluster
-390 assignments.
-391
-392 Args:
-393 beam_state: A BeamState object.
-394 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
-395 look_ahead: number of step to look ahead in the beam search.
-396 D: observation dimension
-397 cluster_seq: Cluster assignment sequence for look_ahead_seq.
-398
-399 Returns:
-400 new_beam_state: An updated BeamState object.
-401 """
-402
-403 loss = 0
-404 new_beam_state = BeamState ( beam_state )
-405 for sub_idx , cluster in enumerate ( cluster_seq ):
-406 if cluster > len ( new_beam_state . mean_set ): # invalid trace
-407 new_beam_state . neg_likelihood = float ( 'inf' )
-408 break
-409 elif cluster < len ( new_beam_state . mean_set ): # existing cluster
-410 last_cluster = new_beam_state . trace [ - 1 ]
-411 loss = loss_func . weighted_mse_loss (
-412 input_tensor = torch . squeeze ( new_beam_state . mean_set [ cluster ]),
-413 target_tensor = look_ahead_seq [ sub_idx , :],
-414 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
-415 if cluster == last_cluster :
-416 loss -= np . log ( 1 - self . transition_bias )
-417 else :
-418 loss -= np . log ( self . transition_bias ) + np . log (
-419 new_beam_state . block_counts [ cluster ]) - np . log (
-420 sum ( new_beam_state . block_counts ) + self . crp_alpha )
-421 # update new mean and new hidden
-422 mean , hidden = self . rnn_model (
-423 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
-424 new_beam_state . hidden_set [ cluster ])
-425 new_beam_state . mean_set [ cluster ] = ( new_beam_state . mean_set [ cluster ] * (
-426 ( np . array ( new_beam_state . trace ) == cluster ) . sum () -
-427 1 ) . astype ( float ) + mean . clone ()) / (
-428 np . array ( new_beam_state . trace ) == cluster ) . sum () . astype (
-429 float ) # use mean to predict
-430 new_beam_state . hidden_set [ cluster ] = hidden . clone ()
-431 if cluster != last_cluster :
-432 new_beam_state . block_counts [ cluster ] += 1
-433 new_beam_state . trace . append ( cluster )
-434 else : # new cluster
-435 init_input = autograd . Variable (
-436 torch . zeros ( self . observation_dim )
-437 ) . unsqueeze ( 0 ) . unsqueeze ( 0 ) . to ( self . device )
-438 mean , hidden = self . rnn_model ( init_input ,
-439 self . rnn_init_hidden )
-440 loss = loss_func . weighted_mse_loss (
-441 input_tensor = torch . squeeze ( mean ),
-442 target_tensor = look_ahead_seq [ sub_idx , :],
-443 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
-444 loss -= np . log ( self . transition_bias ) + np . log (
-445 self . crp_alpha ) - np . log (
-446 sum ( new_beam_state . block_counts ) + self . crp_alpha )
-447 # update new min and new hidden
-448 mean , hidden = self . rnn_model (
-449 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
-450 hidden )
-451 new_beam_state . append ( mean , hidden , cluster )
-452 new_beam_state . neg_likelihood += loss
-453 return new_beam_state
-454
-455 def _calculate_score ( self , beam_state , look_ahead_seq ):
-456 """Calculate negative log likelihoods for all possible state allocations
-457 of a look ahead sequence, according to the current beam state.
-458
-459 Args:
-460 beam_state: A BeamState object.
-461 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
-462 look_ahead: number of step to look ahead in the beam search.
-463 D: observation dimension
-464
-465 Returns:
-466 beam_score_set: a set of scores for each possible state allocation.
-467 """
-468
-469 look_ahead , _ = look_ahead_seq . shape
-470 beam_num_clusters = len ( beam_state . mean_set )
-471 beam_score_set = float ( 'inf' ) * np . ones (
-472 beam_num_clusters + 1 + np . arange ( look_ahead ))
-473 for cluster_seq , _ in np . ndenumerate ( beam_score_set ):
-474 updated_beam_state = self . _update_beam_state ( beam_state ,
-475 look_ahead_seq , cluster_seq )
-476 beam_score_set [ cluster_seq ] = updated_beam_state . neg_likelihood
-477 return beam_score_set
-478
-479 def predict_single ( self , test_sequence , args ):
-480 """Predict labels for a single test sequence using UISRNN model.
-481
-482 Args:
-483 test_sequence: the test observation sequence, which is 2-dim numpy array
-484 of real numbers, of size `N * D`.
-485
-486 - `N`: length of one test utterance.
-487 - `D` : observation dimension.
-488
-489 For example:
-490 ```
-491 test_sequence =
-492 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
-493 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
-494 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
-495 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
-496 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
-497 ```
-498 Here `N=5`, `D=4`.
-499 args: Inference configurations. See `arguments.py` for details.
-500
-501 Returns:
-502 predicted_cluster_id: predicted speaker id sequence, which is
-503 an array of integers, of size `N`.
-504 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
-505
-506 Raises:
-507 TypeError: If test_sequence is of wrong type.
-508 ValueError: If test_sequence has wrong dimension.
-509 """
-510 # check type
-511 if ( not isinstance ( test_sequence , np . ndarray ) or
-512 test_sequence . dtype != float ):
-513 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
-514 # check dimension
-515 if test_sequence . ndim != 2 :
-516 raise ValueError ( 'test_sequence must be 2-dim array.' )
-517 # check size
-518 test_sequence_length , observation_dim = test_sequence . shape
-519 if observation_dim != self . observation_dim :
-520 raise ValueError ( 'test_sequence does not match the dimension specified '
-521 'by args.observation_dim.' )
-522
-523 self . rnn_model . eval ()
-524 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
-525 test_sequence = autograd . Variable (
-526 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
-527 # bookkeeping for beam search
-528 beam_set = [ BeamState ()]
-529 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
-530 args . look_ahead ):
-531 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
-532 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
-533 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
-534 score_set = float ( 'inf' ) * np . ones (
-535 np . append (
-536 args . beam_size , max_clusters + 1 + np . arange (
-537 look_ahead_seq_length )))
-538 for beam_rank , beam_state in enumerate ( beam_set ):
-539 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
-540 score_set [ beam_rank , :] = np . pad (
-541 beam_score_set ,
-542 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
-543 ( look_ahead_seq_length , 1 )), 'constant' ,
-544 constant_values = float ( 'inf' ))
-545 # find top scores
-546 score_ranked = np . sort ( score_set , axis = None )
-547 score_ranked [ score_ranked == float ( 'inf' )] = 0
-548 score_ranked = np . trim_zeros ( score_ranked )
-549 idx_ranked = np . argsort ( score_set , axis = None )
-550 updated_beam_set = []
-551 for new_beam_rank in range (
-552 np . min (( len ( score_ranked ), args . beam_size ))):
-553 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
-554 score_set . shape )
-555 prev_beam_rank = total_idx [ 0 ] . item ()
-556 cluster_seq = total_idx [ 1 :]
-557 updated_beam_state = self . _update_beam_state (
-558 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
-559 updated_beam_set . append ( updated_beam_state )
-560 beam_set = updated_beam_set
-561 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
-562 return predicted_cluster_id
-563
-564 def predict ( self , test_sequences , args ):
-565 """Predict labels for a single or many test sequences using UISRNN model.
-566
-567 Args:
-568 test_sequences: Either a list of test sequences, or a single test
-569 sequence. Each test sequence is a 2-dim numpy array
-570 of real numbers. See `predict_single()` for details.
-571 args: Inference configurations. See `arguments.py` for details.
-572
-573 Returns:
-574 predicted_cluster_ids: Predicted labels for test_sequences.
-575
-576 1. if test_sequences is a list, predicted_cluster_ids will be a list
-577 of the same size, where each element being a 1-dim list of strings.
-578 2. if test_sequences is a single sequence, predicted_cluster_ids will
-579 be a 1-dim list of strings
-580
-581 Raises:
-582 TypeError: If test_sequences is of wrong type.
-583 """
-584 # check type
-585 if isinstance ( test_sequences , np . ndarray ):
-586 return self . predict_single ( test_sequences , args )
-587 if isinstance ( test_sequences , list ):
-588 return [ self . predict_single ( test_sequence , args )
-589 for test_sequence in test_sequences ]
-590 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
+ 81 class UISRNN :
+ 82 """Unbounded Interleaved-State Recurrent Neural Networks."""
+ 83
+ 84 def __init__ ( self , args ):
+ 85 """Construct the UISRNN object.
+ 86
+ 87 Args:
+ 88 args: Model configurations. See `arguments.py` for details.
+ 89 """
+ 90 self . observation_dim = args . observation_dim
+ 91 self . device = torch . device (
+ 92 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
+ 93 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
+ 94 args . rnn_depth , self . observation_dim ,
+ 95 args . rnn_dropout ) . to ( self . device )
+ 96 self . rnn_init_hidden = nn . Parameter (
+ 97 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
+ 98 # booleans indicating which variables are trainable
+ 99 self . estimate_sigma2 = ( args . sigma2 is None )
+100 self . estimate_transition_bias = ( args . transition_bias is None )
+101 # initial values of variables
+102 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
+103 self . sigma2 = nn . Parameter (
+104 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
+105 self . transition_bias = args . transition_bias
+106 self . transition_bias_denominator = 0.0
+107 self . crp_alpha = args . crp_alpha
+108 self . logger = colortimelog . Logger ( args . verbosity )
+109
+110 def _get_optimizer ( self , optimizer , learning_rate ):
+111 """Get optimizer for UISRNN.
+112
+113 Args:
+114 optimizer: string - name of the optimizer.
+115 learning_rate: - learning rate for the entire model.
+116 We do not customize learning rate for separate parts.
+117
+118 Returns:
+119 a pytorch "optim" object
+120 """
+121 params = [
+122 {
+123 'params' : self . rnn_model . parameters ()
+124 }, # rnn parameters
+125 {
+126 'params' : self . rnn_init_hidden
+127 } # rnn initial hidden state
+128 ]
+129 if self . estimate_sigma2 : # train sigma2
+130 params . append ({
+131 'params' : self . sigma2
+132 }) # variance parameters
+133 assert optimizer == 'adam' , 'Only adam optimizer is supported.'
+134 return optim . Adam ( params , lr = learning_rate )
+135
+136 def save ( self , filepath ):
+137 """Save the model to a file.
+138
+139 Args:
+140 filepath: the path of the file.
+141 """
+142 torch . save ({
+143 'rnn_state_dict' : self . rnn_model . state_dict (),
+144 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
+145 'transition_bias' : self . transition_bias ,
+146 'transition_bias_denominator' : self . transition_bias_denominator ,
+147 'crp_alpha' : self . crp_alpha ,
+148 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
+149
+150 def load ( self , filepath ):
+151 """Load the model from a file.
+152
+153 Args:
+154 filepath: the path of the file.
+155 """
+156 var_dict = torch . load ( filepath )
+157 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
+158 self . rnn_init_hidden = nn . Parameter (
+159 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
+160 self . transition_bias = float ( var_dict [ 'transition_bias' ])
+161 self . transition_bias_denominator = float (
+162 var_dict [ 'transition_bias_denominator' ])
+163 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
+164 self . sigma2 = nn . Parameter (
+165 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
+166
+167 self . logger . print (
+168 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
+169 'rnn_init_hidden= {} ' . format (
+170 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
+171 var_dict [ 'rnn_init_hidden' ]))
+172
+173 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
+174 """Fit UISRNN model to concatenated sequence and cluster_id.
+175
+176 Args:
+177 train_sequence: the training observation sequence, which is a
+178 2-dim numpy array of real numbers, of size `N * D`.
+179
+180 - `N`: summation of lengths of all utterances.
+181 - `D`: observation dimension.
+182
+183 For example,
+184 ```
+185 train_sequence =
+186 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
+187 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
+188 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
+189 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
+190 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+191 ```
+192 Here `N=5`, `D=4`.
+193
+194 We concatenate all training utterances into this single sequence.
+195 train_cluster_id: the speaker id sequence, which is 1-dim list or
+196 numpy array of strings, of size `N`.
+197 For example,
+198 ```
+199 train_cluster_id =
+200 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
+201 ```
+202 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+203
+204 Note that the order of entries within an utterance are preserved,
+205 and all utterances are simply concatenated together.
+206 args: Training configurations. See `arguments.py` for details.
+207
+208 Raises:
+209 TypeError: If train_sequence or train_cluster_id is of wrong type.
+210 ValueError: If train_sequence or train_cluster_id has wrong dimension.
+211 """
+212 # check type
+213 if ( not isinstance ( train_sequence , np . ndarray ) or
+214 train_sequence . dtype != float ):
+215 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
+216 if isinstance ( train_cluster_id , list ):
+217 train_cluster_id = np . array ( train_cluster_id )
+218 if ( not isinstance ( train_cluster_id , np . ndarray ) or
+219 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
+220 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
+221 # check dimension
+222 if train_sequence . ndim != 2 :
+223 raise ValueError ( 'train_sequence must be 2-dim array.' )
+224 if train_cluster_id . ndim != 1 :
+225 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
+226 # check length and size
+227 train_total_length , observation_dim = train_sequence . shape
+228 if observation_dim != self . observation_dim :
+229 raise ValueError ( 'train_sequence does not match the dimension specified '
+230 'by args.observation_dim.' )
+231 if train_total_length != len ( train_cluster_id ):
+232 raise ValueError ( 'train_sequence length is not equal to '
+233 'train_cluster_id length.' )
+234
+235 self . rnn_model . train ()
+236 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
+237 learning_rate = args . learning_rate )
+238
+239 sub_sequences , seq_lengths = utils . resize_sequence (
+240 sequence = train_sequence ,
+241 cluster_id = train_cluster_id ,
+242 num_permutations = args . num_permutations )
+243
+244 # For batch learning, pack the entire dataset.
+245 if args . batch_size is None :
+246 packed_train_sequence , rnn_truth = utils . pack_sequence (
+247 sub_sequences ,
+248 seq_lengths ,
+249 args . batch_size ,
+250 self . observation_dim ,
+251 self . device )
+252 train_loss = []
+253 for num_iter in range ( args . train_iteration ):
+254 optimizer . zero_grad ()
+255 # For online learning, pack a subset in each iteration.
+256 if args . batch_size is not None :
+257 packed_train_sequence , rnn_truth = utils . pack_sequence (
+258 sub_sequences ,
+259 seq_lengths ,
+260 args . batch_size ,
+261 self . observation_dim ,
+262 self . device )
+263 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
+264 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
+265 # use mean to predict
+266 mean = torch . cumsum ( mean , dim = 0 )
+267 mean_size = mean . size ()
+268 mean = torch . mm (
+269 torch . diag (
+270 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
+271 mean . view ( mean_size [ 0 ], - 1 ))
+272 mean = mean . view ( mean_size )
+273
+274 # Likelihood part.
+275 loss1 = loss_func . weighted_mse_loss (
+276 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
+277 target_tensor = rnn_truth ,
+278 weight = 1 / ( 2 * self . sigma2 ))
+279
+280 # Sigma2 prior part.
+281 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
+282 ** 2 ) . view ( - 1 , observation_dim )
+283 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
+284 loss2 = loss_func . sigma2_prior_loss (
+285 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
+286
+287 # Regularization part.
+288 loss3 = loss_func . regularization_loss (
+289 self . rnn_model . parameters (), args . regularization_weight )
+290
+291 loss = loss1 + loss2 + loss3
+292 loss . backward ()
+293 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
+294 optimizer . step ()
+295 # avoid numerical issues
+296 self . sigma2 . data . clamp_ ( min = 1e-6 )
+297
+298 if ( np . remainder ( num_iter , 10 ) == 0 or
+299 num_iter == args . train_iteration - 1 ):
+300 self . logger . print (
+301 2 ,
+302 'Iter: {:d} \t '
+303 'Training Loss: {:.4f} \n '
+304 ' Negative Log Likelihood: {:.4f} \t '
+305 'Sigma2 Prior: {:.4f} \t '
+306 'Regularization: {:.4f} ' . format (
+307 num_iter ,
+308 float ( loss . data ),
+309 float ( loss1 . data ),
+310 float ( loss2 . data ),
+311 float ( loss3 . data )))
+312 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
+313 self . logger . print (
+314 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
+315
+316 def fit ( self , train_sequences , train_cluster_ids , args ):
+317 """Fit UISRNN model.
+318
+319 Args:
+320 train_sequences: Either a list of training sequences, or a single
+321 concatenated training sequence:
+322
+323 1. train_sequences is list, and each element is a 2-dim numpy array
+324 of real numbers, of size: `length * D`.
+325 The length varies among different sequences, but the D is the same.
+326 In speaker diarization, each sequence is the sequence of speaker
+327 embeddings of one utterance.
+328 2. train_sequences is a single concatenated sequence, which is a
+329 2-dim numpy array of real numbers. See `fit_concatenated()`
+330 for more details.
+331 train_cluster_ids: Ground truth labels for train_sequences:
+332
+333 1. if train_sequences is a list, this must also be a list of the same
+334 size, each element being a 1-dim list or numpy array of strings.
+335 2. if train_sequences is a single concatenated sequence, this
+336 must also be the concatenated 1-dim list or numpy array of strings
+337 args: Training configurations. See `arguments.py` for details.
+338
+339 Raises:
+340 TypeError: If train_sequences or train_cluster_ids is of wrong type.
+341 """
+342 if isinstance ( train_sequences , np . ndarray ):
+343 # train_sequences is already the concatenated sequence
+344 if self . estimate_transition_bias :
+345 # see issue #55: https://github.com/google/uis-rnn/issues/55
+346 self . logger . print (
+347 2 ,
+348 'Warning: transition_bias cannot be correctly estimated from a '
+349 'concatenated sequence; train_sequences will be treated as a '
+350 'single sequence. This can lead to inaccurate estimation of '
+351 'transition_bias. Please, consider estimating transition_bias '
+352 'before concatenating the sequences and passing it as argument.' )
+353 train_sequences = [ train_sequences ]
+354 train_cluster_ids = [ train_cluster_ids ]
+355 elif isinstance ( train_sequences , list ):
+356 # train_sequences is a list of un-concatenated sequences
+357 # we will concatenate it later, after estimating transition_bias
+358 pass
+359 else :
+360 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
+361
+362 # estimate transition_bias
+363 if self . estimate_transition_bias :
+364 ( transition_bias ,
+365 transition_bias_denominator ) = utils . estimate_transition_bias (
+366 train_cluster_ids )
+367 # set or update transition_bias
+368 if self . transition_bias is None :
+369 self . transition_bias = transition_bias
+370 self . transition_bias_denominator = transition_bias_denominator
+371 else :
+372 self . transition_bias = (
+373 self . transition_bias * self . transition_bias_denominator +
+374 transition_bias * transition_bias_denominator ) / (
+375 self . transition_bias_denominator + transition_bias_denominator )
+376 self . transition_bias_denominator += transition_bias_denominator
+377
+378 # concatenate train_sequences
+379 ( concatenated_train_sequence ,
+380 concatenated_train_cluster_id ) = utils . concatenate_training_data (
+381 train_sequences ,
+382 train_cluster_ids ,
+383 args . enforce_cluster_id_uniqueness ,
+384 True )
+385
+386 self . fit_concatenated (
+387 concatenated_train_sequence , concatenated_train_cluster_id , args )
+388
+389 def _update_beam_state ( self , beam_state , look_ahead_seq , cluster_seq ):
+390 """Update a beam state given a look ahead sequence and known cluster
+391 assignments.
+392
+393 Args:
+394 beam_state: A BeamState object.
+395 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
+396 look_ahead: number of step to look ahead in the beam search.
+397 D: observation dimension
+398 cluster_seq: Cluster assignment sequence for look_ahead_seq.
+399
+400 Returns:
+401 new_beam_state: An updated BeamState object.
+402 """
+403
+404 loss = 0
+405 new_beam_state = BeamState ( beam_state )
+406 for sub_idx , cluster in enumerate ( cluster_seq ):
+407 if cluster > len ( new_beam_state . mean_set ): # invalid trace
+408 new_beam_state . neg_likelihood = float ( 'inf' )
+409 break
+410 elif cluster < len ( new_beam_state . mean_set ): # existing cluster
+411 last_cluster = new_beam_state . trace [ - 1 ]
+412 loss = loss_func . weighted_mse_loss (
+413 input_tensor = torch . squeeze ( new_beam_state . mean_set [ cluster ]),
+414 target_tensor = look_ahead_seq [ sub_idx , :],
+415 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
+416 if cluster == last_cluster :
+417 loss -= np . log ( 1 - self . transition_bias )
+418 else :
+419 loss -= np . log ( self . transition_bias ) + np . log (
+420 new_beam_state . block_counts [ cluster ]) - np . log (
+421 sum ( new_beam_state . block_counts ) + self . crp_alpha )
+422 # update new mean and new hidden
+423 mean , hidden = self . rnn_model (
+424 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
+425 new_beam_state . hidden_set [ cluster ])
+426 new_beam_state . mean_set [ cluster ] = ( new_beam_state . mean_set [ cluster ] * (
+427 ( np . array ( new_beam_state . trace ) == cluster ) . sum () -
+428 1 ) . astype ( float ) + mean . clone ()) / (
+429 np . array ( new_beam_state . trace ) == cluster ) . sum () . astype (
+430 float ) # use mean to predict
+431 new_beam_state . hidden_set [ cluster ] = hidden . clone ()
+432 if cluster != last_cluster :
+433 new_beam_state . block_counts [ cluster ] += 1
+434 new_beam_state . trace . append ( cluster )
+435 else : # new cluster
+436 init_input = autograd . Variable (
+437 torch . zeros ( self . observation_dim )
+438 ) . unsqueeze ( 0 ) . unsqueeze ( 0 ) . to ( self . device )
+439 mean , hidden = self . rnn_model ( init_input ,
+440 self . rnn_init_hidden )
+441 loss = loss_func . weighted_mse_loss (
+442 input_tensor = torch . squeeze ( mean ),
+443 target_tensor = look_ahead_seq [ sub_idx , :],
+444 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
+445 loss -= np . log ( self . transition_bias ) + np . log (
+446 self . crp_alpha ) - np . log (
+447 sum ( new_beam_state . block_counts ) + self . crp_alpha )
+448 # update new min and new hidden
+449 mean , hidden = self . rnn_model (
+450 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
+451 hidden )
+452 new_beam_state . append ( mean , hidden , cluster )
+453 new_beam_state . neg_likelihood += loss
+454 return new_beam_state
+455
+456 def _calculate_score ( self , beam_state , look_ahead_seq ):
+457 """Calculate negative log likelihoods for all possible state allocations
+458 of a look ahead sequence, according to the current beam state.
+459
+460 Args:
+461 beam_state: A BeamState object.
+462 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
+463 look_ahead: number of step to look ahead in the beam search.
+464 D: observation dimension
+465
+466 Returns:
+467 beam_score_set: a set of scores for each possible state allocation.
+468 """
+469
+470 look_ahead , _ = look_ahead_seq . shape
+471 beam_num_clusters = len ( beam_state . mean_set )
+472 beam_score_set = float ( 'inf' ) * np . ones (
+473 beam_num_clusters + 1 + np . arange ( look_ahead ))
+474 for cluster_seq , _ in np . ndenumerate ( beam_score_set ):
+475 updated_beam_state = self . _update_beam_state ( beam_state ,
+476 look_ahead_seq , cluster_seq )
+477 beam_score_set [ cluster_seq ] = updated_beam_state . neg_likelihood
+478 return beam_score_set
+479
+480 def predict_single ( self , test_sequence , args ):
+481 """Predict labels for a single test sequence using UISRNN model.
+482
+483 Args:
+484 test_sequence: the test observation sequence, which is 2-dim numpy array
+485 of real numbers, of size `N * D`.
+486
+487 - `N`: length of one test utterance.
+488 - `D` : observation dimension.
+489
+490 For example:
+491 ```
+492 test_sequence =
+493 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
+494 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
+495 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
+496 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
+497 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+498 ```
+499 Here `N=5`, `D=4`.
+500 args: Inference configurations. See `arguments.py` for details.
+501
+502 Returns:
+503 predicted_cluster_id: predicted speaker id sequence, which is
+504 an array of integers, of size `N`.
+505 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
+506
+507 Raises:
+508 TypeError: If test_sequence is of wrong type.
+509 ValueError: If test_sequence has wrong dimension.
+510 """
+511 # check type
+512 if ( not isinstance ( test_sequence , np . ndarray ) or
+513 test_sequence . dtype != float ):
+514 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
+515 # check dimension
+516 if test_sequence . ndim != 2 :
+517 raise ValueError ( 'test_sequence must be 2-dim array.' )
+518 # check size
+519 test_sequence_length , observation_dim = test_sequence . shape
+520 if observation_dim != self . observation_dim :
+521 raise ValueError ( 'test_sequence does not match the dimension specified '
+522 'by args.observation_dim.' )
+523
+524 self . rnn_model . eval ()
+525 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
+526 test_sequence = autograd . Variable (
+527 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
+528 # bookkeeping for beam search
+529 beam_set = [ BeamState ()]
+530 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
+531 args . look_ahead ):
+532 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
+533 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
+534 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
+535 score_set = float ( 'inf' ) * np . ones (
+536 np . append (
+537 args . beam_size , max_clusters + 1 + np . arange (
+538 look_ahead_seq_length )))
+539 for beam_rank , beam_state in enumerate ( beam_set ):
+540 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
+541 score_set [ beam_rank , :] = np . pad (
+542 beam_score_set ,
+543 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
+544 ( look_ahead_seq_length , 1 )), 'constant' ,
+545 constant_values = float ( 'inf' ))
+546 # find top scores
+547 score_ranked = np . sort ( score_set , axis = None )
+548 score_ranked [ score_ranked == float ( 'inf' )] = 0
+549 score_ranked = np . trim_zeros ( score_ranked )
+550 idx_ranked = np . argsort ( score_set , axis = None )
+551 updated_beam_set = []
+552 for new_beam_rank in range (
+553 np . min (( len ( score_ranked ), args . beam_size ))):
+554 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
+555 score_set . shape )
+556 prev_beam_rank = total_idx [ 0 ] . item ()
+557 cluster_seq = total_idx [ 1 :]
+558 updated_beam_state = self . _update_beam_state (
+559 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
+560 updated_beam_set . append ( updated_beam_state )
+561 beam_set = updated_beam_set
+562 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
+563 return predicted_cluster_id
+564
+565 def predict ( self , test_sequences , args ):
+566 """Predict labels for a single or many test sequences using UISRNN model.
+567
+568 Args:
+569 test_sequences: Either a list of test sequences, or a single test
+570 sequence. Each test sequence is a 2-dim numpy array
+571 of real numbers. See `predict_single()` for details.
+572 args: Inference configurations. See `arguments.py` for details.
+573
+574 Returns:
+575 predicted_cluster_ids: Predicted labels for test_sequences.
+576
+577 1. if test_sequences is a list, predicted_cluster_ids will be a list
+578 of the same size, where each element being a 1-dim list of strings.
+579 2. if test_sequences is a single sequence, predicted_cluster_ids will
+580 be a 1-dim list of strings
+581
+582 Raises:
+583 TypeError: If test_sequences is of wrong type.
+584 """
+585 # check type
+586 if isinstance ( test_sequences , np . ndarray ):
+587 return self . predict_single ( test_sequences , args )
+588 if isinstance ( test_sequences , list ):
+589 return [ self . predict_single ( test_sequence , args )
+590 for test_sequence in test_sequences ]
+591 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
@@ -1025,31 +1023,31 @@
- 83 def __init__ ( self , args ):
- 84 """Construct the UISRNN object.
- 85
- 86 Args:
- 87 args: Model configurations. See `arguments.py` for details.
- 88 """
- 89 self . observation_dim = args . observation_dim
- 90 self . device = torch . device (
- 91 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
- 92 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
- 93 args . rnn_depth , self . observation_dim ,
- 94 args . rnn_dropout ) . to ( self . device )
- 95 self . rnn_init_hidden = nn . Parameter (
- 96 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
- 97 # booleans indicating which variables are trainable
- 98 self . estimate_sigma2 = ( args . sigma2 is None )
- 99 self . estimate_transition_bias = ( args . transition_bias is None )
-100 # initial values of variables
-101 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
-102 self . sigma2 = nn . Parameter (
-103 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
-104 self . transition_bias = args . transition_bias
-105 self . transition_bias_denominator = 0.0
-106 self . crp_alpha = args . crp_alpha
-107 self . logger = utils . Logger ( args . verbosity )
+ 84 def __init__ ( self , args ):
+ 85 """Construct the UISRNN object.
+ 86
+ 87 Args:
+ 88 args: Model configurations. See `arguments.py` for details.
+ 89 """
+ 90 self . observation_dim = args . observation_dim
+ 91 self . device = torch . device (
+ 92 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
+ 93 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
+ 94 args . rnn_depth , self . observation_dim ,
+ 95 args . rnn_dropout ) . to ( self . device )
+ 96 self . rnn_init_hidden = nn . Parameter (
+ 97 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
+ 98 # booleans indicating which variables are trainable
+ 99 self . estimate_sigma2 = ( args . sigma2 is None )
+100 self . estimate_transition_bias = ( args . transition_bias is None )
+101 # initial values of variables
+102 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
+103 self . sigma2 = nn . Parameter (
+104 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
+105 self . transition_bias = args . transition_bias
+106 self . transition_bias_denominator = 0.0
+107 self . crp_alpha = args . crp_alpha
+108 self . logger = colortimelog . Logger ( args . verbosity )
@@ -1193,19 +1191,19 @@
- 135 def save ( self , filepath ):
-136 """Save the model to a file.
-137
-138 Args:
-139 filepath: the path of the file.
-140 """
-141 torch . save ({
-142 'rnn_state_dict' : self . rnn_model . state_dict (),
-143 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
-144 'transition_bias' : self . transition_bias ,
-145 'transition_bias_denominator' : self . transition_bias_denominator ,
-146 'crp_alpha' : self . crp_alpha ,
-147 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
+ 136 def save ( self , filepath ):
+137 """Save the model to a file.
+138
+139 Args:
+140 filepath: the path of the file.
+141 """
+142 torch . save ({
+143 'rnn_state_dict' : self . rnn_model . state_dict (),
+144 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
+145 'transition_bias' : self . transition_bias ,
+146 'transition_bias_denominator' : self . transition_bias_denominator ,
+147 'crp_alpha' : self . crp_alpha ,
+148 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
@@ -1228,28 +1226,28 @@
- 149 def load ( self , filepath ):
-150 """Load the model from a file.
-151
-152 Args:
-153 filepath: the path of the file.
-154 """
-155 var_dict = torch . load ( filepath )
-156 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
-157 self . rnn_init_hidden = nn . Parameter (
-158 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
-159 self . transition_bias = float ( var_dict [ 'transition_bias' ])
-160 self . transition_bias_denominator = float (
-161 var_dict [ 'transition_bias_denominator' ])
-162 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
-163 self . sigma2 = nn . Parameter (
-164 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
-165
-166 self . logger . print (
-167 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
-168 'rnn_init_hidden= {} ' . format (
-169 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
-170 var_dict [ 'rnn_init_hidden' ]))
+ 150 def load ( self , filepath ):
+151 """Load the model from a file.
+152
+153 Args:
+154 filepath: the path of the file.
+155 """
+156 var_dict = torch . load ( filepath )
+157 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
+158 self . rnn_init_hidden = nn . Parameter (
+159 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
+160 self . transition_bias = float ( var_dict [ 'transition_bias' ])
+161 self . transition_bias_denominator = float (
+162 var_dict [ 'transition_bias_denominator' ])
+163 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
+164 self . sigma2 = nn . Parameter (
+165 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
+166
+167 self . logger . print (
+168 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
+169 'rnn_init_hidden= {} ' . format (
+170 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
+171 var_dict [ 'rnn_init_hidden' ]))
@@ -1272,148 +1270,148 @@
- 172 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
-173 """Fit UISRNN model to concatenated sequence and cluster_id.
-174
-175 Args:
-176 train_sequence: the training observation sequence, which is a
-177 2-dim numpy array of real numbers, of size `N * D`.
-178
-179 - `N`: summation of lengths of all utterances.
-180 - `D`: observation dimension.
-181
-182 For example,
-183 ```
-184 train_sequence =
-185 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
-186 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
-187 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
-188 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
-189 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
-190 ```
-191 Here `N=5`, `D=4`.
-192
-193 We concatenate all training utterances into this single sequence.
-194 train_cluster_id: the speaker id sequence, which is 1-dim list or
-195 numpy array of strings, of size `N`.
-196 For example,
-197 ```
-198 train_cluster_id =
-199 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-200 ```
-201 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-202
-203 Note that the order of entries within an utterance are preserved,
-204 and all utterances are simply concatenated together.
-205 args: Training configurations. See `arguments.py` for details.
-206
-207 Raises:
-208 TypeError: If train_sequence or train_cluster_id is of wrong type.
-209 ValueError: If train_sequence or train_cluster_id has wrong dimension.
-210 """
-211 # check type
-212 if ( not isinstance ( train_sequence , np . ndarray ) or
-213 train_sequence . dtype != float ):
-214 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
-215 if isinstance ( train_cluster_id , list ):
-216 train_cluster_id = np . array ( train_cluster_id )
-217 if ( not isinstance ( train_cluster_id , np . ndarray ) or
-218 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
-219 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
-220 # check dimension
-221 if train_sequence . ndim != 2 :
-222 raise ValueError ( 'train_sequence must be 2-dim array.' )
-223 if train_cluster_id . ndim != 1 :
-224 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
-225 # check length and size
-226 train_total_length , observation_dim = train_sequence . shape
-227 if observation_dim != self . observation_dim :
-228 raise ValueError ( 'train_sequence does not match the dimension specified '
-229 'by args.observation_dim.' )
-230 if train_total_length != len ( train_cluster_id ):
-231 raise ValueError ( 'train_sequence length is not equal to '
-232 'train_cluster_id length.' )
-233
-234 self . rnn_model . train ()
-235 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
-236 learning_rate = args . learning_rate )
-237
-238 sub_sequences , seq_lengths = utils . resize_sequence (
-239 sequence = train_sequence ,
-240 cluster_id = train_cluster_id ,
-241 num_permutations = args . num_permutations )
-242
-243 # For batch learning, pack the entire dataset.
-244 if args . batch_size is None :
-245 packed_train_sequence , rnn_truth = utils . pack_sequence (
-246 sub_sequences ,
-247 seq_lengths ,
-248 args . batch_size ,
-249 self . observation_dim ,
-250 self . device )
-251 train_loss = []
-252 for num_iter in range ( args . train_iteration ):
-253 optimizer . zero_grad ()
-254 # For online learning, pack a subset in each iteration.
-255 if args . batch_size is not None :
-256 packed_train_sequence , rnn_truth = utils . pack_sequence (
-257 sub_sequences ,
-258 seq_lengths ,
-259 args . batch_size ,
-260 self . observation_dim ,
-261 self . device )
-262 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
-263 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
-264 # use mean to predict
-265 mean = torch . cumsum ( mean , dim = 0 )
-266 mean_size = mean . size ()
-267 mean = torch . mm (
-268 torch . diag (
-269 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
-270 mean . view ( mean_size [ 0 ], - 1 ))
-271 mean = mean . view ( mean_size )
-272
-273 # Likelihood part.
-274 loss1 = loss_func . weighted_mse_loss (
-275 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
-276 target_tensor = rnn_truth ,
-277 weight = 1 / ( 2 * self . sigma2 ))
-278
-279 # Sigma2 prior part.
-280 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
-281 ** 2 ) . view ( - 1 , observation_dim )
-282 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
-283 loss2 = loss_func . sigma2_prior_loss (
-284 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
-285
-286 # Regularization part.
-287 loss3 = loss_func . regularization_loss (
-288 self . rnn_model . parameters (), args . regularization_weight )
-289
-290 loss = loss1 + loss2 + loss3
-291 loss . backward ()
-292 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
-293 optimizer . step ()
-294 # avoid numerical issues
-295 self . sigma2 . data . clamp_ ( min = 1e-6 )
-296
-297 if ( np . remainder ( num_iter , 10 ) == 0 or
-298 num_iter == args . train_iteration - 1 ):
-299 self . logger . print (
-300 2 ,
-301 'Iter: {:d} \t '
-302 'Training Loss: {:.4f} \n '
-303 ' Negative Log Likelihood: {:.4f} \t '
-304 'Sigma2 Prior: {:.4f} \t '
-305 'Regularization: {:.4f} ' . format (
-306 num_iter ,
-307 float ( loss . data ),
-308 float ( loss1 . data ),
-309 float ( loss2 . data ),
-310 float ( loss3 . data )))
-311 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
-312 self . logger . print (
-313 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
+ 173 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
+174 """Fit UISRNN model to concatenated sequence and cluster_id.
+175
+176 Args:
+177 train_sequence: the training observation sequence, which is a
+178 2-dim numpy array of real numbers, of size `N * D`.
+179
+180 - `N`: summation of lengths of all utterances.
+181 - `D`: observation dimension.
+182
+183 For example,
+184 ```
+185 train_sequence =
+186 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
+187 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
+188 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
+189 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
+190 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+191 ```
+192 Here `N=5`, `D=4`.
+193
+194 We concatenate all training utterances into this single sequence.
+195 train_cluster_id: the speaker id sequence, which is 1-dim list or
+196 numpy array of strings, of size `N`.
+197 For example,
+198 ```
+199 train_cluster_id =
+200 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
+201 ```
+202 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+203
+204 Note that the order of entries within an utterance are preserved,
+205 and all utterances are simply concatenated together.
+206 args: Training configurations. See `arguments.py` for details.
+207
+208 Raises:
+209 TypeError: If train_sequence or train_cluster_id is of wrong type.
+210 ValueError: If train_sequence or train_cluster_id has wrong dimension.
+211 """
+212 # check type
+213 if ( not isinstance ( train_sequence , np . ndarray ) or
+214 train_sequence . dtype != float ):
+215 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
+216 if isinstance ( train_cluster_id , list ):
+217 train_cluster_id = np . array ( train_cluster_id )
+218 if ( not isinstance ( train_cluster_id , np . ndarray ) or
+219 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
+220 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
+221 # check dimension
+222 if train_sequence . ndim != 2 :
+223 raise ValueError ( 'train_sequence must be 2-dim array.' )
+224 if train_cluster_id . ndim != 1 :
+225 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
+226 # check length and size
+227 train_total_length , observation_dim = train_sequence . shape
+228 if observation_dim != self . observation_dim :
+229 raise ValueError ( 'train_sequence does not match the dimension specified '
+230 'by args.observation_dim.' )
+231 if train_total_length != len ( train_cluster_id ):
+232 raise ValueError ( 'train_sequence length is not equal to '
+233 'train_cluster_id length.' )
+234
+235 self . rnn_model . train ()
+236 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
+237 learning_rate = args . learning_rate )
+238
+239 sub_sequences , seq_lengths = utils . resize_sequence (
+240 sequence = train_sequence ,
+241 cluster_id = train_cluster_id ,
+242 num_permutations = args . num_permutations )
+243
+244 # For batch learning, pack the entire dataset.
+245 if args . batch_size is None :
+246 packed_train_sequence , rnn_truth = utils . pack_sequence (
+247 sub_sequences ,
+248 seq_lengths ,
+249 args . batch_size ,
+250 self . observation_dim ,
+251 self . device )
+252 train_loss = []
+253 for num_iter in range ( args . train_iteration ):
+254 optimizer . zero_grad ()
+255 # For online learning, pack a subset in each iteration.
+256 if args . batch_size is not None :
+257 packed_train_sequence , rnn_truth = utils . pack_sequence (
+258 sub_sequences ,
+259 seq_lengths ,
+260 args . batch_size ,
+261 self . observation_dim ,
+262 self . device )
+263 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
+264 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
+265 # use mean to predict
+266 mean = torch . cumsum ( mean , dim = 0 )
+267 mean_size = mean . size ()
+268 mean = torch . mm (
+269 torch . diag (
+270 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
+271 mean . view ( mean_size [ 0 ], - 1 ))
+272 mean = mean . view ( mean_size )
+273
+274 # Likelihood part.
+275 loss1 = loss_func . weighted_mse_loss (
+276 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
+277 target_tensor = rnn_truth ,
+278 weight = 1 / ( 2 * self . sigma2 ))
+279
+280 # Sigma2 prior part.
+281 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
+282 ** 2 ) . view ( - 1 , observation_dim )
+283 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
+284 loss2 = loss_func . sigma2_prior_loss (
+285 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
+286
+287 # Regularization part.
+288 loss3 = loss_func . regularization_loss (
+289 self . rnn_model . parameters (), args . regularization_weight )
+290
+291 loss = loss1 + loss2 + loss3
+292 loss . backward ()
+293 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
+294 optimizer . step ()
+295 # avoid numerical issues
+296 self . sigma2 . data . clamp_ ( min = 1e-6 )
+297
+298 if ( np . remainder ( num_iter , 10 ) == 0 or
+299 num_iter == args . train_iteration - 1 ):
+300 self . logger . print (
+301 2 ,
+302 'Iter: {:d} \t '
+303 'Training Loss: {:.4f} \n '
+304 ' Negative Log Likelihood: {:.4f} \t '
+305 'Sigma2 Prior: {:.4f} \t '
+306 'Regularization: {:.4f} ' . format (
+307 num_iter ,
+308 float ( loss . data ),
+309 float ( loss1 . data ),
+310 float ( loss2 . data ),
+311 float ( loss3 . data )))
+312 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
+313 self . logger . print (
+314 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
@@ -1434,22 +1432,25 @@
[0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
[-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
[3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
- [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+ [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+
+
Here `N=5`, `D=4`.
+
+We concatenate all training utterances into this single sequence.
- Here `N=5`, `D=4`.
-
We concatenate all training utterances into this single sequence.
-
- train_cluster_id: the speaker id sequence, which is 1-dim list or
+
train_cluster_id: the speaker id sequence, which is 1-dim list or
numpy array of strings, of size N
.
- For example,
-
train_cluster_id =
+ For example,
+
+
train_cluster_id =
['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-
- 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+
+
+
'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-Note that the order of entries within an utterance are preserved,
+Note that the order of entries within an utterance are preserved,
and all utterances are simply concatenated together.
@@ -1473,78 +1474,78 @@
- 315 def fit ( self , train_sequences , train_cluster_ids , args ):
-316 """Fit UISRNN model.
-317
-318 Args:
-319 train_sequences: Either a list of training sequences, or a single
-320 concatenated training sequence:
-321
-322 1. train_sequences is list, and each element is a 2-dim numpy array
-323 of real numbers, of size: `length * D`.
-324 The length varies among different sequences, but the D is the same.
-325 In speaker diarization, each sequence is the sequence of speaker
-326 embeddings of one utterance.
-327 2. train_sequences is a single concatenated sequence, which is a
-328 2-dim numpy array of real numbers. See `fit_concatenated()`
-329 for more details.
-330 train_cluster_ids: Ground truth labels for train_sequences:
-331
-332 1. if train_sequences is a list, this must also be a list of the same
-333 size, each element being a 1-dim list or numpy array of strings.
-334 2. if train_sequences is a single concatenated sequence, this
-335 must also be the concatenated 1-dim list or numpy array of strings
-336 args: Training configurations. See `arguments.py` for details.
-337
-338 Raises:
-339 TypeError: If train_sequences or train_cluster_ids is of wrong type.
-340 """
-341 if isinstance ( train_sequences , np . ndarray ):
-342 # train_sequences is already the concatenated sequence
-343 if self . estimate_transition_bias :
-344 # see issue #55: https://github.com/google/uis-rnn/issues/55
-345 self . logger . print (
-346 2 ,
-347 'Warning: transition_bias cannot be correctly estimated from a '
-348 'concatenated sequence; train_sequences will be treated as a '
-349 'single sequence. This can lead to inaccurate estimation of '
-350 'transition_bias. Please, consider estimating transition_bias '
-351 'before concatenating the sequences and passing it as argument.' )
-352 train_sequences = [ train_sequences ]
-353 train_cluster_ids = [ train_cluster_ids ]
-354 elif isinstance ( train_sequences , list ):
-355 # train_sequences is a list of un-concatenated sequences
-356 # we will concatenate it later, after estimating transition_bias
-357 pass
-358 else :
-359 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
-360
-361 # estimate transition_bias
-362 if self . estimate_transition_bias :
-363 ( transition_bias ,
-364 transition_bias_denominator ) = utils . estimate_transition_bias (
-365 train_cluster_ids )
-366 # set or update transition_bias
-367 if self . transition_bias is None :
-368 self . transition_bias = transition_bias
-369 self . transition_bias_denominator = transition_bias_denominator
-370 else :
-371 self . transition_bias = (
-372 self . transition_bias * self . transition_bias_denominator +
-373 transition_bias * transition_bias_denominator ) / (
-374 self . transition_bias_denominator + transition_bias_denominator )
-375 self . transition_bias_denominator += transition_bias_denominator
-376
-377 # concatenate train_sequences
-378 ( concatenated_train_sequence ,
-379 concatenated_train_cluster_id ) = utils . concatenate_training_data (
-380 train_sequences ,
-381 train_cluster_ids ,
-382 args . enforce_cluster_id_uniqueness ,
-383 True )
-384
-385 self . fit_concatenated (
-386 concatenated_train_sequence , concatenated_train_cluster_id , args )
+ 316 def fit ( self , train_sequences , train_cluster_ids , args ):
+317 """Fit UISRNN model.
+318
+319 Args:
+320 train_sequences: Either a list of training sequences, or a single
+321 concatenated training sequence:
+322
+323 1. train_sequences is list, and each element is a 2-dim numpy array
+324 of real numbers, of size: `length * D`.
+325 The length varies among different sequences, but the D is the same.
+326 In speaker diarization, each sequence is the sequence of speaker
+327 embeddings of one utterance.
+328 2. train_sequences is a single concatenated sequence, which is a
+329 2-dim numpy array of real numbers. See `fit_concatenated()`
+330 for more details.
+331 train_cluster_ids: Ground truth labels for train_sequences:
+332
+333 1. if train_sequences is a list, this must also be a list of the same
+334 size, each element being a 1-dim list or numpy array of strings.
+335 2. if train_sequences is a single concatenated sequence, this
+336 must also be the concatenated 1-dim list or numpy array of strings
+337 args: Training configurations. See `arguments.py` for details.
+338
+339 Raises:
+340 TypeError: If train_sequences or train_cluster_ids is of wrong type.
+341 """
+342 if isinstance ( train_sequences , np . ndarray ):
+343 # train_sequences is already the concatenated sequence
+344 if self . estimate_transition_bias :
+345 # see issue #55: https://github.com/google/uis-rnn/issues/55
+346 self . logger . print (
+347 2 ,
+348 'Warning: transition_bias cannot be correctly estimated from a '
+349 'concatenated sequence; train_sequences will be treated as a '
+350 'single sequence. This can lead to inaccurate estimation of '
+351 'transition_bias. Please, consider estimating transition_bias '
+352 'before concatenating the sequences and passing it as argument.' )
+353 train_sequences = [ train_sequences ]
+354 train_cluster_ids = [ train_cluster_ids ]
+355 elif isinstance ( train_sequences , list ):
+356 # train_sequences is a list of un-concatenated sequences
+357 # we will concatenate it later, after estimating transition_bias
+358 pass
+359 else :
+360 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
+361
+362 # estimate transition_bias
+363 if self . estimate_transition_bias :
+364 ( transition_bias ,
+365 transition_bias_denominator ) = utils . estimate_transition_bias (
+366 train_cluster_ids )
+367 # set or update transition_bias
+368 if self . transition_bias is None :
+369 self . transition_bias = transition_bias
+370 self . transition_bias_denominator = transition_bias_denominator
+371 else :
+372 self . transition_bias = (
+373 self . transition_bias * self . transition_bias_denominator +
+374 transition_bias * transition_bias_denominator ) / (
+375 self . transition_bias_denominator + transition_bias_denominator )
+376 self . transition_bias_denominator += transition_bias_denominator
+377
+378 # concatenate train_sequences
+379 ( concatenated_train_sequence ,
+380 concatenated_train_cluster_id ) = utils . concatenate_training_data (
+381 train_sequences ,
+382 train_cluster_ids ,
+383 args . enforce_cluster_id_uniqueness ,
+384 True )
+385
+386 self . fit_concatenated (
+387 concatenated_train_sequence , concatenated_train_cluster_id , args )
@@ -1591,90 +1592,90 @@
- 479 def predict_single ( self , test_sequence , args ):
-480 """Predict labels for a single test sequence using UISRNN model.
-481
-482 Args:
-483 test_sequence: the test observation sequence, which is 2-dim numpy array
-484 of real numbers, of size `N * D`.
-485
-486 - `N`: length of one test utterance.
-487 - `D` : observation dimension.
-488
-489 For example:
-490 ```
-491 test_sequence =
-492 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
-493 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
-494 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
-495 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
-496 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
-497 ```
-498 Here `N=5`, `D=4`.
-499 args: Inference configurations. See `arguments.py` for details.
-500
-501 Returns:
-502 predicted_cluster_id: predicted speaker id sequence, which is
-503 an array of integers, of size `N`.
-504 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
-505
-506 Raises:
-507 TypeError: If test_sequence is of wrong type.
-508 ValueError: If test_sequence has wrong dimension.
-509 """
-510 # check type
-511 if ( not isinstance ( test_sequence , np . ndarray ) or
-512 test_sequence . dtype != float ):
-513 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
-514 # check dimension
-515 if test_sequence . ndim != 2 :
-516 raise ValueError ( 'test_sequence must be 2-dim array.' )
-517 # check size
-518 test_sequence_length , observation_dim = test_sequence . shape
-519 if observation_dim != self . observation_dim :
-520 raise ValueError ( 'test_sequence does not match the dimension specified '
-521 'by args.observation_dim.' )
-522
-523 self . rnn_model . eval ()
-524 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
-525 test_sequence = autograd . Variable (
-526 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
-527 # bookkeeping for beam search
-528 beam_set = [ BeamState ()]
-529 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
-530 args . look_ahead ):
-531 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
-532 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
-533 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
-534 score_set = float ( 'inf' ) * np . ones (
-535 np . append (
-536 args . beam_size , max_clusters + 1 + np . arange (
-537 look_ahead_seq_length )))
-538 for beam_rank , beam_state in enumerate ( beam_set ):
-539 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
-540 score_set [ beam_rank , :] = np . pad (
-541 beam_score_set ,
-542 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
-543 ( look_ahead_seq_length , 1 )), 'constant' ,
-544 constant_values = float ( 'inf' ))
-545 # find top scores
-546 score_ranked = np . sort ( score_set , axis = None )
-547 score_ranked [ score_ranked == float ( 'inf' )] = 0
-548 score_ranked = np . trim_zeros ( score_ranked )
-549 idx_ranked = np . argsort ( score_set , axis = None )
-550 updated_beam_set = []
-551 for new_beam_rank in range (
-552 np . min (( len ( score_ranked ), args . beam_size ))):
-553 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
-554 score_set . shape )
-555 prev_beam_rank = total_idx [ 0 ] . item ()
-556 cluster_seq = total_idx [ 1 :]
-557 updated_beam_state = self . _update_beam_state (
-558 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
-559 updated_beam_set . append ( updated_beam_state )
-560 beam_set = updated_beam_set
-561 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
-562 return predicted_cluster_id
+ 480 def predict_single ( self , test_sequence , args ):
+481 """Predict labels for a single test sequence using UISRNN model.
+482
+483 Args:
+484 test_sequence: the test observation sequence, which is 2-dim numpy array
+485 of real numbers, of size `N * D`.
+486
+487 - `N`: length of one test utterance.
+488 - `D` : observation dimension.
+489
+490 For example:
+491 ```
+492 test_sequence =
+493 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
+494 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
+495 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
+496 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
+497 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+498 ```
+499 Here `N=5`, `D=4`.
+500 args: Inference configurations. See `arguments.py` for details.
+501
+502 Returns:
+503 predicted_cluster_id: predicted speaker id sequence, which is
+504 an array of integers, of size `N`.
+505 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
+506
+507 Raises:
+508 TypeError: If test_sequence is of wrong type.
+509 ValueError: If test_sequence has wrong dimension.
+510 """
+511 # check type
+512 if ( not isinstance ( test_sequence , np . ndarray ) or
+513 test_sequence . dtype != float ):
+514 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
+515 # check dimension
+516 if test_sequence . ndim != 2 :
+517 raise ValueError ( 'test_sequence must be 2-dim array.' )
+518 # check size
+519 test_sequence_length , observation_dim = test_sequence . shape
+520 if observation_dim != self . observation_dim :
+521 raise ValueError ( 'test_sequence does not match the dimension specified '
+522 'by args.observation_dim.' )
+523
+524 self . rnn_model . eval ()
+525 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
+526 test_sequence = autograd . Variable (
+527 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
+528 # bookkeeping for beam search
+529 beam_set = [ BeamState ()]
+530 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
+531 args . look_ahead ):
+532 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
+533 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
+534 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
+535 score_set = float ( 'inf' ) * np . ones (
+536 np . append (
+537 args . beam_size , max_clusters + 1 + np . arange (
+538 look_ahead_seq_length )))
+539 for beam_rank , beam_state in enumerate ( beam_set ):
+540 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
+541 score_set [ beam_rank , :] = np . pad (
+542 beam_score_set ,
+543 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
+544 ( look_ahead_seq_length , 1 )), 'constant' ,
+545 constant_values = float ( 'inf' ))
+546 # find top scores
+547 score_ranked = np . sort ( score_set , axis = None )
+548 score_ranked [ score_ranked == float ( 'inf' )] = 0
+549 score_ranked = np . trim_zeros ( score_ranked )
+550 idx_ranked = np . argsort ( score_set , axis = None )
+551 updated_beam_set = []
+552 for new_beam_rank in range (
+553 np . min (( len ( score_ranked ), args . beam_size ))):
+554 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
+555 score_set . shape )
+556 prev_beam_rank = total_idx [ 0 ] . item ()
+557 cluster_seq = total_idx [ 1 :]
+558 updated_beam_state = self . _update_beam_state (
+559 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
+560 updated_beam_set . append ( updated_beam_state )
+561 beam_set = updated_beam_set
+562 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
+563 return predicted_cluster_id
@@ -1696,9 +1697,12 @@
[-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
[-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
[0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+
+
+
Here `N=5`, `D=4`.
- Here
N=5
,
D=4
.
- args: Inference configurations. See
arguments.py
for details.
+
+
args: Inference configurations. See arguments.py
for details.
Returns:
predicted_cluster_id: predicted speaker id sequence, which is
@@ -1723,33 +1727,33 @@
- 564 def predict ( self , test_sequences , args ):
-565 """Predict labels for a single or many test sequences using UISRNN model.
-566
-567 Args:
-568 test_sequences: Either a list of test sequences, or a single test
-569 sequence. Each test sequence is a 2-dim numpy array
-570 of real numbers. See `predict_single()` for details.
-571 args: Inference configurations. See `arguments.py` for details.
-572
-573 Returns:
-574 predicted_cluster_ids: Predicted labels for test_sequences.
-575
-576 1. if test_sequences is a list, predicted_cluster_ids will be a list
-577 of the same size, where each element being a 1-dim list of strings.
-578 2. if test_sequences is a single sequence, predicted_cluster_ids will
-579 be a 1-dim list of strings
-580
-581 Raises:
-582 TypeError: If test_sequences is of wrong type.
-583 """
-584 # check type
-585 if isinstance ( test_sequences , np . ndarray ):
-586 return self . predict_single ( test_sequences , args )
-587 if isinstance ( test_sequences , list ):
-588 return [ self . predict_single ( test_sequence , args )
-589 for test_sequence in test_sequences ]
-590 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
+ 565 def predict ( self , test_sequences , args ):
+566 """Predict labels for a single or many test sequences using UISRNN model.
+567
+568 Args:
+569 test_sequences: Either a list of test sequences, or a single test
+570 sequence. Each test sequence is a 2-dim numpy array
+571 of real numbers. See `predict_single()` for details.
+572 args: Inference configurations. See `arguments.py` for details.
+573
+574 Returns:
+575 predicted_cluster_ids: Predicted labels for test_sequences.
+576
+577 1. if test_sequences is a list, predicted_cluster_ids will be a list
+578 of the same size, where each element being a 1-dim list of strings.
+579 2. if test_sequences is a single sequence, predicted_cluster_ids will
+580 be a 1-dim list of strings
+581
+582 Raises:
+583 TypeError: If test_sequences is of wrong type.
+584 """
+585 # check type
+586 if isinstance ( test_sequences , np . ndarray ):
+587 return self . predict_single ( test_sequences , args )
+588 if isinstance ( test_sequences , list ):
+589 return [ self . predict_single ( test_sequence , args )
+590 for test_sequence in test_sequences ]
+591 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
@@ -1788,37 +1792,37 @@
- 593 def parallel_predict ( model , test_sequences , args , num_processes = 4 ):
-594 """Run prediction in parallel using torch.multiprocessing.
-595
-596 This is a beta feature. It makes prediction slower on CPU. But it's reported
-597 that it makes prediction faster on GPU.
-598
-599 Args:
-600 model: instance of UISRNN model
-601 test_sequences: a list of test sequences, or a single test
-602 sequence. Each test sequence is a 2-dim numpy array
-603 of real numbers. See `predict_single()` for details.
-604 args: Inference configurations. See `arguments.py` for details.
-605 num_processes: number of parallel processes.
-606
-607 Returns:
-608 a list of the same size as test_sequences, where each element
-609 being a 1-dim list of strings.
-610
-611 Raises:
-612 TypeError: If test_sequences is of wrong type.
-613 """
-614 if not isinstance ( test_sequences , list ):
-615 raise TypeError ( 'test_sequences must be a list.' )
-616 ctx = multiprocessing . get_context ( 'forkserver' )
-617 model . rnn_model . share_memory ()
-618 pool = ctx . Pool ( num_processes )
-619 results = pool . map (
-620 functools . partial ( model . predict_single , args = args ),
-621 test_sequences )
-622 pool . close ()
-623 return results
+ 594 def parallel_predict ( model , test_sequences , args , num_processes = 4 ):
+595 """Run prediction in parallel using torch.multiprocessing.
+596
+597 This is a beta feature. It makes prediction slower on CPU. But it's reported
+598 that it makes prediction faster on GPU.
+599
+600 Args:
+601 model: instance of UISRNN model
+602 test_sequences: a list of test sequences, or a single test
+603 sequence. Each test sequence is a 2-dim numpy array
+604 of real numbers. See `predict_single()` for details.
+605 args: Inference configurations. See `arguments.py` for details.
+606 num_processes: number of parallel processes.
+607
+608 Returns:
+609 a list of the same size as test_sequences, where each element
+610 being a 1-dim list of strings.
+611
+612 Raises:
+613 TypeError: If test_sequences is of wrong type.
+614 """
+615 if not isinstance ( test_sequences , list ):
+616 raise TypeError ( 'test_sequences must be a list.' )
+617 ctx = multiprocessing . get_context ( 'forkserver' )
+618 model . rnn_model . share_memory ()
+619 pool = ctx . Pool ( num_processes )
+620 results = pool . map (
+621 functools . partial ( model . predict_single , args = args ),
+622 test_sequences )
+623 pool . close ()
+624 return results
diff --git a/docs/uisrnn/arguments.html b/docs/uisrnn/arguments.html
index b258153..63673e9 100644
--- a/docs/uisrnn/arguments.html
+++ b/docs/uisrnn/arguments.html
@@ -3,14 +3,14 @@
-
+
uisrnn.arguments API documentation
-
-
+
+
@@ -148,12 +148,12 @@
87 'value is None, we will estimate it from training data.' )
88 model_parser . add_argument (
89 '--verbosity' ,
- 90 default = 2 ,
+ 90 default = 3 ,
91 type = int ,
92 help = 'How verbose will the logging information be. Higher value '
93 'represents more verbose information. A general guideline: '
- 94 '0 for errors; 1 for finishing important steps; '
- 95 '2 for finishing less important steps; 3 or above for debugging '
+ 94 '0 for fatals; 1 for errors; 2 for finishing important steps; '
+ 95 '3 for finishing less important steps; 4 or above for debugging '
96 'information.' )
97 model_parser . add_argument (
98 '--enable_cuda' ,
@@ -365,12 +365,12 @@
88 'value is None, we will estimate it from training data.' )
89 model_parser . add_argument (
90 '--verbosity' ,
- 91 default = 2 ,
+ 91 default = 3 ,
92 type = int ,
93 help = 'How verbose will the logging information be. Higher value '
94 'represents more verbose information. A general guideline: '
- 95 '0 for errors; 1 for finishing important steps; '
- 96 '2 for finishing less important steps; 3 or above for debugging '
+ 95 '0 for fatals; 1 for errors; 2 for finishing important steps; '
+ 96 '3 for finishing less important steps; 4 or above for debugging '
97 'information.' )
98 model_parser . add_argument (
99 '--enable_cuda' ,
diff --git a/docs/uisrnn/contrib.html b/docs/uisrnn/contrib.html
index 259503e..c8aa990 100644
--- a/docs/uisrnn/contrib.html
+++ b/docs/uisrnn/contrib.html
@@ -3,14 +3,14 @@
-
+
uisrnn.contrib API documentation
-
-
+
+
diff --git a/docs/uisrnn/contrib/contrib_template.html b/docs/uisrnn/contrib/contrib_template.html
index 6621ff1..873c1d5 100644
--- a/docs/uisrnn/contrib/contrib_template.html
+++ b/docs/uisrnn/contrib/contrib_template.html
@@ -3,14 +3,14 @@
-
+
uisrnn.contrib.contrib_template API documentation
-
-
+
+
diff --git a/docs/uisrnn/contrib/range_search_crp_alpha.html b/docs/uisrnn/contrib/range_search_crp_alpha.html
index 44fbbde..37b93a2 100644
--- a/docs/uisrnn/contrib/range_search_crp_alpha.html
+++ b/docs/uisrnn/contrib/range_search_crp_alpha.html
@@ -3,14 +3,14 @@
-
+
uisrnn.contrib.range_search_crp_alpha API documentation
-
-
+
+
@@ -51,14 +51,16 @@
This module implements method to search for best crp_alpha within a range for
a given data set.
- For example
-
train_cluster_id = np.array(
+ For example
+
+
train_cluster_id = np.array(
['0_0', '0_0', '0_1', '0_1', '0_1', '0_0', '0_0', '1_0', '1_0', '1_0',
'1_1', '1_1', '1_1', '1_0', '1_0','1_0', '1_2', '1_2', '1_2'])
print(estimate_crp_alpha(train_cluster_id))
0.5
-
- Function for user:
+
+
+
Function for user:
estimate_crp_alpha: see docstring for details.
Internal functions:
_get_cdf: see docstring for details.
diff --git a/docs/uisrnn/evals.html b/docs/uisrnn/evals.html
index a616862..9704650 100644
--- a/docs/uisrnn/evals.html
+++ b/docs/uisrnn/evals.html
@@ -3,14 +3,14 @@
-
+
uisrnn.evals API documentation
-
-
+
+
diff --git a/docs/uisrnn/loss_func.html b/docs/uisrnn/loss_func.html
index 017f560..d7a6b89 100644
--- a/docs/uisrnn/loss_func.html
+++ b/docs/uisrnn/loss_func.html
@@ -3,14 +3,14 @@
-
+
uisrnn.loss_func API documentation
-
-
+
+
diff --git a/docs/uisrnn/uisrnn.html b/docs/uisrnn/uisrnn.html
index 1b6d9bc..b828166 100644
--- a/docs/uisrnn/uisrnn.html
+++ b/docs/uisrnn/uisrnn.html
@@ -3,14 +3,14 @@
-
+
uisrnn.uisrnn API documentation
-
-
+
+
@@ -164,613 +164,614 @@
13 # limitations under the License.
14 """The UIS-RNN model."""
15
- 16 import functools
- 17 import numpy as np
- 18 import torch
- 19 from torch import autograd
- 20 from torch import multiprocessing
- 21 from torch import nn
- 22 from torch import optim
- 23 import torch.nn.functional as F
- 24
- 25 from uisrnn import loss_func
- 26 from uisrnn import utils
- 27
- 28 _INITIAL_SIGMA2_VALUE = 0.1
- 29
+ 16 import colortimelog
+ 17 import functools
+ 18 import numpy as np
+ 19 import torch
+ 20 from torch import autograd
+ 21 from torch import multiprocessing
+ 22 from torch import nn
+ 23 from torch import optim
+ 24 import torch.nn.functional as F
+ 25
+ 26 from uisrnn import loss_func
+ 27 from uisrnn import utils
+ 28
+ 29 _INITIAL_SIGMA2_VALUE = 0.1
30
- 31 class CoreRNN ( nn . Module ):
- 32 """The core Recurent Neural Network used by UIS-RNN."""
- 33
- 34 def __init__ ( self , input_dim , hidden_size , depth , observation_dim , dropout = 0 ):
- 35 super () . __init__ ()
- 36 self . hidden_size = hidden_size
- 37 if depth >= 2 :
- 38 self . gru = nn . GRU ( input_dim , hidden_size , depth , dropout = dropout )
- 39 else :
- 40 self . gru = nn . GRU ( input_dim , hidden_size , depth )
- 41 self . linear_mean1 = nn . Linear ( hidden_size , hidden_size )
- 42 self . linear_mean2 = nn . Linear ( hidden_size , observation_dim )
- 43
- 44 def forward ( self , input_seq , hidden = None ):
- 45 """The forward function of the module."""
- 46 output_seq , hidden = self . gru ( input_seq , hidden )
- 47 if isinstance ( output_seq , torch . nn . utils . rnn . PackedSequence ):
- 48 output_seq , _ = torch . nn . utils . rnn . pad_packed_sequence (
- 49 output_seq , batch_first = False )
- 50 mean = self . linear_mean2 ( F . relu ( self . linear_mean1 ( output_seq )))
- 51 return mean , hidden
- 52
+ 31
+ 32 class CoreRNN ( nn . Module ):
+ 33 """The core Recurent Neural Network used by UIS-RNN."""
+ 34
+ 35 def __init__ ( self , input_dim , hidden_size , depth , observation_dim , dropout = 0 ):
+ 36 super () . __init__ ()
+ 37 self . hidden_size = hidden_size
+ 38 if depth >= 2 :
+ 39 self . gru = nn . GRU ( input_dim , hidden_size , depth , dropout = dropout )
+ 40 else :
+ 41 self . gru = nn . GRU ( input_dim , hidden_size , depth )
+ 42 self . linear_mean1 = nn . Linear ( hidden_size , hidden_size )
+ 43 self . linear_mean2 = nn . Linear ( hidden_size , observation_dim )
+ 44
+ 45 def forward ( self , input_seq , hidden = None ):
+ 46 """The forward function of the module."""
+ 47 output_seq , hidden = self . gru ( input_seq , hidden )
+ 48 if isinstance ( output_seq , torch . nn . utils . rnn . PackedSequence ):
+ 49 output_seq , _ = torch . nn . utils . rnn . pad_packed_sequence (
+ 50 output_seq , batch_first = False )
+ 51 mean = self . linear_mean2 ( F . relu ( self . linear_mean1 ( output_seq )))
+ 52 return mean , hidden
53
- 54 class BeamState :
- 55 """Structure that contains necessary states for beam search."""
- 56
- 57 def __init__ ( self , source = None ):
- 58 if not source :
- 59 self . mean_set = []
- 60 self . hidden_set = []
- 61 self . neg_likelihood = 0
- 62 self . trace = []
- 63 self . block_counts = []
- 64 else :
- 65 self . mean_set = source . mean_set . copy ()
- 66 self . hidden_set = source . hidden_set . copy ()
- 67 self . trace = source . trace . copy ()
- 68 self . block_counts = source . block_counts . copy ()
- 69 self . neg_likelihood = source . neg_likelihood
- 70
- 71 def append ( self , mean , hidden , cluster ):
- 72 """Append new item to the BeamState."""
- 73 self . mean_set . append ( mean . clone ())
- 74 self . hidden_set . append ( hidden . clone ())
- 75 self . block_counts . append ( 1 )
- 76 self . trace . append ( cluster )
- 77
+ 54
+ 55 class BeamState :
+ 56 """Structure that contains necessary states for beam search."""
+ 57
+ 58 def __init__ ( self , source = None ):
+ 59 if not source :
+ 60 self . mean_set = []
+ 61 self . hidden_set = []
+ 62 self . neg_likelihood = 0
+ 63 self . trace = []
+ 64 self . block_counts = []
+ 65 else :
+ 66 self . mean_set = source . mean_set . copy ()
+ 67 self . hidden_set = source . hidden_set . copy ()
+ 68 self . trace = source . trace . copy ()
+ 69 self . block_counts = source . block_counts . copy ()
+ 70 self . neg_likelihood = source . neg_likelihood
+ 71
+ 72 def append ( self , mean , hidden , cluster ):
+ 73 """Append new item to the BeamState."""
+ 74 self . mean_set . append ( mean . clone ())
+ 75 self . hidden_set . append ( hidden . clone ())
+ 76 self . block_counts . append ( 1 )
+ 77 self . trace . append ( cluster )
78
- 79 class UISRNN :
- 80 """Unbounded Interleaved-State Recurrent Neural Networks."""
- 81
- 82 def __init__ ( self , args ):
- 83 """Construct the UISRNN object.
- 84
- 85 Args:
- 86 args: Model configurations. See `arguments.py` for details.
- 87 """
- 88 self . observation_dim = args . observation_dim
- 89 self . device = torch . device (
- 90 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
- 91 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
- 92 args . rnn_depth , self . observation_dim ,
- 93 args . rnn_dropout ) . to ( self . device )
- 94 self . rnn_init_hidden = nn . Parameter (
- 95 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
- 96 # booleans indicating which variables are trainable
- 97 self . estimate_sigma2 = ( args . sigma2 is None )
- 98 self . estimate_transition_bias = ( args . transition_bias is None )
- 99 # initial values of variables
-100 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
-101 self . sigma2 = nn . Parameter (
-102 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
-103 self . transition_bias = args . transition_bias
-104 self . transition_bias_denominator = 0.0
-105 self . crp_alpha = args . crp_alpha
-106 self . logger = utils . Logger ( args . verbosity )
-107
-108 def _get_optimizer ( self , optimizer , learning_rate ):
-109 """Get optimizer for UISRNN.
-110
-111 Args:
-112 optimizer: string - name of the optimizer.
-113 learning_rate: - learning rate for the entire model.
-114 We do not customize learning rate for separate parts.
-115
-116 Returns:
-117 a pytorch "optim" object
-118 """
-119 params = [
-120 {
-121 'params' : self . rnn_model . parameters ()
-122 }, # rnn parameters
-123 {
-124 'params' : self . rnn_init_hidden
-125 } # rnn initial hidden state
-126 ]
-127 if self . estimate_sigma2 : # train sigma2
-128 params . append ({
-129 'params' : self . sigma2
-130 }) # variance parameters
-131 assert optimizer == 'adam' , 'Only adam optimizer is supported.'
-132 return optim . Adam ( params , lr = learning_rate )
-133
-134 def save ( self , filepath ):
-135 """Save the model to a file.
-136
-137 Args:
-138 filepath: the path of the file.
-139 """
-140 torch . save ({
-141 'rnn_state_dict' : self . rnn_model . state_dict (),
-142 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
-143 'transition_bias' : self . transition_bias ,
-144 'transition_bias_denominator' : self . transition_bias_denominator ,
-145 'crp_alpha' : self . crp_alpha ,
-146 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
-147
-148 def load ( self , filepath ):
-149 """Load the model from a file.
-150
-151 Args:
-152 filepath: the path of the file.
-153 """
-154 var_dict = torch . load ( filepath )
-155 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
-156 self . rnn_init_hidden = nn . Parameter (
-157 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
-158 self . transition_bias = float ( var_dict [ 'transition_bias' ])
-159 self . transition_bias_denominator = float (
-160 var_dict [ 'transition_bias_denominator' ])
-161 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
-162 self . sigma2 = nn . Parameter (
-163 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
-164
-165 self . logger . print (
-166 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
-167 'rnn_init_hidden= {} ' . format (
-168 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
-169 var_dict [ 'rnn_init_hidden' ]))
-170
-171 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
-172 """Fit UISRNN model to concatenated sequence and cluster_id.
-173
-174 Args:
-175 train_sequence: the training observation sequence, which is a
-176 2-dim numpy array of real numbers, of size `N * D`.
-177
-178 - `N`: summation of lengths of all utterances.
-179 - `D`: observation dimension.
-180
-181 For example,
-182 ```
-183 train_sequence =
-184 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
-185 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
-186 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
-187 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
-188 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
-189 ```
-190 Here `N=5`, `D=4`.
-191
-192 We concatenate all training utterances into this single sequence.
-193 train_cluster_id: the speaker id sequence, which is 1-dim list or
-194 numpy array of strings, of size `N`.
-195 For example,
-196 ```
-197 train_cluster_id =
-198 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-199 ```
-200 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-201
-202 Note that the order of entries within an utterance are preserved,
-203 and all utterances are simply concatenated together.
-204 args: Training configurations. See `arguments.py` for details.
-205
-206 Raises:
-207 TypeError: If train_sequence or train_cluster_id is of wrong type.
-208 ValueError: If train_sequence or train_cluster_id has wrong dimension.
-209 """
-210 # check type
-211 if ( not isinstance ( train_sequence , np . ndarray ) or
-212 train_sequence . dtype != float ):
-213 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
-214 if isinstance ( train_cluster_id , list ):
-215 train_cluster_id = np . array ( train_cluster_id )
-216 if ( not isinstance ( train_cluster_id , np . ndarray ) or
-217 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
-218 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
-219 # check dimension
-220 if train_sequence . ndim != 2 :
-221 raise ValueError ( 'train_sequence must be 2-dim array.' )
-222 if train_cluster_id . ndim != 1 :
-223 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
-224 # check length and size
-225 train_total_length , observation_dim = train_sequence . shape
-226 if observation_dim != self . observation_dim :
-227 raise ValueError ( 'train_sequence does not match the dimension specified '
-228 'by args.observation_dim.' )
-229 if train_total_length != len ( train_cluster_id ):
-230 raise ValueError ( 'train_sequence length is not equal to '
-231 'train_cluster_id length.' )
-232
-233 self . rnn_model . train ()
-234 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
-235 learning_rate = args . learning_rate )
-236
-237 sub_sequences , seq_lengths = utils . resize_sequence (
-238 sequence = train_sequence ,
-239 cluster_id = train_cluster_id ,
-240 num_permutations = args . num_permutations )
-241
-242 # For batch learning, pack the entire dataset.
-243 if args . batch_size is None :
-244 packed_train_sequence , rnn_truth = utils . pack_sequence (
-245 sub_sequences ,
-246 seq_lengths ,
-247 args . batch_size ,
-248 self . observation_dim ,
-249 self . device )
-250 train_loss = []
-251 for num_iter in range ( args . train_iteration ):
-252 optimizer . zero_grad ()
-253 # For online learning, pack a subset in each iteration.
-254 if args . batch_size is not None :
-255 packed_train_sequence , rnn_truth = utils . pack_sequence (
-256 sub_sequences ,
-257 seq_lengths ,
-258 args . batch_size ,
-259 self . observation_dim ,
-260 self . device )
-261 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
-262 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
-263 # use mean to predict
-264 mean = torch . cumsum ( mean , dim = 0 )
-265 mean_size = mean . size ()
-266 mean = torch . mm (
-267 torch . diag (
-268 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
-269 mean . view ( mean_size [ 0 ], - 1 ))
-270 mean = mean . view ( mean_size )
-271
-272 # Likelihood part.
-273 loss1 = loss_func . weighted_mse_loss (
-274 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
-275 target_tensor = rnn_truth ,
-276 weight = 1 / ( 2 * self . sigma2 ))
-277
-278 # Sigma2 prior part.
-279 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
-280 ** 2 ) . view ( - 1 , observation_dim )
-281 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
-282 loss2 = loss_func . sigma2_prior_loss (
-283 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
-284
-285 # Regularization part.
-286 loss3 = loss_func . regularization_loss (
-287 self . rnn_model . parameters (), args . regularization_weight )
-288
-289 loss = loss1 + loss2 + loss3
-290 loss . backward ()
-291 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
-292 optimizer . step ()
-293 # avoid numerical issues
-294 self . sigma2 . data . clamp_ ( min = 1e-6 )
-295
-296 if ( np . remainder ( num_iter , 10 ) == 0 or
-297 num_iter == args . train_iteration - 1 ):
-298 self . logger . print (
-299 2 ,
-300 'Iter: {:d} \t '
-301 'Training Loss: {:.4f} \n '
-302 ' Negative Log Likelihood: {:.4f} \t '
-303 'Sigma2 Prior: {:.4f} \t '
-304 'Regularization: {:.4f} ' . format (
-305 num_iter ,
-306 float ( loss . data ),
-307 float ( loss1 . data ),
-308 float ( loss2 . data ),
-309 float ( loss3 . data )))
-310 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
-311 self . logger . print (
-312 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
-313
-314 def fit ( self , train_sequences , train_cluster_ids , args ):
-315 """Fit UISRNN model.
-316
-317 Args:
-318 train_sequences: Either a list of training sequences, or a single
-319 concatenated training sequence:
-320
-321 1. train_sequences is list, and each element is a 2-dim numpy array
-322 of real numbers, of size: `length * D`.
-323 The length varies among different sequences, but the D is the same.
-324 In speaker diarization, each sequence is the sequence of speaker
-325 embeddings of one utterance.
-326 2. train_sequences is a single concatenated sequence, which is a
-327 2-dim numpy array of real numbers. See `fit_concatenated()`
-328 for more details.
-329 train_cluster_ids: Ground truth labels for train_sequences:
-330
-331 1. if train_sequences is a list, this must also be a list of the same
-332 size, each element being a 1-dim list or numpy array of strings.
-333 2. if train_sequences is a single concatenated sequence, this
-334 must also be the concatenated 1-dim list or numpy array of strings
-335 args: Training configurations. See `arguments.py` for details.
-336
-337 Raises:
-338 TypeError: If train_sequences or train_cluster_ids is of wrong type.
-339 """
-340 if isinstance ( train_sequences , np . ndarray ):
-341 # train_sequences is already the concatenated sequence
-342 if self . estimate_transition_bias :
-343 # see issue #55: https://github.com/google/uis-rnn/issues/55
-344 self . logger . print (
-345 2 ,
-346 'Warning: transition_bias cannot be correctly estimated from a '
-347 'concatenated sequence; train_sequences will be treated as a '
-348 'single sequence. This can lead to inaccurate estimation of '
-349 'transition_bias. Please, consider estimating transition_bias '
-350 'before concatenating the sequences and passing it as argument.' )
-351 train_sequences = [ train_sequences ]
-352 train_cluster_ids = [ train_cluster_ids ]
-353 elif isinstance ( train_sequences , list ):
-354 # train_sequences is a list of un-concatenated sequences
-355 # we will concatenate it later, after estimating transition_bias
-356 pass
-357 else :
-358 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
-359
-360 # estimate transition_bias
-361 if self . estimate_transition_bias :
-362 ( transition_bias ,
-363 transition_bias_denominator ) = utils . estimate_transition_bias (
-364 train_cluster_ids )
-365 # set or update transition_bias
-366 if self . transition_bias is None :
-367 self . transition_bias = transition_bias
-368 self . transition_bias_denominator = transition_bias_denominator
-369 else :
-370 self . transition_bias = (
-371 self . transition_bias * self . transition_bias_denominator +
-372 transition_bias * transition_bias_denominator ) / (
-373 self . transition_bias_denominator + transition_bias_denominator )
-374 self . transition_bias_denominator += transition_bias_denominator
-375
-376 # concatenate train_sequences
-377 ( concatenated_train_sequence ,
-378 concatenated_train_cluster_id ) = utils . concatenate_training_data (
-379 train_sequences ,
-380 train_cluster_ids ,
-381 args . enforce_cluster_id_uniqueness ,
-382 True )
-383
-384 self . fit_concatenated (
-385 concatenated_train_sequence , concatenated_train_cluster_id , args )
-386
-387 def _update_beam_state ( self , beam_state , look_ahead_seq , cluster_seq ):
-388 """Update a beam state given a look ahead sequence and known cluster
-389 assignments.
-390
-391 Args:
-392 beam_state: A BeamState object.
-393 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
-394 look_ahead: number of step to look ahead in the beam search.
-395 D: observation dimension
-396 cluster_seq: Cluster assignment sequence for look_ahead_seq.
-397
-398 Returns:
-399 new_beam_state: An updated BeamState object.
-400 """
-401
-402 loss = 0
-403 new_beam_state = BeamState ( beam_state )
-404 for sub_idx , cluster in enumerate ( cluster_seq ):
-405 if cluster > len ( new_beam_state . mean_set ): # invalid trace
-406 new_beam_state . neg_likelihood = float ( 'inf' )
-407 break
-408 elif cluster < len ( new_beam_state . mean_set ): # existing cluster
-409 last_cluster = new_beam_state . trace [ - 1 ]
-410 loss = loss_func . weighted_mse_loss (
-411 input_tensor = torch . squeeze ( new_beam_state . mean_set [ cluster ]),
-412 target_tensor = look_ahead_seq [ sub_idx , :],
-413 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
-414 if cluster == last_cluster :
-415 loss -= np . log ( 1 - self . transition_bias )
-416 else :
-417 loss -= np . log ( self . transition_bias ) + np . log (
-418 new_beam_state . block_counts [ cluster ]) - np . log (
-419 sum ( new_beam_state . block_counts ) + self . crp_alpha )
-420 # update new mean and new hidden
-421 mean , hidden = self . rnn_model (
-422 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
-423 new_beam_state . hidden_set [ cluster ])
-424 new_beam_state . mean_set [ cluster ] = ( new_beam_state . mean_set [ cluster ] * (
-425 ( np . array ( new_beam_state . trace ) == cluster ) . sum () -
-426 1 ) . astype ( float ) + mean . clone ()) / (
-427 np . array ( new_beam_state . trace ) == cluster ) . sum () . astype (
-428 float ) # use mean to predict
-429 new_beam_state . hidden_set [ cluster ] = hidden . clone ()
-430 if cluster != last_cluster :
-431 new_beam_state . block_counts [ cluster ] += 1
-432 new_beam_state . trace . append ( cluster )
-433 else : # new cluster
-434 init_input = autograd . Variable (
-435 torch . zeros ( self . observation_dim )
-436 ) . unsqueeze ( 0 ) . unsqueeze ( 0 ) . to ( self . device )
-437 mean , hidden = self . rnn_model ( init_input ,
-438 self . rnn_init_hidden )
-439 loss = loss_func . weighted_mse_loss (
-440 input_tensor = torch . squeeze ( mean ),
-441 target_tensor = look_ahead_seq [ sub_idx , :],
-442 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
-443 loss -= np . log ( self . transition_bias ) + np . log (
-444 self . crp_alpha ) - np . log (
-445 sum ( new_beam_state . block_counts ) + self . crp_alpha )
-446 # update new min and new hidden
-447 mean , hidden = self . rnn_model (
-448 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
-449 hidden )
-450 new_beam_state . append ( mean , hidden , cluster )
-451 new_beam_state . neg_likelihood += loss
-452 return new_beam_state
-453
-454 def _calculate_score ( self , beam_state , look_ahead_seq ):
-455 """Calculate negative log likelihoods for all possible state allocations
-456 of a look ahead sequence, according to the current beam state.
-457
-458 Args:
-459 beam_state: A BeamState object.
-460 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
-461 look_ahead: number of step to look ahead in the beam search.
-462 D: observation dimension
-463
-464 Returns:
-465 beam_score_set: a set of scores for each possible state allocation.
-466 """
-467
-468 look_ahead , _ = look_ahead_seq . shape
-469 beam_num_clusters = len ( beam_state . mean_set )
-470 beam_score_set = float ( 'inf' ) * np . ones (
-471 beam_num_clusters + 1 + np . arange ( look_ahead ))
-472 for cluster_seq , _ in np . ndenumerate ( beam_score_set ):
-473 updated_beam_state = self . _update_beam_state ( beam_state ,
-474 look_ahead_seq , cluster_seq )
-475 beam_score_set [ cluster_seq ] = updated_beam_state . neg_likelihood
-476 return beam_score_set
-477
-478 def predict_single ( self , test_sequence , args ):
-479 """Predict labels for a single test sequence using UISRNN model.
-480
-481 Args:
-482 test_sequence: the test observation sequence, which is 2-dim numpy array
-483 of real numbers, of size `N * D`.
-484
-485 - `N`: length of one test utterance.
-486 - `D` : observation dimension.
-487
-488 For example:
-489 ```
-490 test_sequence =
-491 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
-492 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
-493 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
-494 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
-495 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
-496 ```
-497 Here `N=5`, `D=4`.
-498 args: Inference configurations. See `arguments.py` for details.
-499
-500 Returns:
-501 predicted_cluster_id: predicted speaker id sequence, which is
-502 an array of integers, of size `N`.
-503 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
-504
-505 Raises:
-506 TypeError: If test_sequence is of wrong type.
-507 ValueError: If test_sequence has wrong dimension.
-508 """
-509 # check type
-510 if ( not isinstance ( test_sequence , np . ndarray ) or
-511 test_sequence . dtype != float ):
-512 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
-513 # check dimension
-514 if test_sequence . ndim != 2 :
-515 raise ValueError ( 'test_sequence must be 2-dim array.' )
-516 # check size
-517 test_sequence_length , observation_dim = test_sequence . shape
-518 if observation_dim != self . observation_dim :
-519 raise ValueError ( 'test_sequence does not match the dimension specified '
-520 'by args.observation_dim.' )
-521
-522 self . rnn_model . eval ()
-523 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
-524 test_sequence = autograd . Variable (
-525 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
-526 # bookkeeping for beam search
-527 beam_set = [ BeamState ()]
-528 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
-529 args . look_ahead ):
-530 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
-531 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
-532 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
-533 score_set = float ( 'inf' ) * np . ones (
-534 np . append (
-535 args . beam_size , max_clusters + 1 + np . arange (
-536 look_ahead_seq_length )))
-537 for beam_rank , beam_state in enumerate ( beam_set ):
-538 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
-539 score_set [ beam_rank , :] = np . pad (
-540 beam_score_set ,
-541 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
-542 ( look_ahead_seq_length , 1 )), 'constant' ,
-543 constant_values = float ( 'inf' ))
-544 # find top scores
-545 score_ranked = np . sort ( score_set , axis = None )
-546 score_ranked [ score_ranked == float ( 'inf' )] = 0
-547 score_ranked = np . trim_zeros ( score_ranked )
-548 idx_ranked = np . argsort ( score_set , axis = None )
-549 updated_beam_set = []
-550 for new_beam_rank in range (
-551 np . min (( len ( score_ranked ), args . beam_size ))):
-552 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
-553 score_set . shape )
-554 prev_beam_rank = total_idx [ 0 ] . item ()
-555 cluster_seq = total_idx [ 1 :]
-556 updated_beam_state = self . _update_beam_state (
-557 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
-558 updated_beam_set . append ( updated_beam_state )
-559 beam_set = updated_beam_set
-560 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
-561 return predicted_cluster_id
-562
-563 def predict ( self , test_sequences , args ):
-564 """Predict labels for a single or many test sequences using UISRNN model.
-565
-566 Args:
-567 test_sequences: Either a list of test sequences, or a single test
-568 sequence. Each test sequence is a 2-dim numpy array
-569 of real numbers. See `predict_single()` for details.
-570 args: Inference configurations. See `arguments.py` for details.
-571
-572 Returns:
-573 predicted_cluster_ids: Predicted labels for test_sequences.
-574
-575 1. if test_sequences is a list, predicted_cluster_ids will be a list
-576 of the same size, where each element being a 1-dim list of strings.
-577 2. if test_sequences is a single sequence, predicted_cluster_ids will
-578 be a 1-dim list of strings
-579
-580 Raises:
-581 TypeError: If test_sequences is of wrong type.
-582 """
-583 # check type
-584 if isinstance ( test_sequences , np . ndarray ):
-585 return self . predict_single ( test_sequences , args )
-586 if isinstance ( test_sequences , list ):
-587 return [ self . predict_single ( test_sequence , args )
-588 for test_sequence in test_sequences ]
-589 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
-590
+ 79
+ 80 class UISRNN :
+ 81 """Unbounded Interleaved-State Recurrent Neural Networks."""
+ 82
+ 83 def __init__ ( self , args ):
+ 84 """Construct the UISRNN object.
+ 85
+ 86 Args:
+ 87 args: Model configurations. See `arguments.py` for details.
+ 88 """
+ 89 self . observation_dim = args . observation_dim
+ 90 self . device = torch . device (
+ 91 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
+ 92 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
+ 93 args . rnn_depth , self . observation_dim ,
+ 94 args . rnn_dropout ) . to ( self . device )
+ 95 self . rnn_init_hidden = nn . Parameter (
+ 96 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
+ 97 # booleans indicating which variables are trainable
+ 98 self . estimate_sigma2 = ( args . sigma2 is None )
+ 99 self . estimate_transition_bias = ( args . transition_bias is None )
+100 # initial values of variables
+101 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
+102 self . sigma2 = nn . Parameter (
+103 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
+104 self . transition_bias = args . transition_bias
+105 self . transition_bias_denominator = 0.0
+106 self . crp_alpha = args . crp_alpha
+107 self . logger = colortimelog . Logger ( args . verbosity )
+108
+109 def _get_optimizer ( self , optimizer , learning_rate ):
+110 """Get optimizer for UISRNN.
+111
+112 Args:
+113 optimizer: string - name of the optimizer.
+114 learning_rate: - learning rate for the entire model.
+115 We do not customize learning rate for separate parts.
+116
+117 Returns:
+118 a pytorch "optim" object
+119 """
+120 params = [
+121 {
+122 'params' : self . rnn_model . parameters ()
+123 }, # rnn parameters
+124 {
+125 'params' : self . rnn_init_hidden
+126 } # rnn initial hidden state
+127 ]
+128 if self . estimate_sigma2 : # train sigma2
+129 params . append ({
+130 'params' : self . sigma2
+131 }) # variance parameters
+132 assert optimizer == 'adam' , 'Only adam optimizer is supported.'
+133 return optim . Adam ( params , lr = learning_rate )
+134
+135 def save ( self , filepath ):
+136 """Save the model to a file.
+137
+138 Args:
+139 filepath: the path of the file.
+140 """
+141 torch . save ({
+142 'rnn_state_dict' : self . rnn_model . state_dict (),
+143 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
+144 'transition_bias' : self . transition_bias ,
+145 'transition_bias_denominator' : self . transition_bias_denominator ,
+146 'crp_alpha' : self . crp_alpha ,
+147 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
+148
+149 def load ( self , filepath ):
+150 """Load the model from a file.
+151
+152 Args:
+153 filepath: the path of the file.
+154 """
+155 var_dict = torch . load ( filepath )
+156 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
+157 self . rnn_init_hidden = nn . Parameter (
+158 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
+159 self . transition_bias = float ( var_dict [ 'transition_bias' ])
+160 self . transition_bias_denominator = float (
+161 var_dict [ 'transition_bias_denominator' ])
+162 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
+163 self . sigma2 = nn . Parameter (
+164 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
+165
+166 self . logger . print (
+167 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
+168 'rnn_init_hidden= {} ' . format (
+169 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
+170 var_dict [ 'rnn_init_hidden' ]))
+171
+172 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
+173 """Fit UISRNN model to concatenated sequence and cluster_id.
+174
+175 Args:
+176 train_sequence: the training observation sequence, which is a
+177 2-dim numpy array of real numbers, of size `N * D`.
+178
+179 - `N`: summation of lengths of all utterances.
+180 - `D`: observation dimension.
+181
+182 For example,
+183 ```
+184 train_sequence =
+185 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
+186 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
+187 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
+188 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
+189 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+190 ```
+191 Here `N=5`, `D=4`.
+192
+193 We concatenate all training utterances into this single sequence.
+194 train_cluster_id: the speaker id sequence, which is 1-dim list or
+195 numpy array of strings, of size `N`.
+196 For example,
+197 ```
+198 train_cluster_id =
+199 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
+200 ```
+201 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+202
+203 Note that the order of entries within an utterance are preserved,
+204 and all utterances are simply concatenated together.
+205 args: Training configurations. See `arguments.py` for details.
+206
+207 Raises:
+208 TypeError: If train_sequence or train_cluster_id is of wrong type.
+209 ValueError: If train_sequence or train_cluster_id has wrong dimension.
+210 """
+211 # check type
+212 if ( not isinstance ( train_sequence , np . ndarray ) or
+213 train_sequence . dtype != float ):
+214 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
+215 if isinstance ( train_cluster_id , list ):
+216 train_cluster_id = np . array ( train_cluster_id )
+217 if ( not isinstance ( train_cluster_id , np . ndarray ) or
+218 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
+219 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
+220 # check dimension
+221 if train_sequence . ndim != 2 :
+222 raise ValueError ( 'train_sequence must be 2-dim array.' )
+223 if train_cluster_id . ndim != 1 :
+224 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
+225 # check length and size
+226 train_total_length , observation_dim = train_sequence . shape
+227 if observation_dim != self . observation_dim :
+228 raise ValueError ( 'train_sequence does not match the dimension specified '
+229 'by args.observation_dim.' )
+230 if train_total_length != len ( train_cluster_id ):
+231 raise ValueError ( 'train_sequence length is not equal to '
+232 'train_cluster_id length.' )
+233
+234 self . rnn_model . train ()
+235 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
+236 learning_rate = args . learning_rate )
+237
+238 sub_sequences , seq_lengths = utils . resize_sequence (
+239 sequence = train_sequence ,
+240 cluster_id = train_cluster_id ,
+241 num_permutations = args . num_permutations )
+242
+243 # For batch learning, pack the entire dataset.
+244 if args . batch_size is None :
+245 packed_train_sequence , rnn_truth = utils . pack_sequence (
+246 sub_sequences ,
+247 seq_lengths ,
+248 args . batch_size ,
+249 self . observation_dim ,
+250 self . device )
+251 train_loss = []
+252 for num_iter in range ( args . train_iteration ):
+253 optimizer . zero_grad ()
+254 # For online learning, pack a subset in each iteration.
+255 if args . batch_size is not None :
+256 packed_train_sequence , rnn_truth = utils . pack_sequence (
+257 sub_sequences ,
+258 seq_lengths ,
+259 args . batch_size ,
+260 self . observation_dim ,
+261 self . device )
+262 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
+263 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
+264 # use mean to predict
+265 mean = torch . cumsum ( mean , dim = 0 )
+266 mean_size = mean . size ()
+267 mean = torch . mm (
+268 torch . diag (
+269 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
+270 mean . view ( mean_size [ 0 ], - 1 ))
+271 mean = mean . view ( mean_size )
+272
+273 # Likelihood part.
+274 loss1 = loss_func . weighted_mse_loss (
+275 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
+276 target_tensor = rnn_truth ,
+277 weight = 1 / ( 2 * self . sigma2 ))
+278
+279 # Sigma2 prior part.
+280 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
+281 ** 2 ) . view ( - 1 , observation_dim )
+282 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
+283 loss2 = loss_func . sigma2_prior_loss (
+284 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
+285
+286 # Regularization part.
+287 loss3 = loss_func . regularization_loss (
+288 self . rnn_model . parameters (), args . regularization_weight )
+289
+290 loss = loss1 + loss2 + loss3
+291 loss . backward ()
+292 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
+293 optimizer . step ()
+294 # avoid numerical issues
+295 self . sigma2 . data . clamp_ ( min = 1e-6 )
+296
+297 if ( np . remainder ( num_iter , 10 ) == 0 or
+298 num_iter == args . train_iteration - 1 ):
+299 self . logger . print (
+300 2 ,
+301 'Iter: {:d} \t '
+302 'Training Loss: {:.4f} \n '
+303 ' Negative Log Likelihood: {:.4f} \t '
+304 'Sigma2 Prior: {:.4f} \t '
+305 'Regularization: {:.4f} ' . format (
+306 num_iter ,
+307 float ( loss . data ),
+308 float ( loss1 . data ),
+309 float ( loss2 . data ),
+310 float ( loss3 . data )))
+311 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
+312 self . logger . print (
+313 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
+314
+315 def fit ( self , train_sequences , train_cluster_ids , args ):
+316 """Fit UISRNN model.
+317
+318 Args:
+319 train_sequences: Either a list of training sequences, or a single
+320 concatenated training sequence:
+321
+322 1. train_sequences is list, and each element is a 2-dim numpy array
+323 of real numbers, of size: `length * D`.
+324 The length varies among different sequences, but the D is the same.
+325 In speaker diarization, each sequence is the sequence of speaker
+326 embeddings of one utterance.
+327 2. train_sequences is a single concatenated sequence, which is a
+328 2-dim numpy array of real numbers. See `fit_concatenated()`
+329 for more details.
+330 train_cluster_ids: Ground truth labels for train_sequences:
+331
+332 1. if train_sequences is a list, this must also be a list of the same
+333 size, each element being a 1-dim list or numpy array of strings.
+334 2. if train_sequences is a single concatenated sequence, this
+335 must also be the concatenated 1-dim list or numpy array of strings
+336 args: Training configurations. See `arguments.py` for details.
+337
+338 Raises:
+339 TypeError: If train_sequences or train_cluster_ids is of wrong type.
+340 """
+341 if isinstance ( train_sequences , np . ndarray ):
+342 # train_sequences is already the concatenated sequence
+343 if self . estimate_transition_bias :
+344 # see issue #55: https://github.com/google/uis-rnn/issues/55
+345 self . logger . print (
+346 2 ,
+347 'Warning: transition_bias cannot be correctly estimated from a '
+348 'concatenated sequence; train_sequences will be treated as a '
+349 'single sequence. This can lead to inaccurate estimation of '
+350 'transition_bias. Please, consider estimating transition_bias '
+351 'before concatenating the sequences and passing it as argument.' )
+352 train_sequences = [ train_sequences ]
+353 train_cluster_ids = [ train_cluster_ids ]
+354 elif isinstance ( train_sequences , list ):
+355 # train_sequences is a list of un-concatenated sequences
+356 # we will concatenate it later, after estimating transition_bias
+357 pass
+358 else :
+359 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
+360
+361 # estimate transition_bias
+362 if self . estimate_transition_bias :
+363 ( transition_bias ,
+364 transition_bias_denominator ) = utils . estimate_transition_bias (
+365 train_cluster_ids )
+366 # set or update transition_bias
+367 if self . transition_bias is None :
+368 self . transition_bias = transition_bias
+369 self . transition_bias_denominator = transition_bias_denominator
+370 else :
+371 self . transition_bias = (
+372 self . transition_bias * self . transition_bias_denominator +
+373 transition_bias * transition_bias_denominator ) / (
+374 self . transition_bias_denominator + transition_bias_denominator )
+375 self . transition_bias_denominator += transition_bias_denominator
+376
+377 # concatenate train_sequences
+378 ( concatenated_train_sequence ,
+379 concatenated_train_cluster_id ) = utils . concatenate_training_data (
+380 train_sequences ,
+381 train_cluster_ids ,
+382 args . enforce_cluster_id_uniqueness ,
+383 True )
+384
+385 self . fit_concatenated (
+386 concatenated_train_sequence , concatenated_train_cluster_id , args )
+387
+388 def _update_beam_state ( self , beam_state , look_ahead_seq , cluster_seq ):
+389 """Update a beam state given a look ahead sequence and known cluster
+390 assignments.
+391
+392 Args:
+393 beam_state: A BeamState object.
+394 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
+395 look_ahead: number of step to look ahead in the beam search.
+396 D: observation dimension
+397 cluster_seq: Cluster assignment sequence for look_ahead_seq.
+398
+399 Returns:
+400 new_beam_state: An updated BeamState object.
+401 """
+402
+403 loss = 0
+404 new_beam_state = BeamState ( beam_state )
+405 for sub_idx , cluster in enumerate ( cluster_seq ):
+406 if cluster > len ( new_beam_state . mean_set ): # invalid trace
+407 new_beam_state . neg_likelihood = float ( 'inf' )
+408 break
+409 elif cluster < len ( new_beam_state . mean_set ): # existing cluster
+410 last_cluster = new_beam_state . trace [ - 1 ]
+411 loss = loss_func . weighted_mse_loss (
+412 input_tensor = torch . squeeze ( new_beam_state . mean_set [ cluster ]),
+413 target_tensor = look_ahead_seq [ sub_idx , :],
+414 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
+415 if cluster == last_cluster :
+416 loss -= np . log ( 1 - self . transition_bias )
+417 else :
+418 loss -= np . log ( self . transition_bias ) + np . log (
+419 new_beam_state . block_counts [ cluster ]) - np . log (
+420 sum ( new_beam_state . block_counts ) + self . crp_alpha )
+421 # update new mean and new hidden
+422 mean , hidden = self . rnn_model (
+423 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
+424 new_beam_state . hidden_set [ cluster ])
+425 new_beam_state . mean_set [ cluster ] = ( new_beam_state . mean_set [ cluster ] * (
+426 ( np . array ( new_beam_state . trace ) == cluster ) . sum () -
+427 1 ) . astype ( float ) + mean . clone ()) / (
+428 np . array ( new_beam_state . trace ) == cluster ) . sum () . astype (
+429 float ) # use mean to predict
+430 new_beam_state . hidden_set [ cluster ] = hidden . clone ()
+431 if cluster != last_cluster :
+432 new_beam_state . block_counts [ cluster ] += 1
+433 new_beam_state . trace . append ( cluster )
+434 else : # new cluster
+435 init_input = autograd . Variable (
+436 torch . zeros ( self . observation_dim )
+437 ) . unsqueeze ( 0 ) . unsqueeze ( 0 ) . to ( self . device )
+438 mean , hidden = self . rnn_model ( init_input ,
+439 self . rnn_init_hidden )
+440 loss = loss_func . weighted_mse_loss (
+441 input_tensor = torch . squeeze ( mean ),
+442 target_tensor = look_ahead_seq [ sub_idx , :],
+443 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
+444 loss -= np . log ( self . transition_bias ) + np . log (
+445 self . crp_alpha ) - np . log (
+446 sum ( new_beam_state . block_counts ) + self . crp_alpha )
+447 # update new min and new hidden
+448 mean , hidden = self . rnn_model (
+449 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
+450 hidden )
+451 new_beam_state . append ( mean , hidden , cluster )
+452 new_beam_state . neg_likelihood += loss
+453 return new_beam_state
+454
+455 def _calculate_score ( self , beam_state , look_ahead_seq ):
+456 """Calculate negative log likelihoods for all possible state allocations
+457 of a look ahead sequence, according to the current beam state.
+458
+459 Args:
+460 beam_state: A BeamState object.
+461 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
+462 look_ahead: number of step to look ahead in the beam search.
+463 D: observation dimension
+464
+465 Returns:
+466 beam_score_set: a set of scores for each possible state allocation.
+467 """
+468
+469 look_ahead , _ = look_ahead_seq . shape
+470 beam_num_clusters = len ( beam_state . mean_set )
+471 beam_score_set = float ( 'inf' ) * np . ones (
+472 beam_num_clusters + 1 + np . arange ( look_ahead ))
+473 for cluster_seq , _ in np . ndenumerate ( beam_score_set ):
+474 updated_beam_state = self . _update_beam_state ( beam_state ,
+475 look_ahead_seq , cluster_seq )
+476 beam_score_set [ cluster_seq ] = updated_beam_state . neg_likelihood
+477 return beam_score_set
+478
+479 def predict_single ( self , test_sequence , args ):
+480 """Predict labels for a single test sequence using UISRNN model.
+481
+482 Args:
+483 test_sequence: the test observation sequence, which is 2-dim numpy array
+484 of real numbers, of size `N * D`.
+485
+486 - `N`: length of one test utterance.
+487 - `D` : observation dimension.
+488
+489 For example:
+490 ```
+491 test_sequence =
+492 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
+493 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
+494 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
+495 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
+496 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+497 ```
+498 Here `N=5`, `D=4`.
+499 args: Inference configurations. See `arguments.py` for details.
+500
+501 Returns:
+502 predicted_cluster_id: predicted speaker id sequence, which is
+503 an array of integers, of size `N`.
+504 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
+505
+506 Raises:
+507 TypeError: If test_sequence is of wrong type.
+508 ValueError: If test_sequence has wrong dimension.
+509 """
+510 # check type
+511 if ( not isinstance ( test_sequence , np . ndarray ) or
+512 test_sequence . dtype != float ):
+513 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
+514 # check dimension
+515 if test_sequence . ndim != 2 :
+516 raise ValueError ( 'test_sequence must be 2-dim array.' )
+517 # check size
+518 test_sequence_length , observation_dim = test_sequence . shape
+519 if observation_dim != self . observation_dim :
+520 raise ValueError ( 'test_sequence does not match the dimension specified '
+521 'by args.observation_dim.' )
+522
+523 self . rnn_model . eval ()
+524 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
+525 test_sequence = autograd . Variable (
+526 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
+527 # bookkeeping for beam search
+528 beam_set = [ BeamState ()]
+529 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
+530 args . look_ahead ):
+531 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
+532 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
+533 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
+534 score_set = float ( 'inf' ) * np . ones (
+535 np . append (
+536 args . beam_size , max_clusters + 1 + np . arange (
+537 look_ahead_seq_length )))
+538 for beam_rank , beam_state in enumerate ( beam_set ):
+539 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
+540 score_set [ beam_rank , :] = np . pad (
+541 beam_score_set ,
+542 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
+543 ( look_ahead_seq_length , 1 )), 'constant' ,
+544 constant_values = float ( 'inf' ))
+545 # find top scores
+546 score_ranked = np . sort ( score_set , axis = None )
+547 score_ranked [ score_ranked == float ( 'inf' )] = 0
+548 score_ranked = np . trim_zeros ( score_ranked )
+549 idx_ranked = np . argsort ( score_set , axis = None )
+550 updated_beam_set = []
+551 for new_beam_rank in range (
+552 np . min (( len ( score_ranked ), args . beam_size ))):
+553 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
+554 score_set . shape )
+555 prev_beam_rank = total_idx [ 0 ] . item ()
+556 cluster_seq = total_idx [ 1 :]
+557 updated_beam_state = self . _update_beam_state (
+558 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
+559 updated_beam_set . append ( updated_beam_state )
+560 beam_set = updated_beam_set
+561 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
+562 return predicted_cluster_id
+563
+564 def predict ( self , test_sequences , args ):
+565 """Predict labels for a single or many test sequences using UISRNN model.
+566
+567 Args:
+568 test_sequences: Either a list of test sequences, or a single test
+569 sequence. Each test sequence is a 2-dim numpy array
+570 of real numbers. See `predict_single()` for details.
+571 args: Inference configurations. See `arguments.py` for details.
+572
+573 Returns:
+574 predicted_cluster_ids: Predicted labels for test_sequences.
+575
+576 1. if test_sequences is a list, predicted_cluster_ids will be a list
+577 of the same size, where each element being a 1-dim list of strings.
+578 2. if test_sequences is a single sequence, predicted_cluster_ids will
+579 be a 1-dim list of strings
+580
+581 Raises:
+582 TypeError: If test_sequences is of wrong type.
+583 """
+584 # check type
+585 if isinstance ( test_sequences , np . ndarray ):
+586 return self . predict_single ( test_sequences , args )
+587 if isinstance ( test_sequences , list ):
+588 return [ self . predict_single ( test_sequence , args )
+589 for test_sequence in test_sequences ]
+590 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
591
-592 def parallel_predict ( model , test_sequences , args , num_processes = 4 ):
-593 """Run prediction in parallel using torch.multiprocessing.
-594
-595 This is a beta feature. It makes prediction slower on CPU. But it's reported
-596 that it makes prediction faster on GPU.
-597
-598 Args:
-599 model: instance of UISRNN model
-600 test_sequences: a list of test sequences, or a single test
-601 sequence. Each test sequence is a 2-dim numpy array
-602 of real numbers. See `predict_single()` for details.
-603 args: Inference configurations. See `arguments.py` for details.
-604 num_processes: number of parallel processes.
-605
-606 Returns:
-607 a list of the same size as test_sequences, where each element
-608 being a 1-dim list of strings.
-609
-610 Raises:
-611 TypeError: If test_sequences is of wrong type.
-612 """
-613 if not isinstance ( test_sequences , list ):
-614 raise TypeError ( 'test_sequences must be a list.' )
-615 ctx = multiprocessing . get_context ( 'forkserver' )
-616 model . rnn_model . share_memory ()
-617 pool = ctx . Pool ( num_processes )
-618 results = pool . map (
-619 functools . partial ( model . predict_single , args = args ),
-620 test_sequences )
-621 pool . close ()
-622 return results
+592
+593 def parallel_predict ( model , test_sequences , args , num_processes = 4 ):
+594 """Run prediction in parallel using torch.multiprocessing.
+595
+596 This is a beta feature. It makes prediction slower on CPU. But it's reported
+597 that it makes prediction faster on GPU.
+598
+599 Args:
+600 model: instance of UISRNN model
+601 test_sequences: a list of test sequences, or a single test
+602 sequence. Each test sequence is a 2-dim numpy array
+603 of real numbers. See `predict_single()` for details.
+604 args: Inference configurations. See `arguments.py` for details.
+605 num_processes: number of parallel processes.
+606
+607 Returns:
+608 a list of the same size as test_sequences, where each element
+609 being a 1-dim list of strings.
+610
+611 Raises:
+612 TypeError: If test_sequences is of wrong type.
+613 """
+614 if not isinstance ( test_sequences , list ):
+615 raise TypeError ( 'test_sequences must be a list.' )
+616 ctx = multiprocessing . get_context ( 'forkserver' )
+617 model . rnn_model . share_memory ()
+618 pool = ctx . Pool ( num_processes )
+619 results = pool . map (
+620 functools . partial ( model . predict_single , args = args ),
+621 test_sequences )
+622 pool . close ()
+623 return results
@@ -786,27 +787,27 @@
- 32 class CoreRNN ( nn . Module ):
-33 """The core Recurent Neural Network used by UIS-RNN."""
-34
-35 def __init__ ( self , input_dim , hidden_size , depth , observation_dim , dropout = 0 ):
-36 super () . __init__ ()
-37 self . hidden_size = hidden_size
-38 if depth >= 2 :
-39 self . gru = nn . GRU ( input_dim , hidden_size , depth , dropout = dropout )
-40 else :
-41 self . gru = nn . GRU ( input_dim , hidden_size , depth )
-42 self . linear_mean1 = nn . Linear ( hidden_size , hidden_size )
-43 self . linear_mean2 = nn . Linear ( hidden_size , observation_dim )
-44
-45 def forward ( self , input_seq , hidden = None ):
-46 """The forward function of the module."""
-47 output_seq , hidden = self . gru ( input_seq , hidden )
-48 if isinstance ( output_seq , torch . nn . utils . rnn . PackedSequence ):
-49 output_seq , _ = torch . nn . utils . rnn . pad_packed_sequence (
-50 output_seq , batch_first = False )
-51 mean = self . linear_mean2 ( F . relu ( self . linear_mean1 ( output_seq )))
-52 return mean , hidden
+ 33 class CoreRNN ( nn . Module ):
+34 """The core Recurent Neural Network used by UIS-RNN."""
+35
+36 def __init__ ( self , input_dim , hidden_size , depth , observation_dim , dropout = 0 ):
+37 super () . __init__ ()
+38 self . hidden_size = hidden_size
+39 if depth >= 2 :
+40 self . gru = nn . GRU ( input_dim , hidden_size , depth , dropout = dropout )
+41 else :
+42 self . gru = nn . GRU ( input_dim , hidden_size , depth )
+43 self . linear_mean1 = nn . Linear ( hidden_size , hidden_size )
+44 self . linear_mean2 = nn . Linear ( hidden_size , observation_dim )
+45
+46 def forward ( self , input_seq , hidden = None ):
+47 """The forward function of the module."""
+48 output_seq , hidden = self . gru ( input_seq , hidden )
+49 if isinstance ( output_seq , torch . nn . utils . rnn . PackedSequence ):
+50 output_seq , _ = torch . nn . utils . rnn . pad_packed_sequence (
+51 output_seq , batch_first = False )
+52 mean = self . linear_mean2 ( F . relu ( self . linear_mean1 ( output_seq )))
+53 return mean , hidden
@@ -824,19 +825,19 @@
- 35 def __init__ ( self , input_dim , hidden_size , depth , observation_dim , dropout = 0 ):
-36 super () . __init__ ()
-37 self . hidden_size = hidden_size
-38 if depth >= 2 :
-39 self . gru = nn . GRU ( input_dim , hidden_size , depth , dropout = dropout )
-40 else :
-41 self . gru = nn . GRU ( input_dim , hidden_size , depth )
-42 self . linear_mean1 = nn . Linear ( hidden_size , hidden_size )
-43 self . linear_mean2 = nn . Linear ( hidden_size , observation_dim )
+ 36 def __init__ ( self , input_dim , hidden_size , depth , observation_dim , dropout = 0 ):
+37 super () . __init__ ()
+38 self . hidden_size = hidden_size
+39 if depth >= 2 :
+40 self . gru = nn . GRU ( input_dim , hidden_size , depth , dropout = dropout )
+41 else :
+42 self . gru = nn . GRU ( input_dim , hidden_size , depth )
+43 self . linear_mean1 = nn . Linear ( hidden_size , hidden_size )
+44 self . linear_mean2 = nn . Linear ( hidden_size , observation_dim )
- Initializes internal Module state, shared by both nn.Module and ScriptModule.
+
Initialize internal Module state, shared by both nn.Module and ScriptModule.
@@ -885,14 +886,14 @@
- 45 def forward ( self , input_seq , hidden = None ):
-46 """The forward function of the module."""
-47 output_seq , hidden = self . gru ( input_seq , hidden )
-48 if isinstance ( output_seq , torch . nn . utils . rnn . PackedSequence ):
-49 output_seq , _ = torch . nn . utils . rnn . pad_packed_sequence (
-50 output_seq , batch_first = False )
-51 mean = self . linear_mean2 ( F . relu ( self . linear_mean1 ( output_seq )))
-52 return mean , hidden
+ 46 def forward ( self , input_seq , hidden = None ):
+47 """The forward function of the module."""
+48 output_seq , hidden = self . gru ( input_seq , hidden )
+49 if isinstance ( output_seq , torch . nn . utils . rnn . PackedSequence ):
+50 output_seq , _ = torch . nn . utils . rnn . pad_packed_sequence (
+51 output_seq , batch_first = False )
+52 mean = self . linear_mean2 ( F . relu ( self . linear_mean1 ( output_seq )))
+53 return mean , hidden
@@ -952,6 +953,7 @@ Inherited Members
zero_grad
share_memory
+ compile
@@ -968,29 +970,29 @@ Inherited Members
- 55 class BeamState :
-56 """Structure that contains necessary states for beam search."""
-57
-58 def __init__ ( self , source = None ):
-59 if not source :
-60 self . mean_set = []
-61 self . hidden_set = []
-62 self . neg_likelihood = 0
-63 self . trace = []
-64 self . block_counts = []
-65 else :
-66 self . mean_set = source . mean_set . copy ()
-67 self . hidden_set = source . hidden_set . copy ()
-68 self . trace = source . trace . copy ()
-69 self . block_counts = source . block_counts . copy ()
-70 self . neg_likelihood = source . neg_likelihood
-71
-72 def append ( self , mean , hidden , cluster ):
-73 """Append new item to the BeamState."""
-74 self . mean_set . append ( mean . clone ())
-75 self . hidden_set . append ( hidden . clone ())
-76 self . block_counts . append ( 1 )
-77 self . trace . append ( cluster )
+ 56 class BeamState :
+57 """Structure that contains necessary states for beam search."""
+58
+59 def __init__ ( self , source = None ):
+60 if not source :
+61 self . mean_set = []
+62 self . hidden_set = []
+63 self . neg_likelihood = 0
+64 self . trace = []
+65 self . block_counts = []
+66 else :
+67 self . mean_set = source . mean_set . copy ()
+68 self . hidden_set = source . hidden_set . copy ()
+69 self . trace = source . trace . copy ()
+70 self . block_counts = source . block_counts . copy ()
+71 self . neg_likelihood = source . neg_likelihood
+72
+73 def append ( self , mean , hidden , cluster ):
+74 """Append new item to the BeamState."""
+75 self . mean_set . append ( mean . clone ())
+76 self . hidden_set . append ( hidden . clone ())
+77 self . block_counts . append ( 1 )
+78 self . trace . append ( cluster )
@@ -1008,19 +1010,19 @@ Inherited Members
- 58 def __init__ ( self , source = None ):
-59 if not source :
-60 self . mean_set = []
-61 self . hidden_set = []
-62 self . neg_likelihood = 0
-63 self . trace = []
-64 self . block_counts = []
-65 else :
-66 self . mean_set = source . mean_set . copy ()
-67 self . hidden_set = source . hidden_set . copy ()
-68 self . trace = source . trace . copy ()
-69 self . block_counts = source . block_counts . copy ()
-70 self . neg_likelihood = source . neg_likelihood
+ 59 def __init__ ( self , source = None ):
+60 if not source :
+61 self . mean_set = []
+62 self . hidden_set = []
+63 self . neg_likelihood = 0
+64 self . trace = []
+65 self . block_counts = []
+66 else :
+67 self . mean_set = source . mean_set . copy ()
+68 self . hidden_set = source . hidden_set . copy ()
+69 self . trace = source . trace . copy ()
+70 self . block_counts = source . block_counts . copy ()
+71 self . neg_likelihood = source . neg_likelihood
@@ -1038,12 +1040,12 @@ Inherited Members
- 72 def append ( self , mean , hidden , cluster ):
-73 """Append new item to the BeamState."""
-74 self . mean_set . append ( mean . clone ())
-75 self . hidden_set . append ( hidden . clone ())
-76 self . block_counts . append ( 1 )
-77 self . trace . append ( cluster )
+ 73 def append ( self , mean , hidden , cluster ):
+74 """Append new item to the BeamState."""
+75 self . mean_set . append ( mean . clone ())
+76 self . hidden_set . append ( hidden . clone ())
+77 self . block_counts . append ( 1 )
+78 self . trace . append ( cluster )
@@ -1064,517 +1066,517 @@ Inherited Members
- 80 class UISRNN :
- 81 """Unbounded Interleaved-State Recurrent Neural Networks."""
- 82
- 83 def __init__ ( self , args ):
- 84 """Construct the UISRNN object.
- 85
- 86 Args:
- 87 args: Model configurations. See `arguments.py` for details.
- 88 """
- 89 self . observation_dim = args . observation_dim
- 90 self . device = torch . device (
- 91 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
- 92 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
- 93 args . rnn_depth , self . observation_dim ,
- 94 args . rnn_dropout ) . to ( self . device )
- 95 self . rnn_init_hidden = nn . Parameter (
- 96 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
- 97 # booleans indicating which variables are trainable
- 98 self . estimate_sigma2 = ( args . sigma2 is None )
- 99 self . estimate_transition_bias = ( args . transition_bias is None )
-100 # initial values of variables
-101 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
-102 self . sigma2 = nn . Parameter (
-103 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
-104 self . transition_bias = args . transition_bias
-105 self . transition_bias_denominator = 0.0
-106 self . crp_alpha = args . crp_alpha
-107 self . logger = utils . Logger ( args . verbosity )
-108
-109 def _get_optimizer ( self , optimizer , learning_rate ):
-110 """Get optimizer for UISRNN.
-111
-112 Args:
-113 optimizer: string - name of the optimizer.
-114 learning_rate: - learning rate for the entire model.
-115 We do not customize learning rate for separate parts.
-116
-117 Returns:
-118 a pytorch "optim" object
-119 """
-120 params = [
-121 {
-122 'params' : self . rnn_model . parameters ()
-123 }, # rnn parameters
-124 {
-125 'params' : self . rnn_init_hidden
-126 } # rnn initial hidden state
-127 ]
-128 if self . estimate_sigma2 : # train sigma2
-129 params . append ({
-130 'params' : self . sigma2
-131 }) # variance parameters
-132 assert optimizer == 'adam' , 'Only adam optimizer is supported.'
-133 return optim . Adam ( params , lr = learning_rate )
-134
-135 def save ( self , filepath ):
-136 """Save the model to a file.
-137
-138 Args:
-139 filepath: the path of the file.
-140 """
-141 torch . save ({
-142 'rnn_state_dict' : self . rnn_model . state_dict (),
-143 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
-144 'transition_bias' : self . transition_bias ,
-145 'transition_bias_denominator' : self . transition_bias_denominator ,
-146 'crp_alpha' : self . crp_alpha ,
-147 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
-148
-149 def load ( self , filepath ):
-150 """Load the model from a file.
-151
-152 Args:
-153 filepath: the path of the file.
-154 """
-155 var_dict = torch . load ( filepath )
-156 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
-157 self . rnn_init_hidden = nn . Parameter (
-158 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
-159 self . transition_bias = float ( var_dict [ 'transition_bias' ])
-160 self . transition_bias_denominator = float (
-161 var_dict [ 'transition_bias_denominator' ])
-162 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
-163 self . sigma2 = nn . Parameter (
-164 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
-165
-166 self . logger . print (
-167 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
-168 'rnn_init_hidden= {} ' . format (
-169 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
-170 var_dict [ 'rnn_init_hidden' ]))
-171
-172 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
-173 """Fit UISRNN model to concatenated sequence and cluster_id.
-174
-175 Args:
-176 train_sequence: the training observation sequence, which is a
-177 2-dim numpy array of real numbers, of size `N * D`.
-178
-179 - `N`: summation of lengths of all utterances.
-180 - `D`: observation dimension.
-181
-182 For example,
-183 ```
-184 train_sequence =
-185 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
-186 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
-187 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
-188 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
-189 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
-190 ```
-191 Here `N=5`, `D=4`.
-192
-193 We concatenate all training utterances into this single sequence.
-194 train_cluster_id: the speaker id sequence, which is 1-dim list or
-195 numpy array of strings, of size `N`.
-196 For example,
-197 ```
-198 train_cluster_id =
-199 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-200 ```
-201 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-202
-203 Note that the order of entries within an utterance are preserved,
-204 and all utterances are simply concatenated together.
-205 args: Training configurations. See `arguments.py` for details.
-206
-207 Raises:
-208 TypeError: If train_sequence or train_cluster_id is of wrong type.
-209 ValueError: If train_sequence or train_cluster_id has wrong dimension.
-210 """
-211 # check type
-212 if ( not isinstance ( train_sequence , np . ndarray ) or
-213 train_sequence . dtype != float ):
-214 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
-215 if isinstance ( train_cluster_id , list ):
-216 train_cluster_id = np . array ( train_cluster_id )
-217 if ( not isinstance ( train_cluster_id , np . ndarray ) or
-218 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
-219 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
-220 # check dimension
-221 if train_sequence . ndim != 2 :
-222 raise ValueError ( 'train_sequence must be 2-dim array.' )
-223 if train_cluster_id . ndim != 1 :
-224 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
-225 # check length and size
-226 train_total_length , observation_dim = train_sequence . shape
-227 if observation_dim != self . observation_dim :
-228 raise ValueError ( 'train_sequence does not match the dimension specified '
-229 'by args.observation_dim.' )
-230 if train_total_length != len ( train_cluster_id ):
-231 raise ValueError ( 'train_sequence length is not equal to '
-232 'train_cluster_id length.' )
-233
-234 self . rnn_model . train ()
-235 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
-236 learning_rate = args . learning_rate )
-237
-238 sub_sequences , seq_lengths = utils . resize_sequence (
-239 sequence = train_sequence ,
-240 cluster_id = train_cluster_id ,
-241 num_permutations = args . num_permutations )
-242
-243 # For batch learning, pack the entire dataset.
-244 if args . batch_size is None :
-245 packed_train_sequence , rnn_truth = utils . pack_sequence (
-246 sub_sequences ,
-247 seq_lengths ,
-248 args . batch_size ,
-249 self . observation_dim ,
-250 self . device )
-251 train_loss = []
-252 for num_iter in range ( args . train_iteration ):
-253 optimizer . zero_grad ()
-254 # For online learning, pack a subset in each iteration.
-255 if args . batch_size is not None :
-256 packed_train_sequence , rnn_truth = utils . pack_sequence (
-257 sub_sequences ,
-258 seq_lengths ,
-259 args . batch_size ,
-260 self . observation_dim ,
-261 self . device )
-262 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
-263 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
-264 # use mean to predict
-265 mean = torch . cumsum ( mean , dim = 0 )
-266 mean_size = mean . size ()
-267 mean = torch . mm (
-268 torch . diag (
-269 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
-270 mean . view ( mean_size [ 0 ], - 1 ))
-271 mean = mean . view ( mean_size )
-272
-273 # Likelihood part.
-274 loss1 = loss_func . weighted_mse_loss (
-275 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
-276 target_tensor = rnn_truth ,
-277 weight = 1 / ( 2 * self . sigma2 ))
-278
-279 # Sigma2 prior part.
-280 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
-281 ** 2 ) . view ( - 1 , observation_dim )
-282 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
-283 loss2 = loss_func . sigma2_prior_loss (
-284 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
-285
-286 # Regularization part.
-287 loss3 = loss_func . regularization_loss (
-288 self . rnn_model . parameters (), args . regularization_weight )
-289
-290 loss = loss1 + loss2 + loss3
-291 loss . backward ()
-292 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
-293 optimizer . step ()
-294 # avoid numerical issues
-295 self . sigma2 . data . clamp_ ( min = 1e-6 )
-296
-297 if ( np . remainder ( num_iter , 10 ) == 0 or
-298 num_iter == args . train_iteration - 1 ):
-299 self . logger . print (
-300 2 ,
-301 'Iter: {:d} \t '
-302 'Training Loss: {:.4f} \n '
-303 ' Negative Log Likelihood: {:.4f} \t '
-304 'Sigma2 Prior: {:.4f} \t '
-305 'Regularization: {:.4f} ' . format (
-306 num_iter ,
-307 float ( loss . data ),
-308 float ( loss1 . data ),
-309 float ( loss2 . data ),
-310 float ( loss3 . data )))
-311 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
-312 self . logger . print (
-313 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
-314
-315 def fit ( self , train_sequences , train_cluster_ids , args ):
-316 """Fit UISRNN model.
-317
-318 Args:
-319 train_sequences: Either a list of training sequences, or a single
-320 concatenated training sequence:
-321
-322 1. train_sequences is list, and each element is a 2-dim numpy array
-323 of real numbers, of size: `length * D`.
-324 The length varies among different sequences, but the D is the same.
-325 In speaker diarization, each sequence is the sequence of speaker
-326 embeddings of one utterance.
-327 2. train_sequences is a single concatenated sequence, which is a
-328 2-dim numpy array of real numbers. See `fit_concatenated()`
-329 for more details.
-330 train_cluster_ids: Ground truth labels for train_sequences:
-331
-332 1. if train_sequences is a list, this must also be a list of the same
-333 size, each element being a 1-dim list or numpy array of strings.
-334 2. if train_sequences is a single concatenated sequence, this
-335 must also be the concatenated 1-dim list or numpy array of strings
-336 args: Training configurations. See `arguments.py` for details.
-337
-338 Raises:
-339 TypeError: If train_sequences or train_cluster_ids is of wrong type.
-340 """
-341 if isinstance ( train_sequences , np . ndarray ):
-342 # train_sequences is already the concatenated sequence
-343 if self . estimate_transition_bias :
-344 # see issue #55: https://github.com/google/uis-rnn/issues/55
-345 self . logger . print (
-346 2 ,
-347 'Warning: transition_bias cannot be correctly estimated from a '
-348 'concatenated sequence; train_sequences will be treated as a '
-349 'single sequence. This can lead to inaccurate estimation of '
-350 'transition_bias. Please, consider estimating transition_bias '
-351 'before concatenating the sequences and passing it as argument.' )
-352 train_sequences = [ train_sequences ]
-353 train_cluster_ids = [ train_cluster_ids ]
-354 elif isinstance ( train_sequences , list ):
-355 # train_sequences is a list of un-concatenated sequences
-356 # we will concatenate it later, after estimating transition_bias
-357 pass
-358 else :
-359 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
-360
-361 # estimate transition_bias
-362 if self . estimate_transition_bias :
-363 ( transition_bias ,
-364 transition_bias_denominator ) = utils . estimate_transition_bias (
-365 train_cluster_ids )
-366 # set or update transition_bias
-367 if self . transition_bias is None :
-368 self . transition_bias = transition_bias
-369 self . transition_bias_denominator = transition_bias_denominator
-370 else :
-371 self . transition_bias = (
-372 self . transition_bias * self . transition_bias_denominator +
-373 transition_bias * transition_bias_denominator ) / (
-374 self . transition_bias_denominator + transition_bias_denominator )
-375 self . transition_bias_denominator += transition_bias_denominator
-376
-377 # concatenate train_sequences
-378 ( concatenated_train_sequence ,
-379 concatenated_train_cluster_id ) = utils . concatenate_training_data (
-380 train_sequences ,
-381 train_cluster_ids ,
-382 args . enforce_cluster_id_uniqueness ,
-383 True )
-384
-385 self . fit_concatenated (
-386 concatenated_train_sequence , concatenated_train_cluster_id , args )
-387
-388 def _update_beam_state ( self , beam_state , look_ahead_seq , cluster_seq ):
-389 """Update a beam state given a look ahead sequence and known cluster
-390 assignments.
-391
-392 Args:
-393 beam_state: A BeamState object.
-394 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
-395 look_ahead: number of step to look ahead in the beam search.
-396 D: observation dimension
-397 cluster_seq: Cluster assignment sequence for look_ahead_seq.
-398
-399 Returns:
-400 new_beam_state: An updated BeamState object.
-401 """
-402
-403 loss = 0
-404 new_beam_state = BeamState ( beam_state )
-405 for sub_idx , cluster in enumerate ( cluster_seq ):
-406 if cluster > len ( new_beam_state . mean_set ): # invalid trace
-407 new_beam_state . neg_likelihood = float ( 'inf' )
-408 break
-409 elif cluster < len ( new_beam_state . mean_set ): # existing cluster
-410 last_cluster = new_beam_state . trace [ - 1 ]
-411 loss = loss_func . weighted_mse_loss (
-412 input_tensor = torch . squeeze ( new_beam_state . mean_set [ cluster ]),
-413 target_tensor = look_ahead_seq [ sub_idx , :],
-414 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
-415 if cluster == last_cluster :
-416 loss -= np . log ( 1 - self . transition_bias )
-417 else :
-418 loss -= np . log ( self . transition_bias ) + np . log (
-419 new_beam_state . block_counts [ cluster ]) - np . log (
-420 sum ( new_beam_state . block_counts ) + self . crp_alpha )
-421 # update new mean and new hidden
-422 mean , hidden = self . rnn_model (
-423 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
-424 new_beam_state . hidden_set [ cluster ])
-425 new_beam_state . mean_set [ cluster ] = ( new_beam_state . mean_set [ cluster ] * (
-426 ( np . array ( new_beam_state . trace ) == cluster ) . sum () -
-427 1 ) . astype ( float ) + mean . clone ()) / (
-428 np . array ( new_beam_state . trace ) == cluster ) . sum () . astype (
-429 float ) # use mean to predict
-430 new_beam_state . hidden_set [ cluster ] = hidden . clone ()
-431 if cluster != last_cluster :
-432 new_beam_state . block_counts [ cluster ] += 1
-433 new_beam_state . trace . append ( cluster )
-434 else : # new cluster
-435 init_input = autograd . Variable (
-436 torch . zeros ( self . observation_dim )
-437 ) . unsqueeze ( 0 ) . unsqueeze ( 0 ) . to ( self . device )
-438 mean , hidden = self . rnn_model ( init_input ,
-439 self . rnn_init_hidden )
-440 loss = loss_func . weighted_mse_loss (
-441 input_tensor = torch . squeeze ( mean ),
-442 target_tensor = look_ahead_seq [ sub_idx , :],
-443 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
-444 loss -= np . log ( self . transition_bias ) + np . log (
-445 self . crp_alpha ) - np . log (
-446 sum ( new_beam_state . block_counts ) + self . crp_alpha )
-447 # update new min and new hidden
-448 mean , hidden = self . rnn_model (
-449 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
-450 hidden )
-451 new_beam_state . append ( mean , hidden , cluster )
-452 new_beam_state . neg_likelihood += loss
-453 return new_beam_state
-454
-455 def _calculate_score ( self , beam_state , look_ahead_seq ):
-456 """Calculate negative log likelihoods for all possible state allocations
-457 of a look ahead sequence, according to the current beam state.
-458
-459 Args:
-460 beam_state: A BeamState object.
-461 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
-462 look_ahead: number of step to look ahead in the beam search.
-463 D: observation dimension
-464
-465 Returns:
-466 beam_score_set: a set of scores for each possible state allocation.
-467 """
-468
-469 look_ahead , _ = look_ahead_seq . shape
-470 beam_num_clusters = len ( beam_state . mean_set )
-471 beam_score_set = float ( 'inf' ) * np . ones (
-472 beam_num_clusters + 1 + np . arange ( look_ahead ))
-473 for cluster_seq , _ in np . ndenumerate ( beam_score_set ):
-474 updated_beam_state = self . _update_beam_state ( beam_state ,
-475 look_ahead_seq , cluster_seq )
-476 beam_score_set [ cluster_seq ] = updated_beam_state . neg_likelihood
-477 return beam_score_set
-478
-479 def predict_single ( self , test_sequence , args ):
-480 """Predict labels for a single test sequence using UISRNN model.
-481
-482 Args:
-483 test_sequence: the test observation sequence, which is 2-dim numpy array
-484 of real numbers, of size `N * D`.
-485
-486 - `N`: length of one test utterance.
-487 - `D` : observation dimension.
-488
-489 For example:
-490 ```
-491 test_sequence =
-492 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
-493 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
-494 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
-495 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
-496 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
-497 ```
-498 Here `N=5`, `D=4`.
-499 args: Inference configurations. See `arguments.py` for details.
-500
-501 Returns:
-502 predicted_cluster_id: predicted speaker id sequence, which is
-503 an array of integers, of size `N`.
-504 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
-505
-506 Raises:
-507 TypeError: If test_sequence is of wrong type.
-508 ValueError: If test_sequence has wrong dimension.
-509 """
-510 # check type
-511 if ( not isinstance ( test_sequence , np . ndarray ) or
-512 test_sequence . dtype != float ):
-513 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
-514 # check dimension
-515 if test_sequence . ndim != 2 :
-516 raise ValueError ( 'test_sequence must be 2-dim array.' )
-517 # check size
-518 test_sequence_length , observation_dim = test_sequence . shape
-519 if observation_dim != self . observation_dim :
-520 raise ValueError ( 'test_sequence does not match the dimension specified '
-521 'by args.observation_dim.' )
-522
-523 self . rnn_model . eval ()
-524 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
-525 test_sequence = autograd . Variable (
-526 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
-527 # bookkeeping for beam search
-528 beam_set = [ BeamState ()]
-529 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
-530 args . look_ahead ):
-531 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
-532 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
-533 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
-534 score_set = float ( 'inf' ) * np . ones (
-535 np . append (
-536 args . beam_size , max_clusters + 1 + np . arange (
-537 look_ahead_seq_length )))
-538 for beam_rank , beam_state in enumerate ( beam_set ):
-539 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
-540 score_set [ beam_rank , :] = np . pad (
-541 beam_score_set ,
-542 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
-543 ( look_ahead_seq_length , 1 )), 'constant' ,
-544 constant_values = float ( 'inf' ))
-545 # find top scores
-546 score_ranked = np . sort ( score_set , axis = None )
-547 score_ranked [ score_ranked == float ( 'inf' )] = 0
-548 score_ranked = np . trim_zeros ( score_ranked )
-549 idx_ranked = np . argsort ( score_set , axis = None )
-550 updated_beam_set = []
-551 for new_beam_rank in range (
-552 np . min (( len ( score_ranked ), args . beam_size ))):
-553 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
-554 score_set . shape )
-555 prev_beam_rank = total_idx [ 0 ] . item ()
-556 cluster_seq = total_idx [ 1 :]
-557 updated_beam_state = self . _update_beam_state (
-558 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
-559 updated_beam_set . append ( updated_beam_state )
-560 beam_set = updated_beam_set
-561 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
-562 return predicted_cluster_id
-563
-564 def predict ( self , test_sequences , args ):
-565 """Predict labels for a single or many test sequences using UISRNN model.
-566
-567 Args:
-568 test_sequences: Either a list of test sequences, or a single test
-569 sequence. Each test sequence is a 2-dim numpy array
-570 of real numbers. See `predict_single()` for details.
-571 args: Inference configurations. See `arguments.py` for details.
-572
-573 Returns:
-574 predicted_cluster_ids: Predicted labels for test_sequences.
-575
-576 1. if test_sequences is a list, predicted_cluster_ids will be a list
-577 of the same size, where each element being a 1-dim list of strings.
-578 2. if test_sequences is a single sequence, predicted_cluster_ids will
-579 be a 1-dim list of strings
-580
-581 Raises:
-582 TypeError: If test_sequences is of wrong type.
-583 """
-584 # check type
-585 if isinstance ( test_sequences , np . ndarray ):
-586 return self . predict_single ( test_sequences , args )
-587 if isinstance ( test_sequences , list ):
-588 return [ self . predict_single ( test_sequence , args )
-589 for test_sequence in test_sequences ]
-590 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
+ 81 class UISRNN :
+ 82 """Unbounded Interleaved-State Recurrent Neural Networks."""
+ 83
+ 84 def __init__ ( self , args ):
+ 85 """Construct the UISRNN object.
+ 86
+ 87 Args:
+ 88 args: Model configurations. See `arguments.py` for details.
+ 89 """
+ 90 self . observation_dim = args . observation_dim
+ 91 self . device = torch . device (
+ 92 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
+ 93 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
+ 94 args . rnn_depth , self . observation_dim ,
+ 95 args . rnn_dropout ) . to ( self . device )
+ 96 self . rnn_init_hidden = nn . Parameter (
+ 97 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
+ 98 # booleans indicating which variables are trainable
+ 99 self . estimate_sigma2 = ( args . sigma2 is None )
+100 self . estimate_transition_bias = ( args . transition_bias is None )
+101 # initial values of variables
+102 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
+103 self . sigma2 = nn . Parameter (
+104 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
+105 self . transition_bias = args . transition_bias
+106 self . transition_bias_denominator = 0.0
+107 self . crp_alpha = args . crp_alpha
+108 self . logger = colortimelog . Logger ( args . verbosity )
+109
+110 def _get_optimizer ( self , optimizer , learning_rate ):
+111 """Get optimizer for UISRNN.
+112
+113 Args:
+114 optimizer: string - name of the optimizer.
+115 learning_rate: - learning rate for the entire model.
+116 We do not customize learning rate for separate parts.
+117
+118 Returns:
+119 a pytorch "optim" object
+120 """
+121 params = [
+122 {
+123 'params' : self . rnn_model . parameters ()
+124 }, # rnn parameters
+125 {
+126 'params' : self . rnn_init_hidden
+127 } # rnn initial hidden state
+128 ]
+129 if self . estimate_sigma2 : # train sigma2
+130 params . append ({
+131 'params' : self . sigma2
+132 }) # variance parameters
+133 assert optimizer == 'adam' , 'Only adam optimizer is supported.'
+134 return optim . Adam ( params , lr = learning_rate )
+135
+136 def save ( self , filepath ):
+137 """Save the model to a file.
+138
+139 Args:
+140 filepath: the path of the file.
+141 """
+142 torch . save ({
+143 'rnn_state_dict' : self . rnn_model . state_dict (),
+144 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
+145 'transition_bias' : self . transition_bias ,
+146 'transition_bias_denominator' : self . transition_bias_denominator ,
+147 'crp_alpha' : self . crp_alpha ,
+148 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
+149
+150 def load ( self , filepath ):
+151 """Load the model from a file.
+152
+153 Args:
+154 filepath: the path of the file.
+155 """
+156 var_dict = torch . load ( filepath )
+157 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
+158 self . rnn_init_hidden = nn . Parameter (
+159 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
+160 self . transition_bias = float ( var_dict [ 'transition_bias' ])
+161 self . transition_bias_denominator = float (
+162 var_dict [ 'transition_bias_denominator' ])
+163 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
+164 self . sigma2 = nn . Parameter (
+165 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
+166
+167 self . logger . print (
+168 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
+169 'rnn_init_hidden= {} ' . format (
+170 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
+171 var_dict [ 'rnn_init_hidden' ]))
+172
+173 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
+174 """Fit UISRNN model to concatenated sequence and cluster_id.
+175
+176 Args:
+177 train_sequence: the training observation sequence, which is a
+178 2-dim numpy array of real numbers, of size `N * D`.
+179
+180 - `N`: summation of lengths of all utterances.
+181 - `D`: observation dimension.
+182
+183 For example,
+184 ```
+185 train_sequence =
+186 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
+187 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
+188 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
+189 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
+190 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+191 ```
+192 Here `N=5`, `D=4`.
+193
+194 We concatenate all training utterances into this single sequence.
+195 train_cluster_id: the speaker id sequence, which is 1-dim list or
+196 numpy array of strings, of size `N`.
+197 For example,
+198 ```
+199 train_cluster_id =
+200 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
+201 ```
+202 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+203
+204 Note that the order of entries within an utterance are preserved,
+205 and all utterances are simply concatenated together.
+206 args: Training configurations. See `arguments.py` for details.
+207
+208 Raises:
+209 TypeError: If train_sequence or train_cluster_id is of wrong type.
+210 ValueError: If train_sequence or train_cluster_id has wrong dimension.
+211 """
+212 # check type
+213 if ( not isinstance ( train_sequence , np . ndarray ) or
+214 train_sequence . dtype != float ):
+215 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
+216 if isinstance ( train_cluster_id , list ):
+217 train_cluster_id = np . array ( train_cluster_id )
+218 if ( not isinstance ( train_cluster_id , np . ndarray ) or
+219 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
+220 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
+221 # check dimension
+222 if train_sequence . ndim != 2 :
+223 raise ValueError ( 'train_sequence must be 2-dim array.' )
+224 if train_cluster_id . ndim != 1 :
+225 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
+226 # check length and size
+227 train_total_length , observation_dim = train_sequence . shape
+228 if observation_dim != self . observation_dim :
+229 raise ValueError ( 'train_sequence does not match the dimension specified '
+230 'by args.observation_dim.' )
+231 if train_total_length != len ( train_cluster_id ):
+232 raise ValueError ( 'train_sequence length is not equal to '
+233 'train_cluster_id length.' )
+234
+235 self . rnn_model . train ()
+236 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
+237 learning_rate = args . learning_rate )
+238
+239 sub_sequences , seq_lengths = utils . resize_sequence (
+240 sequence = train_sequence ,
+241 cluster_id = train_cluster_id ,
+242 num_permutations = args . num_permutations )
+243
+244 # For batch learning, pack the entire dataset.
+245 if args . batch_size is None :
+246 packed_train_sequence , rnn_truth = utils . pack_sequence (
+247 sub_sequences ,
+248 seq_lengths ,
+249 args . batch_size ,
+250 self . observation_dim ,
+251 self . device )
+252 train_loss = []
+253 for num_iter in range ( args . train_iteration ):
+254 optimizer . zero_grad ()
+255 # For online learning, pack a subset in each iteration.
+256 if args . batch_size is not None :
+257 packed_train_sequence , rnn_truth = utils . pack_sequence (
+258 sub_sequences ,
+259 seq_lengths ,
+260 args . batch_size ,
+261 self . observation_dim ,
+262 self . device )
+263 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
+264 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
+265 # use mean to predict
+266 mean = torch . cumsum ( mean , dim = 0 )
+267 mean_size = mean . size ()
+268 mean = torch . mm (
+269 torch . diag (
+270 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
+271 mean . view ( mean_size [ 0 ], - 1 ))
+272 mean = mean . view ( mean_size )
+273
+274 # Likelihood part.
+275 loss1 = loss_func . weighted_mse_loss (
+276 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
+277 target_tensor = rnn_truth ,
+278 weight = 1 / ( 2 * self . sigma2 ))
+279
+280 # Sigma2 prior part.
+281 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
+282 ** 2 ) . view ( - 1 , observation_dim )
+283 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
+284 loss2 = loss_func . sigma2_prior_loss (
+285 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
+286
+287 # Regularization part.
+288 loss3 = loss_func . regularization_loss (
+289 self . rnn_model . parameters (), args . regularization_weight )
+290
+291 loss = loss1 + loss2 + loss3
+292 loss . backward ()
+293 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
+294 optimizer . step ()
+295 # avoid numerical issues
+296 self . sigma2 . data . clamp_ ( min = 1e-6 )
+297
+298 if ( np . remainder ( num_iter , 10 ) == 0 or
+299 num_iter == args . train_iteration - 1 ):
+300 self . logger . print (
+301 2 ,
+302 'Iter: {:d} \t '
+303 'Training Loss: {:.4f} \n '
+304 ' Negative Log Likelihood: {:.4f} \t '
+305 'Sigma2 Prior: {:.4f} \t '
+306 'Regularization: {:.4f} ' . format (
+307 num_iter ,
+308 float ( loss . data ),
+309 float ( loss1 . data ),
+310 float ( loss2 . data ),
+311 float ( loss3 . data )))
+312 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
+313 self . logger . print (
+314 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
+315
+316 def fit ( self , train_sequences , train_cluster_ids , args ):
+317 """Fit UISRNN model.
+318
+319 Args:
+320 train_sequences: Either a list of training sequences, or a single
+321 concatenated training sequence:
+322
+323 1. train_sequences is list, and each element is a 2-dim numpy array
+324 of real numbers, of size: `length * D`.
+325 The length varies among different sequences, but the D is the same.
+326 In speaker diarization, each sequence is the sequence of speaker
+327 embeddings of one utterance.
+328 2. train_sequences is a single concatenated sequence, which is a
+329 2-dim numpy array of real numbers. See `fit_concatenated()`
+330 for more details.
+331 train_cluster_ids: Ground truth labels for train_sequences:
+332
+333 1. if train_sequences is a list, this must also be a list of the same
+334 size, each element being a 1-dim list or numpy array of strings.
+335 2. if train_sequences is a single concatenated sequence, this
+336 must also be the concatenated 1-dim list or numpy array of strings
+337 args: Training configurations. See `arguments.py` for details.
+338
+339 Raises:
+340 TypeError: If train_sequences or train_cluster_ids is of wrong type.
+341 """
+342 if isinstance ( train_sequences , np . ndarray ):
+343 # train_sequences is already the concatenated sequence
+344 if self . estimate_transition_bias :
+345 # see issue #55: https://github.com/google/uis-rnn/issues/55
+346 self . logger . print (
+347 2 ,
+348 'Warning: transition_bias cannot be correctly estimated from a '
+349 'concatenated sequence; train_sequences will be treated as a '
+350 'single sequence. This can lead to inaccurate estimation of '
+351 'transition_bias. Please, consider estimating transition_bias '
+352 'before concatenating the sequences and passing it as argument.' )
+353 train_sequences = [ train_sequences ]
+354 train_cluster_ids = [ train_cluster_ids ]
+355 elif isinstance ( train_sequences , list ):
+356 # train_sequences is a list of un-concatenated sequences
+357 # we will concatenate it later, after estimating transition_bias
+358 pass
+359 else :
+360 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
+361
+362 # estimate transition_bias
+363 if self . estimate_transition_bias :
+364 ( transition_bias ,
+365 transition_bias_denominator ) = utils . estimate_transition_bias (
+366 train_cluster_ids )
+367 # set or update transition_bias
+368 if self . transition_bias is None :
+369 self . transition_bias = transition_bias
+370 self . transition_bias_denominator = transition_bias_denominator
+371 else :
+372 self . transition_bias = (
+373 self . transition_bias * self . transition_bias_denominator +
+374 transition_bias * transition_bias_denominator ) / (
+375 self . transition_bias_denominator + transition_bias_denominator )
+376 self . transition_bias_denominator += transition_bias_denominator
+377
+378 # concatenate train_sequences
+379 ( concatenated_train_sequence ,
+380 concatenated_train_cluster_id ) = utils . concatenate_training_data (
+381 train_sequences ,
+382 train_cluster_ids ,
+383 args . enforce_cluster_id_uniqueness ,
+384 True )
+385
+386 self . fit_concatenated (
+387 concatenated_train_sequence , concatenated_train_cluster_id , args )
+388
+389 def _update_beam_state ( self , beam_state , look_ahead_seq , cluster_seq ):
+390 """Update a beam state given a look ahead sequence and known cluster
+391 assignments.
+392
+393 Args:
+394 beam_state: A BeamState object.
+395 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
+396 look_ahead: number of step to look ahead in the beam search.
+397 D: observation dimension
+398 cluster_seq: Cluster assignment sequence for look_ahead_seq.
+399
+400 Returns:
+401 new_beam_state: An updated BeamState object.
+402 """
+403
+404 loss = 0
+405 new_beam_state = BeamState ( beam_state )
+406 for sub_idx , cluster in enumerate ( cluster_seq ):
+407 if cluster > len ( new_beam_state . mean_set ): # invalid trace
+408 new_beam_state . neg_likelihood = float ( 'inf' )
+409 break
+410 elif cluster < len ( new_beam_state . mean_set ): # existing cluster
+411 last_cluster = new_beam_state . trace [ - 1 ]
+412 loss = loss_func . weighted_mse_loss (
+413 input_tensor = torch . squeeze ( new_beam_state . mean_set [ cluster ]),
+414 target_tensor = look_ahead_seq [ sub_idx , :],
+415 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
+416 if cluster == last_cluster :
+417 loss -= np . log ( 1 - self . transition_bias )
+418 else :
+419 loss -= np . log ( self . transition_bias ) + np . log (
+420 new_beam_state . block_counts [ cluster ]) - np . log (
+421 sum ( new_beam_state . block_counts ) + self . crp_alpha )
+422 # update new mean and new hidden
+423 mean , hidden = self . rnn_model (
+424 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
+425 new_beam_state . hidden_set [ cluster ])
+426 new_beam_state . mean_set [ cluster ] = ( new_beam_state . mean_set [ cluster ] * (
+427 ( np . array ( new_beam_state . trace ) == cluster ) . sum () -
+428 1 ) . astype ( float ) + mean . clone ()) / (
+429 np . array ( new_beam_state . trace ) == cluster ) . sum () . astype (
+430 float ) # use mean to predict
+431 new_beam_state . hidden_set [ cluster ] = hidden . clone ()
+432 if cluster != last_cluster :
+433 new_beam_state . block_counts [ cluster ] += 1
+434 new_beam_state . trace . append ( cluster )
+435 else : # new cluster
+436 init_input = autograd . Variable (
+437 torch . zeros ( self . observation_dim )
+438 ) . unsqueeze ( 0 ) . unsqueeze ( 0 ) . to ( self . device )
+439 mean , hidden = self . rnn_model ( init_input ,
+440 self . rnn_init_hidden )
+441 loss = loss_func . weighted_mse_loss (
+442 input_tensor = torch . squeeze ( mean ),
+443 target_tensor = look_ahead_seq [ sub_idx , :],
+444 weight = 1 / ( 2 * self . sigma2 )) . cpu () . detach () . numpy ()
+445 loss -= np . log ( self . transition_bias ) + np . log (
+446 self . crp_alpha ) - np . log (
+447 sum ( new_beam_state . block_counts ) + self . crp_alpha )
+448 # update new min and new hidden
+449 mean , hidden = self . rnn_model (
+450 look_ahead_seq [ sub_idx , :] . unsqueeze ( 0 ) . unsqueeze ( 0 ),
+451 hidden )
+452 new_beam_state . append ( mean , hidden , cluster )
+453 new_beam_state . neg_likelihood += loss
+454 return new_beam_state
+455
+456 def _calculate_score ( self , beam_state , look_ahead_seq ):
+457 """Calculate negative log likelihoods for all possible state allocations
+458 of a look ahead sequence, according to the current beam state.
+459
+460 Args:
+461 beam_state: A BeamState object.
+462 look_ahead_seq: Look ahead sequence, size: look_ahead*D.
+463 look_ahead: number of step to look ahead in the beam search.
+464 D: observation dimension
+465
+466 Returns:
+467 beam_score_set: a set of scores for each possible state allocation.
+468 """
+469
+470 look_ahead , _ = look_ahead_seq . shape
+471 beam_num_clusters = len ( beam_state . mean_set )
+472 beam_score_set = float ( 'inf' ) * np . ones (
+473 beam_num_clusters + 1 + np . arange ( look_ahead ))
+474 for cluster_seq , _ in np . ndenumerate ( beam_score_set ):
+475 updated_beam_state = self . _update_beam_state ( beam_state ,
+476 look_ahead_seq , cluster_seq )
+477 beam_score_set [ cluster_seq ] = updated_beam_state . neg_likelihood
+478 return beam_score_set
+479
+480 def predict_single ( self , test_sequence , args ):
+481 """Predict labels for a single test sequence using UISRNN model.
+482
+483 Args:
+484 test_sequence: the test observation sequence, which is 2-dim numpy array
+485 of real numbers, of size `N * D`.
+486
+487 - `N`: length of one test utterance.
+488 - `D` : observation dimension.
+489
+490 For example:
+491 ```
+492 test_sequence =
+493 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
+494 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
+495 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
+496 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
+497 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+498 ```
+499 Here `N=5`, `D=4`.
+500 args: Inference configurations. See `arguments.py` for details.
+501
+502 Returns:
+503 predicted_cluster_id: predicted speaker id sequence, which is
+504 an array of integers, of size `N`.
+505 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
+506
+507 Raises:
+508 TypeError: If test_sequence is of wrong type.
+509 ValueError: If test_sequence has wrong dimension.
+510 """
+511 # check type
+512 if ( not isinstance ( test_sequence , np . ndarray ) or
+513 test_sequence . dtype != float ):
+514 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
+515 # check dimension
+516 if test_sequence . ndim != 2 :
+517 raise ValueError ( 'test_sequence must be 2-dim array.' )
+518 # check size
+519 test_sequence_length , observation_dim = test_sequence . shape
+520 if observation_dim != self . observation_dim :
+521 raise ValueError ( 'test_sequence does not match the dimension specified '
+522 'by args.observation_dim.' )
+523
+524 self . rnn_model . eval ()
+525 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
+526 test_sequence = autograd . Variable (
+527 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
+528 # bookkeeping for beam search
+529 beam_set = [ BeamState ()]
+530 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
+531 args . look_ahead ):
+532 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
+533 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
+534 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
+535 score_set = float ( 'inf' ) * np . ones (
+536 np . append (
+537 args . beam_size , max_clusters + 1 + np . arange (
+538 look_ahead_seq_length )))
+539 for beam_rank , beam_state in enumerate ( beam_set ):
+540 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
+541 score_set [ beam_rank , :] = np . pad (
+542 beam_score_set ,
+543 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
+544 ( look_ahead_seq_length , 1 )), 'constant' ,
+545 constant_values = float ( 'inf' ))
+546 # find top scores
+547 score_ranked = np . sort ( score_set , axis = None )
+548 score_ranked [ score_ranked == float ( 'inf' )] = 0
+549 score_ranked = np . trim_zeros ( score_ranked )
+550 idx_ranked = np . argsort ( score_set , axis = None )
+551 updated_beam_set = []
+552 for new_beam_rank in range (
+553 np . min (( len ( score_ranked ), args . beam_size ))):
+554 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
+555 score_set . shape )
+556 prev_beam_rank = total_idx [ 0 ] . item ()
+557 cluster_seq = total_idx [ 1 :]
+558 updated_beam_state = self . _update_beam_state (
+559 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
+560 updated_beam_set . append ( updated_beam_state )
+561 beam_set = updated_beam_set
+562 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
+563 return predicted_cluster_id
+564
+565 def predict ( self , test_sequences , args ):
+566 """Predict labels for a single or many test sequences using UISRNN model.
+567
+568 Args:
+569 test_sequences: Either a list of test sequences, or a single test
+570 sequence. Each test sequence is a 2-dim numpy array
+571 of real numbers. See `predict_single()` for details.
+572 args: Inference configurations. See `arguments.py` for details.
+573
+574 Returns:
+575 predicted_cluster_ids: Predicted labels for test_sequences.
+576
+577 1. if test_sequences is a list, predicted_cluster_ids will be a list
+578 of the same size, where each element being a 1-dim list of strings.
+579 2. if test_sequences is a single sequence, predicted_cluster_ids will
+580 be a 1-dim list of strings
+581
+582 Raises:
+583 TypeError: If test_sequences is of wrong type.
+584 """
+585 # check type
+586 if isinstance ( test_sequences , np . ndarray ):
+587 return self . predict_single ( test_sequences , args )
+588 if isinstance ( test_sequences , list ):
+589 return [ self . predict_single ( test_sequence , args )
+590 for test_sequence in test_sequences ]
+591 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
@@ -1592,31 +1594,31 @@ Inherited Members
- 83 def __init__ ( self , args ):
- 84 """Construct the UISRNN object.
- 85
- 86 Args:
- 87 args: Model configurations. See `arguments.py` for details.
- 88 """
- 89 self . observation_dim = args . observation_dim
- 90 self . device = torch . device (
- 91 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
- 92 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
- 93 args . rnn_depth , self . observation_dim ,
- 94 args . rnn_dropout ) . to ( self . device )
- 95 self . rnn_init_hidden = nn . Parameter (
- 96 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
- 97 # booleans indicating which variables are trainable
- 98 self . estimate_sigma2 = ( args . sigma2 is None )
- 99 self . estimate_transition_bias = ( args . transition_bias is None )
-100 # initial values of variables
-101 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
-102 self . sigma2 = nn . Parameter (
-103 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
-104 self . transition_bias = args . transition_bias
-105 self . transition_bias_denominator = 0.0
-106 self . crp_alpha = args . crp_alpha
-107 self . logger = utils . Logger ( args . verbosity )
+ 84 def __init__ ( self , args ):
+ 85 """Construct the UISRNN object.
+ 86
+ 87 Args:
+ 88 args: Model configurations. See `arguments.py` for details.
+ 89 """
+ 90 self . observation_dim = args . observation_dim
+ 91 self . device = torch . device (
+ 92 'cuda:0' if ( torch . cuda . is_available () and args . enable_cuda ) else 'cpu' )
+ 93 self . rnn_model = CoreRNN ( self . observation_dim , args . rnn_hidden_size ,
+ 94 args . rnn_depth , self . observation_dim ,
+ 95 args . rnn_dropout ) . to ( self . device )
+ 96 self . rnn_init_hidden = nn . Parameter (
+ 97 torch . zeros ( args . rnn_depth , 1 , args . rnn_hidden_size ) . to ( self . device ))
+ 98 # booleans indicating which variables are trainable
+ 99 self . estimate_sigma2 = ( args . sigma2 is None )
+100 self . estimate_transition_bias = ( args . transition_bias is None )
+101 # initial values of variables
+102 sigma2 = _INITIAL_SIGMA2_VALUE if self . estimate_sigma2 else args . sigma2
+103 self . sigma2 = nn . Parameter (
+104 sigma2 * torch . ones ( self . observation_dim ) . to ( self . device ))
+105 self . transition_bias = args . transition_bias
+106 self . transition_bias_denominator = 0.0
+107 self . crp_alpha = args . crp_alpha
+108 self . logger = colortimelog . Logger ( args . verbosity )
@@ -1760,19 +1762,19 @@ Inherited Members
- 135 def save ( self , filepath ):
-136 """Save the model to a file.
-137
-138 Args:
-139 filepath: the path of the file.
-140 """
-141 torch . save ({
-142 'rnn_state_dict' : self . rnn_model . state_dict (),
-143 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
-144 'transition_bias' : self . transition_bias ,
-145 'transition_bias_denominator' : self . transition_bias_denominator ,
-146 'crp_alpha' : self . crp_alpha ,
-147 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
+ 136 def save ( self , filepath ):
+137 """Save the model to a file.
+138
+139 Args:
+140 filepath: the path of the file.
+141 """
+142 torch . save ({
+143 'rnn_state_dict' : self . rnn_model . state_dict (),
+144 'rnn_init_hidden' : self . rnn_init_hidden . detach () . cpu () . numpy (),
+145 'transition_bias' : self . transition_bias ,
+146 'transition_bias_denominator' : self . transition_bias_denominator ,
+147 'crp_alpha' : self . crp_alpha ,
+148 'sigma2' : self . sigma2 . detach () . cpu () . numpy ()}, filepath )
@@ -1795,28 +1797,28 @@ Inherited Members
- 149 def load ( self , filepath ):
-150 """Load the model from a file.
-151
-152 Args:
-153 filepath: the path of the file.
-154 """
-155 var_dict = torch . load ( filepath )
-156 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
-157 self . rnn_init_hidden = nn . Parameter (
-158 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
-159 self . transition_bias = float ( var_dict [ 'transition_bias' ])
-160 self . transition_bias_denominator = float (
-161 var_dict [ 'transition_bias_denominator' ])
-162 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
-163 self . sigma2 = nn . Parameter (
-164 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
-165
-166 self . logger . print (
-167 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
-168 'rnn_init_hidden= {} ' . format (
-169 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
-170 var_dict [ 'rnn_init_hidden' ]))
+ 150 def load ( self , filepath ):
+151 """Load the model from a file.
+152
+153 Args:
+154 filepath: the path of the file.
+155 """
+156 var_dict = torch . load ( filepath )
+157 self . rnn_model . load_state_dict ( var_dict [ 'rnn_state_dict' ])
+158 self . rnn_init_hidden = nn . Parameter (
+159 torch . from_numpy ( var_dict [ 'rnn_init_hidden' ]) . to ( self . device ))
+160 self . transition_bias = float ( var_dict [ 'transition_bias' ])
+161 self . transition_bias_denominator = float (
+162 var_dict [ 'transition_bias_denominator' ])
+163 self . crp_alpha = float ( var_dict [ 'crp_alpha' ])
+164 self . sigma2 = nn . Parameter (
+165 torch . from_numpy ( var_dict [ 'sigma2' ]) . to ( self . device ))
+166
+167 self . logger . print (
+168 3 , 'Loaded model with transition_bias= {} , crp_alpha= {} , sigma2= {} , '
+169 'rnn_init_hidden= {} ' . format (
+170 self . transition_bias , self . crp_alpha , var_dict [ 'sigma2' ],
+171 var_dict [ 'rnn_init_hidden' ]))
@@ -1839,148 +1841,148 @@ Inherited Members
- 172 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
-173 """Fit UISRNN model to concatenated sequence and cluster_id.
-174
-175 Args:
-176 train_sequence: the training observation sequence, which is a
-177 2-dim numpy array of real numbers, of size `N * D`.
-178
-179 - `N`: summation of lengths of all utterances.
-180 - `D`: observation dimension.
-181
-182 For example,
-183 ```
-184 train_sequence =
-185 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
-186 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
-187 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
-188 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
-189 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
-190 ```
-191 Here `N=5`, `D=4`.
-192
-193 We concatenate all training utterances into this single sequence.
-194 train_cluster_id: the speaker id sequence, which is 1-dim list or
-195 numpy array of strings, of size `N`.
-196 For example,
-197 ```
-198 train_cluster_id =
-199 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-200 ```
-201 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-202
-203 Note that the order of entries within an utterance are preserved,
-204 and all utterances are simply concatenated together.
-205 args: Training configurations. See `arguments.py` for details.
-206
-207 Raises:
-208 TypeError: If train_sequence or train_cluster_id is of wrong type.
-209 ValueError: If train_sequence or train_cluster_id has wrong dimension.
-210 """
-211 # check type
-212 if ( not isinstance ( train_sequence , np . ndarray ) or
-213 train_sequence . dtype != float ):
-214 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
-215 if isinstance ( train_cluster_id , list ):
-216 train_cluster_id = np . array ( train_cluster_id )
-217 if ( not isinstance ( train_cluster_id , np . ndarray ) or
-218 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
-219 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
-220 # check dimension
-221 if train_sequence . ndim != 2 :
-222 raise ValueError ( 'train_sequence must be 2-dim array.' )
-223 if train_cluster_id . ndim != 1 :
-224 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
-225 # check length and size
-226 train_total_length , observation_dim = train_sequence . shape
-227 if observation_dim != self . observation_dim :
-228 raise ValueError ( 'train_sequence does not match the dimension specified '
-229 'by args.observation_dim.' )
-230 if train_total_length != len ( train_cluster_id ):
-231 raise ValueError ( 'train_sequence length is not equal to '
-232 'train_cluster_id length.' )
-233
-234 self . rnn_model . train ()
-235 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
-236 learning_rate = args . learning_rate )
-237
-238 sub_sequences , seq_lengths = utils . resize_sequence (
-239 sequence = train_sequence ,
-240 cluster_id = train_cluster_id ,
-241 num_permutations = args . num_permutations )
-242
-243 # For batch learning, pack the entire dataset.
-244 if args . batch_size is None :
-245 packed_train_sequence , rnn_truth = utils . pack_sequence (
-246 sub_sequences ,
-247 seq_lengths ,
-248 args . batch_size ,
-249 self . observation_dim ,
-250 self . device )
-251 train_loss = []
-252 for num_iter in range ( args . train_iteration ):
-253 optimizer . zero_grad ()
-254 # For online learning, pack a subset in each iteration.
-255 if args . batch_size is not None :
-256 packed_train_sequence , rnn_truth = utils . pack_sequence (
-257 sub_sequences ,
-258 seq_lengths ,
-259 args . batch_size ,
-260 self . observation_dim ,
-261 self . device )
-262 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
-263 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
-264 # use mean to predict
-265 mean = torch . cumsum ( mean , dim = 0 )
-266 mean_size = mean . size ()
-267 mean = torch . mm (
-268 torch . diag (
-269 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
-270 mean . view ( mean_size [ 0 ], - 1 ))
-271 mean = mean . view ( mean_size )
-272
-273 # Likelihood part.
-274 loss1 = loss_func . weighted_mse_loss (
-275 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
-276 target_tensor = rnn_truth ,
-277 weight = 1 / ( 2 * self . sigma2 ))
-278
-279 # Sigma2 prior part.
-280 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
-281 ** 2 ) . view ( - 1 , observation_dim )
-282 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
-283 loss2 = loss_func . sigma2_prior_loss (
-284 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
-285
-286 # Regularization part.
-287 loss3 = loss_func . regularization_loss (
-288 self . rnn_model . parameters (), args . regularization_weight )
-289
-290 loss = loss1 + loss2 + loss3
-291 loss . backward ()
-292 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
-293 optimizer . step ()
-294 # avoid numerical issues
-295 self . sigma2 . data . clamp_ ( min = 1e-6 )
-296
-297 if ( np . remainder ( num_iter , 10 ) == 0 or
-298 num_iter == args . train_iteration - 1 ):
-299 self . logger . print (
-300 2 ,
-301 'Iter: {:d} \t '
-302 'Training Loss: {:.4f} \n '
-303 ' Negative Log Likelihood: {:.4f} \t '
-304 'Sigma2 Prior: {:.4f} \t '
-305 'Regularization: {:.4f} ' . format (
-306 num_iter ,
-307 float ( loss . data ),
-308 float ( loss1 . data ),
-309 float ( loss2 . data ),
-310 float ( loss3 . data )))
-311 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
-312 self . logger . print (
-313 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
+ 173 def fit_concatenated ( self , train_sequence , train_cluster_id , args ):
+174 """Fit UISRNN model to concatenated sequence and cluster_id.
+175
+176 Args:
+177 train_sequence: the training observation sequence, which is a
+178 2-dim numpy array of real numbers, of size `N * D`.
+179
+180 - `N`: summation of lengths of all utterances.
+181 - `D`: observation dimension.
+182
+183 For example,
+184 ```
+185 train_sequence =
+186 [[1.2 3.0 -4.1 6.0] --> an entry of speaker #0 from utterance 'iaaa'
+187 [0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
+188 [-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
+189 [3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
+190 [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+191 ```
+192 Here `N=5`, `D=4`.
+193
+194 We concatenate all training utterances into this single sequence.
+195 train_cluster_id: the speaker id sequence, which is 1-dim list or
+196 numpy array of strings, of size `N`.
+197 For example,
+198 ```
+199 train_cluster_id =
+200 ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
+201 ```
+202 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+203
+204 Note that the order of entries within an utterance are preserved,
+205 and all utterances are simply concatenated together.
+206 args: Training configurations. See `arguments.py` for details.
+207
+208 Raises:
+209 TypeError: If train_sequence or train_cluster_id is of wrong type.
+210 ValueError: If train_sequence or train_cluster_id has wrong dimension.
+211 """
+212 # check type
+213 if ( not isinstance ( train_sequence , np . ndarray ) or
+214 train_sequence . dtype != float ):
+215 raise TypeError ( 'train_sequence should be a numpy array of float type.' )
+216 if isinstance ( train_cluster_id , list ):
+217 train_cluster_id = np . array ( train_cluster_id )
+218 if ( not isinstance ( train_cluster_id , np . ndarray ) or
+219 not train_cluster_id . dtype . name . startswith (( 'str' , 'unicode' ))):
+220 raise TypeError ( 'train_cluster_id type be a numpy array of strings.' )
+221 # check dimension
+222 if train_sequence . ndim != 2 :
+223 raise ValueError ( 'train_sequence must be 2-dim array.' )
+224 if train_cluster_id . ndim != 1 :
+225 raise ValueError ( 'train_cluster_id must be 1-dim array.' )
+226 # check length and size
+227 train_total_length , observation_dim = train_sequence . shape
+228 if observation_dim != self . observation_dim :
+229 raise ValueError ( 'train_sequence does not match the dimension specified '
+230 'by args.observation_dim.' )
+231 if train_total_length != len ( train_cluster_id ):
+232 raise ValueError ( 'train_sequence length is not equal to '
+233 'train_cluster_id length.' )
+234
+235 self . rnn_model . train ()
+236 optimizer = self . _get_optimizer ( optimizer = args . optimizer ,
+237 learning_rate = args . learning_rate )
+238
+239 sub_sequences , seq_lengths = utils . resize_sequence (
+240 sequence = train_sequence ,
+241 cluster_id = train_cluster_id ,
+242 num_permutations = args . num_permutations )
+243
+244 # For batch learning, pack the entire dataset.
+245 if args . batch_size is None :
+246 packed_train_sequence , rnn_truth = utils . pack_sequence (
+247 sub_sequences ,
+248 seq_lengths ,
+249 args . batch_size ,
+250 self . observation_dim ,
+251 self . device )
+252 train_loss = []
+253 for num_iter in range ( args . train_iteration ):
+254 optimizer . zero_grad ()
+255 # For online learning, pack a subset in each iteration.
+256 if args . batch_size is not None :
+257 packed_train_sequence , rnn_truth = utils . pack_sequence (
+258 sub_sequences ,
+259 seq_lengths ,
+260 args . batch_size ,
+261 self . observation_dim ,
+262 self . device )
+263 hidden = self . rnn_init_hidden . repeat ( 1 , args . batch_size , 1 )
+264 mean , _ = self . rnn_model ( packed_train_sequence , hidden )
+265 # use mean to predict
+266 mean = torch . cumsum ( mean , dim = 0 )
+267 mean_size = mean . size ()
+268 mean = torch . mm (
+269 torch . diag (
+270 1.0 / torch . arange ( 1 , mean_size [ 0 ] + 1 ) . float () . to ( self . device )),
+271 mean . view ( mean_size [ 0 ], - 1 ))
+272 mean = mean . view ( mean_size )
+273
+274 # Likelihood part.
+275 loss1 = loss_func . weighted_mse_loss (
+276 input_tensor = ( rnn_truth != 0 ) . float () * mean [: - 1 , :, :],
+277 target_tensor = rnn_truth ,
+278 weight = 1 / ( 2 * self . sigma2 ))
+279
+280 # Sigma2 prior part.
+281 weight = ((( rnn_truth != 0 ) . float () * mean [: - 1 , :, :] - rnn_truth )
+282 ** 2 ) . view ( - 1 , observation_dim )
+283 num_non_zero = torch . sum (( weight != 0 ) . float (), dim = 0 ) . squeeze ()
+284 loss2 = loss_func . sigma2_prior_loss (
+285 num_non_zero , args . sigma_alpha , args . sigma_beta , self . sigma2 )
+286
+287 # Regularization part.
+288 loss3 = loss_func . regularization_loss (
+289 self . rnn_model . parameters (), args . regularization_weight )
+290
+291 loss = loss1 + loss2 + loss3
+292 loss . backward ()
+293 nn . utils . clip_grad_norm_ ( self . rnn_model . parameters (), args . grad_max_norm )
+294 optimizer . step ()
+295 # avoid numerical issues
+296 self . sigma2 . data . clamp_ ( min = 1e-6 )
+297
+298 if ( np . remainder ( num_iter , 10 ) == 0 or
+299 num_iter == args . train_iteration - 1 ):
+300 self . logger . print (
+301 2 ,
+302 'Iter: {:d} \t '
+303 'Training Loss: {:.4f} \n '
+304 ' Negative Log Likelihood: {:.4f} \t '
+305 'Sigma2 Prior: {:.4f} \t '
+306 'Regularization: {:.4f} ' . format (
+307 num_iter ,
+308 float ( loss . data ),
+309 float ( loss1 . data ),
+310 float ( loss2 . data ),
+311 float ( loss3 . data )))
+312 train_loss . append ( float ( loss1 . data )) # only save the likelihood part
+313 self . logger . print (
+314 1 , 'Done training with {} iterations' . format ( args . train_iteration ))
@@ -2001,22 +2003,25 @@ Inherited Members
[0.8 -1.1 0.4 0.5] --> an entry of speaker #1 from utterance 'iaaa'
[-0.2 1.0 3.8 5.7] --> an entry of speaker #0 from utterance 'iaaa'
[3.8 -0.1 1.5 2.3] --> an entry of speaker #0 from utterance 'ibbb'
- [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+ [1.2 1.4 3.6 -2.7]] --> an entry of speaker #0 from utterance 'ibbb'
+
+
Here `N=5`, `D=4`.
+
+We concatenate all training utterances into this single sequence.
- Here `N=5`, `D=4`.
-
We concatenate all training utterances into this single sequence.
-
- train_cluster_id: the speaker id sequence, which is 1-dim list or
+
train_cluster_id: the speaker id sequence, which is 1-dim list or
numpy array of strings, of size N
.
- For example,
-
train_cluster_id =
+ For example,
+
+
train_cluster_id =
['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
-
- 'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
+
+
+
'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.
-Note that the order of entries within an utterance are preserved,
+Note that the order of entries within an utterance are preserved,
and all utterances are simply concatenated together.
@@ -2040,78 +2045,78 @@ Inherited Members
- 315 def fit ( self , train_sequences , train_cluster_ids , args ):
-316 """Fit UISRNN model.
-317
-318 Args:
-319 train_sequences: Either a list of training sequences, or a single
-320 concatenated training sequence:
-321
-322 1. train_sequences is list, and each element is a 2-dim numpy array
-323 of real numbers, of size: `length * D`.
-324 The length varies among different sequences, but the D is the same.
-325 In speaker diarization, each sequence is the sequence of speaker
-326 embeddings of one utterance.
-327 2. train_sequences is a single concatenated sequence, which is a
-328 2-dim numpy array of real numbers. See `fit_concatenated()`
-329 for more details.
-330 train_cluster_ids: Ground truth labels for train_sequences:
-331
-332 1. if train_sequences is a list, this must also be a list of the same
-333 size, each element being a 1-dim list or numpy array of strings.
-334 2. if train_sequences is a single concatenated sequence, this
-335 must also be the concatenated 1-dim list or numpy array of strings
-336 args: Training configurations. See `arguments.py` for details.
-337
-338 Raises:
-339 TypeError: If train_sequences or train_cluster_ids is of wrong type.
-340 """
-341 if isinstance ( train_sequences , np . ndarray ):
-342 # train_sequences is already the concatenated sequence
-343 if self . estimate_transition_bias :
-344 # see issue #55: https://github.com/google/uis-rnn/issues/55
-345 self . logger . print (
-346 2 ,
-347 'Warning: transition_bias cannot be correctly estimated from a '
-348 'concatenated sequence; train_sequences will be treated as a '
-349 'single sequence. This can lead to inaccurate estimation of '
-350 'transition_bias. Please, consider estimating transition_bias '
-351 'before concatenating the sequences and passing it as argument.' )
-352 train_sequences = [ train_sequences ]
-353 train_cluster_ids = [ train_cluster_ids ]
-354 elif isinstance ( train_sequences , list ):
-355 # train_sequences is a list of un-concatenated sequences
-356 # we will concatenate it later, after estimating transition_bias
-357 pass
-358 else :
-359 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
-360
-361 # estimate transition_bias
-362 if self . estimate_transition_bias :
-363 ( transition_bias ,
-364 transition_bias_denominator ) = utils . estimate_transition_bias (
-365 train_cluster_ids )
-366 # set or update transition_bias
-367 if self . transition_bias is None :
-368 self . transition_bias = transition_bias
-369 self . transition_bias_denominator = transition_bias_denominator
-370 else :
-371 self . transition_bias = (
-372 self . transition_bias * self . transition_bias_denominator +
-373 transition_bias * transition_bias_denominator ) / (
-374 self . transition_bias_denominator + transition_bias_denominator )
-375 self . transition_bias_denominator += transition_bias_denominator
-376
-377 # concatenate train_sequences
-378 ( concatenated_train_sequence ,
-379 concatenated_train_cluster_id ) = utils . concatenate_training_data (
-380 train_sequences ,
-381 train_cluster_ids ,
-382 args . enforce_cluster_id_uniqueness ,
-383 True )
-384
-385 self . fit_concatenated (
-386 concatenated_train_sequence , concatenated_train_cluster_id , args )
+ 316 def fit ( self , train_sequences , train_cluster_ids , args ):
+317 """Fit UISRNN model.
+318
+319 Args:
+320 train_sequences: Either a list of training sequences, or a single
+321 concatenated training sequence:
+322
+323 1. train_sequences is list, and each element is a 2-dim numpy array
+324 of real numbers, of size: `length * D`.
+325 The length varies among different sequences, but the D is the same.
+326 In speaker diarization, each sequence is the sequence of speaker
+327 embeddings of one utterance.
+328 2. train_sequences is a single concatenated sequence, which is a
+329 2-dim numpy array of real numbers. See `fit_concatenated()`
+330 for more details.
+331 train_cluster_ids: Ground truth labels for train_sequences:
+332
+333 1. if train_sequences is a list, this must also be a list of the same
+334 size, each element being a 1-dim list or numpy array of strings.
+335 2. if train_sequences is a single concatenated sequence, this
+336 must also be the concatenated 1-dim list or numpy array of strings
+337 args: Training configurations. See `arguments.py` for details.
+338
+339 Raises:
+340 TypeError: If train_sequences or train_cluster_ids is of wrong type.
+341 """
+342 if isinstance ( train_sequences , np . ndarray ):
+343 # train_sequences is already the concatenated sequence
+344 if self . estimate_transition_bias :
+345 # see issue #55: https://github.com/google/uis-rnn/issues/55
+346 self . logger . print (
+347 2 ,
+348 'Warning: transition_bias cannot be correctly estimated from a '
+349 'concatenated sequence; train_sequences will be treated as a '
+350 'single sequence. This can lead to inaccurate estimation of '
+351 'transition_bias. Please, consider estimating transition_bias '
+352 'before concatenating the sequences and passing it as argument.' )
+353 train_sequences = [ train_sequences ]
+354 train_cluster_ids = [ train_cluster_ids ]
+355 elif isinstance ( train_sequences , list ):
+356 # train_sequences is a list of un-concatenated sequences
+357 # we will concatenate it later, after estimating transition_bias
+358 pass
+359 else :
+360 raise TypeError ( 'train_sequences must be a list or numpy.ndarray' )
+361
+362 # estimate transition_bias
+363 if self . estimate_transition_bias :
+364 ( transition_bias ,
+365 transition_bias_denominator ) = utils . estimate_transition_bias (
+366 train_cluster_ids )
+367 # set or update transition_bias
+368 if self . transition_bias is None :
+369 self . transition_bias = transition_bias
+370 self . transition_bias_denominator = transition_bias_denominator
+371 else :
+372 self . transition_bias = (
+373 self . transition_bias * self . transition_bias_denominator +
+374 transition_bias * transition_bias_denominator ) / (
+375 self . transition_bias_denominator + transition_bias_denominator )
+376 self . transition_bias_denominator += transition_bias_denominator
+377
+378 # concatenate train_sequences
+379 ( concatenated_train_sequence ,
+380 concatenated_train_cluster_id ) = utils . concatenate_training_data (
+381 train_sequences ,
+382 train_cluster_ids ,
+383 args . enforce_cluster_id_uniqueness ,
+384 True )
+385
+386 self . fit_concatenated (
+387 concatenated_train_sequence , concatenated_train_cluster_id , args )
@@ -2158,90 +2163,90 @@ Inherited Members
- 479 def predict_single ( self , test_sequence , args ):
-480 """Predict labels for a single test sequence using UISRNN model.
-481
-482 Args:
-483 test_sequence: the test observation sequence, which is 2-dim numpy array
-484 of real numbers, of size `N * D`.
-485
-486 - `N`: length of one test utterance.
-487 - `D` : observation dimension.
-488
-489 For example:
-490 ```
-491 test_sequence =
-492 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
-493 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
-494 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
-495 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
-496 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
-497 ```
-498 Here `N=5`, `D=4`.
-499 args: Inference configurations. See `arguments.py` for details.
-500
-501 Returns:
-502 predicted_cluster_id: predicted speaker id sequence, which is
-503 an array of integers, of size `N`.
-504 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
-505
-506 Raises:
-507 TypeError: If test_sequence is of wrong type.
-508 ValueError: If test_sequence has wrong dimension.
-509 """
-510 # check type
-511 if ( not isinstance ( test_sequence , np . ndarray ) or
-512 test_sequence . dtype != float ):
-513 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
-514 # check dimension
-515 if test_sequence . ndim != 2 :
-516 raise ValueError ( 'test_sequence must be 2-dim array.' )
-517 # check size
-518 test_sequence_length , observation_dim = test_sequence . shape
-519 if observation_dim != self . observation_dim :
-520 raise ValueError ( 'test_sequence does not match the dimension specified '
-521 'by args.observation_dim.' )
-522
-523 self . rnn_model . eval ()
-524 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
-525 test_sequence = autograd . Variable (
-526 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
-527 # bookkeeping for beam search
-528 beam_set = [ BeamState ()]
-529 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
-530 args . look_ahead ):
-531 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
-532 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
-533 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
-534 score_set = float ( 'inf' ) * np . ones (
-535 np . append (
-536 args . beam_size , max_clusters + 1 + np . arange (
-537 look_ahead_seq_length )))
-538 for beam_rank , beam_state in enumerate ( beam_set ):
-539 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
-540 score_set [ beam_rank , :] = np . pad (
-541 beam_score_set ,
-542 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
-543 ( look_ahead_seq_length , 1 )), 'constant' ,
-544 constant_values = float ( 'inf' ))
-545 # find top scores
-546 score_ranked = np . sort ( score_set , axis = None )
-547 score_ranked [ score_ranked == float ( 'inf' )] = 0
-548 score_ranked = np . trim_zeros ( score_ranked )
-549 idx_ranked = np . argsort ( score_set , axis = None )
-550 updated_beam_set = []
-551 for new_beam_rank in range (
-552 np . min (( len ( score_ranked ), args . beam_size ))):
-553 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
-554 score_set . shape )
-555 prev_beam_rank = total_idx [ 0 ] . item ()
-556 cluster_seq = total_idx [ 1 :]
-557 updated_beam_state = self . _update_beam_state (
-558 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
-559 updated_beam_set . append ( updated_beam_state )
-560 beam_set = updated_beam_set
-561 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
-562 return predicted_cluster_id
+ 480 def predict_single ( self , test_sequence , args ):
+481 """Predict labels for a single test sequence using UISRNN model.
+482
+483 Args:
+484 test_sequence: the test observation sequence, which is 2-dim numpy array
+485 of real numbers, of size `N * D`.
+486
+487 - `N`: length of one test utterance.
+488 - `D` : observation dimension.
+489
+490 For example:
+491 ```
+492 test_sequence =
+493 [[2.2 -1.0 3.0 5.6] --> 1st entry of utterance 'iccc'
+494 [0.5 1.8 -3.2 0.4] --> 2nd entry of utterance 'iccc'
+495 [-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
+496 [-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
+497 [0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+498 ```
+499 Here `N=5`, `D=4`.
+500 args: Inference configurations. See `arguments.py` for details.
+501
+502 Returns:
+503 predicted_cluster_id: predicted speaker id sequence, which is
+504 an array of integers, of size `N`.
+505 For example, `predicted_cluster_id = [0, 1, 0, 0, 1]`
+506
+507 Raises:
+508 TypeError: If test_sequence is of wrong type.
+509 ValueError: If test_sequence has wrong dimension.
+510 """
+511 # check type
+512 if ( not isinstance ( test_sequence , np . ndarray ) or
+513 test_sequence . dtype != float ):
+514 raise TypeError ( 'test_sequence should be a numpy array of float type.' )
+515 # check dimension
+516 if test_sequence . ndim != 2 :
+517 raise ValueError ( 'test_sequence must be 2-dim array.' )
+518 # check size
+519 test_sequence_length , observation_dim = test_sequence . shape
+520 if observation_dim != self . observation_dim :
+521 raise ValueError ( 'test_sequence does not match the dimension specified '
+522 'by args.observation_dim.' )
+523
+524 self . rnn_model . eval ()
+525 test_sequence = np . tile ( test_sequence , ( args . test_iteration , 1 ))
+526 test_sequence = autograd . Variable (
+527 torch . from_numpy ( test_sequence ) . float ()) . to ( self . device )
+528 # bookkeeping for beam search
+529 beam_set = [ BeamState ()]
+530 for num_iter in np . arange ( 0 , args . test_iteration * test_sequence_length ,
+531 args . look_ahead ):
+532 max_clusters = max ([ len ( beam_state . mean_set ) for beam_state in beam_set ])
+533 look_ahead_seq = test_sequence [ num_iter : num_iter + args . look_ahead , :]
+534 look_ahead_seq_length = look_ahead_seq . shape [ 0 ]
+535 score_set = float ( 'inf' ) * np . ones (
+536 np . append (
+537 args . beam_size , max_clusters + 1 + np . arange (
+538 look_ahead_seq_length )))
+539 for beam_rank , beam_state in enumerate ( beam_set ):
+540 beam_score_set = self . _calculate_score ( beam_state , look_ahead_seq )
+541 score_set [ beam_rank , :] = np . pad (
+542 beam_score_set ,
+543 np . tile ([[ 0 , max_clusters - len ( beam_state . mean_set )]],
+544 ( look_ahead_seq_length , 1 )), 'constant' ,
+545 constant_values = float ( 'inf' ))
+546 # find top scores
+547 score_ranked = np . sort ( score_set , axis = None )
+548 score_ranked [ score_ranked == float ( 'inf' )] = 0
+549 score_ranked = np . trim_zeros ( score_ranked )
+550 idx_ranked = np . argsort ( score_set , axis = None )
+551 updated_beam_set = []
+552 for new_beam_rank in range (
+553 np . min (( len ( score_ranked ), args . beam_size ))):
+554 total_idx = np . unravel_index ( idx_ranked [ new_beam_rank ],
+555 score_set . shape )
+556 prev_beam_rank = total_idx [ 0 ] . item ()
+557 cluster_seq = total_idx [ 1 :]
+558 updated_beam_state = self . _update_beam_state (
+559 beam_set [ prev_beam_rank ], look_ahead_seq , cluster_seq )
+560 updated_beam_set . append ( updated_beam_state )
+561 beam_set = updated_beam_set
+562 predicted_cluster_id = beam_set [ 0 ] . trace [ - test_sequence_length :]
+563 return predicted_cluster_id
@@ -2263,9 +2268,12 @@ Inherited Members
[-2.2 5.0 1.8 3.7] --> 3rd entry of utterance 'iccc'
[-3.8 0.1 1.4 3.3] --> 4th entry of utterance 'iccc'
[0.1 2.7 3.5 -1.7]] --> 5th entry of utterance 'iccc'
+
+
+
Here `N=5`, `D=4`.
- Here
N=5
,
D=4
.
- args: Inference configurations. See
arguments.py
for details.
+
+
args: Inference configurations. See arguments.py
for details.
Returns:
predicted_cluster_id: predicted speaker id sequence, which is
@@ -2290,33 +2298,33 @@
Inherited Members
- 564 def predict ( self , test_sequences , args ):
-565 """Predict labels for a single or many test sequences using UISRNN model.
-566
-567 Args:
-568 test_sequences: Either a list of test sequences, or a single test
-569 sequence. Each test sequence is a 2-dim numpy array
-570 of real numbers. See `predict_single()` for details.
-571 args: Inference configurations. See `arguments.py` for details.
-572
-573 Returns:
-574 predicted_cluster_ids: Predicted labels for test_sequences.
-575
-576 1. if test_sequences is a list, predicted_cluster_ids will be a list
-577 of the same size, where each element being a 1-dim list of strings.
-578 2. if test_sequences is a single sequence, predicted_cluster_ids will
-579 be a 1-dim list of strings
-580
-581 Raises:
-582 TypeError: If test_sequences is of wrong type.
-583 """
-584 # check type
-585 if isinstance ( test_sequences , np . ndarray ):
-586 return self . predict_single ( test_sequences , args )
-587 if isinstance ( test_sequences , list ):
-588 return [ self . predict_single ( test_sequence , args )
-589 for test_sequence in test_sequences ]
-590 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
+ 565 def predict ( self , test_sequences , args ):
+566 """Predict labels for a single or many test sequences using UISRNN model.
+567
+568 Args:
+569 test_sequences: Either a list of test sequences, or a single test
+570 sequence. Each test sequence is a 2-dim numpy array
+571 of real numbers. See `predict_single()` for details.
+572 args: Inference configurations. See `arguments.py` for details.
+573
+574 Returns:
+575 predicted_cluster_ids: Predicted labels for test_sequences.
+576
+577 1. if test_sequences is a list, predicted_cluster_ids will be a list
+578 of the same size, where each element being a 1-dim list of strings.
+579 2. if test_sequences is a single sequence, predicted_cluster_ids will
+580 be a 1-dim list of strings
+581
+582 Raises:
+583 TypeError: If test_sequences is of wrong type.
+584 """
+585 # check type
+586 if isinstance ( test_sequences , np . ndarray ):
+587 return self . predict_single ( test_sequences , args )
+588 if isinstance ( test_sequences , list ):
+589 return [ self . predict_single ( test_sequence , args )
+590 for test_sequence in test_sequences ]
+591 raise TypeError ( 'test_sequences should be either a list or numpy array.' )
@@ -2355,37 +2363,37 @@ Inherited Members
- 593 def parallel_predict ( model , test_sequences , args , num_processes = 4 ):
-594 """Run prediction in parallel using torch.multiprocessing.
-595
-596 This is a beta feature. It makes prediction slower on CPU. But it's reported
-597 that it makes prediction faster on GPU.
-598
-599 Args:
-600 model: instance of UISRNN model
-601 test_sequences: a list of test sequences, or a single test
-602 sequence. Each test sequence is a 2-dim numpy array
-603 of real numbers. See `predict_single()` for details.
-604 args: Inference configurations. See `arguments.py` for details.
-605 num_processes: number of parallel processes.
-606
-607 Returns:
-608 a list of the same size as test_sequences, where each element
-609 being a 1-dim list of strings.
-610
-611 Raises:
-612 TypeError: If test_sequences is of wrong type.
-613 """
-614 if not isinstance ( test_sequences , list ):
-615 raise TypeError ( 'test_sequences must be a list.' )
-616 ctx = multiprocessing . get_context ( 'forkserver' )
-617 model . rnn_model . share_memory ()
-618 pool = ctx . Pool ( num_processes )
-619 results = pool . map (
-620 functools . partial ( model . predict_single , args = args ),
-621 test_sequences )
-622 pool . close ()
-623 return results
+ 594 def parallel_predict ( model , test_sequences , args , num_processes = 4 ):
+595 """Run prediction in parallel using torch.multiprocessing.
+596
+597 This is a beta feature. It makes prediction slower on CPU. But it's reported
+598 that it makes prediction faster on GPU.
+599
+600 Args:
+601 model: instance of UISRNN model
+602 test_sequences: a list of test sequences, or a single test
+603 sequence. Each test sequence is a 2-dim numpy array
+604 of real numbers. See `predict_single()` for details.
+605 args: Inference configurations. See `arguments.py` for details.
+606 num_processes: number of parallel processes.
+607
+608 Returns:
+609 a list of the same size as test_sequences, where each element
+610 being a 1-dim list of strings.
+611
+612 Raises:
+613 TypeError: If test_sequences is of wrong type.
+614 """
+615 if not isinstance ( test_sequences , list ):
+616 raise TypeError ( 'test_sequences must be a list.' )
+617 ctx = multiprocessing . get_context ( 'forkserver' )
+618 model . rnn_model . share_memory ()
+619 pool = ctx . Pool ( num_processes )
+620 results = pool . map (
+621 functools . partial ( model . predict_single , args = args ),
+622 test_sequences )
+623 pool . close ()
+624 return results
diff --git a/docs/uisrnn/utils.html b/docs/uisrnn/utils.html
index 1433f97..a5e04c5 100644
--- a/docs/uisrnn/utils.html
+++ b/docs/uisrnn/utils.html
@@ -3,14 +3,14 @@
-
+
uisrnn.utils API documentation
-
-
+
+
@@ -30,18 +30,6 @@
API Documentation
21 from torch import autograd
22
23
- 24 class Logger :
- 25 """A class for printing logging information to screen."""
+ 24 def generate_random_string ( length = 6 ):
+ 25 """Generate a random string of upper case letters and digits.
26
- 27 def __init__ ( self , verbosity ):
- 28 self . _verbosity = verbosity
+ 27 Args:
+ 28 length: length of the generated string
29
- 30 def print ( self , level , message ):
- 31 """Print a message if level is not higher than verbosity.
- 32
- 33 Args:
- 34 level: the level of this message, smaller value means more important
- 35 message: the message to be printed
- 36 """
- 37 if level <= self . _verbosity :
- 38 print ( message )
- 39
+ 30 Returns:
+ 31 the generated string
+ 32 """
+ 33 return '' . join ([
+ 34 random . choice ( string . ascii_uppercase + string . digits )
+ 35 for _ in range ( length )])
+ 36
+ 37
+ 38 def enforce_cluster_id_uniqueness ( cluster_ids ):
+ 39 """Enforce uniqueness of cluster id across sequences.
40
- 41 def generate_random_string ( length = 6 ):
- 42 """Generate a random string of upper case letters and digits.
+ 41 Args:
+ 42 cluster_ids: a list of 1-dim list/numpy.ndarray of strings
43
- 44 Args:
- 45 length: length of the generated string
+ 44 Returns:
+ 45 a new list with same length of cluster_ids
46
- 47 Returns:
- 48 the generated string
+ 47 Raises:
+ 48 TypeError: if cluster_ids or its element has wrong type
49 """
- 50 return '' . join ([
- 51 random . choice ( string . ascii_uppercase + string . digits )
- 52 for _ in range ( length )])
- 53
- 54
- 55 def enforce_cluster_id_uniqueness ( cluster_ids ):
- 56 """Enforce uniqueness of cluster id across sequences.
- 57
- 58 Args:
- 59 cluster_ids: a list of 1-dim list/numpy.ndarray of strings
- 60
- 61 Returns:
- 62 a new list with same length of cluster_ids
+ 50 if not isinstance ( cluster_ids , list ):
+ 51 raise TypeError ( 'cluster_ids must be a list' )
+ 52 new_cluster_ids = []
+ 53 for cluster_id in cluster_ids :
+ 54 sequence_id = generate_random_string ()
+ 55 if isinstance ( cluster_id , np . ndarray ):
+ 56 cluster_id = cluster_id . tolist ()
+ 57 if not isinstance ( cluster_id , list ):
+ 58 raise TypeError ( 'Elements of cluster_ids must be list or numpy.ndarray' )
+ 59 new_cluster_id = [ '_' . join ([ sequence_id , s ]) for s in cluster_id ]
+ 60 new_cluster_ids . append ( new_cluster_id )
+ 61 return new_cluster_ids
+ 62
63
- 64 Raises:
- 65 TypeError: if cluster_ids or its element has wrong type
- 66 """
- 67 if not isinstance ( cluster_ids , list ):
- 68 raise TypeError ( 'cluster_ids must be a list' )
- 69 new_cluster_ids = []
- 70 for cluster_id in cluster_ids :
- 71 sequence_id = generate_random_string ()
- 72 if isinstance ( cluster_id , np . ndarray ):
- 73 cluster_id = cluster_id . tolist ()
- 74 if not isinstance ( cluster_id , list ):
- 75 raise TypeError ( 'Elements of cluster_ids must be list or numpy.ndarray' )
- 76 new_cluster_id = [ '_' . join ([ sequence_id , s ]) for s in cluster_id ]
- 77 new_cluster_ids . append ( new_cluster_id )
- 78 return new_cluster_ids
- 79
- 80
- 81 def concatenate_training_data ( train_sequences , train_cluster_ids ,
- 82 enforce_uniqueness = True , shuffle = True ):
- 83 """Concatenate training data.
- 84
- 85 Args:
- 86 train_sequences: a list of 2-dim numpy arrays to be concatenated
- 87 train_cluster_ids: a list of 1-dim list/numpy.ndarray of strings
- 88 enforce_uniqueness: a boolean indicated whether we should enfore uniqueness
- 89 to train_cluster_ids
- 90 shuffle: whether to randomly shuffle input order
- 91
- 92 Returns:
- 93 concatenated_train_sequence: a 2-dim numpy array
- 94 concatenated_train_cluster_id: a list of strings
- 95
- 96 Raises:
- 97 TypeError: if input has wrong type
- 98 ValueError: if sizes/dimensions of input or their elements are incorrect
- 99 """
-100 # check input
-101 if not isinstance ( train_sequences , list ) or not isinstance (
-102 train_cluster_ids , list ):
-103 raise TypeError ( 'train_sequences and train_cluster_ids must be lists' )
-104 if len ( train_sequences ) != len ( train_cluster_ids ):
-105 raise ValueError (
-106 'train_sequences and train_cluster_ids must have same size' )
-107 train_cluster_ids = [
-108 x . tolist () if isinstance ( x , np . ndarray ) else x
-109 for x in train_cluster_ids ]
-110 global_observation_dim = None
-111 for i , ( train_sequence , train_cluster_id ) in enumerate (
-112 zip ( train_sequences , train_cluster_ids )):
-113 train_length , observation_dim = train_sequence . shape
-114 if i == 0 :
-115 global_observation_dim = observation_dim
-116 elif global_observation_dim != observation_dim :
-117 raise ValueError (
-118 'train_sequences must have consistent observation dimension' )
-119 if not isinstance ( train_cluster_id , list ):
-120 raise TypeError (
-121 'Elements of train_cluster_ids must be list or numpy.ndarray' )
-122 if len ( train_cluster_id ) != train_length :
-123 raise ValueError (
-124 'Each train_sequence and its train_cluster_id must have same length' )
+ 64 def concatenate_training_data ( train_sequences , train_cluster_ids ,
+ 65 enforce_uniqueness = True , shuffle = True ):
+ 66 """Concatenate training data.
+ 67
+ 68 Args:
+ 69 train_sequences: a list of 2-dim numpy arrays to be concatenated
+ 70 train_cluster_ids: a list of 1-dim list/numpy.ndarray of strings
+ 71 enforce_uniqueness: a boolean indicated whether we should enfore uniqueness
+ 72 to train_cluster_ids
+ 73 shuffle: whether to randomly shuffle input order
+ 74
+ 75 Returns:
+ 76 concatenated_train_sequence: a 2-dim numpy array
+ 77 concatenated_train_cluster_id: a list of strings
+ 78
+ 79 Raises:
+ 80 TypeError: if input has wrong type
+ 81 ValueError: if sizes/dimensions of input or their elements are incorrect
+ 82 """
+ 83 # check input
+ 84 if not isinstance ( train_sequences , list ) or not isinstance (
+ 85 train_cluster_ids , list ):
+ 86 raise TypeError ( 'train_sequences and train_cluster_ids must be lists' )
+ 87 if len ( train_sequences ) != len ( train_cluster_ids ):
+ 88 raise ValueError (
+ 89 'train_sequences and train_cluster_ids must have same size' )
+ 90 train_cluster_ids = [
+ 91 x . tolist () if isinstance ( x , np . ndarray ) else x
+ 92 for x in train_cluster_ids ]
+ 93 global_observation_dim = None
+ 94 for i , ( train_sequence , train_cluster_id ) in enumerate (
+ 95 zip ( train_sequences , train_cluster_ids )):
+ 96 train_length , observation_dim = train_sequence . shape
+ 97 if i == 0 :
+ 98 global_observation_dim = observation_dim
+ 99 elif global_observation_dim != observation_dim :
+100 raise ValueError (
+101 'train_sequences must have consistent observation dimension' )
+102 if not isinstance ( train_cluster_id , list ):
+103 raise TypeError (
+104 'Elements of train_cluster_ids must be list or numpy.ndarray' )
+105 if len ( train_cluster_id ) != train_length :
+106 raise ValueError (
+107 'Each train_sequence and its train_cluster_id must have same length' )
+108
+109 # enforce uniqueness
+110 if enforce_uniqueness :
+111 train_cluster_ids = enforce_cluster_id_uniqueness ( train_cluster_ids )
+112
+113 # random shuffle
+114 if shuffle :
+115 zipped_input = list ( zip ( train_sequences , train_cluster_ids ))
+116 random . shuffle ( zipped_input )
+117 train_sequences , train_cluster_ids = zip ( * zipped_input )
+118
+119 # concatenate
+120 concatenated_train_sequence = np . concatenate ( train_sequences , axis = 0 )
+121 concatenated_train_cluster_id = [ x for train_cluster_id in train_cluster_ids
+122 for x in train_cluster_id ]
+123 return concatenated_train_sequence , concatenated_train_cluster_id
+124
125
-126 # enforce uniqueness
-127 if enforce_uniqueness :
-128 train_cluster_ids = enforce_cluster_id_uniqueness ( train_cluster_ids )
-129
-130 # random shuffle
-131 if shuffle :
-132 zipped_input = list ( zip ( train_sequences , train_cluster_ids ))
-133 random . shuffle ( zipped_input )
-134 train_sequences , train_cluster_ids = zip ( * zipped_input )
-135
-136 # concatenate
-137 concatenated_train_sequence = np . concatenate ( train_sequences , axis = 0 )
-138 concatenated_train_cluster_id = [ x for train_cluster_id in train_cluster_ids
-139 for x in train_cluster_id ]
-140 return concatenated_train_sequence , concatenated_train_cluster_id
-141
-142
-143 def sample_permuted_segments ( index_sequence , number_samples ):
-144 """Sample sequences with permuted blocks.
-145
-146 Args:
-147 index_sequence: (integer array, size: L)
-148 - subsequence index
-149 For example, index_sequence = [1,2,6,10,11,12].
-150 number_samples: (integer)
-151 - number of subsampled block-preserving permuted sequences.
-152 For example, number_samples = 5
-153
-154 Returns:
-155 sampled_index_sequences: (a list of numpy arrays) - a list of subsampled
-156 block-preserving permuted sequences. For example,
-157 ```
-158 sampled_index_sequences =
-159 [[10,11,12,1,2,6],
-160 [6,1,2,10,11,12],
-161 [1,2,10,11,12,6],
-162 [6,1,2,10,11,12],
-163 [1,2,6,10,11,12]]
-164 ```
-165 The length of "sampled_index_sequences" is "number_samples".
-166 """
-167 segments = []
-168 if len ( index_sequence ) == 1 :
-169 segments . append ( index_sequence )
-170 else :
-171 prev = 0
-172 for i in range ( len ( index_sequence ) - 1 ):
-173 if index_sequence [ i + 1 ] != index_sequence [ i ] + 1 :
-174 segments . append ( index_sequence [ prev :( i + 1 )])
-175 prev = i + 1
-176 if i + 1 == len ( index_sequence ) - 1 :
-177 segments . append ( index_sequence [ prev :])
-178 # sample permutations
-179 sampled_index_sequences = []
-180 for _ in range ( number_samples ):
-181 segments_array = []
-182 permutation = np . random . permutation ( len ( segments ))
-183 for permutation_item in permutation :
-184 segments_array . append ( segments [ permutation_item ])
-185 sampled_index_sequences . append ( np . concatenate ( segments_array ))
-186 return sampled_index_sequences
-187
-188
-189 def resize_sequence ( sequence , cluster_id , num_permutations = None ):
-190 """Resize sequences for packing and batching.
-191
-192 Args:
-193 sequence: (real numpy matrix, size: seq_len*obs_size) - observed sequence
-194 cluster_id: (numpy vector, size: seq_len) - cluster indicator sequence
-195 num_permutations: int - Number of permutations per utterance sampled.
-196
-197 Returns:
-198 sub_sequences: A list of numpy array, with obsevation vector from the same
-199 cluster in the same list.
-200 seq_lengths: The length of each cluster (+1).
-201 """
-202 # merge sub-sequences that belong to a single cluster to a single sequence
-203 unique_id = np . unique ( cluster_id )
-204 sub_sequences = []
-205 seq_lengths = []
-206 if num_permutations and num_permutations > 1 :
-207 for i in unique_id :
-208 idx_set = np . where ( cluster_id == i )[ 0 ]
-209 sampled_idx_sets = sample_permuted_segments ( idx_set , num_permutations )
-210 for j in range ( num_permutations ):
-211 sub_sequences . append ( sequence [ sampled_idx_sets [ j ], :])
-212 seq_lengths . append ( len ( idx_set ) + 1 )
-213 else :
-214 for i in unique_id :
-215 idx_set = np . where ( cluster_id == i )
-216 sub_sequences . append ( sequence [ idx_set , :][ 0 ])
-217 seq_lengths . append ( len ( idx_set [ 0 ]) + 1 )
-218 return sub_sequences , seq_lengths
-219
-220
-221 def pack_sequence (
-222 sub_sequences , seq_lengths , batch_size , observation_dim , device ):
-223 """Pack sequences for training.
+126 def sample_permuted_segments ( index_sequence , number_samples ):
+127 """Sample sequences with permuted blocks.
+128
+129 Args:
+130 index_sequence: (integer array, size: L)
+131 - subsequence index
+132 For example, index_sequence = [1,2,6,10,11,12].
+133 number_samples: (integer)
+134 - number of subsampled block-preserving permuted sequences.
+135 For example, number_samples = 5
+136
+137 Returns:
+138 sampled_index_sequences: (a list of numpy arrays) - a list of subsampled
+139 block-preserving permuted sequences. For example,
+140 ```
+141 sampled_index_sequences =
+142 [[10,11,12,1,2,6],
+143 [6,1,2,10,11,12],
+144 [1,2,10,11,12,6],
+145 [6,1,2,10,11,12],
+146 [1,2,6,10,11,12]]
+147 ```
+148 The length of "sampled_index_sequences" is "number_samples".
+149 """
+150 segments = []
+151 if len ( index_sequence ) == 1 :
+152 segments . append ( index_sequence )
+153 else :
+154 prev = 0
+155 for i in range ( len ( index_sequence ) - 1 ):
+156 if index_sequence [ i + 1 ] != index_sequence [ i ] + 1 :
+157 segments . append ( index_sequence [ prev :( i + 1 )])
+158 prev = i + 1
+159 if i + 1 == len ( index_sequence ) - 1 :
+160 segments . append ( index_sequence [ prev :])
+161 # sample permutations
+162 sampled_index_sequences = []
+163 for _ in range ( number_samples ):
+164 segments_array = []
+165 permutation = np . random . permutation ( len ( segments ))
+166 for permutation_item in permutation :
+167 segments_array . append ( segments [ permutation_item ])
+168 sampled_index_sequences . append ( np . concatenate ( segments_array ))
+169 return sampled_index_sequences
+170
+171
+172 def resize_sequence ( sequence , cluster_id , num_permutations = None ):
+173 """Resize sequences for packing and batching.
+174
+175 Args:
+176 sequence: (real numpy matrix, size: seq_len*obs_size) - observed sequence
+177 cluster_id: (numpy vector, size: seq_len) - cluster indicator sequence
+178 num_permutations: int - Number of permutations per utterance sampled.
+179
+180 Returns:
+181 sub_sequences: A list of numpy array, with obsevation vector from the same
+182 cluster in the same list.
+183 seq_lengths: The length of each cluster (+1).
+184 """
+185 # merge sub-sequences that belong to a single cluster to a single sequence
+186 unique_id = np . unique ( cluster_id )
+187 sub_sequences = []
+188 seq_lengths = []
+189 if num_permutations and num_permutations > 1 :
+190 for i in unique_id :
+191 idx_set = np . where ( cluster_id == i )[ 0 ]
+192 sampled_idx_sets = sample_permuted_segments ( idx_set , num_permutations )
+193 for j in range ( num_permutations ):
+194 sub_sequences . append ( sequence [ sampled_idx_sets [ j ], :])
+195 seq_lengths . append ( len ( idx_set ) + 1 )
+196 else :
+197 for i in unique_id :
+198 idx_set = np . where ( cluster_id == i )
+199 sub_sequences . append ( sequence [ idx_set , :][ 0 ])
+200 seq_lengths . append ( len ( idx_set [ 0 ]) + 1 )
+201 return sub_sequences , seq_lengths
+202
+203
+204 def pack_sequence (
+205 sub_sequences , seq_lengths , batch_size , observation_dim , device ):
+206 """Pack sequences for training.
+207
+208 Args:
+209 sub_sequences: A list of numpy array, with obsevation vector from the same
+210 cluster in the same list.
+211 seq_lengths: The length of each cluster (+1).
+212 batch_size: int or None - Run batch learning if batch_size is None. Else,
+213 run online learning with specified batch size.
+214 observation_dim: int - dimension for observation vectors
+215 device: str - Your device. E.g., `cuda:0` or `cpu`.
+216
+217 Returns:
+218 packed_rnn_input: (PackedSequence object) packed rnn input
+219 rnn_truth: ground truth
+220 """
+221 num_clusters = len ( seq_lengths )
+222 sorted_seq_lengths = np . sort ( seq_lengths )[:: - 1 ]
+223 permute_index = np . argsort ( seq_lengths )[:: - 1 ]
224
-225 Args:
-226 sub_sequences: A list of numpy array, with obsevation vector from the same
-227 cluster in the same list.
-228 seq_lengths: The length of each cluster (+1).
-229 batch_size: int or None - Run batch learning if batch_size is None. Else,
-230 run online learning with specified batch size.
-231 observation_dim: int - dimension for observation vectors
-232 device: str - Your device. E.g., `cuda:0` or `cpu`.
-233
-234 Returns:
-235 packed_rnn_input: (PackedSequence object) packed rnn input
-236 rnn_truth: ground truth
-237 """
-238 num_clusters = len ( seq_lengths )
-239 sorted_seq_lengths = np . sort ( seq_lengths )[:: - 1 ]
-240 permute_index = np . argsort ( seq_lengths )[:: - 1 ]
-241
-242 if batch_size is None :
-243 rnn_input = np . zeros (( sorted_seq_lengths [ 0 ],
-244 num_clusters ,
-245 observation_dim ))
-246 for i in range ( num_clusters ):
-247 rnn_input [ 1 : sorted_seq_lengths [ i ], i ,
-248 :] = sub_sequences [ permute_index [ i ]]
-249 rnn_input = autograd . Variable (
-250 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
-251 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
-252 rnn_input , sorted_seq_lengths , batch_first = False )
-253 else :
-254 mini_batch = np . sort ( np . random . choice ( num_clusters , batch_size ))
-255 rnn_input = np . zeros (( sorted_seq_lengths [ mini_batch [ 0 ]],
-256 batch_size ,
-257 observation_dim ))
-258 for i in range ( batch_size ):
-259 rnn_input [ 1 : sorted_seq_lengths [ mini_batch [ i ]],
-260 i , :] = sub_sequences [ permute_index [ mini_batch [ i ]]]
-261 rnn_input = autograd . Variable (
-262 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
-263 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
-264 rnn_input , sorted_seq_lengths [ mini_batch ], batch_first = False )
-265 # ground truth is the shifted input
-266 rnn_truth = rnn_input [ 1 :, :, :]
-267 return packed_rnn_input , rnn_truth
-268
-269
-270 def output_result ( model_args , training_args , test_record ):
-271 """Produce a string to summarize the experiment."""
-272 accuracy_array , _ = zip ( * test_record )
-273 total_accuracy = np . mean ( accuracy_array )
-274 output_string = """
-275 Config:
-276 sigma_alpha: {}
-277 sigma_beta: {}
-278 crp_alpha: {}
-279 learning rate: {}
-280 regularization: {}
-281 batch size: {}
-282
-283 Performance:
-284 averaged accuracy: {:.6f}
-285 accuracy numbers for all testing sequences:
-286 """ . strip () . format (
-287 training_args . sigma_alpha ,
-288 training_args . sigma_beta ,
-289 model_args . crp_alpha ,
-290 training_args . learning_rate ,
-291 training_args . regularization_weight ,
-292 training_args . batch_size ,
-293 total_accuracy )
-294 for accuracy in accuracy_array :
-295 output_string += ' \n {:.6f} ' . format ( accuracy )
-296 output_string += ' \n ' + '=' * 80 + ' \n '
-297 filename = 'layer_ {} _ {} _ {:.1f} _result.txt' . format (
-298 model_args . rnn_hidden_size ,
-299 model_args . rnn_depth , model_args . rnn_dropout )
-300 with open ( filename , 'a' ) as file_object :
-301 file_object . write ( output_string )
-302 return output_string
-303
-304
-305 def estimate_transition_bias ( cluster_ids , smooth = 1 ):
-306 """Estimate the transition bias.
-307
-308 Args:
-309 cluster_id: Either a list of cluster indicator sequences, or a single
-310 concatenated sequence. The former is strongly preferred, since the
-311 transition_bias estimated from the latter will be inaccurate.
-312 smooth: int or float - Smoothing coefficient, avoids -inf value in np.log
-313 in the case of a sequence with a single speaker and division by 0 in the
-314 case of empty sequences. Using a small value for smooth decreases the
-315 bias in the calculation of transition_bias but can also lead to underflow
-316 in some remote cases, larger values are safer but less accurate.
-317
-318 Returns:
-319 bias: Flipping coin head probability.
-320 bias_denominator: The denominator of the bias, used for multiple calls to
-321 fit().
-322 """
-323 transit_num = smooth
-324 bias_denominator = 2 * smooth
-325 for cluster_id_seq in cluster_ids :
-326 for entry in range ( len ( cluster_id_seq ) - 1 ):
-327 transit_num += ( cluster_id_seq [ entry ] != cluster_id_seq [ entry + 1 ])
-328 bias_denominator += 1
-329 bias = transit_num / bias_denominator
-330 return bias , bias_denominator
+225 if batch_size is None :
+226 rnn_input = np . zeros (( sorted_seq_lengths [ 0 ],
+227 num_clusters ,
+228 observation_dim ))
+229 for i in range ( num_clusters ):
+230 rnn_input [ 1 : sorted_seq_lengths [ i ], i ,
+231 :] = sub_sequences [ permute_index [ i ]]
+232 rnn_input = autograd . Variable (
+233 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
+234 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
+235 rnn_input , sorted_seq_lengths , batch_first = False )
+236 else :
+237 mini_batch = np . sort ( np . random . choice ( num_clusters , batch_size ))
+238 rnn_input = np . zeros (( sorted_seq_lengths [ mini_batch [ 0 ]],
+239 batch_size ,
+240 observation_dim ))
+241 for i in range ( batch_size ):
+242 rnn_input [ 1 : sorted_seq_lengths [ mini_batch [ i ]],
+243 i , :] = sub_sequences [ permute_index [ mini_batch [ i ]]]
+244 rnn_input = autograd . Variable (
+245 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
+246 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
+247 rnn_input , sorted_seq_lengths [ mini_batch ], batch_first = False )
+248 # ground truth is the shifted input
+249 rnn_truth = rnn_input [ 1 :, :, :]
+250 return packed_rnn_input , rnn_truth
+251
+252
+253 def output_result ( model_args , training_args , test_record ):
+254 """Produce a string to summarize the experiment."""
+255 accuracy_array , _ = zip ( * test_record )
+256 total_accuracy = np . mean ( accuracy_array )
+257 output_string = """
+258 Config:
+259 sigma_alpha: {}
+260 sigma_beta: {}
+261 crp_alpha: {}
+262 learning rate: {}
+263 regularization: {}
+264 batch size: {}
+265
+266 Performance:
+267 averaged accuracy: {:.6f}
+268 accuracy numbers for all testing sequences:
+269 """ . strip () . format (
+270 training_args . sigma_alpha ,
+271 training_args . sigma_beta ,
+272 model_args . crp_alpha ,
+273 training_args . learning_rate ,
+274 training_args . regularization_weight ,
+275 training_args . batch_size ,
+276 total_accuracy )
+277 for accuracy in accuracy_array :
+278 output_string += ' \n {:.6f} ' . format ( accuracy )
+279 output_string += ' \n ' + '=' * 80 + ' \n '
+280 filename = 'layer_ {} _ {} _ {:.1f} _result.txt' . format (
+281 model_args . rnn_hidden_size ,
+282 model_args . rnn_depth , model_args . rnn_dropout )
+283 with open ( filename , 'a' ) as file_object :
+284 file_object . write ( output_string )
+285 return output_string
+286
+287
+288 def estimate_transition_bias ( cluster_ids , smooth = 1 ):
+289 """Estimate the transition bias.
+290
+291 Args:
+292 cluster_id: Either a list of cluster indicator sequences, or a single
+293 concatenated sequence. The former is strongly preferred, since the
+294 transition_bias estimated from the latter will be inaccurate.
+295 smooth: int or float - Smoothing coefficient, avoids -inf value in np.log
+296 in the case of a sequence with a single speaker and division by 0 in the
+297 case of empty sequences. Using a small value for smooth decreases the
+298 bias in the calculation of transition_bias but can also lead to underflow
+299 in some remote cases, larger values are safer but less accurate.
+300
+301 Returns:
+302 bias: Flipping coin head probability.
+303 bias_denominator: The denominator of the bias, used for multiple calls to
+304 fit().
+305 """
+306 transit_num = smooth
+307 bias_denominator = 2 * smooth
+308 for cluster_id_seq in cluster_ids :
+309 for entry in range ( len ( cluster_id_seq ) - 1 ):
+310 transit_num += ( cluster_id_seq [ entry ] != cluster_id_seq [ entry + 1 ])
+311 bias_denominator += 1
+312 bias = transit_num / bias_denominator
+313 return bias , bias_denominator
-
-
-
-
- class
- Logger :
-
- View Source
-
-
-
- 25 class Logger :
-26 """A class for printing logging information to screen."""
-27
-28 def __init__ ( self , verbosity ):
-29 self . _verbosity = verbosity
-30
-31 def print ( self , level , message ):
-32 """Print a message if level is not higher than verbosity.
-33
-34 Args:
-35 level: the level of this message, smaller value means more important
-36 message: the message to be printed
-37 """
-38 if level <= self . _verbosity :
-39 print ( message )
-
-
-
- A class for printing logging information to screen.
-
-
-
-
-
-
-
- Logger (verbosity )
-
- View Source
-
-
-
-
28 def __init__ ( self , verbosity ):
-29 self . _verbosity = verbosity
-
-
-
-
-
-
-
-
-
-
- def
- print (self , level , message ):
-
- View Source
-
-
-
-
31 def print ( self , level , message ):
-32 """Print a message if level is not higher than verbosity.
-33
-34 Args:
-35 level: the level of this message, smaller value means more important
-36 message: the message to be printed
-37 """
-38 if level <= self . _verbosity :
-39 print ( message )
-
-
-
-
Print a message if level is not higher than verbosity.
-
-
Args:
- level: the level of this message, smaller value means more important
- message: the message to be printed
-
-
-
-
-
@@ -518,18 +405,18 @@
- 42 def generate_random_string ( length = 6 ):
-43 """Generate a random string of upper case letters and digits.
-44
-45 Args:
-46 length: length of the generated string
-47
-48 Returns:
-49 the generated string
-50 """
-51 return '' . join ([
-52 random . choice ( string . ascii_uppercase + string . digits )
-53 for _ in range ( length )])
+ 25 def generate_random_string ( length = 6 ):
+26 """Generate a random string of upper case letters and digits.
+27
+28 Args:
+29 length: length of the generated string
+30
+31 Returns:
+32 the generated string
+33 """
+34 return '' . join ([
+35 random . choice ( string . ascii_uppercase + string . digits )
+36 for _ in range ( length )])
@@ -555,30 +442,30 @@
- 56 def enforce_cluster_id_uniqueness ( cluster_ids ):
-57 """Enforce uniqueness of cluster id across sequences.
-58
-59 Args:
-60 cluster_ids: a list of 1-dim list/numpy.ndarray of strings
-61
-62 Returns:
-63 a new list with same length of cluster_ids
-64
-65 Raises:
-66 TypeError: if cluster_ids or its element has wrong type
-67 """
-68 if not isinstance ( cluster_ids , list ):
-69 raise TypeError ( 'cluster_ids must be a list' )
-70 new_cluster_ids = []
-71 for cluster_id in cluster_ids :
-72 sequence_id = generate_random_string ()
-73 if isinstance ( cluster_id , np . ndarray ):
-74 cluster_id = cluster_id . tolist ()
-75 if not isinstance ( cluster_id , list ):
-76 raise TypeError ( 'Elements of cluster_ids must be list or numpy.ndarray' )
-77 new_cluster_id = [ '_' . join ([ sequence_id , s ]) for s in cluster_id ]
-78 new_cluster_ids . append ( new_cluster_id )
-79 return new_cluster_ids
+ 39 def enforce_cluster_id_uniqueness ( cluster_ids ):
+40 """Enforce uniqueness of cluster id across sequences.
+41
+42 Args:
+43 cluster_ids: a list of 1-dim list/numpy.ndarray of strings
+44
+45 Returns:
+46 a new list with same length of cluster_ids
+47
+48 Raises:
+49 TypeError: if cluster_ids or its element has wrong type
+50 """
+51 if not isinstance ( cluster_ids , list ):
+52 raise TypeError ( 'cluster_ids must be a list' )
+53 new_cluster_ids = []
+54 for cluster_id in cluster_ids :
+55 sequence_id = generate_random_string ()
+56 if isinstance ( cluster_id , np . ndarray ):
+57 cluster_id = cluster_id . tolist ()
+58 if not isinstance ( cluster_id , list ):
+59 raise TypeError ( 'Elements of cluster_ids must be list or numpy.ndarray' )
+60 new_cluster_id = [ '_' . join ([ sequence_id , s ]) for s in cluster_id ]
+61 new_cluster_ids . append ( new_cluster_id )
+62 return new_cluster_ids
@@ -607,66 +494,66 @@
- 82 def concatenate_training_data ( train_sequences , train_cluster_ids ,
- 83 enforce_uniqueness = True , shuffle = True ):
- 84 """Concatenate training data.
- 85
- 86 Args:
- 87 train_sequences: a list of 2-dim numpy arrays to be concatenated
- 88 train_cluster_ids: a list of 1-dim list/numpy.ndarray of strings
- 89 enforce_uniqueness: a boolean indicated whether we should enfore uniqueness
- 90 to train_cluster_ids
- 91 shuffle: whether to randomly shuffle input order
- 92
- 93 Returns:
- 94 concatenated_train_sequence: a 2-dim numpy array
- 95 concatenated_train_cluster_id: a list of strings
- 96
- 97 Raises:
- 98 TypeError: if input has wrong type
- 99 ValueError: if sizes/dimensions of input or their elements are incorrect
-100 """
-101 # check input
-102 if not isinstance ( train_sequences , list ) or not isinstance (
-103 train_cluster_ids , list ):
-104 raise TypeError ( 'train_sequences and train_cluster_ids must be lists' )
-105 if len ( train_sequences ) != len ( train_cluster_ids ):
-106 raise ValueError (
-107 'train_sequences and train_cluster_ids must have same size' )
-108 train_cluster_ids = [
-109 x . tolist () if isinstance ( x , np . ndarray ) else x
-110 for x in train_cluster_ids ]
-111 global_observation_dim = None
-112 for i , ( train_sequence , train_cluster_id ) in enumerate (
-113 zip ( train_sequences , train_cluster_ids )):
-114 train_length , observation_dim = train_sequence . shape
-115 if i == 0 :
-116 global_observation_dim = observation_dim
-117 elif global_observation_dim != observation_dim :
-118 raise ValueError (
-119 'train_sequences must have consistent observation dimension' )
-120 if not isinstance ( train_cluster_id , list ):
-121 raise TypeError (
-122 'Elements of train_cluster_ids must be list or numpy.ndarray' )
-123 if len ( train_cluster_id ) != train_length :
-124 raise ValueError (
-125 'Each train_sequence and its train_cluster_id must have same length' )
-126
-127 # enforce uniqueness
-128 if enforce_uniqueness :
-129 train_cluster_ids = enforce_cluster_id_uniqueness ( train_cluster_ids )
-130
-131 # random shuffle
-132 if shuffle :
-133 zipped_input = list ( zip ( train_sequences , train_cluster_ids ))
-134 random . shuffle ( zipped_input )
-135 train_sequences , train_cluster_ids = zip ( * zipped_input )
-136
-137 # concatenate
-138 concatenated_train_sequence = np . concatenate ( train_sequences , axis = 0 )
-139 concatenated_train_cluster_id = [ x for train_cluster_id in train_cluster_ids
-140 for x in train_cluster_id ]
-141 return concatenated_train_sequence , concatenated_train_cluster_id
+ 65 def concatenate_training_data ( train_sequences , train_cluster_ids ,
+ 66 enforce_uniqueness = True , shuffle = True ):
+ 67 """Concatenate training data.
+ 68
+ 69 Args:
+ 70 train_sequences: a list of 2-dim numpy arrays to be concatenated
+ 71 train_cluster_ids: a list of 1-dim list/numpy.ndarray of strings
+ 72 enforce_uniqueness: a boolean indicated whether we should enfore uniqueness
+ 73 to train_cluster_ids
+ 74 shuffle: whether to randomly shuffle input order
+ 75
+ 76 Returns:
+ 77 concatenated_train_sequence: a 2-dim numpy array
+ 78 concatenated_train_cluster_id: a list of strings
+ 79
+ 80 Raises:
+ 81 TypeError: if input has wrong type
+ 82 ValueError: if sizes/dimensions of input or their elements are incorrect
+ 83 """
+ 84 # check input
+ 85 if not isinstance ( train_sequences , list ) or not isinstance (
+ 86 train_cluster_ids , list ):
+ 87 raise TypeError ( 'train_sequences and train_cluster_ids must be lists' )
+ 88 if len ( train_sequences ) != len ( train_cluster_ids ):
+ 89 raise ValueError (
+ 90 'train_sequences and train_cluster_ids must have same size' )
+ 91 train_cluster_ids = [
+ 92 x . tolist () if isinstance ( x , np . ndarray ) else x
+ 93 for x in train_cluster_ids ]
+ 94 global_observation_dim = None
+ 95 for i , ( train_sequence , train_cluster_id ) in enumerate (
+ 96 zip ( train_sequences , train_cluster_ids )):
+ 97 train_length , observation_dim = train_sequence . shape
+ 98 if i == 0 :
+ 99 global_observation_dim = observation_dim
+100 elif global_observation_dim != observation_dim :
+101 raise ValueError (
+102 'train_sequences must have consistent observation dimension' )
+103 if not isinstance ( train_cluster_id , list ):
+104 raise TypeError (
+105 'Elements of train_cluster_ids must be list or numpy.ndarray' )
+106 if len ( train_cluster_id ) != train_length :
+107 raise ValueError (
+108 'Each train_sequence and its train_cluster_id must have same length' )
+109
+110 # enforce uniqueness
+111 if enforce_uniqueness :
+112 train_cluster_ids = enforce_cluster_id_uniqueness ( train_cluster_ids )
+113
+114 # random shuffle
+115 if shuffle :
+116 zipped_input = list ( zip ( train_sequences , train_cluster_ids ))
+117 random . shuffle ( zipped_input )
+118 train_sequences , train_cluster_ids = zip ( * zipped_input )
+119
+120 # concatenate
+121 concatenated_train_sequence = np . concatenate ( train_sequences , axis = 0 )
+122 concatenated_train_cluster_id = [ x for train_cluster_id in train_cluster_ids
+123 for x in train_cluster_id ]
+124 return concatenated_train_sequence , concatenated_train_cluster_id
@@ -701,50 +588,50 @@
- 144 def sample_permuted_segments ( index_sequence , number_samples ):
-145 """Sample sequences with permuted blocks.
-146
-147 Args:
-148 index_sequence: (integer array, size: L)
-149 - subsequence index
-150 For example, index_sequence = [1,2,6,10,11,12].
-151 number_samples: (integer)
-152 - number of subsampled block-preserving permuted sequences.
-153 For example, number_samples = 5
-154
-155 Returns:
-156 sampled_index_sequences: (a list of numpy arrays) - a list of subsampled
-157 block-preserving permuted sequences. For example,
-158 ```
-159 sampled_index_sequences =
-160 [[10,11,12,1,2,6],
-161 [6,1,2,10,11,12],
-162 [1,2,10,11,12,6],
-163 [6,1,2,10,11,12],
-164 [1,2,6,10,11,12]]
-165 ```
-166 The length of "sampled_index_sequences" is "number_samples".
-167 """
-168 segments = []
-169 if len ( index_sequence ) == 1 :
-170 segments . append ( index_sequence )
-171 else :
-172 prev = 0
-173 for i in range ( len ( index_sequence ) - 1 ):
-174 if index_sequence [ i + 1 ] != index_sequence [ i ] + 1 :
-175 segments . append ( index_sequence [ prev :( i + 1 )])
-176 prev = i + 1
-177 if i + 1 == len ( index_sequence ) - 1 :
-178 segments . append ( index_sequence [ prev :])
-179 # sample permutations
-180 sampled_index_sequences = []
-181 for _ in range ( number_samples ):
-182 segments_array = []
-183 permutation = np . random . permutation ( len ( segments ))
-184 for permutation_item in permutation :
-185 segments_array . append ( segments [ permutation_item ])
-186 sampled_index_sequences . append ( np . concatenate ( segments_array ))
-187 return sampled_index_sequences
+ 127 def sample_permuted_segments ( index_sequence , number_samples ):
+128 """Sample sequences with permuted blocks.
+129
+130 Args:
+131 index_sequence: (integer array, size: L)
+132 - subsequence index
+133 For example, index_sequence = [1,2,6,10,11,12].
+134 number_samples: (integer)
+135 - number of subsampled block-preserving permuted sequences.
+136 For example, number_samples = 5
+137
+138 Returns:
+139 sampled_index_sequences: (a list of numpy arrays) - a list of subsampled
+140 block-preserving permuted sequences. For example,
+141 ```
+142 sampled_index_sequences =
+143 [[10,11,12,1,2,6],
+144 [6,1,2,10,11,12],
+145 [1,2,10,11,12,6],
+146 [6,1,2,10,11,12],
+147 [1,2,6,10,11,12]]
+148 ```
+149 The length of "sampled_index_sequences" is "number_samples".
+150 """
+151 segments = []
+152 if len ( index_sequence ) == 1 :
+153 segments . append ( index_sequence )
+154 else :
+155 prev = 0
+156 for i in range ( len ( index_sequence ) - 1 ):
+157 if index_sequence [ i + 1 ] != index_sequence [ i ] + 1 :
+158 segments . append ( index_sequence [ prev :( i + 1 )])
+159 prev = i + 1
+160 if i + 1 == len ( index_sequence ) - 1 :
+161 segments . append ( index_sequence [ prev :])
+162 # sample permutations
+163 sampled_index_sequences = []
+164 for _ in range ( number_samples ):
+165 segments_array = []
+166 permutation = np . random . permutation ( len ( segments ))
+167 for permutation_item in permutation :
+168 segments_array . append ( segments [ permutation_item ])
+169 sampled_index_sequences . append ( np . concatenate ( segments_array ))
+170 return sampled_index_sequences
@@ -760,15 +647,18 @@
Returns:
sampled_index_sequences: (a list of numpy arrays) - a list of subsampled
- block-preserving permuted sequences. For example,
-
sampled_index_sequences =
+ block-preserving permuted sequences. For example,
+
+
sampled_index_sequences =
[[10,11,12,1,2,6],
[6,1,2,10,11,12],
[1,2,10,11,12,6],
[6,1,2,10,11,12],
[1,2,6,10,11,12]]
+
+
+The length of "sampled_index_sequences" is "number_samples".
- The length of "sampled_index_sequences" is "number_samples".
@@ -784,36 +674,36 @@
- 190 def resize_sequence ( sequence , cluster_id , num_permutations = None ):
-191 """Resize sequences for packing and batching.
-192
-193 Args:
-194 sequence: (real numpy matrix, size: seq_len*obs_size) - observed sequence
-195 cluster_id: (numpy vector, size: seq_len) - cluster indicator sequence
-196 num_permutations: int - Number of permutations per utterance sampled.
-197
-198 Returns:
-199 sub_sequences: A list of numpy array, with obsevation vector from the same
-200 cluster in the same list.
-201 seq_lengths: The length of each cluster (+1).
-202 """
-203 # merge sub-sequences that belong to a single cluster to a single sequence
-204 unique_id = np . unique ( cluster_id )
-205 sub_sequences = []
-206 seq_lengths = []
-207 if num_permutations and num_permutations > 1 :
-208 for i in unique_id :
-209 idx_set = np . where ( cluster_id == i )[ 0 ]
-210 sampled_idx_sets = sample_permuted_segments ( idx_set , num_permutations )
-211 for j in range ( num_permutations ):
-212 sub_sequences . append ( sequence [ sampled_idx_sets [ j ], :])
-213 seq_lengths . append ( len ( idx_set ) + 1 )
-214 else :
-215 for i in unique_id :
-216 idx_set = np . where ( cluster_id == i )
-217 sub_sequences . append ( sequence [ idx_set , :][ 0 ])
-218 seq_lengths . append ( len ( idx_set [ 0 ]) + 1 )
-219 return sub_sequences , seq_lengths
+ 173 def resize_sequence ( sequence , cluster_id , num_permutations = None ):
+174 """Resize sequences for packing and batching.
+175
+176 Args:
+177 sequence: (real numpy matrix, size: seq_len*obs_size) - observed sequence
+178 cluster_id: (numpy vector, size: seq_len) - cluster indicator sequence
+179 num_permutations: int - Number of permutations per utterance sampled.
+180
+181 Returns:
+182 sub_sequences: A list of numpy array, with obsevation vector from the same
+183 cluster in the same list.
+184 seq_lengths: The length of each cluster (+1).
+185 """
+186 # merge sub-sequences that belong to a single cluster to a single sequence
+187 unique_id = np . unique ( cluster_id )
+188 sub_sequences = []
+189 seq_lengths = []
+190 if num_permutations and num_permutations > 1 :
+191 for i in unique_id :
+192 idx_set = np . where ( cluster_id == i )[ 0 ]
+193 sampled_idx_sets = sample_permuted_segments ( idx_set , num_permutations )
+194 for j in range ( num_permutations ):
+195 sub_sequences . append ( sequence [ sampled_idx_sets [ j ], :])
+196 seq_lengths . append ( len ( idx_set ) + 1 )
+197 else :
+198 for i in unique_id :
+199 idx_set = np . where ( cluster_id == i )
+200 sub_sequences . append ( sequence [ idx_set , :][ 0 ])
+201 seq_lengths . append ( len ( idx_set [ 0 ]) + 1 )
+202 return sub_sequences , seq_lengths
@@ -843,53 +733,53 @@
- 222 def pack_sequence (
-223 sub_sequences , seq_lengths , batch_size , observation_dim , device ):
-224 """Pack sequences for training.
+ 205 def pack_sequence (
+206 sub_sequences , seq_lengths , batch_size , observation_dim , device ):
+207 """Pack sequences for training.
+208
+209 Args:
+210 sub_sequences: A list of numpy array, with obsevation vector from the same
+211 cluster in the same list.
+212 seq_lengths: The length of each cluster (+1).
+213 batch_size: int or None - Run batch learning if batch_size is None. Else,
+214 run online learning with specified batch size.
+215 observation_dim: int - dimension for observation vectors
+216 device: str - Your device. E.g., `cuda:0` or `cpu`.
+217
+218 Returns:
+219 packed_rnn_input: (PackedSequence object) packed rnn input
+220 rnn_truth: ground truth
+221 """
+222 num_clusters = len ( seq_lengths )
+223 sorted_seq_lengths = np . sort ( seq_lengths )[:: - 1 ]
+224 permute_index = np . argsort ( seq_lengths )[:: - 1 ]
225
-226 Args:
-227 sub_sequences: A list of numpy array, with obsevation vector from the same
-228 cluster in the same list.
-229 seq_lengths: The length of each cluster (+1).
-230 batch_size: int or None - Run batch learning if batch_size is None. Else,
-231 run online learning with specified batch size.
-232 observation_dim: int - dimension for observation vectors
-233 device: str - Your device. E.g., `cuda:0` or `cpu`.
-234
-235 Returns:
-236 packed_rnn_input: (PackedSequence object) packed rnn input
-237 rnn_truth: ground truth
-238 """
-239 num_clusters = len ( seq_lengths )
-240 sorted_seq_lengths = np . sort ( seq_lengths )[:: - 1 ]
-241 permute_index = np . argsort ( seq_lengths )[:: - 1 ]
-242
-243 if batch_size is None :
-244 rnn_input = np . zeros (( sorted_seq_lengths [ 0 ],
-245 num_clusters ,
-246 observation_dim ))
-247 for i in range ( num_clusters ):
-248 rnn_input [ 1 : sorted_seq_lengths [ i ], i ,
-249 :] = sub_sequences [ permute_index [ i ]]
-250 rnn_input = autograd . Variable (
-251 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
-252 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
-253 rnn_input , sorted_seq_lengths , batch_first = False )
-254 else :
-255 mini_batch = np . sort ( np . random . choice ( num_clusters , batch_size ))
-256 rnn_input = np . zeros (( sorted_seq_lengths [ mini_batch [ 0 ]],
-257 batch_size ,
-258 observation_dim ))
-259 for i in range ( batch_size ):
-260 rnn_input [ 1 : sorted_seq_lengths [ mini_batch [ i ]],
-261 i , :] = sub_sequences [ permute_index [ mini_batch [ i ]]]
-262 rnn_input = autograd . Variable (
-263 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
-264 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
-265 rnn_input , sorted_seq_lengths [ mini_batch ], batch_first = False )
-266 # ground truth is the shifted input
-267 rnn_truth = rnn_input [ 1 :, :, :]
-268 return packed_rnn_input , rnn_truth
+226 if batch_size is None :
+227 rnn_input = np . zeros (( sorted_seq_lengths [ 0 ],
+228 num_clusters ,
+229 observation_dim ))
+230 for i in range ( num_clusters ):
+231 rnn_input [ 1 : sorted_seq_lengths [ i ], i ,
+232 :] = sub_sequences [ permute_index [ i ]]
+233 rnn_input = autograd . Variable (
+234 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
+235 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
+236 rnn_input , sorted_seq_lengths , batch_first = False )
+237 else :
+238 mini_batch = np . sort ( np . random . choice ( num_clusters , batch_size ))
+239 rnn_input = np . zeros (( sorted_seq_lengths [ mini_batch [ 0 ]],
+240 batch_size ,
+241 observation_dim ))
+242 for i in range ( batch_size ):
+243 rnn_input [ 1 : sorted_seq_lengths [ mini_batch [ i ]],
+244 i , :] = sub_sequences [ permute_index [ mini_batch [ i ]]]
+245 rnn_input = autograd . Variable (
+246 torch . from_numpy ( rnn_input ) . float ()) . to ( device )
+247 packed_rnn_input = torch . nn . utils . rnn . pack_padded_sequence (
+248 rnn_input , sorted_seq_lengths [ mini_batch ], batch_first = False )
+249 # ground truth is the shifted input
+250 rnn_truth = rnn_input [ 1 :, :, :]
+251 return packed_rnn_input , rnn_truth
@@ -922,39 +812,39 @@
- 271 def output_result ( model_args , training_args , test_record ):
-272 """Produce a string to summarize the experiment."""
-273 accuracy_array , _ = zip ( * test_record )
-274 total_accuracy = np . mean ( accuracy_array )
-275 output_string = """
-276 Config:
-277 sigma_alpha: {}
-278 sigma_beta: {}
-279 crp_alpha: {}
-280 learning rate: {}
-281 regularization: {}
-282 batch size: {}
-283
-284 Performance:
-285 averaged accuracy: {:.6f}
-286 accuracy numbers for all testing sequences:
-287 """ . strip () . format (
-288 training_args . sigma_alpha ,
-289 training_args . sigma_beta ,
-290 model_args . crp_alpha ,
-291 training_args . learning_rate ,
-292 training_args . regularization_weight ,
-293 training_args . batch_size ,
-294 total_accuracy )
-295 for accuracy in accuracy_array :
-296 output_string += ' \n {:.6f} ' . format ( accuracy )
-297 output_string += ' \n ' + '=' * 80 + ' \n '
-298 filename = 'layer_ {} _ {} _ {:.1f} _result.txt' . format (
-299 model_args . rnn_hidden_size ,
-300 model_args . rnn_depth , model_args . rnn_dropout )
-301 with open ( filename , 'a' ) as file_object :
-302 file_object . write ( output_string )
-303 return output_string
+ 254 def output_result ( model_args , training_args , test_record ):
+255 """Produce a string to summarize the experiment."""
+256 accuracy_array , _ = zip ( * test_record )
+257 total_accuracy = np . mean ( accuracy_array )
+258 output_string = """
+259 Config:
+260 sigma_alpha: {}
+261 sigma_beta: {}
+262 crp_alpha: {}
+263 learning rate: {}
+264 regularization: {}
+265 batch size: {}
+266
+267 Performance:
+268 averaged accuracy: {:.6f}
+269 accuracy numbers for all testing sequences:
+270 """ . strip () . format (
+271 training_args . sigma_alpha ,
+272 training_args . sigma_beta ,
+273 model_args . crp_alpha ,
+274 training_args . learning_rate ,
+275 training_args . regularization_weight ,
+276 training_args . batch_size ,
+277 total_accuracy )
+278 for accuracy in accuracy_array :
+279 output_string += ' \n {:.6f} ' . format ( accuracy )
+280 output_string += ' \n ' + '=' * 80 + ' \n '
+281 filename = 'layer_ {} _ {} _ {:.1f} _result.txt' . format (
+282 model_args . rnn_hidden_size ,
+283 model_args . rnn_depth , model_args . rnn_dropout )
+284 with open ( filename , 'a' ) as file_object :
+285 file_object . write ( output_string )
+286 return output_string
@@ -974,32 +864,32 @@
- 306 def estimate_transition_bias ( cluster_ids , smooth = 1 ):
-307 """Estimate the transition bias.
-308
-309 Args:
-310 cluster_id: Either a list of cluster indicator sequences, or a single
-311 concatenated sequence. The former is strongly preferred, since the
-312 transition_bias estimated from the latter will be inaccurate.
-313 smooth: int or float - Smoothing coefficient, avoids -inf value in np.log
-314 in the case of a sequence with a single speaker and division by 0 in the
-315 case of empty sequences. Using a small value for smooth decreases the
-316 bias in the calculation of transition_bias but can also lead to underflow
-317 in some remote cases, larger values are safer but less accurate.
-318
-319 Returns:
-320 bias: Flipping coin head probability.
-321 bias_denominator: The denominator of the bias, used for multiple calls to
-322 fit().
-323 """
-324 transit_num = smooth
-325 bias_denominator = 2 * smooth
-326 for cluster_id_seq in cluster_ids :
-327 for entry in range ( len ( cluster_id_seq ) - 1 ):
-328 transit_num += ( cluster_id_seq [ entry ] != cluster_id_seq [ entry + 1 ])
-329 bias_denominator += 1
-330 bias = transit_num / bias_denominator
-331 return bias , bias_denominator
+ 289 def estimate_transition_bias ( cluster_ids , smooth = 1 ):
+290 """Estimate the transition bias.
+291
+292 Args:
+293 cluster_id: Either a list of cluster indicator sequences, or a single
+294 concatenated sequence. The former is strongly preferred, since the
+295 transition_bias estimated from the latter will be inaccurate.
+296 smooth: int or float - Smoothing coefficient, avoids -inf value in np.log
+297 in the case of a sequence with a single speaker and division by 0 in the
+298 case of empty sequences. Using a small value for smooth decreases the
+299 bias in the calculation of transition_bias but can also lead to underflow
+300 in some remote cases, larger values are safer but less accurate.
+301
+302 Returns:
+303 bias: Flipping coin head probability.
+304 bias_denominator: The denominator of the bias, used for multiple calls to
+305 fit().
+306 """
+307 transit_num = smooth
+308 bias_denominator = 2 * smooth
+309 for cluster_id_seq in cluster_ids :
+310 for entry in range ( len ( cluster_id_seq ) - 1 ):
+311 transit_num += ( cluster_id_seq [ entry ] != cluster_id_seq [ entry + 1 ])
+312 bias_denominator += 1
+313 bias = transit_num / bias_denominator
+314 return bias , bias_denominator
diff --git a/setup.py b/setup.py
index 64f2265..b8d687b 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
import setuptools
-VERSION = '0.1.0'
+VERSION = '0.1.1'
with open('README.md', 'r') as file_object:
LONG_DESCRIPTION = file_object.read()
diff --git a/tests/integration_test.py b/tests/integration_test.py
index 6d02648..5d078a6 100644
--- a/tests/integration_test.py
+++ b/tests/integration_test.py
@@ -109,8 +109,8 @@ def test_four_clusters(self):
predicted_label = model.predict(test_sequence, inference_args)
# run evaluation
- model.logger.print(
- 3, 'Asserting the equivalence between'
+ model.logger.info(
+ 'Asserting the equivalence between'
'\nGround truth: {}\nPredicted: {}'.format(
test_cluster_id, predicted_label))
accuracy = uisrnn.compute_sequence_match_accuracy(
@@ -125,8 +125,8 @@ def test_four_clusters(self):
predicted_label = loaded_model.predict(test_sequence, inference_args)
# run evaluation with loaded model
- model.logger.print(
- 3, 'Asserting the equivalence between'
+ model.logger.info(
+ 'Asserting the equivalence between'
'\nGround truth: {}\nPredicted: {}'.format(
test_cluster_id, predicted_label))
accuracy = uisrnn.compute_sequence_match_accuracy(
@@ -140,13 +140,13 @@ def test_four_clusters(self):
model.fit(train_sequence[:100, :], train_cluster_id[:100], training_args)
transition_bias_2 = model.transition_bias
self.assertNotAlmostEqual(transition_bias_1, transition_bias_2)
- model.logger.print(
- 3, 'Asserting transition_bias changed from {} to {}'.format(
+ model.logger.info(
+ 'Asserting transition_bias changed from {} to {}'.format(
transition_bias_1, transition_bias_2))
# run evaluation
- model.logger.print(
- 3, 'Asserting the equivalence between'
+ model.logger.info(
+ 'Asserting the equivalence between'
'\nGround truth: {}\nPredicted: {}'.format(
test_cluster_id, predicted_label))
accuracy = uisrnn.compute_sequence_match_accuracy(
diff --git a/uisrnn/arguments.py b/uisrnn/arguments.py
index c4eeeba..c539c07 100644
--- a/uisrnn/arguments.py
+++ b/uisrnn/arguments.py
@@ -87,12 +87,12 @@ def parse_arguments():
'value is None, we will estimate it from training data.')
model_parser.add_argument(
'--verbosity',
- default=2,
+ default=3,
type=int,
help='How verbose will the logging information be. Higher value '
'represents more verbose information. A general guideline: '
- '0 for errors; 1 for finishing important steps; '
- '2 for finishing less important steps; 3 or above for debugging '
+ '0 for fatals; 1 for errors; 2 for finishing important steps; '
+ '3 for finishing less important steps; 4 or above for debugging '
'information.')
model_parser.add_argument(
'--enable_cuda',
diff --git a/uisrnn/uisrnn.py b/uisrnn/uisrnn.py
index e951346..e1a1822 100644
--- a/uisrnn/uisrnn.py
+++ b/uisrnn/uisrnn.py
@@ -13,6 +13,7 @@
# limitations under the License.
"""The UIS-RNN model."""
+import colortimelog
import functools
import numpy as np
import torch
@@ -103,7 +104,7 @@ def __init__(self, args):
self.transition_bias = args.transition_bias
self.transition_bias_denominator = 0.0
self.crp_alpha = args.crp_alpha
- self.logger = utils.Logger(args.verbosity)
+ self.logger = colortimelog.Logger(args.verbosity)
def _get_optimizer(self, optimizer, learning_rate):
"""Get optimizer for UISRNN.
diff --git a/uisrnn/utils.py b/uisrnn/utils.py
index 83d760b..6558b47 100644
--- a/uisrnn/utils.py
+++ b/uisrnn/utils.py
@@ -21,23 +21,6 @@
from torch import autograd
-class Logger:
- """A class for printing logging information to screen."""
-
- def __init__(self, verbosity):
- self._verbosity = verbosity
-
- def print(self, level, message):
- """Print a message if level is not higher than verbosity.
-
- Args:
- level: the level of this message, smaller value means more important
- message: the message to be printed
- """
- if level <= self._verbosity:
- print(message)
-
-
def generate_random_string(length=6):
"""Generate a random string of upper case letters and digits.