diff --git a/pydelfi/delfi.py b/pydelfi/delfi.py
index 70fd2c0b1e..ebf8753ace 100644
--- a/pydelfi/delfi.py
+++ b/pydelfi/delfi.py
@@ -21,7 +21,7 @@ def __init__(self, data, prior, nde, \
                  rank = 0, n_procs = 1, comm = None, red_op = None, \
                  show_plot = True, results_dir = "", progress_bar = True, input_normalization = None,
                  graph_restore_filename = "graph_checkpoint", restore_filename = "restore.pkl", restore = False, save = True):
-        
+
         # Input validation
         for i in range(len(nde)):
 
@@ -51,7 +51,7 @@ def __init__(self, data, prior, nde, \
         # Data
         self.data = data
         self.D = len(data)
-        
+
         # Prior
         self.prior = prior
 
@@ -67,7 +67,7 @@ def __init__(self, data, prior, nde, \
         # Tensorflow session for the NDE training
         self.sess = tf.Session(config = tf.ConfigProto())
         self.sess.run(tf.global_variables_initializer())
-        
+
         # Parameter limits
         if param_limits is not None:
             # Set to provided prior limits if provided
@@ -111,12 +111,12 @@ def __init__(self, data, prior, nde, \
         self.x_train = tf.placeholder(tf.float32, shape = (None, self.npar))
         self.y_train = tf.placeholder(tf.float32, shape = (None, self.D))
         self.n_sims = 0
-        
+
         # MCMC chain parameters for EMCEE
         self.nwalkers = nwalkers
         self.posterior_chain_length = posterior_chain_length
         self.proposal_chain_length = proposal_chain_length
-        
+
         # Initialize MCMC chains for posterior and proposal
         if self.asymptotic_posterior is not None:
             self.posterior_samples = np.array([self.asymptotic_posterior.draw() for i in range(self.nwalkers*self.posterior_chain_length)])
@@ -126,16 +126,16 @@ def __init__(self, data, prior, nde, \
             self.proposal_samples = np.array([self.prior.draw() for i in range(self.nwalkers*self.proposal_chain_length)])
         self.posterior_weights = np.ones(len(self.posterior_samples))*1.0/len(self.posterior_samples)
         self.proposal_weights = np.ones(len(self.proposal_samples))*1.0/len(self.proposal_samples)
-    
+
         # Parameter names and ranges for plotting with GetDist
         self.names = param_names
         self.labels = param_names
         self.ranges = dict(zip(param_names, [ [self.lower[i], self.upper[i]] for i in range(self.npar) ]))
         self.show_plot = show_plot
-        
+
         # Results directory
         self.results_dir = results_dir
-        
+
         # Training loss, validation loss
         self.training_loss = [np.array([]) for i in range(self.n_ndes)]
         self.validation_loss = [np.array([]) for i in range(self.n_ndes)]
@@ -155,17 +155,17 @@ def __init__(self, data, prior, nde, \
 
         # Show progress bars?
         self.progress_bar = progress_bar
-        
+
         # Filenames for saving/restoring graph and attributes
         self.graph_restore_filename = results_dir + graph_restore_filename
         self.restore_filename = results_dir + restore_filename
-        
+
         # Save attributes of the ojbect as you go?
         self.save = save
 
         # Restore the graph and dynamic object attributes if restore = True
         if restore == True:
-            
+
             # Restore the graph
             saver = tf.train.Saver()
             saver.restore(self.sess, self.graph_restore_filename)
@@ -175,11 +175,11 @@ def __init__(self, data, prior, nde, \
 
     # Save object attributes
     def saver(self):
-    
+
         f = open(self.restore_filename, 'wb')
         pickle.dump([self.stacking_weights, self.posterior_samples, self.proposal_samples, self.training_loss, self.validation_loss, self.stacked_sequential_training_loss, self.stacked_sequential_validation_loss, self.sequential_nsims, self.ps, self.xs, self.x_mean, self.x_std, self.p_mean, self.p_std], f)
         f.close()
-    
+
     # Divide list of jobs between MPI processes
     def allocate_jobs(self, n_jobs):
         n_j_allocated = 0
@@ -204,12 +204,12 @@ def complete_array(self, target_distrib):
         else:
             target = target_distrib
         return target
-    
+
     # NDE log likelihood (individual NDE)
     def log_likelihood_individual(self, i, theta, data):
-    
+
         lnL = self.nde[i].eval((np.atleast_2d((theta-self.p_mean)/self.p_std), np.atleast_2d((data-self.x_mean)/self.x_std)), self.sess)
-    
+
         return lnL
 
     # NDE log likelihood (stacked)
@@ -225,17 +225,17 @@ def log_likelihood_stacked(self, theta, data):
 
     # Log posterior (stacked)
     def log_posterior_stacked(self, theta, data):
-        
+
         return self.log_likelihood_stacked(theta, data) + self.prior.logpdf(np.atleast_2d(theta))
 
     # Log posterior (individual)
     def log_posterior_individual(self, i, theta, data):
-        
+
         return self.log_likelihood_individual(i, theta, data) + self.prior.logpdf(np.atleast_2d(theta))
-    
+
     # Log posterior
     def log_geometric_mean_proposal_stacked(self, x, data):
-        
+
         return 0.5 * (self.log_likelihood_stacked(x, data) + 2 * self.prior.logpdf(np.atleast_2d(x)) )
 
     # Bayesian optimization acquisition function
@@ -243,19 +243,19 @@ def acquisition(self, theta):
 
         # Compute log_posteriors
         Ls = np.array([self.log_posterior_individual(i, theta) for i in range(self.n_ndes)])
-    
+
         # Check whether prior is zero or not
         return self.log_posterior_stacked(theta)*np.sqrt(np.average((Ls - np.average(Ls, weights = self.stacking_weights, axis=0))**2, weights=self.stacking_weights, axis=0))
-                
+
     # Bayesian optimization training
     def bayesian_optimization_training(self, simulator, compressor, n_batch, n_populations, n_optimizations = 10, \
                                        simulator_args = None, compressor_args = None, plot = False, batch_size = 100, \
                                        validation_split = 0.1, epochs = 300, patience = 20, seed_generator = None, \
                                        save_intermediate_posteriors = False, sub_batch = 1):
-    
+
         # Loop over n_populations
         for i in range(n_populations):
-    
+
             # Find acquisition point...
             print('Finding optimal acquisition point...')
             A_optimal = 0
@@ -265,16 +265,16 @@ def bayesian_optimization_training(self, simulator, compressor, n_batch, n_popul
                 if res.fun < A_optimal:
                     A_optimal = res.fun
                     theta_optimal = res.x
-        
+
             # Array of parameters to run simulations
             ps = np.array([theta_optimal for k in range(n_batch)])
-            
+
             # Run a small batch of simulations at the acquisition point
             xs_batch, ps_batch = self.run_simulation_batch(n_batch, ps, simulator, compressor, simulator_args, compressor_args, seed_generator = seed_generator, sub_batch = sub_batch)
-            
+
             # Augment the training data
             self.add_simulations(xs_batch, ps_batch)
-            
+
             # Re-train the networks
             self.train_ndes(training_data=[self.x_train, self.y_train], batch_size=max(self.n_sims//8, batch_size), validation_split=validation_split, epochs=epochs, patience=patience)
 
@@ -286,18 +286,18 @@ def bayesian_optimization_training(self, simulator, compressor, n_batch, n_popul
             # Save attributes if save == True
             if self.save == True:
                 self.saver()
-    
+
     # Run n_batch simulations
     def run_simulation_batch(self, n_batch, ps, simulator, compressor, simulator_args, compressor_args, seed_generator = None, sub_batch = 1):
-        
+
         # Random seed generator: set to unsigned 32 bit int random numbers as default
         if seed_generator is None:
             seed_generator = lambda: np.random.randint(2147483647)
-        
+
         # Dimension outputs
         data_samples = np.zeros((n_batch*sub_batch, self.D))
         parameter_samples = np.zeros((n_batch*sub_batch, self.npar))
-        
+
         # Run samples assigned to each process, catching exceptions
         # (when simulator returns np.nan).
         i_prop = self.inds_prop[0]
@@ -308,7 +308,7 @@ def run_simulation_batch(self, n_batch, ps, simulator, compressor, simulator_arg
         while i_acpt <= self.inds_acpt[-1]:
             try:
                 sims = simulator(ps[i_prop,:], seed_generator(), simulator_args, sub_batch)
-                
+
                 # Make sure the sims are the right shape
                 if sub_batch == 1 and len(sims) != 1:
                     sims = np.array([sims])
@@ -332,25 +332,25 @@ def run_simulation_batch(self, n_batch, ps, simulator, compressor, simulator_arg
 
     # EMCEE sampler
     def emcee_sample(self, log_likelihood=None, x0=None, burn_in_chain=100, main_chain=1000):
-    
+
         # Set the log likelihood (default to the posterior if none given)
         if log_likelihood is None:
             log_likelihood = lambda x: self.log_posterior_stacked(x, self.data)[0]
-        
+
         # Set up default x0
         if x0 is None:
             x0 = [self.posterior_samples[-i,:] for i in range(self.nwalkers)]
-        
+
         # Set up the sampler
         sampler = emcee.EnsembleSampler(self.nwalkers, self.npar, log_likelihood)
-    
+
         # Burn-in chain
         pos, prob, state = sampler.run_mcmc(x0, burn_in_chain)
         sampler.reset()
-    
+
         # Main chain
         sampler.run_mcmc(pos, main_chain)
-    
+
         return sampler.flatchain
 
     def sequential_training(self, simulator, compressor, n_initial, n_batch, n_populations, proposal = None, \
@@ -366,7 +366,7 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
                 proposal = priors.TruncatedGaussian(self.theta_fiducial, 9*self.Finv, self.lower, self.upper)
             else:
                 proposal = self.prior
-                    
+
         # Generate initial theta values from some broad proposal on
         # master process and share with other processes. Overpropose
         # by a factor of safety to (hopefully) cope gracefully with
@@ -396,19 +396,19 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
             self.stacked_sequential_training_loss.append(np.sum(np.array([self.training_loss[n][-1]*self.stacking_weights[n] for n in range(self.n_ndes)])))
             self.stacked_sequential_validation_loss.append(np.sum(np.array([self.validation_loss[n][-1]*self.stacking_weights[n] for n in range(self.n_ndes)])))
             self.sequential_nsims.append(self.n_sims)
-            
+
             # Generate posterior samples
             if save_intermediate_posteriors:
                 print('Sampling approximate posterior...')
                 self.posterior_samples = self.emcee_sample(log_likelihood = lambda x: self.log_posterior_stacked(x, self.data)[0],
                                                            x0=[self.posterior_samples[-i,:] for i in range(self.nwalkers)], \
                                                            main_chain=self.posterior_chain_length)
-            
+
                 # Save posterior samples to file
                 f = open('{}posterior_samples_0.dat'.format(self.results_dir), 'w')
                 np.savetxt(f, self.posterior_samples)
                 f.close()
-            
+
                 print('Done.')
 
                 # If plot == True, plot the current posterior estimate
@@ -416,14 +416,14 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
                     self.triangle_plot([self.posterior_samples], \
                                     savefig=True, \
                                     filename='{}seq_train_post_0.pdf'.format(self.results_dir))
-    
+
             # Save attributes if save == True
             if self.save == True:
                 self.saver()
 
         # Loop through a number of populations
         for i in range(n_populations):
-            
+
             # Propose theta values on master process and share with
             # other processes. Again, ensure we propose more sets of
             # parameters than needed to cope with bad params.
@@ -431,7 +431,7 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
 
                 # Current population
                 print('Population {}/{}'.format(i+1, n_populations))
-        
+
                 # Sample the current posterior approximation
                 print('Sampling proposal density...')
                 self.proposal_samples = \
@@ -453,10 +453,10 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
 
             # Train on master only
             if self.rank == 0:
-        
+
                 # Augment the training data
                 self.add_simulations(xs_batch, ps_batch)
-        
+
                 # Train the network on these initial simulations
                 self.train_ndes(training_data=[self.x_train, self.y_train], batch_size=max(self.n_sims//8, batch_size), validation_split=0.1, epochs=epochs, patience=patience)
                 self.stacked_sequential_training_loss.append(np.sum(np.array([self.training_loss[n][-1]*self.stacking_weights[n] for n in range(self.n_ndes)])))
@@ -469,7 +469,7 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
                     self.posterior_samples = self.emcee_sample(log_likelihood = lambda x: self.log_posterior_stacked(x, self.data)[0],
                                                            x0=[self.posterior_samples[-i,:] for i in range(self.nwalkers)], \
                                                            main_chain=self.posterior_chain_length)
-                
+
                     # Save posterior samples to file
                     f = open('{}posterior_samples_{:d}.dat'.format(self.results_dir, i+1), 'w')
                     np.savetxt(f, self.posterior_samples)
@@ -490,22 +490,23 @@ def sequential_training(self, simulator, compressor, n_initial, n_batch, n_popul
                     self.sequential_training_plot(savefig=True, filename='{}seq_train_loss.pdf'.format(self.results_dir))
 
     def train_ndes(self, training_data=None, batch_size=100, validation_split=0.1, epochs=500, patience=20, mode='samples'):
-    
+
         # Set the default training data if none
         if training_data is None:
             training_data = [self.x_train, self.y_train]
-        
+
         # Train the networks
         for n in range(self.n_ndes):
             # Train the NDE
             val_loss, train_loss = self.trainer[n].train(self.sess, training_data, validation_split = validation_split, epochs=epochs, batch_size=batch_size, progress_bar=self.progress_bar, patience=patience, saver_name=self.graph_restore_filename, mode=mode)
-        
+
             # Save the training and validation losses
             self.training_loss[n] = np.concatenate([self.training_loss[n], train_loss])
             self.validation_loss[n] = np.concatenate([self.validation_loss[n], val_loss])
 
         # Update weights for stacked density estimator
-        self.stacking_weights = np.exp(-np.array([self.training_loss[i][-1] for i in range(self.n_ndes)]))
+        self.stacking_weights = -np.array([self.training_loss[i][-1] for i in range(self.n_ndes)])
+        self.stacking_weights = np.exp(np.add(-np.max(self.stacking_weights), self.stacking_weights))
         self.stacking_weights = self.stacking_weights/sum(self.stacking_weights)
 
         # if save == True, save everything
@@ -513,7 +514,7 @@ def train_ndes(self, training_data=None, batch_size=100, validation_split=0.1, e
             self.saver()
 
     def load_simulations(self, xs_batch, ps_batch):
-        
+
         # Set the input normalizations if None specified
         if self.input_normalization is None:
             self.p_mean = np.mean(ps_batch, axis = 0)
@@ -528,9 +529,9 @@ def load_simulations(self, xs_batch, ps_batch):
         self.x_train = self.ps.astype(np.float32)
         self.y_train = self.xs.astype(np.float32)
         self.n_sims += len(ps_batch)
-    
+
     def add_simulations(self, xs_batch, ps_batch):
-        
+
         ps_batch = (ps_batch - self.p_mean)/self.p_std
         xs_batch = (xs_batch - self.x_mean)/self.x_std
         self.ps = np.concatenate([self.ps, ps_batch])
@@ -538,14 +539,14 @@ def add_simulations(self, xs_batch, ps_batch):
         self.x_train = self.ps.astype(np.float32)
         self.y_train = self.xs.astype(np.float32)
         self.n_sims += len(ps_batch)
-    
+
     def fisher_pretraining(self, n_batch=5000, plot=True, batch_size=100, validation_split=0.1, epochs=1000, patience=20, mode='regression'):
 
         # Train on master only
         if self.rank == 0:
 
             # Generate fisher pre-training data
-            
+
             # Broader proposal
             proposal = priors.TruncatedGaussian(self.theta_fiducial, 9*self.Finv, self.lower, self.upper)
 
@@ -557,31 +558,31 @@ def fisher_pretraining(self, n_batch=5000, plot=True, batch_size=100, validation
             Ldd = np.linalg.cholesky(Cdd)
             Cddinv = np.linalg.inv(Cdd)
             ln2pidetCdd = np.log(2*np.pi*np.linalg.det(Cdd))
-            
+
             # Sample parameters from some broad proposal
             ps = np.zeros((3*n_batch, self.npar))
             for i in range(0, n_batch):
                 # Draws from prior
                 ps[i,:] = (self.prior.draw() - self.theta_fiducial)/self.fisher_errors
-                
+
                 # Draws from asymptotic posterior
                 ps[n_batch + i,:] = (self.asymptotic_posterior.draw() - self.theta_fiducial)/self.fisher_errors
-                
+
                 # Drawn from Gaussian with 3x anticipated covariance matrix
                 ps[2*n_batch + i,:] = (proposal.draw() - self.theta_fiducial)/self.fisher_errors
-            
+
             # Sample data assuming a Gaussian likelihood
             xs = np.array([pss + np.dot(Ldd, np.random.normal(0, 1, self.npar)) for pss in ps])
-            
+
             # Evaluate the logpdf at those values
             fisher_logpdf_train = np.array([-0.5*np.dot(xs[i,:]-ps[i,:], np.dot(Cddinv, xs[i,:]-ps[i,:])) - 0.5*ln2pidetCdd for i in range(len(xs))])
-            
+
             # Construct the initial training-set
             fisher_x_train = ps.astype(np.float32).reshape((3*n_batch, self.npar))
             fisher_y_train = xs.astype(np.float32).reshape((3*n_batch, self.npar))
-            
+
             # Train the networks depending on the chosen mode (regression = default)
-            
+
             if mode == "regression":
                 # Train the networks on these initial simulations
                 self.train_ndes(training_data=[fisher_x_train, fisher_y_train, np.atleast_2d(fisher_logpdf_train).reshape(-1,1)], validation_split = validation_split, epochs=epochs, batch_size=batch_size, patience=patience, mode='regression')
@@ -625,7 +626,7 @@ def triangle_plot(self, samples = None, weights = None, savefig = False, filenam
                     ax.set_xticklabels(xtl, rotation=45)
             plt.tight_layout()
             plt.subplots_adjust(hspace=0, wspace=0)
-            
+
             if savefig:
                 plt.savefig(filename)
             if self.show_plot:
@@ -651,7 +652,7 @@ def sequential_training_plot(self, savefig = False, filename = None):
                                 'lines.linewidth': .5,
                                 'axes.linewidth': .5,
                                 'axes.edgecolor': 'black'}):
-                          
+
             # Trace plot of the training and validation loss as a function of the number of simulations ran
             plt.plot(self.sequential_nsims, self.stacked_sequential_training_loss, markersize=5, marker='o', lw=2, alpha=0.7, label = 'training loss')
             plt.plot(self.sequential_nsims, self.stacked_sequential_validation_loss, markersize=5, marker='o', lw=2, alpha=0.7, label = 'validation loss')