# -*- coding: utf-8 -*-
import coplot.plot_settings as pls
from . import data_processor as dp
from . import data_simulator as ds
from . import space_updater as su
from . import train, evaluate, optimize, fcnet, nodeframe, utils, cosmic_params
import torch
from torch.autograd import Variable
import torch.multiprocessing as mp
import numpy as np
import matplotlib.pyplot as plt
#%% multilayer perceptron (MLP)
[docs]class OneBranchMLP(train.Train, dp.ParamsScaling, ds.CutParams):
"""Multilayer perceptron (MLP) that is used to predict cosmological parameters with one set of datasets.
Parameters
----------
train_set : list
The training set that contains simulated observational spectra (data) with shape (N, spectra_length)
and simulated parameters of a specific cosmological (or theoretical) model. i.e. [spectra, parameters]
param_names : list
A list which contains the parameter names, e.g. ['H0','Omega_m','ombh2','omch2','tau','As','ns'].
vali_set : list, optional
The validation set that contains simulated observational spectra (data) with shape (N, spectra_length)
and simulated parameters of a specific cosmological (or theoretical) model. The validation set can also be set to None.
i.e. [spectra, parameters] or [None, None]
obs_errors : array-like, optional
Observational errors with shape (spectra_length,). Default: None
cov_matrix : array-like or None, optional
Covariance matrix of the observational data. Default: None
params_dict : dict or None, optional
Information of cosmological parameters that include the labels, the minimum values,
and the maximum values. See :func:`~.cosmic_params.params_dict_zoo`. Default: None
hidden_layer : int, optional
The number of the hidden layer of the network. Default: 3
activation_func : str, optional
Activation function, which can be 'relu', 'leakyrelu', 'prelu', 'rrelu', 'relu6', 'elu', 'celu', 'selu', 'silu',
'sigmoid', 'logsigmoid', 'tanh', 'tanhshrink', 'softsign', or 'softplus' (see :func:`~.element.activation`). Default: 'rrelu'
loss_func : str, optional
The loss function used in the network. Default: 'L1'
noise_type : str, optional
The type of Gaussian noise added to the training set, 'singleNormal' or 'multiNormal'. Default: 'multiNormal'
factor_sigma : float, optional
For the case of 'singleNormal', it is the factor of the observational error (standard deviation),
while for the case of 'multiNormal' it is the standard deviation of the coefficient of the observational error (standard deviation). Default: 0.5
multi_noise : int, optional
The number of realization of noise added to a spectrum. Default: 5
Attributes
----------
spectra_base : array-like, optional
The base value of spectra that is used for data normalization when training the network to ensure that the scaled spectra are ~ 1.,
it is suggested to set the mean of the simulated spectra. The default is the mean of the simulated spectra.
params_base : array-like, optional
The base value of parameters that is used for data normalization when training the network to ensure that the scaled parameters are ~ 1.,
it is suggested to set the mean of the posterior distribution (or the simulated parameters). The default is the mean of the simulated parameters.
lr : float, optional
The learning rate setting of the network. Default: 1e-2
lr_min : float, optional
The minimum of the learning rate. Default: 1e-8
batch_size : int, optional
The batch size setting of the network. Default: 750
auto_batchSize : bool, optional
If True, the batch size will be set automatically in the training process, otherwise, use the setting of ``batch_size``. Default: True
epoch : int, optional
The number of epoch of the training process. Default: 2000
base_epoch : int, optional
The base number (or the minimum number) of epoch. Default: 1000
auto_epoch : bool, optional
If True, the epoch will be set automatically in the training process, otherwise, use the setting of ``epoch``. Default: True
scale_spectra : bool, optional
If True, the input data (measurements) will be scaled based on the base values of the data. It is recommended to set to True. Default: True
scale_params : bool, optional
If True, the target data (cosmological parameters) will be scaled based on the base values of parameters.
See :class:`~.data_processor.ParamsScaling`. It is recommended to set to True. Default: True
norm_inputs : bool, optional
If True, the input data of the network will be normalized. Default: True
norm_target : bool, optional
If True, the target data (cosmological parameters) will be normalized. Default: True
independent_norm : bool, optional
If True, the target data (cosmological parameters) will be normalized independently. This only works when ``norm_target`` is True.
It is recommended to set to False. Default: False
norm_type : str, optional
The method of normalization, 'z_score', 'minmax', or 'mean' (see :class:`~.data_processor.Normalize`). Default: 'z_score'
spaceSigma_min : int, optional
The minimum parameter space to be learned, e.g. for spaceSigma_min=5, the parameter space to be learned is :math:`[-5\sigma, +5\sigma]`. Default: 5
transfer_learning : bool, optional
If True, the network will be initialized using the well-trained network of the previous step. Default: False
"""
def __init__(self, train_set, param_names, vali_set=[None,None], obs_errors=None, cov_matrix=None, params_dict=None,
hidden_layer=3, activation_func='rrelu', loss_func='L1',
noise_type='multiNormal', factor_sigma=0.5, multi_noise=5):
#data
self.spectra, self.params = train_set
self.spectra_base = np.mean(self.spectra, axis=0)
self.params_base = np.mean(self.params, axis=0)
self.param_names = param_names
self.params_n = len(param_names)
self.spectra_vali, self.params_vali = vali_set
# self.obs_data = obs_data #updated, update for multi-branch network !!!, remove?
# self.obs_errors = obs_data[:,2], remove?
self.obs_errors = obs_errors
self.cov_matrix = cov_matrix
self.cholesky_factor = self._cholesky_factor(cov_matrix)
self.params_dict = params_dict
p_property = cosmic_params.ParamsProperty(param_names, params_dict=params_dict)
self.params_limit = p_property.params_limit
#ANN model
self.hidden_layer = hidden_layer
self.activation_func = activation_func
self.loss_func = train.loss_funcs(name=loss_func) #train.Train should be updated
self.lr = 1e-2
self.lr_min = 1e-8
self.batch_size = 750
self.auto_batchSize = True
self.epoch = 2000
self.base_epoch = 1000
self.auto_epoch = True
self.fix_initialize = False
self.print_info = False
#data preprocessing
self.noise_type = noise_type
self.factor_sigma = factor_sigma
self.multi_noise = multi_noise
self.scale_spectra = True
self.scale_params = True
self.norm_inputs = True
self.norm_target = True
self.independent_norm = False
self.norm_type = 'z_score'
#training
self.spaceSigma_min = 5
self.auto_repeat_n = False
self.burn_in = False
self.burnIn_step = None
self.transfer_learning = False
self.randn_num = round(abs(np.random.randn()/5.), 5)
def _cholesky_factor(self, cov_matrix):
if cov_matrix is None:
return None
else:
return np.linalg.cholesky(cov_matrix) #cov=LL^T
# return cov_matrix #test
def _get_params_tot(self):
if self.params_vali is None:
return self.params
else:
return np.concatenate((self.params, self.params_vali), axis=0)
def _nodes(self):
self.node_in = self.spectra.shape[1]
self.node_out = self.params.shape[1]
return nodeframe.decreasingNode(node_in=self.node_in,node_out=self.node_out,hidden_layer=self.hidden_layer)
def _net(self):
if self.fix_initialize:
torch.manual_seed(1000) #Fixed parameter initialization
self.nodes = self._nodes()
self.net = fcnet.FcNet(nodes=self.nodes, activation_func=self.activation_func)
if self.print_info:
print(self.net)
def _check_batchSize(self):
if self.batch_size > len(self.params)*self.multi_noise:
self.batch_size = len(self.params)*self.multi_noise
print('The batch size is set to %s'%(self.batch_size))
def _auto_batchSize(self):
if self.burn_in:
#here <=2.5 is based on experiments
if self.spaceSigma_min<=2.5:
self.batch_size = 500
else:
self.batch_size = len(self.params)//4
else:
self.batch_size = len(self.params)//2
#make sure batch size will not too large
if self.batch_size>1250:
self.batch_size = 1250
def _auto_epoch(self):
if not self.burn_in:
self.epoch = self.base_epoch
def _auto_repeat_n(self, repeat_n):
if self.burn_in:
return repeat_n
else:
return 1
[docs] def statistic(self):
if self.scale_spectra:
self.spectra_statistic = dp.Statistic(self.spectra/self.spectra_base).statistic()
else:
self.spectra_statistic = dp.Statistic(self.spectra).statistic()
if self.scale_params:
self.params = self.scaling(self.params)
if self.spectra_vali is not None:
self.params_vali = self.scaling(self.params_vali)
self.params_tot = self._get_params_tot() #updated
if self.independent_norm:
# self.params_statistic = [dp.Statistic(self.params[:,i]).statistic() for i in range(self.params_n)]
#updated
self.params_statistic = [dp.Statistic(self.params_tot[:,i]).statistic() for i in range(self.params_n)]
else:
# self.params_statistic = dp.Statistic(self.params).statistic()
#updated
self.params_statistic = dp.Statistic(self.params_tot).statistic()
[docs] def load_wellTrainedNet(self, path='ann', randn_num='0.123'):
randn_num = str(randn_num)
print('\nLoading the well trained network that has random number {}'.format(randn_num))
file_path = evaluate.FilePath(filedir=path, randn_num=randn_num).filePath()
self.trained_net = torch.load(file_path)[0]#
self.transfer_learning = True
[docs] def copyLayers_fromTrainedNet(self):
print('\nCopying hyperparameters of a well trained network to the network')
self.net.load_state_dict(self.trained_net.state_dict())
[docs] def transfer_data(self):
if self.use_GPU:
self.spectra = dp.numpy2cuda(self.spectra)
self.params = dp.numpy2cuda(self.params)
if self.cholesky_factor is None:
self.obs_errors = dp.numpy2cuda(self.obs_errors)
else:
self.cholesky_factor = dp.numpy2cuda(self.cholesky_factor)
self.spectra_base_torch = dp.numpy2cuda(self.spectra_base)
#vali_set
if self.spectra_vali is not None:
self.spectra_vali = dp.numpy2cuda(self.spectra_vali)
self.params_vali = dp.numpy2cuda(self.params_vali)
else:
self.spectra = dp.numpy2torch(self.spectra)
self.params = dp.numpy2torch(self.params)
if self.cholesky_factor is None:
self.obs_errors = dp.numpy2torch(self.obs_errors)
else:
self.cholesky_factor = dp.numpy2torch(self.cholesky_factor)
self.spectra_base_torch = dp.numpy2torch(self.spectra_base)
#vali_set
if self.spectra_vali is not None:
self.spectra_vali = dp.numpy2torch(self.spectra_vali)
self.params_vali = dp.numpy2torch(self.params_vali)
[docs] def preprocessing_target(self, params, a=0, b=1):
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
params[:,i] = dp.Normalize(params[:,i], self.params_statistic[i], norm_type=self.norm_type, a=a, b=b).norm()
else:
params = dp.Normalize(params, self.params_statistic, norm_type=self.norm_type, a=a, b=b).norm()
return params
[docs] def preprocessing_target_inv(self, params, a=0, b=1):
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
params[:,i] = dp.InverseNormalize(params[:,i], self.params_statistic[i], norm_type=self.norm_type, a=a, b=b).inverseNorm()
else:
params = dp.InverseNormalize(params, self.params_statistic, norm_type=self.norm_type, a=a, b=b).inverseNorm()
if self.scale_params:
params = self.inverseScaling(params)
return params
[docs] def train(self, repeat_n=3, showIter_n=100):
"""Train the network
Parameters
----------
repeat_n : int, optional
The number of repeat feed to the network for each batch size data, which will increase the number of iterations in each epoch. Default: 3
showIter_n : int, optional
The number of epoch that show the training information. Default: 100
Returns
-------
object
The network object.
array-like
The loss.
"""
# change showIter_n to showEpoch_n???
self._net()
if self.transfer_learning:
self.copyLayers_fromTrainedNet()
self.transfer_net()
self.optimizer = self._optimizer(name='Adam')
if self.auto_batchSize:
self._auto_batchSize()
self._check_batchSize()
# print('batch size: %s'%self.batch_size)
if self.auto_epoch:
self._auto_epoch()
if self.auto_repeat_n:
repeat_n = self._auto_repeat_n(repeat_n)
self.iteration = self.multi_noise * len(self.spectra)//self.batch_size * repeat_n
self.statistic()
self.transfer_data()
self.train_loss = []
self.vali_loss = []
# np.random.seed(1000)#
print('randn_num: %s'%self.randn_num)
for subsample_num in range(1, self.epoch+1):
self.inputs, self.target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
self.inputs = self.preprocessing_input(self.inputs, self.spectra_base_torch)
self.target = self.preprocessing_target(self.target)
_, loss_ = self.train_1(self.inputs, self.target, repeat_n=1, set_seed=False, lr_decay=False, print_info=False)
loss_mean = np.mean(loss_)
self.train_loss.append(loss_mean)
#vali_loss
if self.spectra_vali is not None:
self.inputs_vali, self.target_vali = ds.AddGaussianNoise(self.spectra_vali,params=self.params_vali,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
self.inputs_vali = self.preprocessing_input(self.inputs_vali, self.spectra_base_torch)
self.target_vali = self.preprocessing_target(self.target_vali)
self.net.eval()
pred_vali = self.net(Variable(self.inputs_vali))
_vali_loss = self.loss_func(pred_vali, Variable(self.target_vali, requires_grad=False))
self.vali_loss.append(_vali_loss.item())
self.net.train()
if subsample_num%showIter_n==0:
if self.spectra_vali is None:
print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.optimizer.param_groups[0]['lr']))
else:
print('(epoch:%s/%s; train_loss/vali_loss:%.5f/%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.vali_loss[-1], self.optimizer.param_groups[0]['lr']))
lrdc = optimize.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
self.optimizer.param_groups[0]['lr'] = lrdc.exp()
if self.use_multiGPU:
self.net = self.net.module.cpu()
else:
self.net = self.net.cpu()
self.train_loss = np.array(self.train_loss)
self.vali_loss = np.array(self.vali_loss)
return self.net, self.train_loss, self.vali_loss
def _predict(self, inputs, use_GPU=False, in_type='torch'):
"""Make predictions using a well-trained network.
Parameters
----------
inputs : numpy array or torch tensor
The inputs of the network.
use_GPU : bool
If True, calculate using GPU, otherwise, calculate using CPU.
in_type : str
The data type of the inputs, 'numpy' or 'torch'.
"""
if use_GPU:
self.net = self.net.cuda()
if in_type=='numpy':
inputs = dp.numpy2cuda(inputs)
elif in_type=='torch':
inputs = dp.torch2cuda(inputs)
else:
if in_type=='numpy':
inputs = dp.numpy2torch(inputs)
self.net.eval() #this works for the batch normalization layers
pred = self.net(Variable(inputs))
if use_GPU:
pred = dp.cuda2numpy(pred.data)
else:
pred = dp.torch2numpy(pred.data)
#reshape chain
if len(pred.shape)==1:
pred = pred.reshape(-1, 1)
return pred
[docs] def predict(self, spectra, use_GPU=False, in_type='torch'):
if len(spectra.shape)==1:
spectra = spectra.reshape(1, -1) #for one spectrum
spectra = self.preprocessing_input(spectra, dp.numpy2torch(self.spectra_base))
self.pred_params = self._predict(spectra, use_GPU=use_GPU, in_type=in_type)
self.pred_params = self.preprocessing_target_inv(self.pred_params)
return self.pred_params
[docs] def predict_chain(self, obs_data, cov_matrix=None, chain_leng=10000, use_GPU=False):
# torch.manual_seed(1000)#
# limit parameters using params_limit???, do not set limit here, set limit in chain_ann
obs_data = dp.numpy2torch(obs_data)
obs_best, obs_errors = obs_data[:,1], obs_data[:,2]
self.obs_best_multi = torch.ones((chain_leng, len(obs_best))) * obs_best
if cov_matrix is None:
cholesky_factor = None
else:
cholesky_factor = dp.numpy2torch(np.linalg.cholesky(cov_matrix))
self.obs_best_multi = ds.AddGaussianNoise(self.obs_best_multi, obs_errors=obs_errors, cholesky_factor=cholesky_factor, noise_type='singleNormal', factor_sigma=1, use_GPU=False).noisySpectra()
self.chain = self.predict(self.obs_best_multi, use_GPU=use_GPU, in_type='torch')
self.chain = self.cut_params(self.chain) #remove non-physical parameters
return self.chain
[docs] def predict_params(self, sim_spectra, use_GPU=False):
sim_spectra = dp.numpy2torch(sim_spectra)
params = self.predict(sim_spectra, use_GPU=use_GPU, in_type='torch')
return params
[docs] def save_net(self, path='ann', sample=None):
if sample is None:
fileName = 'net_train%s_batch%s_epoch%s_%s.pt'%(len(self.params),self.batch_size,self.epoch,self.randn_num)
else:
fileName = 'net-%s_train%s_batch%s_epoch%s_%s.pt'%(sample,len(self.params),self.batch_size,self.epoch,self.randn_num)
utils.saveTorchPt(path+'/net', fileName, self.net)
[docs] def save_loss(self, path='ann', sample=None):
if sample is None:
fileName = 'loss_train%s_batch%s_epoch%s_%s'%(len(self.params),self.batch_size,self.epoch,self.randn_num)
else:
fileName = 'loss-%s_train%s_batch%s_epoch%s_%s'%(sample,len(self.params),self.batch_size,self.epoch,self.randn_num)
utils.savenpy(path+'/loss', fileName, [self.train_loss, self.vali_loss])
[docs] def save_chain(self, path='ann', sample=None):
if sample is None:
fileName = 'chain_train%s_batch%s_epoch%s_%s'%(len(self.params),self.batch_size,self.epoch,self.randn_num)
else:
fileName = 'chain-%s_train%s_batch%s_epoch%s_%s'%(sample,len(self.params),self.batch_size,self.epoch,self.randn_num)
utils.savenpy(path+'/chains', fileName, self.chain)
[docs] def save_hparams(self, path='ann', sample=None):
if sample is None:
fileName = 'hparams_train%s_batch%s_epoch%s_%s'%(len(self.params),self.batch_size,self.epoch,self.randn_num)
else:
fileName = 'hparams-%s_train%s_batch%s_epoch%s_%s'%(sample,len(self.params),self.batch_size,self.epoch,self.randn_num)
utils.savenpy(path+'/hparams', fileName, [self.spectra_statistic, self.params_statistic, self.spectra_base, self.params_base, self.param_names, self.params_dict,
self.scale_spectra, self.scale_params, self.norm_inputs, self.norm_target, self.independent_norm, self.norm_type,
self.burnIn_step])
[docs] def plot_loss(self, alpha=0.6, show_logLoss=False):
vali_loss_size = len(self.vali_loss)
train_loss_mean = np.mean(self.train_loss[-200:])
train_loss_min = np.min(self.train_loss[-200:])
train_loss_max = np.max(self.train_loss[-200:])
if vali_loss_size==0:
print ('The average of last 200 training set losses: %.5f\n'%(train_loss_mean))
else:
print ('The aveage of last 200 training/validation set losses: %.5f/%.5f\n'%(train_loss_mean, np.mean(self.vali_loss[-200:])))
vali_loss_min = np.min(self.vali_loss[-200:])
vali_loss_max = np.min(self.vali_loss[-200:])
x = np.linspace(1, len(self.train_loss), len(self.train_loss))
if show_logLoss:
panel = pls.PlotSettings(set_fig=True, figsize=(6*2, 4.5))
panel.setting(location=[1,2,1], labels=['Epochs','Loss'])
else:
panel = pls.PlotSettings(set_fig=True)
panel.setting(location=[1,1,1], labels=['Epochs','Loss'])
plt.plot(x, self.train_loss, label='Training set')
if vali_loss_size==0:
loss_min, loss_max = train_loss_min, train_loss_max
else:
plt.plot(x, self.vali_loss, label='Validation set', alpha=alpha)
loss_min, loss_max = min(train_loss_min, vali_loss_min), max(train_loss_max, vali_loss_max)
loss_diff = loss_max - loss_min
fraction_loss = 0.18
fraction_low = 0.08
ylim_tot = loss_diff / fraction_loss
delta_low = fraction_low * ylim_tot
ylim_min = loss_min - delta_low
ylim_max = ylim_min + ylim_tot
plt.xlim(1, len(self.train_loss))
plt.ylim(ylim_min, ylim_max)
panel.set_legend()
if show_logLoss:
panel.setting(location=[1,2,2], labels=['Epochs','Loss'])
plt.loglog(x, self.train_loss, label='Training set')
if vali_loss_size!=0:
plt.loglog(x, self.vali_loss, label='Validation set', alpha=alpha)
plt.xlim(1, len(self.train_loss))
panel.set_legend()
return panel.fig, panel.ax
[docs]class LoadNet(object):
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
[docs] def load_net(self):
file_path = evaluate.FilePath(filedir=self.path+'/net', randn_num=self.randn_num).filePath()
self.net = torch.load(file_path)
[docs]class LoadLoss(object):
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
[docs] def load_loss(self):
file_path = evaluate.FilePath(filedir=self.path+'/loss', randn_num=self.randn_num, suffix='.npy').filePath()
self.train_loss, self.vali_loss = np.load(file_path, allow_pickle=True)
[docs]class LoadChain(object):
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
[docs] def load_chain(self):
file_path = evaluate.FilePath(filedir=self.path+'/chains', randn_num=self.randn_num, suffix='.npy').filePath()
self.chain = np.load(file_path)
[docs]class LoadHparams(object):
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
[docs] def load_hparams(self):
file_path = evaluate.FilePath(filedir=self.path+'/hparams', randn_num=self.randn_num, suffix='.npy').filePath()
self.spectra_statistic, self.params_statistic, self.spectra_base, self.params_base, self.param_names, self.params_dict, self.scale_spectra, self.scale_params, self.norm_inputs, self.norm_target, self.independent_norm, self.norm_type, self.burnIn_step = np.load(file_path, allow_pickle=True)
self.params_n = len(self.param_names)
p_property = cosmic_params.ParamsProperty(self.param_names, params_dict=self.params_dict)
self.params_limit = p_property.params_limit
[docs]class RePredictOBMLP(OneBranchMLP, LoadNet, LoadLoss, LoadChain, LoadHparams):
"""Repredict cosmological parameters using the saved networks.
Parameters
----------
path : str
The path of the results saved. Default: 'ann'
randn_num : str or int
A random number that identifies the saved results.
"""
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
#%% multibranch network
[docs]class MultiBranchMLP(OneBranchMLP):
"""Multibranch network that is used to predict cosmological parameters with multiple sets of datasets.
Parameters
----------
train_set : list
The training set that contains simulated observational spectra (data) which is a list spectra with shape [(N,spectra_length_1), (N,spectra_length_2), ...]
and simulated parameters of a specific cosmological (or theoretical) model. i.e. [spectra, parameters]
param_names : list
A list which contains the parameter names, e.g. ['H0','Omega_m','ombh2','omch2','tau','As','ns'].
vali_set : list, optional
The validation set that contains simulated observational spectra (data) which is a list spectra with shape [(N,spectra_length_1), (N,spectra_length_2), ...]
and simulated parameters of a specific cosmological (or theoretical) model. The validation set can also be set to None.
i.e. [spectra, parameters] or [None, None]
obs_errors : list, optional
Observational errors, it is a list of errors with shape [(spectra_length_1,), (spectra_length_2,), ...]. Default: None
cov_matrix : list or None, optional
A list of covariance matrix with shape [(spectra_length_1, spectra_length_1), (spectra_length_2, spectra_length_2), ...].
If there is no covariance for some observations, the covariance matrix should be set to None. e.g. [cov_matrix_1, None, cov_matrix_3]. Default: None
params_dict : dict or None, optional
Information of cosmological parameters that include the labels, the minimum values,
and the maximum values. See :func:`~.cosmic_params.params_dict_zoo`. Default: None
branch_hiddenLayer : int, optional
The number of the hidden layer for the branch part of the network. Default: 2
trunk_hiddenLayer : int, optional
The number of the hidden layer for the trunk part of the network. Default: 1
activation_func : str, optional
Activation function, which can be 'relu', 'leakyrelu', 'prelu', 'rrelu', 'relu6', 'elu', 'celu', 'selu', 'silu',
'sigmoid', 'logsigmoid', 'tanh', 'tanhshrink', 'softsign', or 'softplus' (see :func:`~.element.activation`). Default: 'rrelu'
loss_func : str, optional
The loss function used in the network. Default: 'L1'
noise_type : str, optional
The type of Gaussian noise added to the training set, 'singleNormal' or 'multiNormal'. Default: 'multiNormal'
factor_sigma : float, optional
For the case of 'singleNormal', it is the factor of the observational error (standard deviation),
while for the case of 'multiNormal' it is the standard deviation of the coefficient of the observational error (standard deviation). Default: 0.5
multi_noise : int, optional
The number of realization of noise added to a spectrum. Default: 5
Attributes
----------
spectra_base : array-like, optional
The base value of spectra that is used for data normalization when training the network to ensure that the scaled spectra are ~ 1.,
it is suggested to set the mean of the simulated spectra. The default is the mean of the simulated spectra.
params_base : array-like, optional
The base value of parameters that is used for data normalization when training the network to ensure that the scaled parameters are ~ 1.,
it is suggested to set the mean of the posterior distribution (or the simulated parameters). The default is the mean of the simulated parameters.
lr : float, optional
The learning rate setting of the network. Default: 1e-2
lr_branch : float, optional
The learning rate setting of the branch part. Default: 1e-2
lr_min : float, optional
The minimum of the learning rate. Default: 1e-8
batch_size : int, optional
The batch size setting of the network. Default: 750
auto_batchSize : bool, optional
If True, the batch size will be set automatically in the training process, otherwise, use the setting of ``batch_size``. Default: True
epoch : int, optional
The number of epoch of the training process. Default: 2000
epoch_branch : int, optional
The number of epoch for the branch part. This only works when training the branch part. Default: 2000
base_epoch : int, optional
The base number (or the minimum number) of epoch. Default: 1000
auto_epoch : bool, optional
If True, the epoch will be set automatically in the training process, otherwise, use the setting of ``epoch``. Default: True
scale_spectra : bool, optional
If True, the input data (measurements) will be scaled based on the base values of the data. It is recommended to set to True. Default: True
scale_params : bool, optional
If True, the target data (cosmological parameters) will be scaled based on the base values of parameters.
See :class:`~.data_processor.ParamsScaling`. It is recommended to set to True. Default: True
norm_inputs : bool, optional
If True, the input data of the network will be normalized. Default: True
norm_target : bool, optional
If True, the target data (cosmological parameters) will be normalized. Default: True
independent_norm : bool, optional
If True, the target data (cosmological parameters) will be normalized independently. This only works when ``norm_target`` is True.
It is recommended to set to False. Default: False
norm_type : str, optional
The method of normalization, 'z_score', 'minmax', or 'mean' (see :class:`~.data_processor.Normalize`). Default: 'z_score'
spaceSigma_min : int, optional
The minimum parameter space to be learned, e.g. for spaceSigma_min=5, the parameter space to be learned is :math:`[-5\sigma, +5\sigma]`. Default: 5
transfer_learning : bool, optional
If True, the network will be initialized using the well-trained network of the previous step. Default: False
Note
----
It is suggested to set lr and lr_branch the same value.
"""
def __init__(self, train_set, param_names, vali_set=[None,None], obs_errors=None, cov_matrix=None, params_dict=None,
branch_hiddenLayer=2, trunk_hiddenLayer=1, activation_func='rrelu', loss_func='L1',
noise_type='multiNormal', factor_sigma=0.5, multi_noise=5):
#data
self.spectra, self.params = train_set
self.branch_n = len(train_set[0])
self.spectra_base = [np.mean(self.spectra[i], axis=0) for i in range(self.branch_n)]
self.params_base = np.mean(self.params, axis=0)
self.param_names = param_names
self.params_n = len(param_names)
self.spectra_vali, self.params_vali = vali_set
self.obs_errors = self._obs_errors(obs_errors)
self.cholesky_factor = self._cholesky_factor(cov_matrix)
self.params_dict = params_dict
p_property = cosmic_params.ParamsProperty(param_names, params_dict=params_dict)
self.params_limit = p_property.params_limit
#ANN model
self.branch_hiddenLayer = branch_hiddenLayer
self.trunk_hiddenLayer = trunk_hiddenLayer
self.activation_func = activation_func
self.loss_func = train.loss_funcs(name=loss_func)
self.lr = 1e-2
self.lr_branch = 1e-2
self.lr_min = 1e-8
self.batch_size = 750
self.auto_batchSize = True
self.epoch = 2000
self.epoch_branch = 2000
self.base_epoch = 1000
self.auto_epoch = True
self.fix_initialize = False
self.print_info = False
#data preprocessing
self.noise_type = noise_type
self.factor_sigma = factor_sigma
self.multi_noise = multi_noise
self.scale_spectra = True
self.scale_params = True
self.norm_inputs = True
self.norm_target = True
self.independent_norm = False
self.norm_type = 'z_score'
#training
self.spaceSigma_min = 5
self.auto_repeat_n = False
self.burn_in = False
self.burnIn_step = None
self.transfer_learning = False
self.randn_num = round(abs(np.random.randn()/5.), 5)
def _obs_errors(self, errors):
if errors is None:
return [None for i in range(self.branch_n)]
else:
return errors
def _cholesky_factor(self, cov_matrix):
if cov_matrix is None:
return [None for i in range(self.branch_n)]
else:
cholesky_f = []
for i in range(self.branch_n):
if cov_matrix[i] is None:
cholesky_f.append(None)
else:
cholesky_f.append(np.linalg.cholesky(cov_matrix[i]))
# cholesky_f.append(cov_matrix[i]) #test
return cholesky_f
def _nodes(self):
self.nodes_in = []
self.node_out = self.params.shape[1]
for i in range(self.branch_n):
self.nodes_in.append(self.spectra[i].shape[1])
self.fc_hidden = self.branch_hiddenLayer*2 + 1
# self.fc_hidden = self.branch_hiddenLayer + self.trunk_hiddenLayer + 1 #also works, but not necessary
def _net(self):
if self.fix_initialize:
torch.manual_seed(1000) #Fixed parameter initialization
self._nodes()
for i in range(self.branch_n):
exec('self.branch_net%s=fcnet.FcNet(node_in=self.nodes_in[i], node_out=self.node_out,\
hidden_layer=self.fc_hidden, activation_func=self.activation_func)'%(i+1))
self.net = fcnet.MultiBranchFcNet(nodes_in=self.nodes_in, node_out=self.node_out, branch_hiddenLayer=self.branch_hiddenLayer,
trunk_hiddenLayer=self.trunk_hiddenLayer, nodes_all=None, activation_func=self.activation_func)
if self.print_info:
print(self.net)
#change the branch net & trunk net (contain training) to use multiple GPUs ???
[docs] def transfer_branchNet(self, device=None):
if self.use_GPU:
for i in range(1, self.branch_n+1):
exec('self.branch_net%s = self.branch_net%s.cuda(device)'%(i,i))
def _copyLayer_fromBranch(self, branch_index=None):
if branch_index is None:
print('\nCopying hyperparameters of the branch networks to the multibranch network')
for i in range(1, self.branch_n+1):
for j in range(self.branch_hiddenLayer+1):
eval('self.net.branch%s[j*3].load_state_dict(self.branch_net%s.fc[j*3].state_dict())'%(i, i))#copy Linear
eval('self.net.branch%s[j*3+1].load_state_dict(self.branch_net%s.fc[j*3+1].state_dict())'%(i, i))#copy BN
else:
print('Copying hyperparameters of the branch network {} to the multibranch network\n'.format(branch_index))
for j in range(self.branch_hiddenLayer+1):
eval('self.net.branch%s[j*3].load_state_dict(self.branch_net%s.fc[j*3].state_dict())'%(branch_index, branch_index))#copy Linear
eval('self.net.branch%s[j*3+1].load_state_dict(self.branch_net%s.fc[j*3+1].state_dict())'%(branch_index, branch_index))#copy BN
[docs] def statistic(self):
if self.scale_spectra:
self.spectra_statistic = [dp.Statistic(self.spectra[i]/self.spectra_base[i]).statistic() for i in range(self.branch_n)]
#test
# sp_comb = np.concatenate(self.spectra, axis=1)/np.concatenate(self.spectra_base)
# self.spectra_statistic = [dp.Statistic(sp_comb).statistic() for i in range(self.branch_n)]
else:
self.spectra_statistic = [dp.Statistic(self.spectra[i]).statistic() for i in range(self.branch_n)]
#test
# sp_comb = np.concatenate(self.spectra, axis=1)
# self.spectra_statistic = [dp.Statistic(sp_comb).statistic() for i in range(self.branch_n)]
if self.scale_params:
self.params = self.scaling(self.params)
if self.spectra_vali is not None:
self.params_vali = self.scaling(self.params_vali)
self.params_tot = self._get_params_tot() #updated
if self.independent_norm:
# self.params_statistic = [dp.Statistic(self.params[:,i]).statistic() for i in range(self.params_n)]
#updated
self.params_statistic = [dp.Statistic(self.params_tot[:,i]).statistic() for i in range(self.params_n)]
else:
# self.params_statistic = dp.Statistic(self.params).statistic()
#updated
self.params_statistic = dp.Statistic(self.params_tot).statistic()
[docs] def transfer_data(self):
if self.use_GPU:
self.spectra = [dp.numpy2cuda(self.spectra[i]) for i in range(self.branch_n)]
self.params = dp.numpy2cuda(self.params)
for i in range(self.branch_n):
if self.cholesky_factor[i] is None:
self.obs_errors[i] = dp.numpy2cuda(self.obs_errors[i])
else:
self.cholesky_factor[i] = dp.numpy2cuda(self.cholesky_factor[i])
self.spectra_base_torch = [dp.numpy2cuda(self.spectra_base[i]) for i in range(self.branch_n)]
# self.spectra_base_torch = self.spectra_base #test
#vali_set
if self.spectra_vali is not None:
self.spectra_vali = [dp.numpy2cuda(self.spectra_vali[i]) for i in range(self.branch_n)]
self.params_vali = dp.numpy2cuda(self.params_vali)
else:
self.spectra = [dp.numpy2torch(self.spectra[i]) for i in range(self.branch_n)]
self.params = dp.numpy2torch(self.params)
for i in range(self.branch_n):
if self.cholesky_factor[i] is None:
self.obs_errors[i] = dp.numpy2torch(self.obs_errors[i])
else:
self.cholesky_factor[i] = dp.numpy2torch(self.cholesky_factor[i])
self.spectra_base_torch = [dp.numpy2torch(self.spectra_base[i]) for i in range(self.branch_n)]
#vali_set
if self.spectra_vali is not None:
self.spectra_vali = [dp.numpy2torch(self.spectra_vali[i]) for i in range(self.branch_n)]
self.params_vali = dp.numpy2torch(self.params_vali)
# def transfer_subData(self, device=None):
# if self.use_GPU:
# self.inputs = dp.numpy2cuda(self.inputs, device=device)
# self.target = dp.numpy2cuda(self.target, device=device)
# self.error = dp.numpy2cuda(self.error, device=device)
# else:
# self.inputs = dp.numpy2torch(self.inputs)
# self.target = dp.numpy2torch(self.target)
# self.error = dp.numpy2torch(self.error)
def _train_branch(self, rank, repeat_n, showIter_n, device):
optimizer = torch.optim.Adam(eval('self.branch_net%s.parameters()'%(rank+1)), lr=self.lr_branch)
iteration = self.multi_noise*len(self.spectra[0])//self.batch_size * repeat_n
self.inputs = self.spectra[rank]
self.target = self.params
self.error = self.obs_errors[rank]
self.cholesky_f = self.cholesky_factor[rank]
# self.transfer_subData(device=device)
print('Training the branch network %s'%(rank+1))
for subsample_num in range(1, self.epoch_branch+1):
_inputs, _target = ds.AddGaussianNoise(self.inputs,params=self.target,obs_errors=self.error,cholesky_factor=self.cholesky_f,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
if self.scale_spectra:
_inputs = _inputs / self.spectra_base_torch[rank] #to be tested !!!
if self.norm_inputs:
_inputs = dp.Normalize(_inputs, self.spectra_statistic[rank], norm_type=self.norm_type).norm()
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
_target[:,i] = dp.Normalize(_target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
else:
_target = dp.Normalize(_target, self.params_statistic, norm_type=self.norm_type).norm()
for iter_mid in range(1, iteration+1):
batch_index = np.random.choice(len(_inputs), self.batch_size, replace=False)
xx = _inputs[batch_index]
yy = _target[batch_index]
xx = Variable(xx)
yy = Variable(yy, requires_grad=False)
_predicted = eval('self.branch_net%s(xx)'%(rank+1))
_loss = self.loss_func(_predicted, yy)
optimizer.zero_grad()
_loss.backward()
optimizer.step()
if subsample_num%showIter_n==0:
print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr']))
lrdc = optimize.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr_branch,lr_min=self.lr_min)
optimizer.param_groups[0]['lr'] = lrdc.exp()
#############################################################################
# Note: hyperparameters must be transferred in the subprocess.
#
# Variables defined in the subprocess can not be called by the main process,
# but, the hyperparameters of "self.branch_net%s"%i can be copied to "self.net",
# the reason may be that hyperparameters of the network shared the memory.
#############################################################################
#print(eval("self.branch_net%s.fc[3].state_dict()['bias'][:5]"%(rank+1)))
self._copyLayer_fromBranch(branch_index=rank+1)
def _train_branchNet(self, repeat_n=3, showIter_n=10):
#############################################################################
# Note: variables used in the subprocess (in the function self._train_branch)
# should be defined before using "mp.spawn", and variables defined in the
# subprocess can not be called by the main process.
#
# # the following lines have the same function as "mp.spawn"
# mp.set_start_method('spawn') #this is important
# processes = []
# for rank in range(self.branch_n):
# p = mp.Process(target=self._train_branch, args=(rank, repeat_n, showIter_n, device))
# p.start()
# processes.append(p)
# for p in processes:
# p.join()
#############################################################################
#this means that the branch networks can only be trained on 1 GPU, how to train them on muliple GPUs?
device = None
#Note: all networks should be transfered to GPU when using "mp.spawn" to train the branch networks
self.transfer_branchNet(device=device)
mp.spawn(self._train_branch, nprocs=self.branch_n, args=(repeat_n, showIter_n, device), join=True)
def _train_trunk(self, repeat_n=3, showIter_n=100, fix_lr=1e-4, reduce_fix_lr=False):
branch_p = []
for i in range(1, self.branch_n+1):
branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':fix_lr}"%i)) #lr=fix_lr
optimizer = torch.optim.Adam(branch_p + [{'params':self.net.trunk.parameters()}], lr=self.lr)
print('Training the trunk network')
for subsample_num in range(1, self.epoch_branch+1):
self.inputs, self.target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
self.inputs = self.preprocessing_multiBranch_input(self.inputs, self.spectra_base_torch)
self.target = self.preprocessing_multiBranch_target(self.target)
for iter_mid in range(1, self.iteration+1):
batch_index = np.random.choice(len(self.inputs[0]), self.batch_size, replace=False)
xx = [self.inputs[i][batch_index] for i in range(self.branch_n)]
yy = self.target[batch_index]
xx = [Variable(xx[i]) for i in range(self.branch_n)]
yy = Variable(yy, requires_grad=False)
_predicted = self.net(xx)
_loss = self.loss_func(_predicted, yy)
optimizer.zero_grad()
_loss.backward()
optimizer.step()
if subsample_num%showIter_n==0:
print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr'], optimizer.param_groups[-1]['lr']))
lrdc_t = optimize.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr,lr_min=self.lr_min)
optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
#test
if reduce_fix_lr:
lrdc_b = optimize.LrDecay(subsample_num,iteration=self.epoch_branch,lr=fix_lr,lr_min=self.lr_min)#change to lr=self.lr ?
for i in range(self.branch_n):
optimizer.param_groups[i]['lr'] = lrdc_b.exp()
[docs] def preprocessing_multiBranch_target(self, params, a=0, b=1):
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
params[:,i] = dp.Normalize(params[:,i], self.params_statistic[i], norm_type=self.norm_type, a=a, b=b).norm()
else:
params = dp.Normalize(params, self.params_statistic, norm_type=self.norm_type, a=a, b=b).norm()
return params
[docs] def train(self, repeat_n=3, showIter_n=100, train_branch=True, parallel=True, train_trunk=False, fix_lr=1e-4, reduce_fix_lr=False):
self._net()
if self.transfer_learning==True and train_branch==False:
self.copyLayers_fromTrainedNet()
self.transfer_net(prints=self.print_info)
#branch_p = [eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i) for i in range(1,self.branch_n+1)] #this will raise an error in python3.X
#however, the following lines run well for both python2.X and python3.X, why?
branch_p = []
for i in range(1, self.branch_n+1):
branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i))
self.optimizer = torch.optim.Adam(branch_p + [{'params':self.net.trunk.parameters()}], lr=self.lr)
#added
if self.auto_batchSize:
self._auto_batchSize()
self._check_batchSize()
# print('batch size: %s'%self.batch_size)
if self.auto_epoch:
self._auto_epoch()
if self.auto_repeat_n:
repeat_n = self._auto_repeat_n(repeat_n)
self.iteration = self.multi_noise*len(self.spectra[0])//self.batch_size * repeat_n
# print('repeat_n:%s'%repeat_n)
self.statistic()
self.transfer_data()
# np.random.seed(1000)#
print('randn_num: {}'.format(self.randn_num))
if train_branch:
if parallel:
self._train_branchNet(repeat_n=repeat_n, showIter_n=showIter_n)
else:
self.transfer_branchNet()
for rank in range(self.branch_n):
self._train_branch(rank, repeat_n, showIter_n, None)
if train_trunk:
self._train_trunk(repeat_n=repeat_n, showIter_n=showIter_n, fix_lr=fix_lr, reduce_fix_lr=reduce_fix_lr)
self.train_loss = [] #loss
self.vali_loss = []
print('\nTraining the multibranch network')
for subsample_num in range(1, self.epoch+1):
self.inputs, self.target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
self.inputs = self.preprocessing_multiBranch_input(self.inputs, self.spectra_base_torch)
self.target = self.preprocessing_multiBranch_target(self.target)
running_loss = []
for iter_mid in range(1, self.iteration+1):
batch_index = np.random.choice(len(self.inputs[0]), self.batch_size, replace=False)
xx = [self.inputs[i][batch_index] for i in range(self.branch_n)]
yy = self.target[batch_index]
xx = [Variable(xx[i]) for i in range(self.branch_n)]
yy = Variable(yy, requires_grad=False)
_predicted = self.net(xx)
_loss = self.loss_func(_predicted, yy)
self.optimizer.zero_grad()
_loss.backward()
self.optimizer.step()
running_loss.append(_loss.item())
loss_mean = np.mean(running_loss)
self.train_loss.append(loss_mean)
#vali_set
if self.spectra_vali is not None:
self.inputs_vali, self.target_vali = ds.AddGaussianNoise(self.spectra_vali,params=self.params_vali,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
self.inputs_vali = self.preprocessing_multiBranch_input(self.inputs_vali, self.spectra_base_torch)
self.target_vali = self.preprocessing_multiBranch_target(self.target_vali)
self.net.eval()
pred_vali = self.net([Variable(self.inputs_vali[i]) for i in range(self.branch_n)])
_loss_vali = self.loss_func(pred_vali, Variable(self.target_vali, requires_grad=False))
self.vali_loss.append(_loss_vali.item())
self.net.train()
if subsample_num%showIter_n==0:
if self.lr==self.lr_branch:
if self.spectra_vali is None:
print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.optimizer.param_groups[0]['lr']))
else:
print('(epoch:%s/%s; train_loss/vali_loss:%.5f/%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.vali_loss[-1], self.optimizer.param_groups[0]['lr']))
else:
if self.spectra_vali is None:
print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch, loss_mean, self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[-1]['lr']))
else:
print('(epoch:%s/%s; train_loss/vali_loss:%.5f/%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch, loss_mean, self.vali_loss[-1], self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[-1]['lr']))
lrdc_t = optimize.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
self.optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
lrdc_b = optimize.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
for i in range(self.branch_n):
self.optimizer.param_groups[i]['lr'] = lrdc_b.exp()
if self.use_multiGPU:
self.net = self.net.module.cpu()
else:
self.net = self.net.cpu()
self.train_loss = np.array(self.train_loss)
self.vali_loss = np.array(self.vali_loss)
return self.net, self.train_loss, self.vali_loss
#test,
def _train_netBranch(self, repeat_n=3, showIter_n=100):
#branch_p = [eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i) for i in range(1,self.branch_n+1)] #this will raise an error in python3.X
#however, the following lines run well for both python2.X and python3.X, why?
branch_p = []
for i in range(1, self.branch_n+1):
branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i))
trunk_p = [{'params':self.net.trunk.parameters(), 'lr':0}]
optimizer = torch.optim.Adam(branch_p + trunk_p)
print('\nTraining the branch parts of the network')
for subsample_num in range(1, self.epoch_branch+1):
_inputs, _target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
if self.scale_spectra:
_inputs = [_inputs[i]/self.spectra_base_torch[i] for i in range(self.branch_n)]
if self.norm_inputs:
_inputs = [dp.Normalize(_inputs[i], self.spectra_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
_target[:,i] = dp.Normalize(_target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
else:
_target = dp.Normalize(_target, self.params_statistic, norm_type=self.norm_type).norm()
for iter_mid in range(1, self.iteration+1):
batch_index = np.random.choice(len(_inputs[0]), self.batch_size, replace=False)
xx = [_inputs[i][batch_index] for i in range(self.branch_n)]
yy = _target[batch_index]
xx = [Variable(xx[i]) for i in range(self.branch_n)]
yy = Variable(yy, requires_grad=False)
_predicted = self.net(xx)
_loss = self.loss_func(_predicted, yy)
optimizer.zero_grad()
_loss.backward()
optimizer.step()
if subsample_num%showIter_n==0:
print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr'], optimizer.param_groups[-1]['lr']))
# lrdc_t = optimize.LrDecay(subsample_num,iteration=epoch,lr=self.lr,lr_min=self.lr_min)
# self.optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
lrdc_b = optimize.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
for i in range(self.branch_n):
optimizer.param_groups[i]['lr'] = lrdc_b.exp()
#test,
def _train_netTrunk(self, repeat_n=3, showIter_n=100):
#branch_p = [eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i) for i in range(1,self.branch_n+1)] #this will raise an error in python3.X
#however, the following lines run well for both python2.X and python3.X, why?
branch_p = []
for i in range(1, self.branch_n+1):
branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':0}"%i))
trunk_p = [{'params':self.net.trunk.parameters(), 'lr':self.lr_branch}]
optimizer = torch.optim.Adam(branch_p + trunk_p)
print('\nTraining the trunk part of the network')
for subsample_num in range(1, self.epoch_branch+1):
_inputs, _target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
if self.scale_spectra:
_inputs = [_inputs[i]/self.spectra_base_torch[i] for i in range(self.branch_n)]
if self.norm_inputs:
_inputs = [dp.Normalize(_inputs[i], self.spectra_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
_target[:,i] = dp.Normalize(_target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
else:
_target = dp.Normalize(_target, self.params_statistic, norm_type=self.norm_type).norm()
for iter_mid in range(1, self.iteration+1):
batch_index = np.random.choice(len(_inputs[0]), self.batch_size, replace=False)
xx = [_inputs[i][batch_index] for i in range(self.branch_n)]
yy = _target[batch_index]
xx = [Variable(xx[i]) for i in range(self.branch_n)]
yy = Variable(yy, requires_grad=False)
_predicted = self.net(xx)
_loss = self.loss_func(_predicted, yy)
optimizer.zero_grad()
_loss.backward()
optimizer.step()
if subsample_num%showIter_n==0:
print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr'], optimizer.param_groups[-1]['lr']))
lrdc_t = optimize.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
# lrdc_b = optimize.LrDecay(subsample_num,iteration=epoch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
# for i in range(self.branch_n):
# optimizer.param_groups[i]['lr'] = lrdc_b.exp()
#test,
[docs] def train_branch_trunk(self, repeat_n=3, showIter_n=100, train_branch_trunk=True):
self._net()
if self.transfer_learning==True:
self.copyLayers_fromTrainedNet()
self.transfer_net(prints=self.print_info)
self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.lr)
#added
if self.auto_batchSize:
self._auto_batchSize()
self._check_batchSize()
# print('batch size: %s'%self.batch_size)
if self.auto_epoch:
self._auto_epoch()
if self.auto_repeat_n:
repeat_n = self._auto_repeat_n(repeat_n)
self.iteration = self.multi_noise*len(self.spectra[0])//self.batch_size * repeat_n
# print('repeat_n:%s'%repeat_n)
self.statistic()
self.transfer_data()
# np.random.seed(1000)#
print('randn_num: {}'.format(self.randn_num))
if train_branch_trunk:
self._train_netBranch(repeat_n=repeat_n, showIter_n=showIter_n)
self._train_netTrunk(repeat_n=repeat_n, showIter_n=showIter_n)
self.loss = []
print('\nTraining the multibranch network')
for subsample_num in range(1, self.epoch+1):
self.inputs, self.target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
if self.scale_spectra:
self.inputs = [self.inputs[i]/self.spectra_base_torch[i] for i in range(self.branch_n)]
if self.norm_inputs:
self.inputs = [dp.Normalize(self.inputs[i], self.spectra_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
self.target[:,i] = dp.Normalize(self.target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
else:
self.target = dp.Normalize(self.target, self.params_statistic, norm_type=self.norm_type).norm()
for iter_mid in range(1, self.iteration+1):
batch_index = np.random.choice(len(self.inputs[0]), self.batch_size, replace=False)
xx = [self.inputs[i][batch_index] for i in range(self.branch_n)]
yy = self.target[batch_index]
xx = [Variable(xx[i]) for i in range(self.branch_n)]
yy = Variable(yy, requires_grad=False)
_predicted = self.net(xx)
_loss = self.loss_func(_predicted, yy)
self.optimizer.zero_grad()
_loss.backward()
self.optimizer.step()
self.loss.append(_loss.item())
if subsample_num%showIter_n==0:
if self.lr==self.lr_branch:
print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, self.loss[-1], self.optimizer.param_groups[0]['lr']))
else:
print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch, self.loss[-1], self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[-1]['lr']))
lrdc = optimize.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
self.optimizer.param_groups[0]['lr'] = lrdc.exp()
if self.use_multiGPU:
self.net = self.net.module.cpu()
else:
self.net = self.net.cpu()
self.loss = np.array(self.loss)
return self.net, self.loss
#to be updated, to remove params net
def _predict(self, inputs, in_type='numpy'):
# in_type: 'numpy' or 'torch'
self.net.eval() #this works for the batch normalization layers
if in_type=='numpy':
inputs = [dp.numpy2torch(inputs[i]) for i in range(len(inputs))]
inputs = [Variable(inputs[i]) for i in range(len(inputs))]
pred = self.net(inputs)
pred = dp.torch2numpy(pred.data)
#reshape chain
if len(pred.shape)==1:
pred = pred.reshape(-1, 1)
return pred
[docs] def predict(self, spectra, in_type='torch'):
# spectra: [spectra1, spectra2, ...]
if len(spectra[0].shape)==1:
spectra = [spectra[i].reshape(1, -1) for i in range(len(spectra))] #for one spectrum
spectra = self.preprocessing_multiBranch_input(spectra, [dp.numpy2torch(self.spectra_base[i]) for i in range(len(spectra))])
self.pred_params = self._predict(spectra, in_type=in_type)
self.pred_params = self.preprocessing_target_inv(self.pred_params)
return self.pred_params
[docs] def predict_chain(self, obs_data, cov_matrix=None, chain_leng=10000):
# obs_data: observational spectrum in a list [spectra1, spectra2, ...], each element has shape (N, 3)
# torch.manual_seed(1000)#
if cov_matrix is None:
cov_matrix = [None for i in range(len(obs_data))]
obs_data = [dp.numpy2torch(obs_data[i]) for i in range(len(obs_data))]
obs_best = [obs_data[i][:,1] for i in range(len(obs_data))]
obs_errors = [obs_data[i][:,2] for i in range(len(obs_data))]
obs_best_multi = [torch.ones((chain_leng, len(obs_best[i]))) * obs_best[i] for i in range(len(obs_data))]
cholesky_factor = []
for i in range(len(obs_data)):
if cov_matrix[i] is None:
cholesky_factor.append(None)
else:
cholesky_factor.append(dp.numpy2torch(np.linalg.cholesky(cov_matrix[i])))
obs_best_multi = ds.AddGaussianNoise(obs_best_multi, obs_errors=obs_errors, cholesky_factor=cholesky_factor, noise_type='singleNormal', factor_sigma=1, use_GPU=False).noisySpectra()
self.chain = self.predict(obs_best_multi, in_type='torch')
self.chain = self.cut_params(self.chain) #remove non-physical parameters
return self.chain
[docs] def save_net(self, path='ann', sample='TT'):
if sample is None:
fileName = 'net_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s.pt'%(self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
else:
fileName = 'net-%s_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s.pt'%(sample,self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
utils.saveTorchPt(path+'/net', fileName, self.net)
[docs] def save_loss(self, path='ann', sample='TT'):
if sample is None:
fileName = 'loss_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s'%(self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
else:
fileName = 'loss-%s_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s'%(sample,self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
utils.savenpy(path+'/loss', fileName, [self.train_loss, self.vali_loss])
[docs] def save_chain(self, path='ann', sample='TT'):
if sample is None:
fileName = 'chain_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s'%(self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
else:
fileName = 'chain-%s_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s'%(sample,self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
utils.savenpy(path+'/chains', fileName, self.chain)
[docs] def save_hparams(self, path='ann', sample='TT'):
if sample is None:
fileName = 'hparams_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s'%(self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
else:
fileName = 'hparams-%s_branch%s_train%s_batch%s_epoch%s_epochBranch%s_%s'%(sample,self.branch_n,len(self.params),self.batch_size,self.epoch,self.epoch_branch,self.randn_num)
utils.savenpy(path+'/hparams', fileName, [self.spectra_statistic, self.params_statistic, self.spectra_base, self.params_base, self.param_names, self.params_dict,
self.scale_spectra, self.scale_params, self.norm_inputs, self.norm_target, self.independent_norm, self.norm_type,
self.burnIn_step])
[docs]class LoadHparams_MB(object):
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
[docs] def load_hparams(self):
file_path = evaluate.FilePath(filedir=self.path+'/hparams', randn_num=self.randn_num, suffix='.npy').filePath()
self.spectra_statistic, self.params_statistic, self.spectra_base, self.params_base, self.param_names, self.params_dict, self.scale_spectra, self.scale_params, self.norm_inputs, self.norm_target, self.independent_norm, self.norm_type, self.burnIn_step = np.load(file_path, allow_pickle=True)
self.params_n = len(self.param_names)
p_property = cosmic_params.ParamsProperty(self.param_names, params_dict=self.params_dict)
self.params_limit = p_property.params_limit
self.branch_n = len(self.spectra_statistic)
[docs]class RePredictMBMLP(MultiBranchMLP, LoadNet, LoadLoss, LoadChain, LoadHparams_MB):
"""Repredict cosmological parameters using the saved networks.
Parameters
----------
path : str
The path of the results saved. Default: 'ann'
randn_num : str or int
A random number that identifies the saved results.
"""
def __init__(self, path='ann', randn_num='0.123'):
self.path = path
self.randn_num = str(randn_num)
#!!!
# 1. check how to reduce training time, when using multi_noise
# 2. whether to use transfer learning?
#%% test a new model for multiple data sets
[docs]class MultiDataSetMLP(OneBranchMLP):
def __init__(self, spectra, parameters, param_names, obs_data, cov_matrix=None, params_dict=None,
hidden_layer=3, activation_func='rrelu', loss_func='L1',
noise_type='multiNormal', factor_sigma=0.5, multi_noise=5):
#data
self.spectra = spectra
self.branch_n = len(spectra)
self.spectra_base = [np.mean(spectra[i], axis=0) for i in range(self.branch_n)]
self.params = parameters
self.params_base = np.mean(parameters, axis=0)
self.param_names = param_names
self.params_n = len(param_names)
self.obs_data = obs_data #updated, update for multi-branch network !!!
self.obs_errors = [obs_data[i][:,2] for i in range(len(obs_data))]
self.cov_matrix = cov_matrix
self.cholesky_factor = self._cholesky_factor(cov_matrix)
self.params_dict = params_dict
p_property = cosmic_params.ParamsProperty(param_names, params_dict=params_dict)
self.params_limit = p_property.params_limit
#ANN model
self.hidden_layer = hidden_layer
self.activation_func = activation_func
self.loss_func = train.loss_funcs(name=loss_func) #train.Train should be updated
self.lr = 1e-2
self.lr_min = 1e-8
self.batch_size = 750
self.auto_batchSize = True
self.epoch = 2000
self.base_epoch = 1000
self.auto_epoch = True
self.fix_initialize = False
self.print_info = False
#data preprocessing
self.noise_type = noise_type
self.factor_sigma = factor_sigma
self.multi_noise = multi_noise
self.scale_spectra = True
self.scale_params = True
self.norm_inputs = True
self.norm_target = True
self.independent_norm = False
self.norm_type = 'z_score'
#training
self.spaceSigma_min = 5
self.auto_repeat_n = False
self.burn_in = False
self.burnIn_step = None
self.transfer_learning = False
self.randn_num = round(abs(np.random.randn()/5.), 5)
def _cholesky_factor(self, cov_matrix):
if cov_matrix is None:
return [None for i in range(self.branch_n)]
else:
cholesky_f = []
for i in range(self.branch_n):
if cov_matrix[i] is None:
cholesky_f.append(None)
else:
cholesky_f.append(np.linalg.cholesky(cov_matrix[i]))
return cholesky_f
def _nodes(self):
self.node_in = sum([self.spectra[i].shape[1] for i in range(self.branch_n)])
self.node_out = self.params.shape[1]
return nodeframe.decreasingNode(node_in=self.node_in,node_out=self.node_out,hidden_layer=self.hidden_layer)
[docs] def statistic(self):
if self.scale_spectra:
self.spectra_statistic = [dp.Statistic(self.spectra[i]/self.spectra_base[i]).statistic() for i in range(self.branch_n)]
else:
self.spectra_statistic = [dp.Statistic(self.spectra[i]).statistic() for i in range(self.branch_n)]
if self.scale_params:
self.params = self.scaling(self.params)
if self.independent_norm:
self.params_statistic = [dp.Statistic(self.params[:,i]).statistic() for i in range(self.params_n)]
else:
self.params_statistic = dp.Statistic(self.params).statistic()
[docs] def transfer_data(self):
if self.use_GPU:
self.spectra = [dp.numpy2cuda(self.spectra[i]) for i in range(self.branch_n)]
self.params = dp.numpy2cuda(self.params)
for i in range(self.branch_n):
if self.cholesky_factor[i] is None:
self.obs_errors[i] = dp.numpy2cuda(self.obs_errors[i])
else:
self.cholesky_factor[i] = dp.numpy2cuda(self.cholesky_factor[i])
self.spectra_base_torch = [dp.numpy2cuda(self.spectra_base[i]) for i in range(self.branch_n)]
else:
self.spectra = [dp.numpy2torch(self.spectra[i]) for i in range(self.branch_n)]
self.params = dp.numpy2torch(self.params)
for i in range(self.branch_n):
if self.cholesky_factor[i] is None:
self.obs_errors[i] = dp.numpy2torch(self.obs_errors[i])
else:
self.cholesky_factor[i] = dp.numpy2torch(self.cholesky_factor[i])
self.spectra_base_torch = [dp.numpy2torch(self.spectra_base[i]) for i in range(self.branch_n)]
[docs] def train(self, repeat_n=3, showIter_n=100):
self._net()
if self.transfer_learning:
self.copyLayers_fromTrainedNet()
self.transfer_net(prints=self.print_info)
self.optimizer = self._optimizer(name='Adam')
if self.auto_batchSize:
self._auto_batchSize()
self._check_batchSize()
# print('batch size: %s'%self.batch_size)
if self.auto_epoch:
self._auto_epoch()
if self.auto_repeat_n:
repeat_n = self._auto_repeat_n(repeat_n)
self.iteration = self.multi_noise * len(self.spectra[0])//self.batch_size * repeat_n
self.statistic()
self.transfer_data()
self.loss = []
print('randn_num: %s'%self.randn_num)
for subsample_num in range(1, self.epoch+1):
self.inputs, self.target = ds.AddGaussianNoise(self.spectra,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
if self.scale_spectra:
self.inputs = [self.inputs[i]/self.spectra_base_torch[i] for i in range(self.branch_n)]
if self.norm_inputs:
self.inputs = [dp.Normalize(self.inputs[i], self.spectra_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
self.target[:,i] = dp.Normalize(self.target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
else:
self.target = dp.Normalize(self.target, self.params_statistic, norm_type=self.norm_type).norm()
self.inputs = torch.cat(self.inputs, dim=1)
for iter_mid in range(1, self.iteration+1):
batch_index = np.random.choice(len(self.inputs), self.batch_size, replace=False)
xx = self.inputs[batch_index]
yy = self.target[batch_index]
xx = Variable(xx)
yy = Variable(yy, requires_grad=False)
_predicted = self.net(xx)
_loss = self.loss_func(_predicted, yy)
self.optimizer.zero_grad()
_loss.backward()
self.optimizer.step()
self.loss.append(_loss.item())
if subsample_num%showIter_n==0:
print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, self.loss[-1], self.optimizer.param_groups[0]['lr']))
lrdc = optimize.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
self.optimizer.param_groups[0]['lr'] = lrdc.exp()
if self.use_multiGPU:
self.net = self.net.module.cpu()
else:
self.net = self.net.cpu()
self.loss = np.array(self.loss)
return self.net, self.loss
[docs] def predict(self, spectra, in_type='torch'):
# spectra: [spectra1, spectra2, ...]
if len(spectra[0].shape)==1:
spectra = [spectra[i].reshape(1, -1) for i in range(len(spectra))] #for one spectrum
if self.scale_spectra:
spectra = [spectra[i]/dp.numpy2torch(self.spectra_base[i]) for i in range(len(spectra))]
if self.norm_inputs:
spectra = [dp.Normalize(spectra[i], self.spectra_statistic[i], norm_type=self.norm_type).norm() for i in range(len(spectra))]
spectra = torch.cat(spectra, dim=1) #
self.pred_params = self._predict(spectra, in_type=in_type)
if self.norm_target:
if self.independent_norm:
for i in range(self.params_n):
self.pred_params[:,i] = dp.InverseNormalize(self.pred_params[:,i], self.params_statistic[i], norm_type=self.norm_type).inverseNorm()
else:
self.pred_params = dp.InverseNormalize(self.pred_params, self.params_statistic, norm_type=self.norm_type).inverseNorm()
if self.scale_params:
self.pred_params = self.inverseScaling(self.pred_params)
return self.pred_params
[docs] def predict_chain(self, obs_data, cov_matrix=None, chain_leng=10000):
# obs_data: observational spectrum in a list [spectra1, spectra2, ...], each element has shape (N, 3)
# torch.manual_seed(1000)#
if cov_matrix is None:
cov_matrix = [None for i in range(len(obs_data))]
obs_data = [dp.numpy2torch(obs_data[i]) for i in range(len(obs_data))]
obs_best = [obs_data[i][:,1] for i in range(len(obs_data))]
obs_errors = [obs_data[i][:,2] for i in range(len(obs_data))]
obs_best_multi = [torch.ones((chain_leng, len(obs_best[i]))) * obs_best[i] for i in range(len(obs_data))]
cholesky_factor = []
for i in range(len(obs_data)):
if cov_matrix[i] is None:
cholesky_factor.append(None)
else:
cholesky_factor.append(dp.numpy2torch(np.linalg.cholesky(cov_matrix[i])))
obs_best_multi = ds.AddGaussianNoise(obs_best_multi, obs_errors=obs_errors, cholesky_factor=cholesky_factor, noise_type='singleNormal', factor_sigma=1, use_GPU=False).noisySpectra()
self.chain = self.predict(obs_best_multi, in_type='torch')
self.chain = self.cut_params(self.chain) #remove non-physical parameters
return self.chain