Source code for hzdplugins.deepmdplugins.submit

import json
[docs]class deepmdInputGenerator():
    """
    deepmdInputGenerator will help us to generate the input file for the deepmd-kit
    """

    def __init__(self, model={}, learning_rate={}, loss={}, training={}):
        """
        In the input file of deepmd-kit, there are four major components: (1) model (2) learning rate (3) loss function (4) training

        :param model: represent the model of the system, including the descriptor, defaults to {}
        :type model: python dictionary, optional
        :param learning_rate: the learning rate of neural network, defaults to {}
        :type learning_rate: python dictionary, optional
        :param loss: the parameters for the loss functions, defaults to {}
        :type loss: python dictionary, optional
        :param training: the parameters for the training, defaults to {}
        :type training: python dictionary, optional
        """
        self.model = model
        self.learning_rate = learning_rate
        self.loss = loss 
        self.training = training

[docs]    def setDescriptor_se_a(self, sel, rcut=6.0, rcut_smth=0.5, neuron=[10, 20, 40], axis_neuron=4, activation_function='tanh', resnet_dt=False, type_one_side=False, precision='float64', trainable=True, seed=1, exclude_types=[], set_davg_zero=False):
        """The `se_a` is (I think) the most popular descriptor, so that I should only used it in here.

        :param sel: a list of integers. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of typi-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-ff radius. 
        :type sel: python list 
        :param rcut: the cut-off radius, defaults to 6.0
        :type rcut: python float, optional
        :param rcut_smth: the 1/r term is smoothed from `rcut` to `rcut_smth`, defaults to 0.5
        :type rcut_smth: python float, optional
        :param neuron: number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built. defaults to [10, 20, 40]. This list could influence the accuracy of our model.
        :type neuron: python list, optional
        :param axis_neuron: size of the submatrix of G (embedding matrix), defaults to 4
        :type axis_neuron: python int, optional
        :param activation_function: the activation function in the embedding net, supported activation functions are: `relu`, `relu6`, `softplus`, `sigmoid`, `tanh`, `gelu`, defaults to 'tanh'
        :type activation_function: python str, optional
        :param seed: random seed for parameter initialization. Usually it is set to be 1.
        :type seed: python int.
        :param resnet_dt: whether to use a `timestep` in the skip connection, defaults to False
        :type resnet_dt: bool, optional
        :param type_one_side: try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets, defaults to False
        :type type_one_side: bool, optional
        :param precision: the precision of the float numbers, defaults to 'float64'
        :type precision: str, optional
        :param trainable: if the parameters in the embedding net is trainable, defaults to True
        :type trainable: bool, optional
        :param exclude_types: the excluded types, defaults to []
        :type exclude_types: list, optional
        :param set_davg_zero: set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used, defaults to False
        :type set_davg_zero: bool, optional
        :return: the descriptor (feature vector) for each atom 
        :rtype: a python list? 
        """
        tmp_descriptor = {}
        tmp_descriptor['type'] = 'se_a'
        tmp_descriptor['sel'] = sel 
        tmp_descriptor['rcut'] = rcut
        tmp_descriptor['rcut_smth'] = rcut_smth
        tmp_descriptor['neuron'] = neuron 
        tmp_descriptor['axis_neuron'] = axis_neuron
        tmp_descriptor['activation_function'] = activation_function
        tmp_descriptor['resnet_dt'] = resnet_dt
        tmp_descriptor['type_one_side'] = type_one_side
        tmp_descriptor['precision'] = precision
        tmp_descriptor['trainable'] = trainable
        tmp_descriptor['seed'] = seed 
        tmp_descriptor['exclude_types'] = exclude_types
        tmp_descriptor['set_davg_zero'] = set_davg_zero
        return tmp_descriptor

[docs]    def setFittingNet(self, fntype = 'ener', numb_fparam = 0, numb_aparm = 0, neuron = [120, 120, 120], activation_function = 'tanh', precision = 'float64', resnet_dt = True, trainable = True, rcond = 0.001, seed = 1, atom_ener = []):
        """The fitting of physical properties

        :param fntype: the types of the fitting, usually we like to fit the energy (potential energy surface), but deep potential can also fit `dipole`, `polar` and also `global_polar`, defaults to 'ener'.
        :type fntype: python str, optional
        :param numb_fparam: the dimension of the frame parameter. If set to > 0, file `fparam.npy` should be included to provided the input fparams, defaults to 0
        :type numb_fparam: int, optional
        :param numb_aparm: the dimension of atomic parameter. If set to > 0, the `aparam.npy` should be included to provided the input aparams, defaults to 0
        :type numb_aparm: int, optional
        :param neuron: the number of neurons in each hidden layers of the fitting net, defaults to [120, 120, 120]
        :type neuron: list, optional
        :param activation_function: the activation function used in the neural network, defaults to 'tanh'
        :type activation_function: str, optional
        :param precision: the precision for float numbers, defaults to 'float64'
        :type precision: str, optional
        :param resnet_dt: whether to use a `timestep` in the skip connection, defaults to True
        :type resnet_dt: bool, optional
        :param trainable: whether the parameter in the fitting net is trainable, defaults to True
        :type trainable: bool, optional
        :param rcond: the condition number used to determine the initial energy shift for each type of atoms, defaults to 0.001
        :type rcond: float, optional
        :param seed: random seed for parameter initialization of the fitting net, defaults to 1
        :type seed: int, optional
        :param atom_ener: specify the atomic energy in vacuum for each type, defaults to []
        :type atom_ener: list, optional
        :return: a dictionary that can be prepared in the mode stages 
        :rtype: python dictionary 
        """
        tmp_fitting_net = {}
        tmp_fitting_net['type'] =  fntype
        tmp_fitting_net['numb_fparam'] = numb_fparam
        tmp_fitting_net['numb_aparam'] = numb_aparm
        tmp_fitting_net['neuron'] = neuron 
        tmp_fitting_net['activation_function'] = activation_function
        tmp_fitting_net['precision'] = precision
        tmp_fitting_net['resnet_dt'] = resnet_dt
        tmp_fitting_net['trainable'] = trainable
        tmp_fitting_net['rcond'] = rcond 
        tmp_fitting_net['seed'] = seed 
        tmp_fitting_net['atom_ener'] = atom_ener
        return tmp_fitting_net
        
[docs]    def setModel(self, type_map, descriptor, fitting_net):
        """This function can help us set the parameters for the model part

        :param type_map: a list of elements investigated in here
        :type type_map: python list
        :param descriptor: the parameters for the descriptor, this can be generated by using self.setDescriptor_se_a() method
        :type descriptor: python dictionary
        :param fitting_net: the parameters for the fitting new, this can be generated by using self.setFittingNet() method
        :type fitting_net: python dictionary
        """
        tmp_model = {}
        tmp_model['type_map'] = type_map
        tmp_model['descriptor'] = descriptor
        tmp_model['fitting_net'] = fitting_net
        self.model = tmp_model
    
[docs]    def setLearningRate(self, lr_type='exp', decay_steps=5000, start_lr=0.001, stop_lr=1e-08):
        tmp_learningrate = {}
        tmp_learningrate['type'] = lr_type
        tmp_learningrate['decay_steps'] = decay_steps
        tmp_learningrate['start_lr'] = start_lr
        tmp_learningrate['stop_lr'] = stop_lr
        self.learning_rate = tmp_learningrate

[docs]    def setLoss(self, start_pref_e=0.02, limit_pref_e=1.0, start_pref_f=1000, limit_pref_f=1.0, start_pref_v=0.0, limit_pref_v=0.0):
        tmp_loss = {}
        tmp_loss['start_pref_e'] = start_pref_e 
        tmp_loss['limit_pref_e'] = start_pref_e
        tmp_loss['start_pref_f'] = start_pref_f
        tmp_loss['limit_pref_f'] = limit_pref_f
        tmp_loss['start_pref_v'] = start_pref_v
        tmp_loss['limit_pref_v'] = limit_pref_v
        self.loss = tmp_loss

[docs]    def setTraining(self, systems=['./'], set_prefix='set', stop_batch=1000000, seed=1, numb_test=10, disp_file='lcurve.out', disp_freq=50, save_freq=100, save_ckpt='model.ckpt', load_ckpt='model.ckpt', disp_training=True, time_training=True, profiling=True, profiling_file='timeline.json'):
        """This function can help us generate the parameters for the training section

         
        :type validation_data: python dictionary
        :param stop_batch: the number of batches that we want to train the model, defaults to 1000000
        :type stop_batch: int, optional
        :param seed: the seed for generating the random number, defaults to 1
        :type seed: int, optional
        :param disp_file: the output file for the error, defaults to 'lcurve.out'
        :type disp_file: str, optional
        :param disp_freq: output every `disp_freq` steps, defaults to '500'
        :type disp_freq: str, optional
        :param save_freq: steps for saving the training data, defaults to '1000'
        :type save_freq: str, optional
        :param save_ckpt: the file for storing the training stage, defaults to 'model.ckpt'
        :type save_ckpt: str, optional
        :param time_training: output the training time for each step, defaults to True
        :type time_training: bool, optional
        :param profiling: output the profile (cpu / gpu usages for training), defaults to True
        :type profiling: bool, optional
        :param profiling_file: filename of the profiling data, defaults to 'timeline.json'
        :type profiling_file: str, optional
        """
        tmp_training = {}
        tmp_training['systems'] = systems
        tmp_training['set_prefix'] = set_prefix
        tmp_training['stop_batch'] = stop_batch
        tmp_training['seed'] = seed 
        tmp_training['numb_test'] = numb_test
        tmp_training['disp_file'] = disp_file
        tmp_training['disp_freq'] = disp_freq
        tmp_training['save_freq'] = save_freq
        tmp_training['save_ckpt'] = save_ckpt
        tmp_training['disp_training'] = disp_training
        tmp_training['time_training'] = time_training
        tmp_training['profiling'] = profiling
        tmp_training['profiling_file'] = profiling_file
        self.training = tmp_training
 
[docs]    def output(self):
        """
        This function can output the current stages of the input file for deepmd-kit, very useful for checking which parameter is still missing
        """
        tmp_dict = {}
        tmp_dict['model'] = self.model
        tmp_dict['learning_rate'] = self.learning_rate
        tmp_dict['loss'] = self.loss
        tmp_dict['training'] = self.training 
        return tmp_dict

[docs]    def export(self, filename):
        """`export` function can help us generate the json file

        :param filename: the name of the json file 
        :type filename: python string 
        """

        print('The items in model:{}; learning_rate:{}, loss:{}, training:{}'.format(len(self.model), len(self.learning_rate), len(self.loss), len(self.training)))
        
        tmp_dict = {
            'model': self.model,
            'learning_rate': self.learning_rate,
            'loss': self.loss,
            'training': self.training
        }
        f = open(filename,'w+')
        json.dump(tmp_dict, f, sort_keys=True, indent=4)

[docs]class dpgenInputGenerator():
    """
    dpgenInputGenerator will help us to generate the input json file for the dpgen package.
    """

[docs]class lammpsInputGenerator():
    """
    `lammpsInputGenerator` will help us to generate the input file for LAMMPS (if we want to combine the deepmd with lammps)

    There are four parts of the input file for LAMMPS:
    * Initilization
    * System definition
    * Simulation settings
    * Run a simulation
    """

    def __init__(self, sys_name, data_file, dp_graph_file, temperature, timestep, run_num):
        self.dict = {}
        self.dict['boundary'] = 'p p p'
        self.dict['units'] = 'metal'
        self.dict['atom_style'] = 'atomic'
        self.dict['neighbor'] = '2.0 bin'
        self.dict['neigh_modify'] = 'every 10 delay 0 check no'
        self.dict['read_data'] = data_file 
        self.dict['pair_style'] = 'deepmd {}'.format(dp_graph_file)
        self.dict['pair_coeff'] = ''
        self.dict['velocity'] = 'all create {} 23456789'.format(temperature)
        self.dict['minimize'] = '1.0e-8 1.0e-6 100000 100000'
        self.dict['fix'] = '1 all nvt temp {} {} 0.5'.format(temperature, temperature)
        self.dict['timestep'] = str(timestep)
        self.dict['thermo_style'] = 'custom step pe ke etotal temp press vol'
        self.dict['thermo'] = '100'
        self.dict['dump'] = 'hzd_dump all custom 1000 geo.xyz type x y z'
        self.dict['run'] = str(run_num) 
        self.dict['write_data'] = '{}.dat'.format(sys_name)
        self.dict['write_restart'] = '{}.rest'.format(sys_name)

[docs]    def output(self):
        for k, v in self.dict.items():
            print(k, v) 

[docs]    def export(self, filename):
        f = open(filename, 'w')
        for k, v in self.dict.items():
            f.writelines('{} {}\n'.format(k, v))
        f.close()