Hyper-parameter scans

Here we consider how different optimization algorithms behave (for this system) and how their hyper parameters influence the resulting cluster expansion. We use the cutoffs [8.0, 6.5, 6.0] and consider the following optimization algorithms

  • ARDR (Automatic Relevance Determination Regression)

  • RFE (Recursive Feature Elimination)

  • LASSO (Least Absolute Shrinkage and Selection Operator)

  • Adaptive-LASSO

From the analyis above one can conclude that around 20 to 30 non-zero ECIs are a good choice with cutoffs [8.0, 6.5, 6.0]. We also note here that LASSO performs quite poorly compared to the other methods, whereas the other three methods all yield similar results.

[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ase.db import connect
from icet import ClusterSpace, StructureContainer
from trainstation import CrossValidationEstimator

try:
    import seaborn as sns
    sns.set_context('notebook')
except ImportError:
    print('sad')


def get_row(cve):
    row = dict()
    row['rmse_validation'] = cve.rmse_validation
    row['rmse_train'] = cve.rmse_train
    row['BIC'] = cve.model.BIC
    row['n_parameters'] = cve.n_parameters
    row['n_nonzero_parameters'] = cve.n_nonzero_parameters
    return row

[2]:
# parameters
cutoffs = [8.0, 6.5, 6.0]

# setup CS and get fit data
db = connect('../../tutorial/reference_data.db')
primitive_structure = db.get(id=1).toatoms()  # primitive structure

cs = ClusterSpace(structure=primitive_structure, cutoffs=cutoffs, chemical_symbols=['Ag', 'Pd'])
sc = StructureContainer(cluster_space=cs)
for row in db.select():
    sc.add_structure(structure=row.toatoms(), user_tag=row.tag,
                     properties={'mixing_energy': row.mixing_energy})
A, y = sc.get_fit_data(key='mixing_energy')

Scanning ARDR hyper-parameter

ARDR has the hyper-parameter threshold-lambda which controls the sparsity of the solution.

[3]:
# scan ARDR
lambda_values = [250, 500, 1000, 1400, 2000, 2500, 4500, 7500, 13000, 18000, 25000, 40000, 60000, 90000, 200000]
records = []
for lam in lambda_values:
    cve = CrossValidationEstimator((A, y), fit_method='ardr', threshold_lambda=lam)
    cve.validate()
    cve.train()
    row = get_row(cve)
    row['threshold_lambda'] = lam
    records.append(row)
df_ardr = pd.DataFrame(records)

Scanning RFE hyper-parameter

RFE has the hyper-parameter n_features which controls the sparsity of the solution.

[4]:
nf_values = np.arange(10, len(cs), 4)
records = []
for nf in nf_values:
    cve = CrossValidationEstimator((A, y), fit_method='rfe', n_features=nf)
    cve.validate()
    cve.train()
    row = get_row(cve)
    print(row)
    records.append(row)
df_rfe = pd.DataFrame(records)
{'rmse_validation': 0.002798344450116252, 'rmse_train': 0.002683403161977985, 'BIC': -7354.707739792168, 'n_parameters': 64, 'n_nonzero_parameters': 10}
{'rmse_validation': 0.0023145778033947244, 'rmse_train': 0.0021922571247087823, 'BIC': -7570.183866056698, 'n_parameters': 64, 'n_nonzero_parameters': 14}
{'rmse_validation': 0.0021973373681505614, 'rmse_train': 0.0020374384922598526, 'BIC': -7618.165863997955, 'n_parameters': 64, 'n_nonzero_parameters': 18}
{'rmse_validation': 0.0021057053328768344, 'rmse_train': 0.0019395259773153275, 'BIC': -7661.051031863528, 'n_parameters': 64, 'n_nonzero_parameters': 22}
{'rmse_validation': 0.0020280745500089077, 'rmse_train': 0.0018518706714521532, 'BIC': -7685.110319218685, 'n_parameters': 64, 'n_nonzero_parameters': 26}
{'rmse_validation': 0.0020160090433279216, 'rmse_train': 0.0018127681433093307, 'BIC': -7690.311903614312, 'n_parameters': 64, 'n_nonzero_parameters': 30}
{'rmse_validation': 0.001995087671145074, 'rmse_train': 0.0017808454418820883, 'BIC': -7687.224112465629, 'n_parameters': 64, 'n_nonzero_parameters': 34}
{'rmse_validation': 0.0019723136505239228, 'rmse_train': 0.0017625302601417293, 'BIC': -7674.515701930791, 'n_parameters': 64, 'n_nonzero_parameters': 38}
{'rmse_validation': 0.001980221782136884, 'rmse_train': 0.001752432921969732, 'BIC': -7655.1225628361835, 'n_parameters': 64, 'n_nonzero_parameters': 42}
{'rmse_validation': 0.001968234318822529, 'rmse_train': 0.0017459746377072555, 'BIC': -7633.32071777841, 'n_parameters': 64, 'n_nonzero_parameters': 46}
{'rmse_validation': 0.0019659194017986642, 'rmse_train': 0.0017423794967478504, 'BIC': -7609.777547105853, 'n_parameters': 64, 'n_nonzero_parameters': 50}
{'rmse_validation': 0.001966988837928071, 'rmse_train': 0.0017407213959834919, 'BIC': -7585.784504933986, 'n_parameters': 64, 'n_nonzero_parameters': 54}
{'rmse_validation': 0.0019677644104134848, 'rmse_train': 0.0017402356917872596, 'BIC': -7560.295995196741, 'n_parameters': 64, 'n_nonzero_parameters': 58}
{'rmse_validation': 0.0019690062027653494, 'rmse_train': 0.0017400702788613342, 'BIC': -7534.563254440424, 'n_parameters': 64, 'n_nonzero_parameters': 62}

Scanning LASSO

[5]:
# LASSO
alpha_values = np.logspace(-5, -0.5, 20)
records = []
for alpha in alpha_values:
    cve = CrossValidationEstimator((A, y), max_iter=50000, fit_method='lasso', alpha=alpha)
    cve.validate()
    cve.train()
    row = get_row(cve)
    row['alpha'] = alpha
    print(row)
    records.append(row)
df_lasso = pd.DataFrame(records)
{'rmse_validation': 0.001968113853657245, 'rmse_train': 0.0017400585214774424, 'BIC': -7528.120330641913, 'n_parameters': 64, 'n_nonzero_parameters': 63, 'alpha': 1e-05}
{'rmse_validation': 0.00196748489187155, 'rmse_train': 0.001740075837948438, 'BIC': -7528.1128117539, 'n_parameters': 64, 'n_nonzero_parameters': 63, 'alpha': 1.725210549942039e-05}
{'rmse_validation': 0.001966399230367668, 'rmse_train': 0.0017401195325501665, 'BIC': -7521.649049770729, 'n_parameters': 64, 'n_nonzero_parameters': 64, 'alpha': 2.9763514416313192e-05}
{'rmse_validation': 0.0019647094450454014, 'rmse_train': 0.0017402294453499902, 'BIC': -7521.566491938609, 'n_parameters': 64, 'n_nonzero_parameters': 64, 'alpha': 5.1348329074375493e-05}
{'rmse_validation': 0.001961979315327377, 'rmse_train': 0.0017404941606713371, 'BIC': -7534.267236507214, 'n_parameters': 64, 'n_nonzero_parameters': 62, 'alpha': 8.858667904100833e-05}
{'rmse_validation': 0.0019579068381050295, 'rmse_train': 0.001741129678082679, 'BIC': -7533.831587850884, 'n_parameters': 64, 'n_nonzero_parameters': 62, 'alpha': 0.00015283067326587687}
{'rmse_validation': 0.0019519780649841652, 'rmse_train': 0.0017427711592992278, 'BIC': -7552.049810599044, 'n_parameters': 64, 'n_nonzero_parameters': 59, 'alpha': 0.00026366508987303583}
{'rmse_validation': 0.0019476977392845238, 'rmse_train': 0.0017465948943124587, 'BIC': -7575.5480122989065, 'n_parameters': 64, 'n_nonzero_parameters': 55, 'alpha': 0.00045487779470037773}
{'rmse_validation': 0.001944784017116787, 'rmse_train': 0.0017546628613150559, 'BIC': -7563.173612770661, 'n_parameters': 64, 'n_nonzero_parameters': 56, 'alpha': 0.0007847599703514606}
{'rmse_validation': 0.0019426846914566524, 'rmse_train': 0.001770253730881873, 'BIC': -7610.759431657655, 'n_parameters': 64, 'n_nonzero_parameters': 47, 'alpha': 0.0013538761800225433}
{'rmse_validation': 0.0019653401674951604, 'rmse_train': 0.0018020430396815412, 'BIC': -7602.747573750417, 'n_parameters': 64, 'n_nonzero_parameters': 45, 'alpha': 0.002335721469090121}
{'rmse_validation': 0.0020313018266476532, 'rmse_train': 0.0018767582331051286, 'BIC': -7577.302571260845, 'n_parameters': 64, 'n_nonzero_parameters': 41, 'alpha': 0.0040296113202004}
{'rmse_validation': 0.0021470782334785535, 'rmse_train': 0.002012960053483511, 'BIC': -7530.235916217105, 'n_parameters': 64, 'n_nonzero_parameters': 35, 'alpha': 0.0069519279617756054}
{'rmse_validation': 0.0023909321913676896, 'rmse_train': 0.0022784616508545715, 'BIC': -7413.568434409249, 'n_parameters': 64, 'n_nonzero_parameters': 29, 'alpha': 0.011993539462092343}
{'rmse_validation': 0.002775270345678115, 'rmse_train': 0.0026825220266335935, 'BIC': -7249.732517244797, 'n_parameters': 64, 'n_nonzero_parameters': 23, 'alpha': 0.02069138081114788}
{'rmse_validation': 0.0034046494165508963, 'rmse_train': 0.003320242201387385, 'BIC': -7049.399612344974, 'n_parameters': 64, 'n_nonzero_parameters': 13, 'alpha': 0.035696988468260624}
{'rmse_validation': 0.004366174644743007, 'rmse_train': 0.004281444080531599, 'BIC': -6745.338178627839, 'n_parameters': 64, 'n_nonzero_parameters': 11, 'alpha': 0.06158482110660261}
{'rmse_validation': 0.0062240513439747016, 'rmse_train': 0.006133683810005989, 'BIC': -6309.579632518047, 'n_parameters': 64, 'n_nonzero_parameters': 9, 'alpha': 0.10624678308940409}
{'rmse_validation': 0.00876566344071705, 'rmse_train': 0.008701411499691733, 'BIC': -5884.716739611459, 'n_parameters': 64, 'n_nonzero_parameters': 7, 'alpha': 0.18329807108324336}
{'rmse_validation': 0.012698006188672891, 'rmse_train': 0.01269135605017289, 'BIC': -5419.718881571171, 'n_parameters': 64, 'n_nonzero_parameters': 6, 'alpha': 0.31622776601683794}
[6]:
# Adaptive - LASSO
alpha_values = np.logspace(-5, -1.5, 20)
records = []
for alpha in alpha_values:
    cve = CrossValidationEstimator((A, y), max_iter=50000, fit_method='adaptive-lasso', alpha=alpha)
    cve.validate()
    cve.train()
    row = get_row(cve)
    row['alpha'] = alpha
    print(row)
    records.append(row)
df_adlasso = pd.DataFrame(records)
{'rmse_validation': 0.0019663470998611673, 'rmse_train': 0.001740216265055334, 'BIC': -7560.272185961742, 'n_parameters': 64, 'n_nonzero_parameters': 58, 'alpha': 1e-05}
{'rmse_validation': 0.0019641902390184116, 'rmse_train': 0.0017403582897293386, 'BIC': -7560.23568024784, 'n_parameters': 64, 'n_nonzero_parameters': 58, 'alpha': 1.5283067326587687e-05}
{'rmse_validation': 0.001962023557717041, 'rmse_train': 0.0017406179609130857, 'BIC': -7566.541913746687, 'n_parameters': 64, 'n_nonzero_parameters': 57, 'alpha': 2.3357214690901213e-05}
{'rmse_validation': 0.0019606395408323166, 'rmse_train': 0.0017412436684415664, 'BIC': -7585.209905482276, 'n_parameters': 64, 'n_nonzero_parameters': 54, 'alpha': 3.5696988468260624e-05}
{'rmse_validation': 0.00196096282914115, 'rmse_train': 0.001742494457786217, 'BIC': -7597.421552630661, 'n_parameters': 64, 'n_nonzero_parameters': 52, 'alpha': 5.4555947811685143e-05}
{'rmse_validation': 0.001957241589180568, 'rmse_train': 0.0017444088173038078, 'BIC': -7608.844059669129, 'n_parameters': 64, 'n_nonzero_parameters': 50, 'alpha': 8.337822234717882e-05}
{'rmse_validation': 0.0019523193272483763, 'rmse_train': 0.0017486042529944426, 'BIC': -7643.0476165637, 'n_parameters': 64, 'n_nonzero_parameters': 44, 'alpha': 0.00012742749857031334}
{'rmse_validation': 0.0019538567103684584, 'rmse_train': 0.0017549543972147363, 'BIC': -7659.0637804937505, 'n_parameters': 64, 'n_nonzero_parameters': 41, 'alpha': 0.00019474830399087572}
{'rmse_validation': 0.0019579145919251303, 'rmse_train': 0.0017649758393761493, 'BIC': -7661.515779214831, 'n_parameters': 64, 'n_nonzero_parameters': 40, 'alpha': 0.00029763514416313193}
{'rmse_validation': 0.0019740970937807416, 'rmse_train': 0.0017836506039064152, 'BIC': -7685.994784874949, 'n_parameters': 64, 'n_nonzero_parameters': 34, 'alpha': 0.00045487779470037773}
{'rmse_validation': 0.0020014362835092466, 'rmse_train': 0.0018180674548355636, 'BIC': -7694.006048095806, 'n_parameters': 64, 'n_nonzero_parameters': 27, 'alpha': 0.0006951927961775605}
{'rmse_validation': 0.0020004670695963605, 'rmse_train': 0.00187667898481482, 'BIC': -7698.35143324549, 'n_parameters': 64, 'n_nonzero_parameters': 22, 'alpha': 0.001062467830894041}
{'rmse_validation': 0.0020687242543100336, 'rmse_train': 0.0019348143731209976, 'BIC': -7666.153617565288, 'n_parameters': 64, 'n_nonzero_parameters': 22, 'alpha': 0.001623776739188721}
{'rmse_validation': 0.0022116362512679564, 'rmse_train': 0.002083130733168835, 'BIC': -7591.057758390146, 'n_parameters': 64, 'n_nonzero_parameters': 16, 'alpha': 0.002481628922836824}
{'rmse_validation': 0.0023142589677141705, 'rmse_train': 0.002222523551420671, 'BIC': -7545.737647194027, 'n_parameters': 64, 'n_nonzero_parameters': 14, 'alpha': 0.00379269019073225}
{'rmse_validation': 0.002424033448299887, 'rmse_train': 0.0023085226806188654, 'BIC': -7506.35225874992, 'n_parameters': 64, 'n_nonzero_parameters': 13, 'alpha': 0.005796393953384968}
{'rmse_validation': 0.0025143383574155673, 'rmse_train': 0.0024381014073525135, 'BIC': -7443.306528207596, 'n_parameters': 64, 'n_nonzero_parameters': 12, 'alpha': 0.008858667904100823}
{'rmse_validation': 0.003064254593080284, 'rmse_train': 0.0029123055982127315, 'BIC': -7208.03988949416, 'n_parameters': 64, 'n_nonzero_parameters': 9, 'alpha': 0.013538761800225433}
{'rmse_validation': 0.0034506753311372477, 'rmse_train': 0.003357628550616386, 'BIC': -7062.006441184525, 'n_parameters': 64, 'n_nonzero_parameters': 7, 'alpha': 0.02069138081114788}
{'rmse_validation': 0.0036206941039860626, 'rmse_train': 0.003572585942426887, 'BIC': -6997.9768630944845, 'n_parameters': 64, 'n_nonzero_parameters': 7, 'alpha': 0.03162277660168379}
[7]:

# Plotting fig = plt.figure(figsize=(7, 9)) ax1 = fig.add_subplot(211) ax2 = fig.add_subplot(212) xlim = [0, len(cs)] ylim1 = [1.5, 5] ylim2 = [-7800, -7000] ax1.plot(df_ardr.n_nonzero_parameters, 1000 * df_ardr.rmse_validation, '-o', label='ARDR') ax1.plot(df_rfe.n_nonzero_parameters, 1000 * df_rfe.rmse_validation, '-o', label='RFE') ax1.plot(df_lasso.n_nonzero_parameters, 1000 * df_lasso.rmse_validation, '-o', label='LASSO') ax1.plot(df_adlasso.n_nonzero_parameters, 1000 * df_adlasso.rmse_validation, '-o', label='adaptive-LASSO') ax1.legend() ax2.plot(df_ardr.n_nonzero_parameters, df_ardr.BIC, '-o', label='ARDR') ax2.plot(df_rfe.n_nonzero_parameters, df_rfe.BIC, '-o', label='RFE') ax2.plot(df_lasso.n_nonzero_parameters, df_lasso.BIC, '-o', label='LASSO') ax2.plot(df_adlasso.n_nonzero_parameters, df_adlasso.BIC, '-o', label='adaptive-LASSO') ax1.set_xlim(xlim) ax2.set_xlim(xlim) ax1.set_ylim(ylim1) ax2.set_ylim(ylim2) ax2.set_xlabel('Number of nonzero parameters') ax2.set_ylabel('BIC') ax1.set_ylabel('RMSE validation (meV/atom)') fig.tight_layout() plt.show()
../_images/advanced_topics_training_hyper_parameter_scans_10_0.png