Selecting cutoffs

Selecting cutoffs for a CE can typically be done by systematically increasing cutoffs and checking the RMSE over the validation set or an information criterion. A simple approach is to first scan the second order cutoff, find the optimal second order cutoff, use this to scan the third order cutoff and so on.

We also recommend that once cutoffs are scanned to go back and rescan, e.g., the second order cutoff using the finalized cutoffs for the third and fourth order because this might lead to a slightly different optimal choice than the previous scans.

The scans can take more or less time depending on fitting algorithm and how densely one scans the cutoffs.

[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ase.db import connect
from icet import ClusterSpace, StructureContainer
from trainstation import CrossValidationEstimator

try:
    import seaborn as sns
    sns.set_context('notebook')
except ImportError:
    print('sad')


def get_fit_data(cutoffs):
    """
    Construct cluster space and structure container for the given cutoffs
    and return the fit matrix along with the target energies
    """
    cs = ClusterSpace(structure=primitive_structure,
                      cutoffs=cutoffs,
                      chemical_symbols=['Ag', 'Pd'])

    sc = StructureContainer(cluster_space=cs)
    for row in db.select():
        sc.add_structure(structure=row.toatoms(),
                         user_tag=row.tag,
                         properties={'mixing_energy': row.mixing_energy})
    return sc.get_fit_data(key='mixing_energy')


def train_ce(cutoffs):
    """
    Train a cluster expansion with the given cutoffs and return fit metrics of the obtained model.
    """
    A, y = get_fit_data(cutoffs)
    cve = CrossValidationEstimator((A, y), fit_method=fit_method,
                                   validation_method='shuffle-split', n_splits=100)
    cve.validate()
    cve.train()

    row = dict()
    row['rmse_validation'] = cve.rmse_validation
    row['rmse_train'] = cve.rmse_train
    row['BIC'] = cve.model.BIC
    row['n_parameters'] = cve.n_parameters
    row['n_nonzero_parameters'] = cve.n_nonzero_parameters

    return row
[2]:
# parameters
fit_method = 'least-squares'
c2_vals = np.arange(4.0, 15.01, 0.5)
c3_vals = np.arange(4.0, 8.0, 0.5)
c4_vals = np.arange(4.0, 7.5, 0.5)

# setup CS and SC
db = connect('../../tutorial/reference_data.db')
primitive_structure = db.get(id=1).toatoms()  # primitive structure

Second order cutoff

First we scan the second order, and find a good value of about 9 Å.

[3]:
# Scan 2nd order cutoff
records = []
for c2 in c2_vals:
    cutoffs = [c2]
    row = train_ce(cutoffs)
    row = {'c2': c2, **row}
    print(row)
    records.append(row)

df2 = pd.DataFrame(records)
c2_final = 8.0
{'c2': 4.0, 'rmse_validation': 0.007906443595761958, 'rmse_train': 0.007789759276035674, 'BIC': -6047.517734565386, 'n_parameters': 3, 'n_nonzero_parameters': 3}
{'c2': 4.5, 'rmse_validation': 0.005905154878101159, 'rmse_train': 0.005828416062622854, 'BIC': -6403.742696884292, 'n_parameters': 4, 'n_nonzero_parameters': 4}
{'c2': 5.0, 'rmse_validation': 0.005905154878101159, 'rmse_train': 0.005828416062622854, 'BIC': -6403.742696884292, 'n_parameters': 4, 'n_nonzero_parameters': 4}
{'c2': 5.5, 'rmse_validation': 0.005368637463082624, 'rmse_train': 0.005301294721455475, 'BIC': -6515.807921095053, 'n_parameters': 5, 'n_nonzero_parameters': 5}
{'c2': 6.0, 'rmse_validation': 0.005374143211234686, 'rmse_train': 0.005298007547654692, 'BIC': -6510.007120837804, 'n_parameters': 6, 'n_nonzero_parameters': 6}
{'c2': 6.5, 'rmse_validation': 0.0053455453155681, 'rmse_train': 0.00526482284974105, 'BIC': -6511.343329356486, 'n_parameters': 7, 'n_nonzero_parameters': 7}
{'c2': 7.0, 'rmse_validation': 0.0053455453155681, 'rmse_train': 0.00526482284974105, 'BIC': -6511.343329356486, 'n_parameters': 7, 'n_nonzero_parameters': 7}
{'c2': 7.5, 'rmse_validation': 0.005235590768222653, 'rmse_train': 0.005153782744508843, 'BIC': -6531.588343530676, 'n_parameters': 8, 'n_nonzero_parameters': 8}
{'c2': 8.0, 'rmse_validation': 0.005231453386506078, 'rmse_train': 0.005144172261542513, 'BIC': -6527.503437114905, 'n_parameters': 9, 'n_nonzero_parameters': 9}
{'c2': 8.5, 'rmse_validation': 0.005234003786061428, 'rmse_train': 0.005140138101446462, 'BIC': -6522.00004191754, 'n_parameters': 10, 'n_nonzero_parameters': 10}
{'c2': 9.0, 'rmse_validation': 0.005237354663143693, 'rmse_train': 0.00511943476791182, 'BIC': -6513.826519533814, 'n_parameters': 12, 'n_nonzero_parameters': 12}
{'c2': 9.5, 'rmse_validation': 0.005223442662848543, 'rmse_train': 0.005100598569962429, 'BIC': -6511.948543444877, 'n_parameters': 13, 'n_nonzero_parameters': 13}
{'c2': 10.0, 'rmse_validation': 0.005229976678641013, 'rmse_train': 0.005098615358473966, 'BIC': -6505.885374799127, 'n_parameters': 14, 'n_nonzero_parameters': 14}
{'c2': 10.5, 'rmse_validation': 0.005239075446830428, 'rmse_train': 0.005080075057786764, 'BIC': -6490.727570945517, 'n_parameters': 17, 'n_nonzero_parameters': 17}
{'c2': 11.0, 'rmse_validation': 0.005239075446830428, 'rmse_train': 0.005080075057786764, 'BIC': -6490.727570945517, 'n_parameters': 17, 'n_nonzero_parameters': 17}
{'c2': 11.5, 'rmse_validation': 0.0052418880226640305, 'rmse_train': 0.005073923857770292, 'BIC': -6485.676498188947, 'n_parameters': 18, 'n_nonzero_parameters': 18}
{'c2': 12.0, 'rmse_validation': 0.005243593500777188, 'rmse_train': 0.005058328567498962, 'BIC': -6470.031477233707, 'n_parameters': 21, 'n_nonzero_parameters': 21}
{'c2': 12.5, 'rmse_validation': 0.00526032243755337, 'rmse_train': 0.005055539997452015, 'BIC': -6457.581716077217, 'n_parameters': 23, 'n_nonzero_parameters': 23}
{'c2': 13.0, 'rmse_validation': 0.005278777764013501, 'rmse_train': 0.005049813417556868, 'BIC': -6439.389093461534, 'n_parameters': 26, 'n_nonzero_parameters': 26}
{'c2': 13.5, 'rmse_validation': 0.005287644610389536, 'rmse_train': 0.00504931519212303, 'BIC': -6432.972526973013, 'n_parameters': 27, 'n_nonzero_parameters': 27}
{'c2': 14.0, 'rmse_validation': 0.005302548496671004, 'rmse_train': 0.005046395984100731, 'BIC': -6420.609832182944, 'n_parameters': 29, 'n_nonzero_parameters': 29}
{'c2': 14.5, 'rmse_validation': 0.0053376031987636356, 'rmse_train': 0.005042205129069571, 'BIC': -6395.4333713581555, 'n_parameters': 33, 'n_nonzero_parameters': 33}
{'c2': 15.0, 'rmse_validation': 0.005345857128167708, 'rmse_train': 0.005041101796608185, 'BIC': -6389.139951155822, 'n_parameters': 34, 'n_nonzero_parameters': 34}
[4]:
# plot 2nd order cutoff scan
fig = plt.figure(figsize=(7, 8))
gs = plt.GridSpec(3, 1, hspace=0)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[2, 0])

xlim = [df2.c2.min()-0.5,  df2.c2.max()+0.5]

ax1.plot(df2.c2, 1000 * df2.rmse_validation, '-o', label='validation')
ax1.plot(df2.c2, 1000 * df2.rmse_train, '--s', label='train')
ax1.legend()
ax1.set_ylabel('RMSE (meV/atom)')

ax2.plot(df2.c2, df2.BIC, '-o')
ax2.set_ylabel('BIC')

ax3.plot(df2.c2, df2.n_parameters, '--s', label='Total')
ax3.plot(df2.c2, df2.n_nonzero_parameters, '-o', label='Nonzero')
ax3.set_ylabel('Number of parameters')
ax3.legend()

ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
ax3.set_xlim(xlim)

ax1.set_xticklabels([])
ax2.set_xticklabels([])
ax3.set_xlabel('2nd order cutoff (Å)')

fig.tight_layout()
plt.show()
../_images/advanced_topics_training_cutoffs_selection_5_0.png

Third-order cutoff

For the third-order cutoff we find a value of about 6.5 Å.

[5]:
# Scan 3rd order cutoff
records = []
for c3 in c3_vals:
    cutoffs = [c2_final, c3]
    row = train_ce(cutoffs)
    row = {'c2': c2_final, 'c3': c3, **row}

    print(row)
    records.append(row)

df3 = pd.DataFrame(records)
c3_final = 6.5
{'c2': 8.0, 'c3': 4.0, 'rmse_validation': 0.0036006241653608402, 'rmse_train': 0.00359040151755787, 'BIC': -6972.463131530305, 'n_parameters': 10, 'n_nonzero_parameters': 10}
{'c2': 8.0, 'c3': 4.5, 'rmse_validation': 0.0034315681103231272, 'rmse_train': 0.0034184603576390958, 'BIC': -7027.516895058285, 'n_parameters': 11, 'n_nonzero_parameters': 11}
{'c2': 8.0, 'c3': 5.0, 'rmse_validation': 0.0034315681103231272, 'rmse_train': 0.0034184603576390958, 'BIC': -7027.516895058285, 'n_parameters': 11, 'n_nonzero_parameters': 11}
{'c2': 8.0, 'c3': 5.5, 'rmse_validation': 0.003114043128626342, 'rmse_train': 0.003076076644589442, 'BIC': -7126.827552809291, 'n_parameters': 16, 'n_nonzero_parameters': 16}
{'c2': 8.0, 'c3': 6.0, 'rmse_validation': 0.0025468284304101057, 'rmse_train': 0.0024953392388428175, 'BIC': -7356.177908650355, 'n_parameters': 21, 'n_nonzero_parameters': 21}
{'c2': 8.0, 'c3': 6.5, 'rmse_validation': 0.00246434860087697, 'rmse_train': 0.002375115708994107, 'BIC': -7365.550248283435, 'n_parameters': 29, 'n_nonzero_parameters': 29}
{'c2': 8.0, 'c3': 7.0, 'rmse_validation': 0.00246434860087697, 'rmse_train': 0.002375115708994107, 'BIC': -7365.550248283435, 'n_parameters': 29, 'n_nonzero_parameters': 29}
{'c2': 8.0, 'c3': 7.5, 'rmse_validation': 0.0024639072483062987, 'rmse_train': 0.0023579588158507084, 'BIC': -7354.88189840891, 'n_parameters': 32, 'n_nonzero_parameters': 32}
[6]:
# plot 3rd order cutoff scan
fig = plt.figure(figsize=(7, 8))
gs = plt.GridSpec(3, 1, hspace=0)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[2, 0])

xlim = [df3.c3.min()-0.5,  df3.c3.max()+0.5]

ax1.plot(df3.c3, 1000 * df3.rmse_validation, '-o', label='validation')
ax1.plot(df3.c3, 1000 * df3.rmse_train, '--s', label='train')
ax1.legend()
ax1.set_ylabel('RMSE (meV/atom)')

ax2.plot(df3.c3, df3.BIC, '-o')
ax2.set_ylabel('BIC')

ax3.plot(df3.c3, df3.n_parameters, '--s', label='Total')
ax3.plot(df3.c3, df3.n_nonzero_parameters, '-o', label='Nonzero')
ax3.set_ylabel('Number of parameters')
ax3.legend()

ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
ax3.set_xlim(xlim)

ax1.set_xticklabels([])
ax2.set_xticklabels([])
ax3.set_xlabel('3rd order cutoff (Å)')

fig.tight_layout()
plt.show()
../_images/advanced_topics_training_cutoffs_selection_8_0.png

Fourth-order cutoff

For the fourth-order cutoff we find a value of about 6.0 Å. We note that for fourth-order cutoffs above 6.5 Å we get a condition number warning, which indicates that the linear problem we are solving is ill conditioned and thus we cannot trust the resulting cluster expansion for these cutoffs.

[7]:
# Scan 4th order cutoff
records = []
for c4 in c4_vals:
    cutoffs = [c2_final, c3_final, c4]
    row = train_ce(cutoffs)
    row = {'c2': c2_final, 'c3': c3_final, 'c4': c4, **row}
    print(row)
    records.append(row)

df4 = pd.DataFrame(records)
c4_final = 6.5
{'c2': 8.0, 'c3': 6.5, 'c4': 4.0, 'rmse_validation': 0.0024571627124155165, 'rmse_train': 0.0023630776797101266, 'BIC': -7365.394485289505, 'n_parameters': 30, 'n_nonzero_parameters': 30}
{'c2': 8.0, 'c3': 6.5, 'c4': 4.5, 'rmse_validation': 0.0024261109989691743, 'rmse_train': 0.0023286213678193977, 'BIC': -7370.751087283575, 'n_parameters': 32, 'n_nonzero_parameters': 32}
{'c2': 8.0, 'c3': 6.5, 'c4': 5.0, 'rmse_validation': 0.0024261109989691743, 'rmse_train': 0.0023286213678193977, 'BIC': -7370.751087283575, 'n_parameters': 32, 'n_nonzero_parameters': 32}
{'c2': 8.0, 'c3': 6.5, 'c4': 5.5, 'rmse_validation': 0.002207642637192658, 'rmse_train': 0.0020448371179085027, 'BIC': -7446.803640224018, 'n_parameters': 45, 'n_nonzero_parameters': 45}
{'c2': 8.0, 'c3': 6.5, 'c4': 6.0, 'rmse_validation': 0.0019437648978908096, 'rmse_train': 0.0017430310908495975, 'BIC': -7521.687921832897, 'n_parameters': 64, 'n_nonzero_parameters': 64}
Condition number is large, 2.3366780145843172e+16
Condition number is large, 2.2983507138842772e+16
Condition number is large, 2.7320534418143132e+16
Condition number is large, 2.3850777029795024e+16
Condition number is large, 3.2041642489013868e+16
Condition number is large, 2.8072752047122964e+16
Condition number is large, 2.7285099338188428e+16
Condition number is large, 2.8943246285900292e+16
Condition number is large, 2.4292211913313376e+16
Condition number is large, 2.246767301788247e+16
Condition number is large, 2.4002071320630164e+16
Condition number is large, 2.2593076154262184e+16
Condition number is large, 3.0420515445288116e+16
Condition number is large, 3.575687165207429e+16
Condition number is large, 2.798088544900558e+16
Condition number is large, 2.8544938903252228e+16
Condition number is large, 3.0099196573961964e+16
Condition number is large, 2.577855257203065e+16
Condition number is large, 3.0750651692689164e+16
Condition number is large, 2.4723979644596484e+16
Condition number is large, 3.177736050094163e+16
Condition number is large, 3.2071550247692196e+16
Condition number is large, 3.0039198508873292e+16
Condition number is large, 2.3342378910732804e+16
Condition number is large, 2.743384186286969e+16
Condition number is large, 2.9406477208313316e+16
Condition number is large, 3.0053778322918736e+16
Condition number is large, 3.0557982884486096e+16
Condition number is large, 2.828893931744529e+16
Condition number is large, 2.6584332230283332e+16
Condition number is large, 2.6761476081522188e+16
Condition number is large, 2.6731184261995356e+16
Condition number is large, 3.2312143269876476e+16
Condition number is large, 2.7835615334291612e+16
Condition number is large, 2.9773363380407444e+16
Condition number is large, 2.8732603213800836e+16
Condition number is large, 2.2461256802319332e+16
Condition number is large, 2.253523128466033e+16
Condition number is large, 2.2167500400574696e+16
Condition number is large, 2.359175867843177e+16
Condition number is large, 2.955576856248458e+16
Condition number is large, 2.9625815798380084e+16
Condition number is large, 2.132272414233711e+16
Condition number is large, 2.7473681843127276e+16
Condition number is large, 2.211723179120394e+16
Condition number is large, 2.2345667895285868e+16
Condition number is large, 3.4442989104845656e+16
Condition number is large, 3.520596565772002e+16
Condition number is large, 2.6259222063565404e+16
Condition number is large, 2.9639050669832184e+16
Condition number is large, 3.0603713113878028e+16
Condition number is large, 2.79976704352863e+16
Condition number is large, 3.2529667609829036e+16
Condition number is large, 2.9015766036122644e+16
Condition number is large, 1.941497466690872e+16
Condition number is large, 2.7666298317954836e+16
Condition number is large, 2.9473421944182624e+16
Condition number is large, 2.705369829577059e+16
Condition number is large, 2.802168200503224e+16
Condition number is large, 3.092585765615843e+16
Condition number is large, 2.9870343532228132e+16
Condition number is large, 2.782075845843869e+16
Condition number is large, 2.613127946062695e+16
Condition number is large, 2.6058958991745464e+16
Condition number is large, 2.601373667057441e+16
Condition number is large, 2.7836684777379332e+16
Condition number is large, 2.5832404629001104e+16
Condition number is large, 2.7580034283573864e+16
Condition number is large, 2.6786377327908844e+16
Condition number is large, 2.3140709563102708e+16
Condition number is large, 2.3822988008802612e+16
Condition number is large, 2.1885067797751736e+16
Condition number is large, 2.804610973690078e+16
Condition number is large, 2.7074147280799452e+16
Condition number is large, 2.7894132871354092e+16
Condition number is large, 2.5278392892758656e+16
Condition number is large, 2.770854198520622e+16
Condition number is large, 2.979669770583427e+16
Condition number is large, 2.443728046142252e+16
Condition number is large, 2.6553110948574196e+16
Condition number is large, 2.304498602179855e+16
Condition number is large, 2.682529880827576e+16
Condition number is large, 2.3633182513983776e+16
Condition number is large, 2.596936939490044e+16
Condition number is large, 2.90481600376124e+16
Condition number is large, 3.0969292420511428e+16
Condition number is large, 2.564002446925404e+16
Condition number is large, 3.0508457189455836e+16
Condition number is large, 2.7362491948752784e+16
Condition number is large, 2.832978892218459e+16
Condition number is large, 2.4263734732564676e+16
Condition number is large, 2.212045422924315e+16
Condition number is large, 2.6843536415831108e+16
Condition number is large, 2.17948734217367e+16
Condition number is large, 2.4155910722038036e+16
Condition number is large, 2.775473953265896e+16
Condition number is large, 2.513344024847881e+16
Condition number is large, 2.7183128388488716e+16
Condition number is large, 2.4715981431213484e+16
Condition number is large, 2.688895695996603e+16
Condition number is large, 1.5444615431338644e+16
{'c2': 8.0, 'c3': 6.5, 'c4': 6.5, 'rmse_validation': 0.0020851723316147423, 'rmse_train': 0.0015731717545720528, 'BIC': -7298.181541634836, 'n_parameters': 117, 'n_nonzero_parameters': 117}
Condition number is large, 2.3366780145843172e+16
Condition number is large, 2.2983507138842772e+16
Condition number is large, 2.7320534418143132e+16
Condition number is large, 2.3850777029795024e+16
Condition number is large, 3.2041642489013868e+16
Condition number is large, 2.8072752047122964e+16
Condition number is large, 2.7285099338188428e+16
Condition number is large, 2.8943246285900292e+16
Condition number is large, 2.4292211913313376e+16
Condition number is large, 2.246767301788247e+16
Condition number is large, 2.4002071320630164e+16
Condition number is large, 2.2593076154262184e+16
Condition number is large, 3.0420515445288116e+16
Condition number is large, 3.575687165207429e+16
Condition number is large, 2.798088544900558e+16
Condition number is large, 2.8544938903252228e+16
Condition number is large, 3.0099196573961964e+16
Condition number is large, 2.577855257203065e+16
Condition number is large, 3.0750651692689164e+16
Condition number is large, 2.4723979644596484e+16
Condition number is large, 3.177736050094163e+16
Condition number is large, 3.2071550247692196e+16
Condition number is large, 3.0039198508873292e+16
Condition number is large, 2.3342378910732804e+16
Condition number is large, 2.743384186286969e+16
Condition number is large, 2.9406477208313316e+16
Condition number is large, 3.0053778322918736e+16
Condition number is large, 3.0557982884486096e+16
Condition number is large, 2.828893931744529e+16
Condition number is large, 2.6584332230283332e+16
Condition number is large, 2.6761476081522188e+16
Condition number is large, 2.6731184261995356e+16
Condition number is large, 3.2312143269876476e+16
Condition number is large, 2.7835615334291612e+16
Condition number is large, 2.9773363380407444e+16
Condition number is large, 2.8732603213800836e+16
Condition number is large, 2.2461256802319332e+16
Condition number is large, 2.253523128466033e+16
Condition number is large, 2.2167500400574696e+16
Condition number is large, 2.359175867843177e+16
Condition number is large, 2.955576856248458e+16
Condition number is large, 2.9625815798380084e+16
Condition number is large, 2.132272414233711e+16
Condition number is large, 2.7473681843127276e+16
Condition number is large, 2.211723179120394e+16
Condition number is large, 2.2345667895285868e+16
Condition number is large, 3.4442989104845656e+16
Condition number is large, 3.520596565772002e+16
Condition number is large, 2.6259222063565404e+16
Condition number is large, 2.9639050669832184e+16
Condition number is large, 3.0603713113878028e+16
Condition number is large, 2.79976704352863e+16
Condition number is large, 3.2529667609829036e+16
Condition number is large, 2.9015766036122644e+16
Condition number is large, 1.941497466690872e+16
Condition number is large, 2.7666298317954836e+16
Condition number is large, 2.9473421944182624e+16
Condition number is large, 2.705369829577059e+16
Condition number is large, 2.802168200503224e+16
Condition number is large, 3.092585765615843e+16
Condition number is large, 2.9870343532228132e+16
Condition number is large, 2.782075845843869e+16
Condition number is large, 2.613127946062695e+16
Condition number is large, 2.6058958991745464e+16
Condition number is large, 2.601373667057441e+16
Condition number is large, 2.7836684777379332e+16
Condition number is large, 2.5832404629001104e+16
Condition number is large, 2.7580034283573864e+16
Condition number is large, 2.6786377327908844e+16
Condition number is large, 2.3140709563102708e+16
Condition number is large, 2.3822988008802612e+16
Condition number is large, 2.1885067797751736e+16
Condition number is large, 2.804610973690078e+16
Condition number is large, 2.7074147280799452e+16
Condition number is large, 2.7894132871354092e+16
Condition number is large, 2.5278392892758656e+16
Condition number is large, 2.770854198520622e+16
Condition number is large, 2.979669770583427e+16
Condition number is large, 2.443728046142252e+16
Condition number is large, 2.6553110948574196e+16
Condition number is large, 2.304498602179855e+16
Condition number is large, 2.682529880827576e+16
Condition number is large, 2.3633182513983776e+16
Condition number is large, 2.596936939490044e+16
Condition number is large, 2.90481600376124e+16
Condition number is large, 3.0969292420511428e+16
Condition number is large, 2.564002446925404e+16
Condition number is large, 3.0508457189455836e+16
Condition number is large, 2.7362491948752784e+16
Condition number is large, 2.832978892218459e+16
Condition number is large, 2.4263734732564676e+16
Condition number is large, 2.212045422924315e+16
Condition number is large, 2.6843536415831108e+16
Condition number is large, 2.17948734217367e+16
Condition number is large, 2.4155910722038036e+16
Condition number is large, 2.775473953265896e+16
Condition number is large, 2.513344024847881e+16
Condition number is large, 2.7183128388488716e+16
Condition number is large, 2.4715981431213484e+16
Condition number is large, 2.688895695996603e+16
Condition number is large, 1.5444615431338644e+16
{'c2': 8.0, 'c3': 6.5, 'c4': 7.0, 'rmse_validation': 0.0020851723316147423, 'rmse_train': 0.0015731717545720528, 'BIC': -7298.181541634836, 'n_parameters': 117, 'n_nonzero_parameters': 117}
[8]:
# plot 4th order cutoff scan
fig = plt.figure(figsize=(7, 8))
gs = plt.GridSpec(3, 1, hspace=0)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[2, 0])

xlim = [df4.c4.min()-0.5,  df4.c4.max()+0.5]

ax1.plot(df4.c4, 1000 * df4.rmse_validation, '-o', label='validation')
ax1.plot(df4.c4, 1000 * df4.rmse_train, '--s', label='train')
ax1.legend()
ax1.set_ylabel('RMSE (meV/atom)')

ax2.plot(df4.c4, df4.BIC, '-o')
ax2.set_ylabel('BIC')

ax3.plot(df4.c4, df4.n_parameters, '--s', label='Total')
ax3.plot(df4.c4, df4.n_nonzero_parameters, '-o', label='Nonzero')
ax3.set_ylabel('Number of parameters')
ax3.legend()

ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
ax3.set_xlim(xlim)

ax1.set_xticklabels([])
ax2.set_xticklabels([])
ax3.set_xlabel('4th order cutoff (Å)')

fig.tight_layout()
plt.show()
../_images/advanced_topics_training_cutoffs_selection_11_0.png