Selecting cutoffs¶
Selecting cutoffs for a CE can typically be done by systematically increasing cutoffs and checking the RMSE over the validation set or an information criterion. A simple approach is to first scan the second order cutoff, find the optimal second order cutoff, use this to scan the third order cutoff and so on.
We also recommend that once cutoffs are scanned to go back and rescan, e.g., the second order cutoff using the finalized cutoffs for the third and fourth order because this might lead to a slightly different optimal choice than the previous scans.
The scans can take more or less time depending on fitting algorithm and how densely one scans the cutoffs.
[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ase.db import connect
from icet import ClusterSpace, StructureContainer
from trainstation import CrossValidationEstimator
try:
import seaborn as sns
sns.set_context('notebook')
except ImportError:
print('sad')
def get_fit_data(cutoffs):
"""
Construct cluster space and structure container for the given cutoffs
and return the fit matrix along with the target energies
"""
cs = ClusterSpace(structure=primitive_structure,
cutoffs=cutoffs,
chemical_symbols=['Ag', 'Pd'])
sc = StructureContainer(cluster_space=cs)
for row in db.select():
sc.add_structure(structure=row.toatoms(),
user_tag=row.tag,
properties={'mixing_energy': row.mixing_energy})
return sc.get_fit_data(key='mixing_energy')
def train_ce(cutoffs):
"""
Train a cluster expansion with the given cutoffs and return fit metrics of the obtained model.
"""
A, y = get_fit_data(cutoffs)
cve = CrossValidationEstimator((A, y), fit_method=fit_method,
validation_method='shuffle-split', n_splits=100)
cve.validate()
cve.train()
row = dict()
row['rmse_validation'] = cve.rmse_validation
row['rmse_train'] = cve.rmse_train
row['BIC'] = cve.model.BIC
row['n_parameters'] = cve.n_parameters
row['n_nonzero_parameters'] = cve.n_nonzero_parameters
return row
[2]:
# parameters
fit_method = 'least-squares'
c2_vals = np.arange(4.0, 15.01, 0.5)
c3_vals = np.arange(4.0, 8.0, 0.5)
c4_vals = np.arange(4.0, 7.5, 0.5)
# setup CS and SC
db = connect('../../tutorial/reference_data.db')
primitive_structure = db.get(id=1).toatoms() # primitive structure
Second order cutoff¶
First we scan the second order, and find a good value of about 9 Å.
[3]:
# Scan 2nd order cutoff
records = []
for c2 in c2_vals:
cutoffs = [c2]
row = train_ce(cutoffs)
row = {'c2': c2, **row}
print(row)
records.append(row)
df2 = pd.DataFrame(records)
c2_final = 8.0
{'c2': 4.0, 'rmse_validation': 0.007906443595761958, 'rmse_train': 0.007789759276035674, 'BIC': -6047.517734565386, 'n_parameters': 3, 'n_nonzero_parameters': 3}
{'c2': 4.5, 'rmse_validation': 0.005905154878101159, 'rmse_train': 0.005828416062622854, 'BIC': -6403.742696884292, 'n_parameters': 4, 'n_nonzero_parameters': 4}
{'c2': 5.0, 'rmse_validation': 0.005905154878101159, 'rmse_train': 0.005828416062622854, 'BIC': -6403.742696884292, 'n_parameters': 4, 'n_nonzero_parameters': 4}
{'c2': 5.5, 'rmse_validation': 0.005368637463082624, 'rmse_train': 0.005301294721455475, 'BIC': -6515.807921095053, 'n_parameters': 5, 'n_nonzero_parameters': 5}
{'c2': 6.0, 'rmse_validation': 0.005374143211234686, 'rmse_train': 0.005298007547654692, 'BIC': -6510.007120837804, 'n_parameters': 6, 'n_nonzero_parameters': 6}
{'c2': 6.5, 'rmse_validation': 0.0053455453155681, 'rmse_train': 0.00526482284974105, 'BIC': -6511.343329356486, 'n_parameters': 7, 'n_nonzero_parameters': 7}
{'c2': 7.0, 'rmse_validation': 0.0053455453155681, 'rmse_train': 0.00526482284974105, 'BIC': -6511.343329356486, 'n_parameters': 7, 'n_nonzero_parameters': 7}
{'c2': 7.5, 'rmse_validation': 0.005235590768222653, 'rmse_train': 0.005153782744508843, 'BIC': -6531.588343530676, 'n_parameters': 8, 'n_nonzero_parameters': 8}
{'c2': 8.0, 'rmse_validation': 0.005231453386506078, 'rmse_train': 0.005144172261542513, 'BIC': -6527.503437114905, 'n_parameters': 9, 'n_nonzero_parameters': 9}
{'c2': 8.5, 'rmse_validation': 0.005234003786061428, 'rmse_train': 0.005140138101446462, 'BIC': -6522.00004191754, 'n_parameters': 10, 'n_nonzero_parameters': 10}
{'c2': 9.0, 'rmse_validation': 0.005237354663143693, 'rmse_train': 0.00511943476791182, 'BIC': -6513.826519533814, 'n_parameters': 12, 'n_nonzero_parameters': 12}
{'c2': 9.5, 'rmse_validation': 0.005223442662848543, 'rmse_train': 0.005100598569962429, 'BIC': -6511.948543444877, 'n_parameters': 13, 'n_nonzero_parameters': 13}
{'c2': 10.0, 'rmse_validation': 0.005229976678641013, 'rmse_train': 0.005098615358473966, 'BIC': -6505.885374799127, 'n_parameters': 14, 'n_nonzero_parameters': 14}
{'c2': 10.5, 'rmse_validation': 0.005239075446830428, 'rmse_train': 0.005080075057786764, 'BIC': -6490.727570945517, 'n_parameters': 17, 'n_nonzero_parameters': 17}
{'c2': 11.0, 'rmse_validation': 0.005239075446830428, 'rmse_train': 0.005080075057786764, 'BIC': -6490.727570945517, 'n_parameters': 17, 'n_nonzero_parameters': 17}
{'c2': 11.5, 'rmse_validation': 0.0052418880226640305, 'rmse_train': 0.005073923857770292, 'BIC': -6485.676498188947, 'n_parameters': 18, 'n_nonzero_parameters': 18}
{'c2': 12.0, 'rmse_validation': 0.005243593500777188, 'rmse_train': 0.005058328567498962, 'BIC': -6470.031477233707, 'n_parameters': 21, 'n_nonzero_parameters': 21}
{'c2': 12.5, 'rmse_validation': 0.00526032243755337, 'rmse_train': 0.005055539997452015, 'BIC': -6457.581716077217, 'n_parameters': 23, 'n_nonzero_parameters': 23}
{'c2': 13.0, 'rmse_validation': 0.005278777764013501, 'rmse_train': 0.005049813417556868, 'BIC': -6439.389093461534, 'n_parameters': 26, 'n_nonzero_parameters': 26}
{'c2': 13.5, 'rmse_validation': 0.005287644610389536, 'rmse_train': 0.00504931519212303, 'BIC': -6432.972526973013, 'n_parameters': 27, 'n_nonzero_parameters': 27}
{'c2': 14.0, 'rmse_validation': 0.005302548496671004, 'rmse_train': 0.005046395984100731, 'BIC': -6420.609832182944, 'n_parameters': 29, 'n_nonzero_parameters': 29}
{'c2': 14.5, 'rmse_validation': 0.0053376031987636356, 'rmse_train': 0.005042205129069571, 'BIC': -6395.4333713581555, 'n_parameters': 33, 'n_nonzero_parameters': 33}
{'c2': 15.0, 'rmse_validation': 0.005345857128167708, 'rmse_train': 0.005041101796608185, 'BIC': -6389.139951155822, 'n_parameters': 34, 'n_nonzero_parameters': 34}
[4]:
# plot 2nd order cutoff scan
fig = plt.figure(figsize=(7, 8))
gs = plt.GridSpec(3, 1, hspace=0)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[2, 0])
xlim = [df2.c2.min()-0.5, df2.c2.max()+0.5]
ax1.plot(df2.c2, 1000 * df2.rmse_validation, '-o', label='validation')
ax1.plot(df2.c2, 1000 * df2.rmse_train, '--s', label='train')
ax1.legend()
ax1.set_ylabel('RMSE (meV/atom)')
ax2.plot(df2.c2, df2.BIC, '-o')
ax2.set_ylabel('BIC')
ax3.plot(df2.c2, df2.n_parameters, '--s', label='Total')
ax3.plot(df2.c2, df2.n_nonzero_parameters, '-o', label='Nonzero')
ax3.set_ylabel('Number of parameters')
ax3.legend()
ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
ax3.set_xlim(xlim)
ax1.set_xticklabels([])
ax2.set_xticklabels([])
ax3.set_xlabel('2nd order cutoff (Å)')
fig.tight_layout()
plt.show()
Third-order cutoff¶
For the third-order cutoff we find a value of about 6.5 Å.
[5]:
# Scan 3rd order cutoff
records = []
for c3 in c3_vals:
cutoffs = [c2_final, c3]
row = train_ce(cutoffs)
row = {'c2': c2_final, 'c3': c3, **row}
print(row)
records.append(row)
df3 = pd.DataFrame(records)
c3_final = 6.5
{'c2': 8.0, 'c3': 4.0, 'rmse_validation': 0.0036006241653608402, 'rmse_train': 0.00359040151755787, 'BIC': -6972.463131530305, 'n_parameters': 10, 'n_nonzero_parameters': 10}
{'c2': 8.0, 'c3': 4.5, 'rmse_validation': 0.0034315681103231272, 'rmse_train': 0.0034184603576390958, 'BIC': -7027.516895058285, 'n_parameters': 11, 'n_nonzero_parameters': 11}
{'c2': 8.0, 'c3': 5.0, 'rmse_validation': 0.0034315681103231272, 'rmse_train': 0.0034184603576390958, 'BIC': -7027.516895058285, 'n_parameters': 11, 'n_nonzero_parameters': 11}
{'c2': 8.0, 'c3': 5.5, 'rmse_validation': 0.003114043128626342, 'rmse_train': 0.003076076644589442, 'BIC': -7126.827552809291, 'n_parameters': 16, 'n_nonzero_parameters': 16}
{'c2': 8.0, 'c3': 6.0, 'rmse_validation': 0.0025468284304101057, 'rmse_train': 0.0024953392388428175, 'BIC': -7356.177908650355, 'n_parameters': 21, 'n_nonzero_parameters': 21}
{'c2': 8.0, 'c3': 6.5, 'rmse_validation': 0.00246434860087697, 'rmse_train': 0.002375115708994107, 'BIC': -7365.550248283435, 'n_parameters': 29, 'n_nonzero_parameters': 29}
{'c2': 8.0, 'c3': 7.0, 'rmse_validation': 0.00246434860087697, 'rmse_train': 0.002375115708994107, 'BIC': -7365.550248283435, 'n_parameters': 29, 'n_nonzero_parameters': 29}
{'c2': 8.0, 'c3': 7.5, 'rmse_validation': 0.0024639072483062987, 'rmse_train': 0.0023579588158507084, 'BIC': -7354.88189840891, 'n_parameters': 32, 'n_nonzero_parameters': 32}
[6]:
# plot 3rd order cutoff scan
fig = plt.figure(figsize=(7, 8))
gs = plt.GridSpec(3, 1, hspace=0)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[2, 0])
xlim = [df3.c3.min()-0.5, df3.c3.max()+0.5]
ax1.plot(df3.c3, 1000 * df3.rmse_validation, '-o', label='validation')
ax1.plot(df3.c3, 1000 * df3.rmse_train, '--s', label='train')
ax1.legend()
ax1.set_ylabel('RMSE (meV/atom)')
ax2.plot(df3.c3, df3.BIC, '-o')
ax2.set_ylabel('BIC')
ax3.plot(df3.c3, df3.n_parameters, '--s', label='Total')
ax3.plot(df3.c3, df3.n_nonzero_parameters, '-o', label='Nonzero')
ax3.set_ylabel('Number of parameters')
ax3.legend()
ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
ax3.set_xlim(xlim)
ax1.set_xticklabels([])
ax2.set_xticklabels([])
ax3.set_xlabel('3rd order cutoff (Å)')
fig.tight_layout()
plt.show()
Fourth-order cutoff¶
For the fourth-order cutoff we find a value of about 6.0 Å. We note that for fourth-order cutoffs above 6.5 Å we get a condition number warning, which indicates that the linear problem we are solving is ill conditioned and thus we cannot trust the resulting cluster expansion for these cutoffs.
[7]:
# Scan 4th order cutoff
records = []
for c4 in c4_vals:
cutoffs = [c2_final, c3_final, c4]
row = train_ce(cutoffs)
row = {'c2': c2_final, 'c3': c3_final, 'c4': c4, **row}
print(row)
records.append(row)
df4 = pd.DataFrame(records)
c4_final = 6.5
{'c2': 8.0, 'c3': 6.5, 'c4': 4.0, 'rmse_validation': 0.0024571627124155165, 'rmse_train': 0.0023630776797101266, 'BIC': -7365.394485289505, 'n_parameters': 30, 'n_nonzero_parameters': 30}
{'c2': 8.0, 'c3': 6.5, 'c4': 4.5, 'rmse_validation': 0.0024261109989691743, 'rmse_train': 0.0023286213678193977, 'BIC': -7370.751087283575, 'n_parameters': 32, 'n_nonzero_parameters': 32}
{'c2': 8.0, 'c3': 6.5, 'c4': 5.0, 'rmse_validation': 0.0024261109989691743, 'rmse_train': 0.0023286213678193977, 'BIC': -7370.751087283575, 'n_parameters': 32, 'n_nonzero_parameters': 32}
{'c2': 8.0, 'c3': 6.5, 'c4': 5.5, 'rmse_validation': 0.002207642637192658, 'rmse_train': 0.0020448371179085027, 'BIC': -7446.803640224018, 'n_parameters': 45, 'n_nonzero_parameters': 45}
{'c2': 8.0, 'c3': 6.5, 'c4': 6.0, 'rmse_validation': 0.0019437648978908096, 'rmse_train': 0.0017430310908495975, 'BIC': -7521.687921832897, 'n_parameters': 64, 'n_nonzero_parameters': 64}
Condition number is large, 2.3366780145843172e+16
Condition number is large, 2.2983507138842772e+16
Condition number is large, 2.7320534418143132e+16
Condition number is large, 2.3850777029795024e+16
Condition number is large, 3.2041642489013868e+16
Condition number is large, 2.8072752047122964e+16
Condition number is large, 2.7285099338188428e+16
Condition number is large, 2.8943246285900292e+16
Condition number is large, 2.4292211913313376e+16
Condition number is large, 2.246767301788247e+16
Condition number is large, 2.4002071320630164e+16
Condition number is large, 2.2593076154262184e+16
Condition number is large, 3.0420515445288116e+16
Condition number is large, 3.575687165207429e+16
Condition number is large, 2.798088544900558e+16
Condition number is large, 2.8544938903252228e+16
Condition number is large, 3.0099196573961964e+16
Condition number is large, 2.577855257203065e+16
Condition number is large, 3.0750651692689164e+16
Condition number is large, 2.4723979644596484e+16
Condition number is large, 3.177736050094163e+16
Condition number is large, 3.2071550247692196e+16
Condition number is large, 3.0039198508873292e+16
Condition number is large, 2.3342378910732804e+16
Condition number is large, 2.743384186286969e+16
Condition number is large, 2.9406477208313316e+16
Condition number is large, 3.0053778322918736e+16
Condition number is large, 3.0557982884486096e+16
Condition number is large, 2.828893931744529e+16
Condition number is large, 2.6584332230283332e+16
Condition number is large, 2.6761476081522188e+16
Condition number is large, 2.6731184261995356e+16
Condition number is large, 3.2312143269876476e+16
Condition number is large, 2.7835615334291612e+16
Condition number is large, 2.9773363380407444e+16
Condition number is large, 2.8732603213800836e+16
Condition number is large, 2.2461256802319332e+16
Condition number is large, 2.253523128466033e+16
Condition number is large, 2.2167500400574696e+16
Condition number is large, 2.359175867843177e+16
Condition number is large, 2.955576856248458e+16
Condition number is large, 2.9625815798380084e+16
Condition number is large, 2.132272414233711e+16
Condition number is large, 2.7473681843127276e+16
Condition number is large, 2.211723179120394e+16
Condition number is large, 2.2345667895285868e+16
Condition number is large, 3.4442989104845656e+16
Condition number is large, 3.520596565772002e+16
Condition number is large, 2.6259222063565404e+16
Condition number is large, 2.9639050669832184e+16
Condition number is large, 3.0603713113878028e+16
Condition number is large, 2.79976704352863e+16
Condition number is large, 3.2529667609829036e+16
Condition number is large, 2.9015766036122644e+16
Condition number is large, 1.941497466690872e+16
Condition number is large, 2.7666298317954836e+16
Condition number is large, 2.9473421944182624e+16
Condition number is large, 2.705369829577059e+16
Condition number is large, 2.802168200503224e+16
Condition number is large, 3.092585765615843e+16
Condition number is large, 2.9870343532228132e+16
Condition number is large, 2.782075845843869e+16
Condition number is large, 2.613127946062695e+16
Condition number is large, 2.6058958991745464e+16
Condition number is large, 2.601373667057441e+16
Condition number is large, 2.7836684777379332e+16
Condition number is large, 2.5832404629001104e+16
Condition number is large, 2.7580034283573864e+16
Condition number is large, 2.6786377327908844e+16
Condition number is large, 2.3140709563102708e+16
Condition number is large, 2.3822988008802612e+16
Condition number is large, 2.1885067797751736e+16
Condition number is large, 2.804610973690078e+16
Condition number is large, 2.7074147280799452e+16
Condition number is large, 2.7894132871354092e+16
Condition number is large, 2.5278392892758656e+16
Condition number is large, 2.770854198520622e+16
Condition number is large, 2.979669770583427e+16
Condition number is large, 2.443728046142252e+16
Condition number is large, 2.6553110948574196e+16
Condition number is large, 2.304498602179855e+16
Condition number is large, 2.682529880827576e+16
Condition number is large, 2.3633182513983776e+16
Condition number is large, 2.596936939490044e+16
Condition number is large, 2.90481600376124e+16
Condition number is large, 3.0969292420511428e+16
Condition number is large, 2.564002446925404e+16
Condition number is large, 3.0508457189455836e+16
Condition number is large, 2.7362491948752784e+16
Condition number is large, 2.832978892218459e+16
Condition number is large, 2.4263734732564676e+16
Condition number is large, 2.212045422924315e+16
Condition number is large, 2.6843536415831108e+16
Condition number is large, 2.17948734217367e+16
Condition number is large, 2.4155910722038036e+16
Condition number is large, 2.775473953265896e+16
Condition number is large, 2.513344024847881e+16
Condition number is large, 2.7183128388488716e+16
Condition number is large, 2.4715981431213484e+16
Condition number is large, 2.688895695996603e+16
Condition number is large, 1.5444615431338644e+16
{'c2': 8.0, 'c3': 6.5, 'c4': 6.5, 'rmse_validation': 0.0020851723316147423, 'rmse_train': 0.0015731717545720528, 'BIC': -7298.181541634836, 'n_parameters': 117, 'n_nonzero_parameters': 117}
Condition number is large, 2.3366780145843172e+16
Condition number is large, 2.2983507138842772e+16
Condition number is large, 2.7320534418143132e+16
Condition number is large, 2.3850777029795024e+16
Condition number is large, 3.2041642489013868e+16
Condition number is large, 2.8072752047122964e+16
Condition number is large, 2.7285099338188428e+16
Condition number is large, 2.8943246285900292e+16
Condition number is large, 2.4292211913313376e+16
Condition number is large, 2.246767301788247e+16
Condition number is large, 2.4002071320630164e+16
Condition number is large, 2.2593076154262184e+16
Condition number is large, 3.0420515445288116e+16
Condition number is large, 3.575687165207429e+16
Condition number is large, 2.798088544900558e+16
Condition number is large, 2.8544938903252228e+16
Condition number is large, 3.0099196573961964e+16
Condition number is large, 2.577855257203065e+16
Condition number is large, 3.0750651692689164e+16
Condition number is large, 2.4723979644596484e+16
Condition number is large, 3.177736050094163e+16
Condition number is large, 3.2071550247692196e+16
Condition number is large, 3.0039198508873292e+16
Condition number is large, 2.3342378910732804e+16
Condition number is large, 2.743384186286969e+16
Condition number is large, 2.9406477208313316e+16
Condition number is large, 3.0053778322918736e+16
Condition number is large, 3.0557982884486096e+16
Condition number is large, 2.828893931744529e+16
Condition number is large, 2.6584332230283332e+16
Condition number is large, 2.6761476081522188e+16
Condition number is large, 2.6731184261995356e+16
Condition number is large, 3.2312143269876476e+16
Condition number is large, 2.7835615334291612e+16
Condition number is large, 2.9773363380407444e+16
Condition number is large, 2.8732603213800836e+16
Condition number is large, 2.2461256802319332e+16
Condition number is large, 2.253523128466033e+16
Condition number is large, 2.2167500400574696e+16
Condition number is large, 2.359175867843177e+16
Condition number is large, 2.955576856248458e+16
Condition number is large, 2.9625815798380084e+16
Condition number is large, 2.132272414233711e+16
Condition number is large, 2.7473681843127276e+16
Condition number is large, 2.211723179120394e+16
Condition number is large, 2.2345667895285868e+16
Condition number is large, 3.4442989104845656e+16
Condition number is large, 3.520596565772002e+16
Condition number is large, 2.6259222063565404e+16
Condition number is large, 2.9639050669832184e+16
Condition number is large, 3.0603713113878028e+16
Condition number is large, 2.79976704352863e+16
Condition number is large, 3.2529667609829036e+16
Condition number is large, 2.9015766036122644e+16
Condition number is large, 1.941497466690872e+16
Condition number is large, 2.7666298317954836e+16
Condition number is large, 2.9473421944182624e+16
Condition number is large, 2.705369829577059e+16
Condition number is large, 2.802168200503224e+16
Condition number is large, 3.092585765615843e+16
Condition number is large, 2.9870343532228132e+16
Condition number is large, 2.782075845843869e+16
Condition number is large, 2.613127946062695e+16
Condition number is large, 2.6058958991745464e+16
Condition number is large, 2.601373667057441e+16
Condition number is large, 2.7836684777379332e+16
Condition number is large, 2.5832404629001104e+16
Condition number is large, 2.7580034283573864e+16
Condition number is large, 2.6786377327908844e+16
Condition number is large, 2.3140709563102708e+16
Condition number is large, 2.3822988008802612e+16
Condition number is large, 2.1885067797751736e+16
Condition number is large, 2.804610973690078e+16
Condition number is large, 2.7074147280799452e+16
Condition number is large, 2.7894132871354092e+16
Condition number is large, 2.5278392892758656e+16
Condition number is large, 2.770854198520622e+16
Condition number is large, 2.979669770583427e+16
Condition number is large, 2.443728046142252e+16
Condition number is large, 2.6553110948574196e+16
Condition number is large, 2.304498602179855e+16
Condition number is large, 2.682529880827576e+16
Condition number is large, 2.3633182513983776e+16
Condition number is large, 2.596936939490044e+16
Condition number is large, 2.90481600376124e+16
Condition number is large, 3.0969292420511428e+16
Condition number is large, 2.564002446925404e+16
Condition number is large, 3.0508457189455836e+16
Condition number is large, 2.7362491948752784e+16
Condition number is large, 2.832978892218459e+16
Condition number is large, 2.4263734732564676e+16
Condition number is large, 2.212045422924315e+16
Condition number is large, 2.6843536415831108e+16
Condition number is large, 2.17948734217367e+16
Condition number is large, 2.4155910722038036e+16
Condition number is large, 2.775473953265896e+16
Condition number is large, 2.513344024847881e+16
Condition number is large, 2.7183128388488716e+16
Condition number is large, 2.4715981431213484e+16
Condition number is large, 2.688895695996603e+16
Condition number is large, 1.5444615431338644e+16
{'c2': 8.0, 'c3': 6.5, 'c4': 7.0, 'rmse_validation': 0.0020851723316147423, 'rmse_train': 0.0015731717545720528, 'BIC': -7298.181541634836, 'n_parameters': 117, 'n_nonzero_parameters': 117}
[8]:
# plot 4th order cutoff scan
fig = plt.figure(figsize=(7, 8))
gs = plt.GridSpec(3, 1, hspace=0)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[2, 0])
xlim = [df4.c4.min()-0.5, df4.c4.max()+0.5]
ax1.plot(df4.c4, 1000 * df4.rmse_validation, '-o', label='validation')
ax1.plot(df4.c4, 1000 * df4.rmse_train, '--s', label='train')
ax1.legend()
ax1.set_ylabel('RMSE (meV/atom)')
ax2.plot(df4.c4, df4.BIC, '-o')
ax2.set_ylabel('BIC')
ax3.plot(df4.c4, df4.n_parameters, '--s', label='Total')
ax3.plot(df4.c4, df4.n_nonzero_parameters, '-o', label='Nonzero')
ax3.set_ylabel('Number of parameters')
ax3.legend()
ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
ax3.set_xlim(xlim)
ax1.set_xticklabels([])
ax2.set_xticklabels([])
ax3.set_xlabel('4th order cutoff (Å)')
fig.tight_layout()
plt.show()