Script example¶
There is also a script that runs through all the steps in the usage guide, but then as a Python program. This may be easier to edit, and for finding relevant functions.
While the script is rather lengthy, a lot of it are comments explaning the steps, plus a lot of bookkeeping, and some plotting functionality. The actual steps aren’t that many.
The full script is included below, but can also be found as
extras/single-script.py
.
Full code¶
import sys
from pathlib import Path
import math
from copy import deepcopy
import toml
import pandas as pd
import matplotlib.pyplot as plt
from kcs.config import read_config, default_config
from kcs.utils.atlist import atlist
from kcs.extraction import calc as extract
from kcs.utils.io import read_averaged_data, concat_cubes
from kcs.utils.attributes import get as get_attrs
from kcs.utils.matching import match
from kcs.tas_change import calc as calc_tas_change
from kcs.tas_change.plot import plot as plot_tas_change
from kcs.steering import calc as calc_steering
from kcs.steering.core import normalize_average_dataset
from kcs.steering.plot import plot as plot_steering
from kcs.change_perc import calc as calc_change_perc
from kcs.change_perc.plot import plot as plot_change_perc
from kcs.resample import calc as resample
# Read any command-line supplied configuration file
# If not supplied, read the default user one (kcs-config.toml in working directory)
try:
read_config(sys.argv[1])
except IndexError:
read_config(None)
# default_config is now updated
# Read the dataset files from our lists
cmip_lists = dict(tas="@cmip6-tas.list", pr="@cmip6-pr.list")
cmip_files = {key: list(atlist(fname)) for key, fname in cmip_lists.items()}
ecearth_lists = dict(tas="@ecearth-tas.list", pr="@ecearth-pr.list")
ecearth_files = {key: list(atlist(fname)) for key, fname in ecearth_lists.items()}
# Alternatively, we could use e.g. glob:
# import glob
# files_pr = glob.glob("/data/cmip6/*/Amon/pr/*/*/*.nc")
# etcetera.
# Step 0: extract and average the areas
data = {'cmip': {'global': {}, 'nlpoint': {}},
'ecearth': {'global': {}, 'nlpoint': {}}}
# Areas are given as a dictionary: {name: definition}
# The definition can be taken from the default_config
# Conveniently, None is the definition for the global area
area = {'global': None}
# Almost all parameters have a proper default
# The template parameter is just made explicit
template = "data/cmip6/{var}-{area}-averaged/{filename}.nc"
data['cmip']['global']['tas'] = extract(cmip_files['tas'], area, ignore_common_warnings=True,
template=template)
area = {'nlpoint': default_config['areas']['nlpoint']}
for var in {'tas', 'pr'}:
data['cmip']['nlpoint'][var] = extract(cmip_files[var], area, ignore_common_warnings=True,
template=template)
area = {'global': None}
template = "data/ecearth/{var}-{area}-averaged/{filename}.nc"
data['ecearth']['global']['tas'] = extract(ecearth_files['tas'], area, ignore_common_warnings=True,
template=template)
area = {'nlpoint': default_config['areas']['nlpoint']}
for var in {'tas', 'pr'}:
data['ecearth']['nlpoint'][var] = extract(ecearth_files[var], area, ignore_common_warnings=True,
template=template)
# Step 1-pre: read previously created data
datasets = {'cmip': {'global': {}, 'nlpoint': {}},
'ecearth': {'global': {}, 'nlpoint': {}}}
skipped_above = False
if skipped_above:
# Use pre-defined @-lists
cmip_lists = {'global-tas': "@cmip6-tas-global-averaged.list",
'nlpoint-pr': "@cmip6-pr-nlpoint-averaged.list",
'nlpoint-tas': "@cmip6-tas-nlpoint-averaged.list"}
ecearth_lists = {'global-tas': "@ecearth-tas-global-averaged-short.list",
'nlpoint-pr': "@ecearth-pr-nlpoint-averaged-short.list",
'nlpoint-tas': "@ecearth-tas-nlpoint-averaged-short.list"}
cmip_files = {key: list(atlist(Path(fname))) for key, fname in cmip_lists.items()}
ecearth_files = {key: list(atlist(Path(fname))) for key, fname in ecearth_lists.items()}
for area in {'global', 'nlpoint'}:
for var in {'tas', 'pr'}:
if area == 'global' and var == 'pr':
continue
key = f"{area}-{var}"
datasets['cmip'][area][var] = read_averaged_data(cmip_files[key])
# Limit to eight datasets for testing purposes
datasets['ecearth'][area][var] = read_averaged_data(ecearth_files[key][:8])
else:
for name in data:
for area in data[name]:
for var in data[name][area]:
paths = [Path(item.path) for item in data[name][area][var]]
cubes = [item.cube for item in data[name][area][var]]
datasets[name][area][var] = get_attrs(cubes, paths)
# Step 1a: calculate the tas annual change
dataset = datasets['cmip']['global']['tas']
dataset = match(dataset)
cols = ['model', 'experiment', 'realization', 'index_match_run']
#print(dataset[cols])
print(dataset.columns)
tas_change_percentiles, dataset = \
calc_tas_change(dataset, reference_period=[1990, 2019], relative=False)
datasets['cmip']['global']['tas'] = dataset
print(tas_change_percentiles)
print(dataset.columns)
print(tas_change_percentiles.index.dtype)
plot_tas_change(tas_change_percentiles, "tas_change_cmip.png",
xlabel="Year", ylabel="Temperature increase [${}^{\circ}$C]",
title="Yearly annual temperature change",
legend=True, smooth=10)
#tas_change_percentiles.to_csv("tas_change_cmip.csv", index_label="date")
#tas_change_percentiles = pd.read_csv("tas_change_cmip.csv")
# Step 1b: calculate steering table for the scenarios
scenarios = []
for epoch in {2050, 2085}:
# Note: percentiles need to be strings
scenarios.extend([{'name': 'G', 'percentile': '10', 'epoch': epoch},
{'name': 'W', 'percentile': '90', 'epoch': epoch}])
dataset = datasets['ecearth']['global']['tas']
steering = calc_steering(dataset, tas_change_percentiles, scenarios,
rolling_mean=10, reference_period=[1990, 2019])
print(steering)
steering = pd.DataFrame(steering)
print(steering)
# Normalize EC-EARTH data for the plot
ecearth_data = normalize_average_dataset(dataset['cube'], relative=False,
reference_period=[1990, 2019])
plot_steering(tas_change_percentiles, steering, "tas_change_cmip_steering.png",
extra_data=ecearth_data, reference_epoch=2005,
xlabel="Year", ylabel="Temperature increase [${}^{\circ}$C]",
title="Yearly annual temperature change",
legend=True, smooth=10)
#steering.to_csv("steering.csv", index=False)
#steering = pd.read_csv("steering.csv")
#steering_table['period'] = steering_table['period'].apply(
# lambda x: tuple(map(int, x.strip('()').split(','))))
# Step 2
# The nested loops below are essentially the same as the
# kcs.change_perc.runall helper module
area = 'nlpoint'
seasons = ['djf', 'mam', 'jja', 'son']
distrs = {}
writecsv = False
columns = ['mean', '5', '10', '50', '90', '95']
xlabels = ['ave', 'P05', 'P10', 'P50', 'P90', 'P95']
for var in ['tas', 'pr']:
relative = var == 'pr'
if var == 'pr':
text = 'prec'
ylabel = "Change (%)"
elif var == 'tas':
text = 't2m'
ylabel = r"Change (${}^{\circ}$C)"
distrs[var] = {}
dataset = datasets['cmip'][area][var]
dataset = match(dataset)
cmip_dataset = concat_cubes(dataset)
ecearth_dataset = datasets['ecearth'][area][var]
for epoch, steering_group in steering.groupby('epoch'):
distrs[var][epoch] = {}
period = epoch - 15 + 1, epoch + 15
for season in seasons:
perc_distr, perc = calc_change_perc(dataset.copy(), season, period,
relative=relative, reference_period=[1990, 2019])
# Save the CMIP percentile distributions for later
distrs[var][epoch][season] = perc_distr
if writecsv:
filename = f"{var}_{epoch}_{season}_perc_distr.csv"
perc_distr.to_csv(filename, index=True)
filename = f"{var}_{epoch}_{season}_perc.csv"
perc.to_csv(filename, index=True)
scenarios = {}
for _, row in steering_group.iterrows():
data = ecearth_dataset.copy()
name = row['name'].rstrip('0123456789') # remove the year part
period = row['period'] # Matched EC-EARTH period
print(period)
_, scenarios[name] = calc_change_perc(data, season, period, relative=relative,
reference_period=[1990, 2019])
if writecsv:
filename = f"{var}_{epoch}_{season}_{name}_perc.csv"
scenarios[name].to_csv(filename, index=False)
labels = {
'title': '',
'text': text,
'y': ylabel,
'x': '',
'epoch': epoch,
}
plot_change_perc(perc_distr, labels, limits=None, columns=columns, xlabels=xlabels,
scenarios=scenarios)
plt.tight_layout()
filename = f"{var}_{epoch}_{season}.png"
plt.savefig(filename, bbox_inches='tight')
from pprint import pprint
pprint(distrs)
# Step 3: resample the EC-EARTH data
# Concatenate tas & pr, since we'll need both in resampling step 2
dataset = pd.concat([datasets['ecearth']['nlpoint'][var] for var in ['tas', 'pr']])
print(dataset.columns)
print(dataset[['var', 'experiment', 'model']])
# Update steering table with precipitation conditions for resampling 1
steering = steering.rename(columns={'target year': 'epoch', 'resampling period': 'period',
'name': 'scenario'})
precip_table = pd.DataFrame({'subscenario': ['L', 'H'],
'precip_change_per_t': [4, 8]})
# must be a better way to create an outer product without inserting a random key
steering['key'] = precip_table['key'] = 0
table = pd.merge(steering, precip_table, on='key', how='outer').drop(columns='key')
# Update precipiation values with delta-t
table['precip_change'] = table['model_delta_t'] * table['precip_change_per_t']
# We should end up with nstep1 samples after this step
nstep1 = 1000
# Load the conditions for resampling step 2 from a config file
# We could define them in code, but that would become quite lengthy
with open('step2_conditions.toml') as fh:
conditions = toml.load(fh)
# transform the epoch keys to integer, since TOML only uses string keys
conditions2 = deepcopy(conditions)
for key in conditions:
for prkey in conditions[key]:
for epoch, value in conditions[key][prkey].items():
conditions2[key][prkey][int(epoch)] = value
del conditions2[key][prkey][epoch]
conditions = conditions2
from pprint import pprint
pprint(conditions)
# Number of final resampled datasets and penalties for resampling step 3
nstep3 = 8
penalties = {1: 0.0, 2: 0.0, 3: 1.0, 4: 3.0}
# fill up the penalties with inf to the number of resampled datasets
for i in range(max(penalties.keys())+1, nstep3+1):
penalties[i] = math.inf
print(table)
indices, diffs = resample(dataset, table, conditions, penalties,
nstep1=nstep1, nstep3=nstep3, nsections=6,
reference_period=[1990, 2019], relative=['pr'])
for key in diffs:
print(key)
for var in diffs[key]:
for season in diffs[key][var]:
print(var, season)
print(diffs[key][var][season])
columns = ['mean', '5', '10', '50', '90', '95']
xlabels = ['ave', 'P05', 'P10', 'P50', 'P90', 'P95']
scenarios = {}
for epoch in {2050, 2085}:
scenarios[epoch] = {}
for var in {'pr', 'tas'}:
scenarios[epoch][var] = {}
for season in seasons:
scenarios[epoch][var][season] = {}
for key, diff in diffs.items():
epoch, scenario, pr_scenario = key
epoch = int(epoch)
for var in {'pr', 'tas'}:
for season in seasons:
scenarios[epoch][var][season][f"{scenario}_{pr_scenario}"] = diff[var][season]
for key, value in diffs.items():
epoch, scenario, pr_scenario = key
epoch = int(epoch)
for var, value2 in value.items():
relative = var == 'pr'
if var == 'pr':
text = 'precip'
ylabel = "Change (%)"
ylimits = [-50, 50]
elif var == 'tas':
text = 't2m'
ylabel = r"Change (${}^{\circ}$C)"
ylimits = [-1, 5]
for season, diff in value2.items():
distr = distrs[var][epoch][season]
labels = {
'title': '',
'text': f"{text}, {season}",
'y': ylabel,
'x': '',
'epoch': epoch,
}
plot_change_perc(distr, labels, limits=ylimits,
columns=columns, xlabels=xlabels,
scenarios=scenarios[epoch][var][season],
only_scenario_mean=True)
plt.tight_layout()
filename = f"resampled_{var}_change_{epoch}_{season}_nlpoint.png"
plt.savefig(filename, bbox_inches='tight')
plt.close()