ACINN workshop, Tue 07.02.2017
Fabien Maussion
Slides: http://fabienmaussion.info/acinn_xarray_workshop
Notebook: On GitHub
Documentation: http://xarray.pydata.org
Repository: https://github.com/pydata/xarray
Initial release: 03.05.2014
Latest release: v0.9.1 (20.01.2017)
53 contributors (latest release: 24)
Umbrellas: Python for data & NumFOCUS (but no funding...)
import numpy as np
a = np.array([[1, 3, 9], [2, 8, 4]])
a
array([[1, 3, 9], [2, 8, 4]])
a[1, 2]
4
a.mean(axis=0)
array([ 1.5, 5.5, 6.5])
import xarray as xr
da = xr.DataArray(a, dims=['lat', 'lon'],
coords={'lon':[11, 12, 13], 'lat':[1, 2]})
da
<xarray.DataArray (lat: 2, lon: 3)> array([[1, 3, 9], [2, 8, 4]]) Coordinates: * lon (lon) int64 11 12 13 * lat (lat) int64 1 2
da.sel(lon=13, lat=2).values
array(4)
da.mean(dim='lat')
<xarray.DataArray (lon: 3)> array([ 1.5, 5.5, 6.5]) Coordinates: * lon (lon) int64 11 12 13
f = 'ERA-Int-MonthlyAvg-4D-TUVWZ.nc'
ds = xr.open_dataset(f)
ds
<xarray.Dataset> Dimensions: (latitude: 241, level: 15, longitude: 480, month: 12) Coordinates: * latitude (latitude) float32 90.0 89.25 88.5 87.75 87.0 ... * level (level) int32 50 100 150 200 300 400 500 600 ... * longitude (longitude) float32 -180.0 -179.25 -178.5 ... * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12 Data variables: u (month, level, latitude, longitude) float64 10.38 ... v (month, level, latitude, longitude) float64 5.594 ... w (month, level, latitude, longitude) float64 -0.0003052 ... z (month, level, latitude, longitude) float64 1.888e+05 ... t (month, level, latitude, longitude) float64 201.1 ... Attributes: Conventions: CF-1.0 Info: Monthly ERA-Interim data. Downloaded and edited by fabien.maussion@uibk.ac.at
ds.t.sel(month=8, level=850)
<xarray.DataArray 't' (latitude: 241, longitude: 480)> [115680 values with dtype=float64] Coordinates: * latitude (latitude) float32 90.0 89.25 88.5 87.75 87.0 ... level int32 850 * longitude (longitude) float32 -180.0 -179.25 -178.5 ... month int64 8 Attributes: units: K long_name: Temperature standard_name: air_temperature
ds.t.isel(month=7, level=11)
<xarray.DataArray 't' (latitude: 241, longitude: 480)> [115680 values with dtype=float64] Coordinates: * latitude (latitude) float32 90.0 89.25 88.5 87.75 87.0 ... level int32 850 * longitude (longitude) float32 -180.0 -179.25 -178.5 ... month int64 8 Attributes: units: K long_name: Temperature standard_name: air_temperature
ds.t.sel(level=1001, latitude=47.26, longitude=11.38, method='nearest')
<xarray.DataArray 't' (month: 12)> array([ 278.921875, 279.09375 , 282.140625, 285.359375, 290.132812, 293.390625, 295.84375 , 295.867188, 292.21875 , 288.484375, 283.117188, 280.054688]) Coordinates: latitude float32 47.25 level int32 1000 longitude float32 11.25 * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12 Attributes: units: K long_name: Temperature standard_name: air_temperature
ds.t[7, 11, :, :]
<xarray.DataArray 't' (latitude: 241, longitude: 480)> [115680 values with dtype=float64] Coordinates: * latitude (latitude) float32 90.0 89.25 88.5 87.75 87.0 ... level int32 850 * longitude (longitude) float32 -180.0 -179.25 -178.5 ... month int64 8 Attributes: units: K long_name: Temperature standard_name: air_temperature
ds.u.mean(dim=['month', 'longitude']).plot.contourf(levels=13)
plt.ylim([1000, 100]);
u_avg = ds.u.mean(dim=['month', 'longitude'])
u_avg_masked = u_avg.where(u_avg > 12)
u_avg_masked.plot.contourf(levels=13)
plt.ylim([1000, 100]);
a = xr.DataArray(np.arange(3), dims='time',
coords={'time':np.arange(3)})
b = xr.DataArray(np.arange(4), dims='space',
coords={'space':np.arange(4)})
a + b
<xarray.DataArray (time: 3, space: 4)> array([[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]) Coordinates: * time (time) int64 0 1 2 * space (space) int64 0 1 2 3
a = xr.DataArray(np.arange(3), dims='time',
coords={'time':np.arange(3)})
b = xr.DataArray(np.arange(5), dims='time',
coords={'time':np.arange(5)+1})
a + b
<xarray.DataArray (time: 2)> array([1, 3]) Coordinates: * time (time) int64 1 2
ts = ds.t.sel(level=1001, latitude=47.26, longitude=11.38, method='nearest')
ts.plot();
import cartopy.crs as ccrs
ax = plt.axes(projection=ccrs.Robinson())
ds.z.sel(level=1000, month=8).plot(ax=ax, transform=ccrs.PlateCarree());
ax.coastlines();
Opening all files in a directory...
mfs = '/home/mowglie/disk/Data/Gridded/GPM/3BDAY_sorted/*.nc'
dsmf = xr.open_mfdataset(mfs)
... results in a consolidated dataset ...
dsmf
<xarray.Dataset> Dimensions: (lat: 732, lon: 620, time: 672) Coordinates: * lat (lat) float32 -56.95 -56.85 -56.75 ... * lon (lon) float32 -93.55 -93.45 -93.35 ... * time (time) datetime64[ns] 2014-03-31 ... Data variables: precipitationCal (time, lat, lon) float64 0.0 0.0 0.3 ... precipitationHQ (time, lat, lon) float64 0.0 0.0 0.325 ...
... on which all usual operations can be applied:
dsmf = dsmf.sel(time='2015')
dsmf
<xarray.Dataset> Dimensions: (lat: 732, lon: 620, time: 365) Coordinates: * lat (lat) float32 -56.95 -56.85 -56.75 ... * lon (lon) float32 -93.55 -93.45 -93.35 ... * time (time) datetime64[ns] 2015-01-01 ... Data variables: precipitationCal (time, lat, lon) float64 0.0 0.0 0.0 ... precipitationHQ (time, lat, lon) float64 0.0 0.0 0.0 ...
Yes, even computations!
ts = dsmf.precipitationCal.mean(dim=['lon', 'lat'])
ts
<xarray.DataArray 'precipitationCal' (time: 365)> dask.array<mean_ag..., shape=(365,), dtype=float64, chunksize=(1,)> Coordinates: * time (time) datetime64[ns] 2015-01-01 2015-01-02 ...
Computations are done "lazily"
No actual computation has happened yet:
ts.data
dask.array<mean_ag..., shape=(365,), dtype=float64, chunksize=(1,)>
But they can be triggered:
ts = ts.load()
ts
<xarray.DataArray 'precipitationCal' (time: 365)> array([ 2.297214, 3.00098 , 2.532836, ..., 2.516468, 2.334409, 3.469001]) Coordinates: * time (time) datetime64[ns] 2015-01-01 2015-01-02 ...
For more information: http://xarray.pydata.org/en/stable/dask.html
ts.plot();
ts.rolling(time=31, center=True).mean().plot();
Taken from: http://ajdawson.github.io/eofs/examples/nao_xarray.html
from eofs.xarray import Eof
from eofs.examples import example_data_path
# Read geopotential height data using the xarray module
filename = example_data_path('hgt_djf.nc')
z_djf = xr.open_dataset(filename)['z']
# Compute anomalies by removing the time-mean.
z_djf = z_djf - z_djf.mean(dim='time')
# Create an EOF solver to do the EOF analysis.
coslat = np.cos(np.deg2rad(z_djf.coords['latitude'].values)).clip(0., 1.)
solver = Eof(z_djf, weights=np.sqrt(coslat)[..., np.newaxis])
# Get the leading EOF
eof1 = solver.eofsAsCovariance(neofs=1)
# Leading EOF expressed as covariance in the European/Atlantic domain
ax = plt.axes(projection=ccrs.Orthographic(central_longitude=-20, central_latitude=60))
ax.coastlines() ; ax.set_global()
eof1[0, 0].plot.contourf(ax=ax, levels=np.linspace(-75, 75, 11),
cmap=plt.cm.RdBu_r, add_colorbar=False,
transform=ccrs.PlateCarree())
ax.set_title('EOF1 expressed as covariance', fontsize=16);
http://salem.readthedocs.io/en/latest/
Try it out:
pip install salem
# importing salem adds a new "toolbox" to xarray objects
import salem
pday = dsmf.precipitationCal.sel(time='2015-02-01')
cm = pday.salem.quick_map(cmap='Blues', vmax=100);
shdf = salem.read_shapefile(salem.get_demo_file('world_borders.shp'))
shdf = shdf.loc[shdf['CNTRY_NAME'].isin(['Peru'])]
dsmfperu = dsmf.salem.subset(shape=shdf, margin=10)
pday = dsmfperu.precipitationCal.sel(time='2015-02-01')
cm = pday.salem.quick_map(cmap='Blues', vmax=100);
dsmfperu = dsmfperu.salem.roi(shape=shdf)
pday = dsmfperu.precipitationCal.sel(time='2015-02-01')
cm = pday.salem.quick_map(cmap='Blues', vmax=100);
prpc_a = dsmfperu.precipitationCal.sum(dim=['time']).load()
prpc_a.salem.quick_map(cmap='Blues', vmax=5000);