#!/usr/bin/env python
# Copyright 2017 ARC Centre of Excellence for Climate Systems Science
# author: Scott Wales <scott.wales@unimelb.edu.au>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from .base import Base
from .cfnetcdf import File as CFFile, Variable, cf_variable_link
from sqlalchemy.dialects.postgresql import UUID, ARRAY
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, ForeignKey, Table, join, ForeignKeyConstraint
from sqlalchemy.orm import relationship, column_property
from sqlalchemy.types import Text, Boolean, Integer, Date
cmip5_attributes_links = Table('cmip5_attributes_links', Base.metadata,
Column('md_hash', UUID, ForeignKey('cmip5_attributes.md_hash'), ForeignKey('paths.pa_hash'), primary_key=True),
Column('dataset_id', UUID, ForeignKey('cmip5_dataset.dataset_id')),
Column('version_id', UUID, ForeignKey('cmip5_version.version_id')),
Column('variable_id', UUID),
Column('variable_list', ARRAY(Text)))
cmip5_latest_version = Table('cmip5_latest_version', Base.metadata,
Column('dataset_id', UUID, ForeignKey('cmip5_dataset.dataset_id')),
Column('version_id', UUID, ForeignKey('cmip5_version.version_id'), primary_key=True),
Column('variable_id', UUID, primary_key=True))
[docs]class File(CFFile):
"""
A CMIP5 output file's attributes
Relationships:
attribute:: dataset
:class:`Dataset`: The dataset this file is part of
attribute:: version
:class:`Version`: This file's dataset version
attribute:: warnings
[:class:`Warning`]: Warnings associated with this file
attribute:: timeseries
:class:`Timeseries` holding all files in the dataset with the same variables
Attributes:
attribute:: experiment_id
attribute:: frequency
attribute:: institute_id
attribute:: model_id
attribute:: modeling_realm
attribute:: product
attribute:: table_id
attribute:: tracking_id
attribute:: version_number
attribute:: realization
attribute:: initialization_method
attribute:: physics_version
"""
__tablename__ = 'cmip5_attributes'
md_hash = Column(UUID, ForeignKey('cf_attributes_raw.md_hash'), primary_key = True)
experiment_id = Column(Text)
frequency = Column(Text)
institute_id = Column(Text)
model_id = Column(Text)
modeling_realm = Column(Text)
product = Column(Text)
table_id = Column(Text)
tracking_id = Column(Text)
version_number = Column(Text)
realization = Column(Text)
initialization_method = Column(Text)
physics_version = Column(Text)
dataset = relationship(
'Dataset',
uselist=False,
secondary=cmip5_attributes_links)
version = relationship(
'cmip5.Version',
uselist=False,
secondary=cmip5_attributes_links,
back_populates='files')
warnings = relationship(
'cmip5.Warning',
secondary=cmip5_attributes_links,
secondaryjoin='cmip5_attributes_links.c.version_id == cmip5.Warning.version_id')
#@ old_version = relationship(
#@ 'old_cmip5.Model.Version',
#@ secondary=cmip5_attributes_links,
#@ viewonly=True)
timeseries = relationship(
'Timeseries',
uselist=False,
secondary = cmip5_attributes_links,
secondaryjoin = 'and_('
'Timeseries.version_id == cmip5_attributes_links.c.version_id,'
'Timeseries.variable_id == cmip5_attributes_links.c.variable_id'
')',
back_populates = 'files')
__mapper_args__ = {'polymorphic_identity': 'CMIP5'}
class Path(Base):
"""
A indexed table of CMIP5 paths only
"""
__tablename__ = 'cmip5_path'
pa_hash = Column(UUID,
ForeignKey('cmip5_attributes.md_hash'),
ForeignKey('cmip5_attributes_links.md_hash'),
primary_key=True)
path = Column('pa_path', Text)
# file = relationship('cmip5.File',
# viewonly=True,
# primaryjoin='cmip5.Path.pa_hash == foreign(cmip5.File.md_hash)')
[docs]class Version(Base):
"""
A version of a ESGF dataset
Over time files within a dataset get updated, due to bug fixes and
processing improvements. This results in multiple versions of files getting
published to ESGF
"""
__tablename__ = 'cmip5_version'
version_id = Column(UUID, primary_key = True)
dataset_id = Column(Text, ForeignKey('cmip5_dataset.dataset_id'))
#: str: Version number
version_number = Column(Text)
#: boolean: True if this is the latest version available
is_latest = Column(Boolean)
#: :class:`Dataset`: Dataset associated with this version
dataset = relationship('Dataset', back_populates='versions')
#: :class:`VersionOverride`: Errata information for this version
override = relationship('VersionOverride', uselist=False)
#: list[:class:`File`]: Files belonging to this dataset version
files = relationship(
'cmip5.File',
secondary=cmip5_attributes_links,
back_populates='version')
#: list[:class:`Warning`]: Warnings attached to the datset by users
warnings = relationship(
'cmip5.Warning',
order_by='cmip5.Warning.added_on',
back_populates='dataset_version')
variables = relationship(
'Timeseries',
back_populates='version',
viewonly = True)
[docs] def open(self):
"""
Open all variables in the dataset
"""
pass
[docs]class VersionOverride(Base):
"""
Errata for a CMIP5 dataset version, for cases when the published version_id
is unset or incorrect
Editing this table will automatically update the corresponding
:class:`Version`.
v = session.query(Version).first()
v.override = VersionOverride(version_number='v20120101')
session.add(v)
"""
__tablename__ = 'cmip5_override_version'
version_id = Column(UUID, ForeignKey('cmip5_version.version_id'), primary_key = True)
#: str: New version number
version_number = Column(Text)
#: boolean: True if this is the latest version available
is_latest = Column(Boolean)
[docs]class Dataset(Base):
"""
A CMIP5 Dataset, as you'd find listed on ESGF
"""
__tablename__ = 'cmip5_dataset'
dataset_id = Column(UUID, primary_key=True)
experiment_id = Column(Text)
#: str: ID of the institute that ran the experiment
institute_id = Column(Text)
#: str: ID of the model used
model_id = Column(Text)
#: str: Model component - atmos, land, ocean, etc.
modeling_realm = Column(Text)
#: str: Data output frequency
frequency = Column(Text)
#: str: Ensemble member
ensemble_member = Column(Text)
#: str: MIP Table
mip_table = Column(Text)
#: list[:class:`Version`]: Available versions of this dataset, in release order
versions = relationship(
'cmip5.Version',
back_populates='dataset',
order_by='(cmip5.Version.is_latest, cmip5.Version.version_number)')
#: list[:class:`Timeseries`]: The most recent versions of the variables in this dataset
variables = relationship('Timeseries',
secondary=cmip5_latest_version,
secondaryjoin='and_(Timeseries.version_id == cmip5_latest_version.c.version_id,'
'Timeseries.variable_id == cmip5_latest_version.c.variable_id)',
back_populates='dataset')
@property
def latest_version(self):
"""
The latest :class:`Version` for this dataset
"""
return self.versions[-1]
[docs] def drstree_path(self):
"""
Get the drs tree path to variables within this dataset
"""
base = '/g/data1/ua6/DRSv2/CMIP5'
return os.path.join(
base,
self.model_id,
self.experiment_id,
self.frequency,
self.modeling_realm,
self.ensemble_member)
[docs]class Warning(Base):
__tablename__ = 'cmip5_warning'
id = Column(Integer, primary_key=True)
version_id = Column(UUID, ForeignKey('cmip5_version.version_id'))
#: str: Warning text
warning = Column(Text)
#: str: Who added thge warning
added_by = Column(Text)
#: str: Date the warning was added
added_on = Column(Date)
dataset_version = relationship(
'cmip5.Version',
back_populates='warnings')
def __str__(self):
return u'%s (%s): %s'%(self.added_on, self.added_by, self.warning)
class Timeseries(Base):
"""
All the files at a given Dataset, Variable and Version
"""
__tablename__ = 'cmip5_timeseries_link'
dataset_id = Column(UUID, ForeignKey('cmip5_dataset.dataset_id'))
version_id = Column(UUID, ForeignKey('cmip5_version.version_id'), primary_key=True)
variable_id = Column(UUID, primary_key=True)
variable_list = Column(ARRAY(Text))
__table_args__ = (
ForeignKeyConstraint(
['version_id', 'variable_id'],
['cmip5_attributes_links.version_id', 'cmip5_attributes_links.variable_id'],
),
)
#: Dataset this timeseries is part of
dataset = relationship('Dataset', back_populates='variables')
#: Dataset version of this timeseries
version = relationship('cmip5.Version', back_populates='variables', viewonly=True)
#: List of files in this timeseries
files = relationship('cmip5.File',
secondary = cmip5_attributes_links,
# primaryjoin = 'and_('
# 'Timeseries.version_id == cmip5_attributes_links.c.version_id,'
# 'Timeseries.variable_id == cmip5_attributes_links.c.variable_id'
# ')',
back_populates = 'timeseries')
warnings = association_proxy('version', 'warnings')
def open(self):
"""
Open all files in the set
"""
return xarray.concat([x.open() for x in self.files], 'time')