Source code for ARCCSSive.model.old_cmip5

#!/usr/bin/env python
# Copyright 2017 ARC Centre of Excellence for Climate Systems Science
# author: Scott Wales <scott.wales@unimelb.edu.au>
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function

from __future__ import print_function, unicode_literals
from sqlalchemy import Column, Integer, Text, Boolean, ForeignKey, Date, UniqueConstraint
from sqlalchemy import ForeignKeyConstraint
from sqlalchemy import select, func, join
from sqlalchemy.orm import relationship
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm.session import object_session
from ARCCSSive.data import *

from ARCCSSive.model.base import Base
import ARCCSSive.model.cmip5 as cmip5

import os
import glob

[docs]class Instance(Base): """ A combination of a CMIP5 Dataset and a single variable **Relationships:** .. attribute:: versions list[:class:`Version`]: List of all available versions of this dataset .. attribute:: latest_version :class:`Version`: The most recent version of this dataset .. attribute:: files list[:class:`ARCCSSive.model.cmip5.File`]: All files belonging to this dataset and variable, regardless of version **Attributes:** .. attribute:: variable Variable name .. attribute:: experiment CMIP experiment .. attribute:: mip MIP table specifying output frequency and realm .. attribute:: model Model that generated the dataset .. attribute:: ensemble Ensemble member .. attribute:: realm Realm: ie atmos, ocean """ __tablename__ = 'old_cmip5_instance' dataset_id = Column(UUID, ForeignKey('cmip5_dataset.dataset_id'), primary_key=True) variable = Column(Text, primary_key=True) variable_id = Column(UUID) experiment = Column(Text) mip = Column(Text) model = Column(Text) ensemble = Column(Text) realm = Column(Text) __table_args__ = ( ForeignKeyConstraint( ['dataset_id', 'variable_id'], ['cmip5_latest_version.dataset_id', 'cmip5_latest_version.variable_id'], ), ForeignKeyConstraint( ['dataset_id', 'variable_id'], ['cmip5_attributes_links.dataset_id', 'cmip5_attributes_links.variable_id'], ), ) versions = relationship('old_cmip5.Version', order_by='(old_cmip5.Version.is_latest, old_cmip5.Version.version)', primaryjoin='and_(old_cmip5.Instance.dataset_id == old_cmip5.Version.dataset_id,' 'old_cmip5.Instance.variable == old_cmip5.Version.variable_name)', viewonly = True ) latest_version = relationship('old_cmip5.Version', secondary = cmip5.cmip5_latest_version, uselist = False, viewonly = True) files = relationship('model.cmip5.File', secondary = cmip5.cmip5_attributes_links, viewonly = True) # # Missing versions are labelled NA in database and v20110427 in drstree, this is CMOR documentation date # # order doesn't work if version NA # # versions = relationship('Version', order_by='Version.version', backref='variable') # # __table_args__ = ( # UniqueConstraint('variable','experiment','mip','model','ensemble'), # ) # # def latest(self): # """ # Returns latest version/s available on raijin, first check in any version is_latest, then checks date stamp # """ # if len(self.versions)==1: return self.versions # vlatest=[v for v in self.versions if v.is_latest] # if vlatest==[]: # valid=[v for v in self.versions if v.version!="NA"] # if valid==[]: return self.versions # valid.sort(key=lambda x: x.version[-8:]) # vlatest.append(valid[-1]) # i=-2 # while i>=-len(valid) and valid[i].version[-8:]==vlatest[0].version[-8:]: # vlatest.append(valid[i]) # i+=-1 # return vlatest def latest(self): return [self.latest_version]
[docs] def filenames(self): """ Returns the file names from the latest version of this variable :returns: List of file names """ return self.latest_version.filenames()
[docs] def drstree_path(self): """ Returns the drstree path for this instance latest version """ #drs_root="/g/data1/ua6/drstree/CMIP5/" # this should be passed as DRSTREE env var drs_root="/g/data1/ua6/DRSv2/CMIP5/" # pointing to temporary location for new drstree frequency=mip_dict[self.mip][0] return drs_root + "/".join([ self.model, self.experiment, frequency, self.realm, self.ensemble, self.variable]) + "/latest"
# Add alias to deprecated name Variable = Instance
[docs]class Version(Base): """ A version of a model run's variable **Relationships:** .. attribute:: variable :class:`Instance`: Dataset and variable this version is attached to .. attribute:: warnings [:class:`ARCCSSive.model.cmip5.Warning`]: Warnings attached to this dataset version .. attribute:: files [:class:`ARCCSSive.model.cmip5.File`]: Files belonging to this dataset version **Attributes:** .. attribute:: version Version identifier .. attribute:: path Path to the output directory .. testsetup:: >>> cmip5 = getfixture('session') >>> instance = cmip5.query(Instance).filter_by(dataset_id = 'c6d75f4c-793b-5bcc-28ab-1af81e4b679d', variable='tas').one() >>> version = instance.latest() >>> version = instance.versions[-1] """ __tablename__ = 'old_cmip5_version' dataset_id = Column(UUID, ForeignKey('cmip5_dataset.dataset_id')) version_id = Column(UUID, ForeignKey('cmip5_version.version_id'), primary_key=True) variable_id = Column(UUID) variable_name = Column('variable', Text, primary_key=True) # id = Column(Integer, name='version_id', primary_key = True) # instance_id = Column(Integer, ForeignKey('instances.instance_id'), index=True) version = Column(Text) # path = Column(Text) # dataset_id = Column(Text) is_latest = Column(Boolean) # checked_on = Column(Text) # to_update = Column(Boolean) # # warnings = relationship('VersionWarning', order_by='VersionWarning.id', # backref='version', cascade="all, delete-orphan", passive_deletes=True) # files = relationship('VersionFile', order_by='VersionFile.id', # backref='version', cascade="all, delete-orphan", passive_deletes=True) __table_args__ = ( ForeignKeyConstraint( ['variable', 'dataset_id'], ['old_cmip5_instance.variable', 'old_cmip5_instance.dataset_id'], ), ForeignKeyConstraint( ['version_id', 'variable_id', 'dataset_id'], ['cmip5_latest_version.version_id', 'cmip5_latest_version.variable_id', 'cmip5_latest_version.dataset_id'], ), ForeignKeyConstraint( ['version_id', 'variable_id', 'dataset_id'], ['cmip5_attributes_links.version_id', 'cmip5_attributes_links.variable_id', 'cmip5_attributes_links.dataset_id'], ), ) timeseries = relationship('cmip5.Timeseries', primaryjoin='and_(' 'old_cmip5.Version.dataset_id == foreign(cmip5.Timeseries.dataset_id),' 'old_cmip5.Version.version_id == foreign(cmip5.Timeseries.version_id),' 'cmip5.Timeseries.variable_list.any(old_cmip5.Version.variable_name)' ')', uselist=False, viewonly=True) new_version = relationship('cmip5.Version') warnings = association_proxy('new_version', 'warnings') files = relationship( 'cmip5.File', secondary = cmip5.cmip5_attributes_links, viewonly=True) variable = relationship('old_cmip5.Instance', viewonly=True) paths = relationship('cmip5.Path', secondary = cmip5.cmip5_attributes_links, order_by='cmip5.Path.path', viewonly=True) @property def path(self): return os.path.dirname(self.paths[0].path)
[docs] def glob(self): """ Get the glob string matching the CMIP5 filename .. testsetup:: >>> import six >>> cmip5 = getfixture('session') >>> version = cmip5.query(Version).filter_by(version_id = 'ed04fb7a-79e2-5b2f-2569-42abffd322db', variable_name='tas').one() >>> six.print_(version.glob()) tas_day_ACCESS1.3_rcp45_r1i1p1*.nc """ return '%s_%s_%s_%s_%s*.nc'%( self.variable.variable, self.variable.mip, self.variable.model, self.variable.experiment, self.variable.ensemble)
[docs] def build_filepaths(self): """ Returns the list of files matching this version :returns: List of file names .. testsetup:: >>> import six >>> import pprint >>> cmip5 = getfixture('session') >>> version = cmip5.query(Version).filter_by(version_id = 'ed04fb7a-79e2-5b2f-2569-42abffd322db', variable_name='tas').one() >>> pprint.pprint(version.build_filepaths()) ['/g/data1/ua6/unofficial-ESG-replica/tmp/tree/pcmdi9.llnl.gov/thredds/fileServer/cmip5_css02_data/cmip5/output1/CSIRO-BOM/ACCESS1-3/rcp45/day/atmos/day/r1i1p1/tas/1/tas_day_ACCESS1-3_rcp45_r1i1p1_20060101-20301231.nc', '/g/data1/ua6/unofficial-ESG-replica/tmp/tree/pcmdi9.llnl.gov/thredds/fileServer/cmip5_css02_data/cmip5/output1/CSIRO-BOM/ACCESS1-3/rcp45/day/atmos/day/r1i1p1/tas/1/tas_day_ACCESS1-3_rcp45_r1i1p1_20310101-20551231.nc', '/g/data1/ua6/unofficial-ESG-replica/tmp/tree/pcmdi9.llnl.gov/thredds/fileServer/cmip5_css02_data/cmip5/output1/CSIRO-BOM/ACCESS1-3/rcp45/day/atmos/day/r1i1p1/tas/1/tas_day_ACCESS1-3_rcp45_r1i1p1_20560101-20801231.nc', '/g/data1/ua6/unofficial-ESG-replica/tmp/tree/pcmdi9.llnl.gov/thredds/fileServer/cmip5_css02_data/cmip5/output1/CSIRO-BOM/ACCESS1-3/rcp45/day/atmos/day/r1i1p1/tas/1/tas_day_ACCESS1-3_rcp45_r1i1p1_20810101-21001231.nc'] """ return [x.path for x in self.paths]
[docs] def filenames(self): """ Returns the list of filenames for this version :returns: List of file names .. testsetup:: >>> cmip5 = getfixture('session') >>> version = cmip5.query(Version).filter_by(version_id = 'ed04fb7a-79e2-5b2f-2569-42abffd322db', variable_name='tas').one() >>> sorted(version.filenames()) ['tas_day_ACCESS1-3_rcp45_r1i1p1_20060101-20301231.nc', 'tas_day_ACCESS1-3_rcp45_r1i1p1_20310101-20551231.nc', 'tas_day_ACCESS1-3_rcp45_r1i1p1_20560101-20801231.nc', 'tas_day_ACCESS1-3_rcp45_r1i1p1_20810101-21001231.nc'] """ return [os.path.basename(x.path) for x in self.paths]
[docs] def tracking_ids(self): """ Returns the list of tracking_ids for files in this version :returns: List of tracking_ids .. testsetup:: >>> cmip5 = getfixture('session') >>> version = cmip5.query(Version).filter_by(version_id = 'ed04fb7a-79e2-5b2f-2569-42abffd322db', variable_name='tas').one() >>> sorted(version.tracking_ids()) ['54779e2d-41fb-4671-bbdf-2170385afa3b', '800713b7-c303-4618-aef9-f72548d5ada6', 'd2813685-9c7c-4527-8186-44a8f19d31dd', 'f810f58d-329e-4934-bb1c-28c5c314e073'] """ return [x.tracking_id for x in self.files]
[docs] def drstree_path(self): """ Returns the drstree path for this particular version """ if self.version is not None: version=self.version else: version='v20110427' return self.variable.drstree_path().replace('latest',version)