nemo_bdy_dl_cmems.py 16.6 KB
Newer Older
thopri's avatar
thopri committed
1 2 3 4 5 6
# -*- coding: utf-8 -*-
"""
Set of functions to download CMEMS files using FTP (for static mask data) and MOTU (for subsetted variable data).

"""
# import modules
7
from subprocess import Popen, PIPE, CalledProcessError
thopri's avatar
thopri committed
8 9 10 11 12 13
import xml.etree.ElementTree as ET
import logging
import ftplib
import re
import pandas as pd
from datetime import datetime
14
from pathlib import Path
thopri's avatar
thopri committed
15 16 17 18 19
import glob
import os
#local imports
from pynemo.utils import cmems_errors as errors

20 21
logger = logging.getLogger(__name__)
# TODO: Fix double spacing issue on CMEMS download log entries.
thopri's avatar
thopri committed
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
'''
This function checks to see if the MOTU client is installed on the PyNEMO python environment. If it is not installed
error code 1 is returned . If it is installed the version number of the installed client is returned as a string
'''
def chk_motu():
    stdout,stderr = Popen(['motuclient','--version'], stdout=PIPE, stderr=PIPE,universal_newlines=True).communicate()
    stdout = stdout.strip()
    stderr = stderr.strip()

    if len(stderr) > 0:
        return stderr
    
    if not 'motuclient-python' in stdout:
        return 1
    else:
        idx = stdout.find('v')
        return stdout[idx:-1]


'''
CMEMS holds mask data for the model grids as an FTP download only, i.e. it can't be used with the MOTU subsetter.
This code logs on to the FTP server and downloads the requested files. The config bdy file needs to provide location and 
filenames to download. These can be found using an FTP server or the CMEMS web portal. The credintials for the 
FTP connection (and MOTU client) are stored in a Credintials files called CMEMS_cred.py located in the utils folder.
If the download is successful a zero status code is returned. Other wise an error message is returned in the form of a string
'''
def get_static(args):
    try:
        from pynemo.utils import CMEMS_cred
    except ImportError:
52 53
        logger.error('Unable to import CMEMS credentials, see Readme for instructions on adding to PyNEMO')
        return 'Unable to import credential file, have you created one?'
thopri's avatar
thopri committed
54
    try:
55
        logger.info('connecting to FTP host......')
thopri's avatar
thopri committed
56 57 58 59 60 61 62 63 64 65 66 67 68
        ftp = ftplib.FTP(host=args['ftp_server'], user=CMEMS_cred.user, passwd=CMEMS_cred.pwd)
    except ftplib.error_temp:
        return 'temporary error in FTP connection, please try running PyNEMO again........'
    except ftplib.error_perm as err:
        return err
    except ftplib.error_reply as err:
        return err
    except ftplib.error_proto as err:
        return err
    # TODO: provide better returns for the various FTP errors
    # TODO: add try excepts to handle issues with files being missing etc.
    # TODO: Check there is enough space to download as well.....
    # TODO: Handle timeouts etc as well......
69
    logger.info('navigating to download directoy.......')
thopri's avatar
thopri committed
70
    ftp.cwd(args['static_dir'])
71
    logger.info('generating download filename list......')
thopri's avatar
thopri committed
72 73 74
    filenames = args['static_filenames'].split(' ')
    for f in filenames:
        try:
75
            logger.info('downloading '+f+' now......')
thopri's avatar
thopri committed
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
            ftp.retrbinary("RETR " + f, open(args['cmems_dir']+f, 'wb').write)
        except ftplib.error_temp:
            return 'temporary error in FTP download, please try running PyNEMO again........'
        except ftplib.error_perm as err:
            return err
        except ftplib.error_reply as err:
            return err
        except ftplib.error_proto as err:
            return err
    ftp.quit()

    return 0

'''
The FTP download results in the whole product grid being downloaded, so this needs to be subset to match the data downloads
This functions uses CDO library to subset the netcdf file. Therefore CDO should be installed on the operating system.
For each of the defined static files this function subsets based on the defined extent in the settings bdy file, 
if 'Abort' is in the string returned by CDO then this is returned as an error string. 
If Abort is not in the returned string this indicates success, then a zero status code is returned.
'''
def subset_static(args):
    logger.info('subsetting static files now......')
    filenames = args['static_filenames'].split(' ')
    for f in filenames:
        v = f.split('_')
        v = args['dl_prefix']+'_'+v[-1]
        cdo = args['cdo_loc']
        sellon = 'sellonlatbox,'+str(args['longitude_min'])+','+str(args['longitude_max'])+','\
                 +str(args['latitude_min'])+','+str(args['latitude_max'])
        src_file = args['cmems_dir']+f
        dst_file = args['cmems_dir']+v
        stdout, stderr = Popen([cdo, sellon, src_file, dst_file],stdout=PIPE, stderr=PIPE,universal_newlines=True).communicate()
        stdout = stdout.strip()
        stderr = stderr.strip()

        # For some reason CDO seems to pipe output to stderr so check stderr for results and pass stdout
        # if it has length greater than zero i.e. not an empty string
        if 'Abort' in stderr:
            return stderr
        if len(stdout) > 0:
            return stdout
    return 0

'''
Request CMEMS data in either monthly, weekly, and daily intervals. Depending on the character passed to the function 'F'
the function will split the requests into monthly, weekly or daily intervals. The function splits the requested period into 
the relevent intervals and passes the interval into the request_cmems function below. If request_cmems returns a zero then
the next interval is downloaded. If there is an error then the string containing the error is returned. Part of request_cmems
is that it returns a 1 if the interval is too large.
'''
def MWD_request_cmems(args,date_min,date_max,F):
    if F == 'M':
        month_start = pd.date_range(date_min, date_max,
                                freq='MS').strftime("%Y-%m-%d").tolist()
        month_end = pd.date_range(date_min, date_max,
                              freq='M').strftime("%Y-%m-%d").tolist()
        for m in range(len(month_end)):
            mnth_dl = request_cmems(args, month_start[m], month_end[m])
            if mnth_dl == 0:
                logger.info('CMEMS month request ' + str((m + 1)) + 'of' + (str(len(month_end))) + ' successful')
            if type(mnth_dl) == str:
                logger.error(
                    'CMEMS month request ' + str((m + 1)) + 'of' + str((len(month_end))) + ' unsuccessful: Error Msg below')
                logger.error(mnth_dl)
                return mnth_dl
            if mnth_dl == 1:
                return 1

    if F == 'W':
        week_start = pd.date_range(date_min, date_max,
                                   freq='W').strftime("%Y-%m-%d").tolist()
        week_end = []
        for w in range(len(week_start)):
            week_end.append((datetime.strptime(week_start[w], '%Y-%m-%d')
                             + datetime.timedelta(days=6)).strftime('%Y-%m-%d'))
        for w in range(len(week_end)):
            wk_dl = request_cmems(args, week_start[w], week_end[w])
            if wk_dl == 0:
                logger.info('CMEMS week request ' + str((w + 1)) + 'of' + str((len(week_end))) + ' successful')
                if type(wk_dl) == str:
                    logger.error(
                        'CMEMS week request ' + str((m + 1)) + 'of' + str((len(week_end))) + ' unsuccessful: Error Msg below')
                    logger.error(wk_dl)
                    return wk_dl
                if wk_dl == 1:
                    return 1

    if F == 'D':
        days = pd.date_range(date_min, date_max,
                             freq='D').strftime("%Y-%m-%d").tolist()
        for d in range(len(days)):
            dy_dl = request_cmems(args, days[d], days[d])
            if dy_dl == 0:
                logger.info('CMEMS day request ' + str((d + 1)) + 'of' + str((len(week_end) + 1)) + ' successful')
            if dy_dl == 1:
                logger.error('CMEMS day request still too big, please make domain smaller, or use less variables')
                return
            if type(dy_dl) == str:
                logger.error('CMEMS day request ' + str((d + 1)) + 'of' + (
                        str(len(days))) + ' unsuccessful: Error Msg below')
                logger.error(dy_dl)
                return dy_dl

    if F not in ('MWD'):
        time_int_err = 'incorrect string used to define time download interval please use M, W or D'
        logger.error(time_int_err)
        return time_int_err

    return 0

'''
Main request cmems download function. First tries to import CMEMS creditials as per FTP function. This function reads
the defined NCML file from the bdy file. This gives the number of variables to populate the MOTU download config file.
For each variable, the name and grid type are pulled from the NCML and populated into a python dictionary.

For each item in the dictionary the relevent request is populated in the CMEMS config file. The first request has
 a size flag applied and a xml file is downloaded containing details of the request. 
 If the request is valid a field in the XML is set to OK and then the request is repeated with the size flag removed
resulting in the download of the relevent netcdf file. The console information is parsed to check for errors 
and for confirmation of the success of the download. If there are errors the error string is returned otherwise a
success message is written to the log file. If the request is too big than a 1 error code is returned. 
Otherwise if all requests are successful then a zero status code is returned.
'''
def request_cmems(args, date_min, date_max):
    try:
        from pynemo.utils import CMEMS_cred
    except ImportError:
        logger.error('Unable to import CMEMS credentials, see Readme for instructions on adding to PyNEMO')
        return 'Unable to import credentials file'

    xml = args['src_dir']
    root = ET.parse(xml).getroot()
    num_var = (len(root.getchildren()[0].getchildren()))
    logger.info('number of variables requested is '+str(num_var))
    grids = {}
    locs = {}

    for n in range(num_var):
        F = root.getchildren()[0].getchildren()[n].getchildren()[0].getchildren()[0].attrib
        var_name = root.getchildren()[n+1].attrib['name']
        Type = root.getchildren()[0].getchildren()[n].getchildren()[0].attrib
        logger.info('Variable '+ str(n+1)+' is '+Type['name']+' (Variable name: '+var_name+')')
        r = re.findall('([A-Z])', F['regExp'])
        r = ''.join(r)
        logger.info('It is on the '+str(r)+' grid')

        if r in grids:
            grids[r].append(var_name)
        else:
            grids[r] = [var_name]
        if r in locs:
            pass
        else:
            locs[r] = F['location'][6:]

    for key in grids:
        with open(args['cmems_config_template'], 'r') as file:
            filedata = file.read()
            
            filedata = filedata.replace('J90TBS4Q1UCT4CM7', CMEMS_cred.user)
            filedata = filedata.replace('Z8UKFNXA5OIZRXCK', CMEMS_cred.pwd)
            filedata = filedata.replace('DSGJJGWODV2F8TFU', args['motu_server'])
            filedata = filedata.replace('S7L40ACQHANTAC6Y', args['cmems_model'])
            filedata = filedata.replace('4LC8ALR9T96XN08U', args['cmems_product'])
            filedata = filedata.replace('M49OAWI14XESWY1K', date_min)
            filedata = filedata.replace('DBT3J4GH2O19Q75P', date_max)
            filedata = filedata.replace('3M2FJJE5JW1EN4C1', str(args['latitude_min']))
            filedata = filedata.replace('OXI2PXSTJG5PV6OW', str(args['latitude_max']))
            filedata = filedata.replace('DWUJ65Y233FQFW3F', str(args['longitude_min']))
            filedata = filedata.replace('K0UQJJDJOKX14DPS', str(args['longitude_max']))
            filedata = filedata.replace('FNO0GZ1INQDATAXA', str(args['depth_min']))
            filedata = filedata.replace('EI6GB1FHTMCIPOZC', str(args['depth_max']))
            filedata = filedata.replace('4Y4LMQLAKP10YFUE', ','.join(grids[key]))
            filedata = filedata.replace('QFCN2P56ZQSA7YNK', locs[key])
            filedata = filedata.replace('YSLTB459ZW0P84GE', args['dl_prefix']+'_'+str(date_min)+'_'+str(date_max)+'_'+str(key)+'.nc')

252
            file_chk = Path(locs[key] + args['dl_prefix'] + '_' + str(date_min) + '_' + str(date_max) + '_' + str(key) + '.nc')
thopri's avatar
thopri committed
253

254 255
            if file_chk.is_file() == True:
                logger.warning('filename of download already exists, please check file is valid, skipping to next item......')
thopri's avatar
thopri committed
256

257
            if file_chk.is_file() == False:
thopri's avatar
thopri committed
258

259 260
                with open(args['cmems_config'], 'w') as file:
                    file.write(filedata)
thopri's avatar
thopri committed
261

262 263 264 265 266 267 268 269 270 271
                with Popen(['motuclient', '--size','--config-file', args['cmems_config']], stdout=PIPE, bufsize=1, universal_newlines=True) as p:
                    for line in p.stdout:
                        line = line.replace("[ INFO]", "")
                        logger.info(line)
                        if 'Error' in line:
                            return 'Error found in CMEMS download report, please check downloaded data'
                        if 'Done' in line:
                            logger.info('download of request xml file for variable ' + ' '.join(grids[key]) + ' successful')
                if p.returncode != 0:
                    return str(p.returncode)
thopri's avatar
thopri committed
272

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
                logger.info('checking size of request for variables '+' '.join(grids[key]))
                xml = locs[key]+args['dl_prefix']+'_'+str(date_min)+'_'+str(date_max)+'_'+str(key)+ '.xml'
                try:
                    root = ET.parse(xml).getroot()
                except ET.ParseError:
                    return 'Parse Error in XML file, This generally occurs when CMEMS service is down and returns an unexpected XML.'

                logger.info('size of request ' + root.attrib['size']+'Kb')

                if 'OK' in root.attrib['msg']:
                    logger.info('request valid, downloading now......')

                    with Popen(['motuclient', '--config-file', args['cmems_config']], stdout=PIPE, bufsize=1, universal_newlines=True) as p:
                        for line in p.stdout:
                            line = line.replace("[ INFO]", "")
                            logger.info(line)
                            if 'Error' in line:
thopri's avatar
thopri committed
290
                                return line
291 292 293 294 295 296 297 298 299
                            if 'Done' in line:
                                logger.info('download of request data file for variable ' + ' '.join(grids[key]) + ' successful')
                    if p.returncode != 0:
                        return str(p.returncode)

                elif 'too big' in root.attrib['msg']:
                    return 1
                else:
                    return 'unable to determine if size request is valid (too big or not)'
thopri's avatar
thopri committed
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353

    return 0

'''
Function to check errors from both FTP or MOTU. This checks a python file containing dictionaries for different error types,
(currently FTP and MOTU) with the error from the download. If the error code is present as a key, then the system will retry
or exit. This depends which error dictionary the type is in. i.e. restart errors result in restart and critical errors result
in sys exit. The number of restarts is defined in the BDY file and once expired results in sys exit. Finally if the error is 
not in the type dictionaries the system will exit. Unlogged errors can be added so that a restart or exit occurs when they occur.
'''
def err_parse(error, err_type):

    if err_type == 'FTP':
        for key in errors.FTP_retry:
            if key in error:
                logger.info('retrying FTP download....')
                return 0
        for key in errors.FTP_critical:
            if key in error:
                logger.info('critical FTP error, stopping')
                return 1
        logger.critical('unlogged FTP error, stopping')
        logger.critical(error)
        return 2

    if err_type == 'MOTU':
        for key in errors.MOTU_retry:
            if key in error:
                logger.info('non critical error')
                logger.info('restarting download....')
                return 0
        for key in errors.MOTU_critical:
            if key in error:
                logger.critical('critical error found: please see below')
                logger.critical(error)
                return 1
        logger.critical('unlogged error: please see below')
        logger.critical(error)
        return 2

'''
Function to clean up the download process, this is called before every sys exit and at the end of the download function
At the moment it only removes the xml size files that are no longer required. Other functionality maybe added in the future
'''
def clean_up(settings):
    # remove size check XML files that are no longer needed.
    try:
        for f in glob.glob(settings['cmems_dir'] + "*.xml"):
            os.remove(f)
    except OSError:
        logger.info('no xml files found to remove')
        return
    logger.info('removed size check xml files successfully')
    return