From 4f102aed89d32de5ae45a92a2edab2ac5a953c8a Mon Sep 17 00:00:00 2001 From: thopri <thopri@noc.ac.uk> Date: Mon, 30 Mar 2020 13:31:15 +0100 Subject: [PATCH] added skip existing files to CMEMS download functon --- inputs/namelist_cmems.bdy | 2 +- pynemo/nemo_bdy_dl_cmems.py | 97 +++++++++++++++++++++---------------- pynemo/profile.py | 12 ++--- 3 files changed, 61 insertions(+), 50 deletions(-) diff --git a/inputs/namelist_cmems.bdy b/inputs/namelist_cmems.bdy index 2883afb..531ad64 100644 --- a/inputs/namelist_cmems.bdy +++ b/inputs/namelist_cmems.bdy @@ -56,7 +56,7 @@ sn_cmems_dir = '/Users/thopri/Projects/PyNEMO/inputs/' ! where to download CMEMS input files (static and variable) ln_download_static = .false. ln_subset_static = .false. - nn_num_retry = 4 ! how many times to retry CMEMS download after non critical errors? + nn_num_retry = 1 ! how many times to retry CMEMS download after non critical errors? !------------------------------------------------------------------------------ ! CMEMS MOTU Configuration (for Boundary Data) !------------------------------------------------------------------------------ diff --git a/pynemo/nemo_bdy_dl_cmems.py b/pynemo/nemo_bdy_dl_cmems.py index e1642a7..54fcbc5 100644 --- a/pynemo/nemo_bdy_dl_cmems.py +++ b/pynemo/nemo_bdy_dl_cmems.py @@ -11,6 +11,7 @@ import ftplib import re import pandas as pd from datetime import datetime +from pathlib import Path import glob import os #local imports @@ -18,6 +19,7 @@ from pynemo.utils import cmems_errors as errors logger = logging.getLogger(__name__) # TODO: Fix double spacing issue on CMEMS download log entries. +# TODO: Add some sort of file check so CMEMS files that are already successfully downloaded aren't redownloaded ''' This function checks to see if the MOTU client is installed on the PyNEMO python environment. If it is not installed error code 1 is returned . If it is installed the version number of the installed client is returned as a string @@ -51,6 +53,7 @@ def get_static(args): logger.error('Unable to import CMEMS credentials, see Readme for instructions on adding to PyNEMO') return 'Unable to import credential file, have you created one?' try: + logger.info('connecting to FTP host......') ftp = ftplib.FTP(host=args['ftp_server'], user=CMEMS_cred.user, passwd=CMEMS_cred.pwd) except ftplib.error_temp: return 'temporary error in FTP connection, please try running PyNEMO again........' @@ -64,10 +67,13 @@ def get_static(args): # TODO: add try excepts to handle issues with files being missing etc. # TODO: Check there is enough space to download as well..... # TODO: Handle timeouts etc as well...... + logger.info('navigating to download directoy.......') ftp.cwd(args['static_dir']) + logger.info('generating download filename list......') filenames = args['static_filenames'].split(' ') for f in filenames: try: + logger.info('downloading '+f+' now......') ftp.retrbinary("RETR " + f, open(args['cmems_dir']+f, 'wb').write) except ftplib.error_temp: return 'temporary error in FTP download, please try running PyNEMO again........' @@ -243,48 +249,55 @@ def request_cmems(args, date_min, date_max): filedata = filedata.replace('4Y4LMQLAKP10YFUE', ','.join(grids[key])) filedata = filedata.replace('QFCN2P56ZQSA7YNK', locs[key]) filedata = filedata.replace('YSLTB459ZW0P84GE', args['dl_prefix']+'_'+str(date_min)+'_'+str(date_max)+'_'+str(key)+'.nc') - - with open(args['cmems_config'], 'w') as file: - file.write(filedata) - - with Popen(['motuclient', '--size','--config-file', args['cmems_config']], stdout=PIPE, bufsize=1, universal_newlines=True) as p: - for line in p.stdout: - line = line.replace("[ INFO]", "") - logger.info(line) - if 'Error' in line: - return 'Error found in CMEMS download report, please check downloaded data' - if 'Done' in line: - logger.info('download of request xml file for variable ' + ' '.join(grids[key]) + ' successful') - if p.returncode != 0: - return str(p.returncode) - - logger.info('checking size of request for variables '+' '.join(grids[key])) - xml = locs[key]+args['dl_prefix']+'_'+str(date_min)+'_'+str(date_max)+'_'+str(key)+ '.xml' - try: - root = ET.parse(xml).getroot() - except ET.ParseError: - return 'Parse Error in XML file, This generally occurs when CMEMS service is down and returns an unexpected XML.' - - logger.info('size of request ' + root.attrib['size']) - - if 'OK' in root.attrib['msg']: - logger.info('request valid, downloading now......') - - with Popen(['motuclient', '--config-file', args['cmems_config']], stdout=PIPE, bufsize=1, universal_newlines=True) as p: - for line in p.stdout: - line = line.replace("[ INFO]", "") - logger.info(line) - if 'Error' in line: - return 'Error found in CMEMS download report, please check downloaded data' - if 'Done' in line: - logger.info('download of request xml file for variable ' + ' '.join(grids[key]) + ' successful') - if p.returncode != 0: - return str(p.returncode) - - elif 'too big' in root.attrib['msg']: - return 1 - else: - return 'unable to determine if size request is valid (too big or not)' + + file_chk = Path(locs[key] + args['dl_prefix'] + '_' + str(date_min) + '_' + str(date_max) + '_' + str(key) + '.nc') + + if file_chk.is_file() == True: + logger.warning('filename of download already exists, please check file is valid, skipping to next item......') + + if file_chk.is_file() == False: + + with open(args['cmems_config'], 'w') as file: + file.write(filedata) + + with Popen(['motuclient', '--size','--config-file', args['cmems_config']], stdout=PIPE, bufsize=1, universal_newlines=True) as p: + for line in p.stdout: + line = line.replace("[ INFO]", "") + logger.info(line) + if 'Error' in line: + return 'Error found in CMEMS download report, please check downloaded data' + if 'Done' in line: + logger.info('download of request xml file for variable ' + ' '.join(grids[key]) + ' successful') + if p.returncode != 0: + return str(p.returncode) + + logger.info('checking size of request for variables '+' '.join(grids[key])) + xml = locs[key]+args['dl_prefix']+'_'+str(date_min)+'_'+str(date_max)+'_'+str(key)+ '.xml' + try: + root = ET.parse(xml).getroot() + except ET.ParseError: + return 'Parse Error in XML file, This generally occurs when CMEMS service is down and returns an unexpected XML.' + + logger.info('size of request ' + root.attrib['size']+'Kb') + + if 'OK' in root.attrib['msg']: + logger.info('request valid, downloading now......') + + with Popen(['motuclient', '--config-file', args['cmems_config']], stdout=PIPE, bufsize=1, universal_newlines=True) as p: + for line in p.stdout: + line = line.replace("[ INFO]", "") + logger.info(line) + if 'Error' in line: + return 'Error found in CMEMS download report, please check downloaded data' + if 'Done' in line: + logger.info('download of request data file for variable ' + ' '.join(grids[key]) + ' successful') + if p.returncode != 0: + return str(p.returncode) + + elif 'too big' in root.attrib['msg']: + return 1 + else: + return 'unable to determine if size request is valid (too big or not)' return 0 diff --git a/pynemo/profile.py b/pynemo/profile.py index 12f69dd..a172225 100644 --- a/pynemo/profile.py +++ b/pynemo/profile.py @@ -74,10 +74,6 @@ class Grid(object): logger = logging.getLogger(__name__) logging.basicConfig(filename='nrct.log', level=logging.INFO) -# define a Handler which writes INFO messages or higher to the sys.stderr -console = logging.StreamHandler() -console.setLevel(logging.INFO) - def download_cmems(setup_filepath=0): ''' CMEMS download function. @@ -92,6 +88,7 @@ def download_cmems(setup_filepath=0): :param mask_gui: :return: ''' + logger.info('============================================') logger.info('Start CMEMS download Logging: ' + time.asctime()) logger.info('============================================') @@ -122,6 +119,7 @@ def download_cmems(setup_filepath=0): dl_cmems.clean_up(settings) sys.exit(static) dl_cmems.clean_up(settings) + # subset downloaded static grid files to match downloaded CMEMS data if settings['subset_static'] == False: logger.info('CMEMS subset static data not requested') @@ -277,9 +275,9 @@ def download_cmems(setup_filepath=0): sys.exit(dy_dl) # end of messy if statements to split requests into months, weeks and days as needed. dl_cmems.clean_up(settings) - + logger.info('============================================') logger.info('End CMEMS download: ' + time.asctime()) - logger.info('==========================================') + logger.info('============================================') def process_bdy(setup_filepath=0, mask_gui=False): @@ -297,7 +295,7 @@ def process_bdy(setup_filepath=0, mask_gui=False): """ # Start Logger - + logger.info('============================================') logger.info('Start NRCT Logging: '+time.asctime()) logger.info('============================================') -- GitLab