#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Feb 20 08:05:50 2019 @author: iregon """ import os import mdf_reader import pandas as pd import numpy as np from io import StringIO import mdf_reader.common.pandas_TextParser_hdlr as pandas_TextParser_hdlr funPath = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(funPath,'data') schema_lib = os.path.join(os.path.dirname(funPath),'schemas','lib') # A. TESTS TO READ FROM DATA FROM DIFFERENT INPUTS # ----------------------------------------------------------------------------- # FROM FILE: WITH AND WIHTOUT SUPPLEMENTAL def imma1_buoys_nosupp(): schema = 'imma1' data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma') return mdf_reader.read(data_file_path, data_model = schema) def imma1_buoys_supp(): schema = 'imma1' schema_supp = 'cisdm_dbo_imma1' data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma') supp_section = 'c99' supp_model = schema_supp return mdf_reader.read(data_file_path, data_model = schema, supp_section = supp_section, supp_model = supp_model ) # FROM DATA FRAME: WITH AND WIHTOUT SUPPLEMENTAL def td11_deck187_nosupp(): schema = 'td11' deck = '187' data_file_path = os.path.join(data_path,'AZH1.ascii') TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t") deck_data = TextParser.loc[TextParser[0].str[0:3] == deck] deck_data.index = range(0,len(deck_data)) return mdf_reader.read(deck_data,data_model = schema) def td11_deck187_supp(): schema = 'td11' schema_supp = 'deck187_td11' deck = '187' data_file_path = os.path.join(data_path,'AZH1.ascii') TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t") deck_data = TextParser.loc[TextParser[0].str[0:3] == deck] deck_data.index = range(0,len(deck_data)) supp_section = 'supplemental' supp_model = schema_supp return mdf_reader.read(deck_data,data_model = schema,supp_section = supp_section, supp_model = supp_model ) # B. TESTS TO ASSESS CHUNKING # ----------------------------------------------------------------------------- # FROM FILE: WITH AND WITHOUT SUPPLEMENTAL def read_imma1_buoys_nosupp_chunks(): data_model = 'imma1' chunksize = 10000 data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma') return mdf_reader.read(data_file_path, data_model = data_model, chunksize = chunksize) def read_imma1_buoys_supp_chunks(): data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma') chunksize = 10000 data_model = 'imma1' supp_section = 'c99' supp_model = 'cisdm_dbo_imma1' return mdf_reader.read(data_file_path, data_model = data_model,supp_section = supp_section, supp_model = supp_model, chunksize = chunksize) def assess_read_from_file_supp_chunk_options(): nosupp_nochunk = read_imma1_buoys_nosupp() supp_nochunk = read_imma1_buoys_supp() io_nosupp_chunk = read_imma1_buoys_nosupp_chunks() nosupp_chunk = pd.DataFrame() for df in io_nosupp_chunk: nosupp_chunk = pd.concat([nosupp_chunk,df]) io_supp_chunk = read_imma1_buoys_supp_chunks() supp_chunk = pd.DataFrame() for df in io_supp_chunk: supp_chunk = pd.concat([supp_chunk,df]) print('Checking differences in core data when adding supplemental data with no chunking') if not nosupp_nochunk.drop('c99',axis = 1,level=0).equals(supp_nochunk.drop('c99',axis = 1,level=0)): print('...ERROR: differences found') else: print('...OK') print('\nChecking differences in core data when adding supplemental data with chunking') if not nosupp_chunk.drop('c99',axis = 1,level=0).equals(supp_chunk.drop('c99',axis = 1,level=0)): print('...ERROR: differences found') else: print('...OK') print('\nChecking differences in data when chunking with no supplemental') if not nosupp_nochunk.equals(nosupp_chunk): print('...ERROR: differences found') else: print('...OK') print('\nChecking differences in full data when chunking with supplemental') if not supp_nochunk.equals(supp_chunk): print('...ERROR: differences found') else: print('...OK') return # FROM PD.IO.PARSER.TEXTREADER: WITH AND WITHOUT SUPPLEMENTAL def read_td11_deck187_nosupp_chunks(): data_model = 'td11' deck = '187' data_file_path = os.path.join(data_path,'AZH1.ascii') TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t") deck_data = TextParser.loc[TextParser[0].str[0:3] == deck] deck_data.index = range(0,len(deck_data)) output_buffer = StringIO() deck_data.to_csv(output_buffer,header = False, index = False) chunksize = 10000 output_buffer.seek(0) TextParser = pd.read_fwf(output_buffer,widths=[100000],chunksize = chunksize, header = None) return mdf_reader.read(TextParser,data_model = data_model) def read_td11_deck187_supp_chunks(): data_model = 'td11' supp_model = 'deck187_td11' supp_section = 'supplemental' deck = '187' data_file_path = os.path.join(data_path,'AZH1.ascii') TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t") deck_data = TextParser.loc[TextParser[0].str[0:3] == deck] deck_data.index = range(0,len(deck_data)) output_buffer = StringIO() deck_data.to_csv(output_buffer,header = False, index = False) chunksize = 10000 output_buffer.seek(0) TextParser = pd.read_fwf(output_buffer,widths=[100000],chunksize = chunksize, header = None) return mdf_reader.read(TextParser,data_model = data_model ,supp_section = supp_section, supp_model = supp_model) # C. TESTS TO READ DATA MODEL SCHEMA FROM EXTERNAL SOURCE # ----------------------------------------------------------------------------- def read_imma1_buoys_supp_external_models(): data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma') schema = 'imma1' schema_supp = 'cisdm_dbo_imma1' data_model_path = os.path.join(schema_lib,schema) supp_section = 'c99' supp_model_path = os.path.join(schema_lib,schema_supp) return mdf_reader.read(data_file_path, data_model_path = data_model_path,supp_section = supp_section, supp_model_path = supp_model_path) # D. CHECK DATA SOURCES ------------------------------------------------------- def check_data_sources(): data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma') data_ioStringIO = StringIO() data_model = 'imma1' with open(data_file_path,'r') as fileO: data_ioStringIO.writelines(fileO.readlines()) data_ioStringIO.seek(0) data_pandas_df = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t") data_pandas_tfr = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t", chunksize = 1000) sources = {'data_file_path': data_file_path, 'data_ioStringIO': data_ioStringIO, 'data_pandas_df': data_pandas_df, 'data_pandas_tfr': data_pandas_tfr} for source in sources.keys(): print('Reading from source {} ....'.format(source)) try: data = mdf_reader.read(sources.get(source), data_model = data_model, sections = ['core']) if source == 'data_pandas_tfr': data_c = data.get_chunk() print(data_c['core']['SST'][0]) else: print(data['core']['SST'][0]) print('.....OK') except Exception as e: print('ERROR: {}'.format(e))