Commit d01a68b9 authored by Irene Perez Gonzalez's avatar Irene Perez Gonzalez
Browse files

removed all

parent 077c27ee
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 20 08:05:50 2019
@author: iregon
"""
import os
import mdf_reader
import pandas as pd
import numpy as np
from io import StringIO
import mdf_reader.common.pandas_TextParser_hdlr as pandas_TextParser_hdlr
funPath = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(funPath,'data')
schema_lib = os.path.join(os.path.dirname(funPath),'schemas','lib')
# A. TESTS TO READ FROM DATA FROM DIFFERENT INPUTS
# -----------------------------------------------------------------------------
# FROM FILE: WITH AND WIHTOUT SUPPLEMENTAL
def imma1_buoys_nosupp():
schema = 'imma1'
data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma')
return mdf_reader.read(data_file_path, data_model = schema)
def imma1_buoys_supp():
schema = 'imma1'
schema_supp = 'cisdm_dbo_imma1'
data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma')
supp_section = 'c99'
supp_model = schema_supp
return mdf_reader.read(data_file_path, data_model = schema, supp_section = supp_section, supp_model = supp_model )
# FROM DATA FRAME: WITH AND WIHTOUT SUPPLEMENTAL
def td11_deck187_nosupp():
schema = 'td11'
deck = '187'
data_file_path = os.path.join(data_path,'AZH1.ascii')
TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t")
deck_data = TextParser.loc[TextParser[0].str[0:3] == deck]
deck_data.index = range(0,len(deck_data))
return mdf_reader.read(deck_data,data_model = schema)
def td11_deck187_supp():
schema = 'td11'
schema_supp = 'deck187_td11'
deck = '187'
data_file_path = os.path.join(data_path,'AZH1.ascii')
TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t")
deck_data = TextParser.loc[TextParser[0].str[0:3] == deck]
deck_data.index = range(0,len(deck_data))
supp_section = 'supplemental'
supp_model = schema_supp
return mdf_reader.read(deck_data,data_model = schema,supp_section = supp_section, supp_model = supp_model )
# B. TESTS TO ASSESS CHUNKING
# -----------------------------------------------------------------------------
# FROM FILE: WITH AND WITHOUT SUPPLEMENTAL
def read_imma1_buoys_nosupp_chunks():
data_model = 'imma1'
chunksize = 10000
data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma')
return mdf_reader.read(data_file_path, data_model = data_model, chunksize = chunksize)
def read_imma1_buoys_supp_chunks():
data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma')
chunksize = 10000
data_model = 'imma1'
supp_section = 'c99'
supp_model = 'cisdm_dbo_imma1'
return mdf_reader.read(data_file_path, data_model = data_model,supp_section = supp_section, supp_model = supp_model, chunksize = chunksize)
def assess_read_from_file_supp_chunk_options():
nosupp_nochunk = read_imma1_buoys_nosupp()
supp_nochunk = read_imma1_buoys_supp()
io_nosupp_chunk = read_imma1_buoys_nosupp_chunks()
nosupp_chunk = pd.DataFrame()
for df in io_nosupp_chunk:
nosupp_chunk = pd.concat([nosupp_chunk,df])
io_supp_chunk = read_imma1_buoys_supp_chunks()
supp_chunk = pd.DataFrame()
for df in io_supp_chunk:
supp_chunk = pd.concat([supp_chunk,df])
print('Checking differences in core data when adding supplemental data with no chunking')
if not nosupp_nochunk.drop('c99',axis = 1,level=0).equals(supp_nochunk.drop('c99',axis = 1,level=0)):
print('...ERROR: differences found')
else:
print('...OK')
print('\nChecking differences in core data when adding supplemental data with chunking')
if not nosupp_chunk.drop('c99',axis = 1,level=0).equals(supp_chunk.drop('c99',axis = 1,level=0)):
print('...ERROR: differences found')
else:
print('...OK')
print('\nChecking differences in data when chunking with no supplemental')
if not nosupp_nochunk.equals(nosupp_chunk):
print('...ERROR: differences found')
else:
print('...OK')
print('\nChecking differences in full data when chunking with supplemental')
if not supp_nochunk.equals(supp_chunk):
print('...ERROR: differences found')
else:
print('...OK')
return
# FROM PD.IO.PARSER.TEXTREADER: WITH AND WITHOUT SUPPLEMENTAL
def read_td11_deck187_nosupp_chunks():
data_model = 'td11'
deck = '187'
data_file_path = os.path.join(data_path,'AZH1.ascii')
TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t")
deck_data = TextParser.loc[TextParser[0].str[0:3] == deck]
deck_data.index = range(0,len(deck_data))
output_buffer = StringIO()
deck_data.to_csv(output_buffer,header = False, index = False)
chunksize = 10000
output_buffer.seek(0)
TextParser = pd.read_fwf(output_buffer,widths=[100000],chunksize = chunksize, header = None)
return mdf_reader.read(TextParser,data_model = data_model)
def read_td11_deck187_supp_chunks():
data_model = 'td11'
supp_model = 'deck187_td11'
supp_section = 'supplemental'
deck = '187'
data_file_path = os.path.join(data_path,'AZH1.ascii')
TextParser = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t")
deck_data = TextParser.loc[TextParser[0].str[0:3] == deck]
deck_data.index = range(0,len(deck_data))
output_buffer = StringIO()
deck_data.to_csv(output_buffer,header = False, index = False)
chunksize = 10000
output_buffer.seek(0)
TextParser = pd.read_fwf(output_buffer,widths=[100000],chunksize = chunksize, header = None)
return mdf_reader.read(TextParser,data_model = data_model ,supp_section = supp_section, supp_model = supp_model)
# C. TESTS TO READ DATA MODEL SCHEMA FROM EXTERNAL SOURCE
# -----------------------------------------------------------------------------
def read_imma1_buoys_supp_external_models():
data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma')
schema = 'imma1'
schema_supp = 'cisdm_dbo_imma1'
data_model_path = os.path.join(schema_lib,schema)
supp_section = 'c99'
supp_model_path = os.path.join(schema_lib,schema_supp)
return mdf_reader.read(data_file_path, data_model_path = data_model_path,supp_section = supp_section, supp_model_path = supp_model_path)
# D. CHECK DATA SOURCES -------------------------------------------------------
def check_data_sources():
data_file_path = os.path.join(data_path,'meds_2010-07_subset.imma')
data_ioStringIO = StringIO()
data_model = 'imma1'
with open(data_file_path,'r') as fileO:
data_ioStringIO.writelines(fileO.readlines())
data_ioStringIO.seek(0)
data_pandas_df = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t")
data_pandas_tfr = pd.read_fwf(data_file_path,widths=[100000],header=None,delimiter="\t", chunksize = 1000)
sources = {'data_file_path': data_file_path, 'data_ioStringIO': data_ioStringIO,
'data_pandas_df': data_pandas_df, 'data_pandas_tfr': data_pandas_tfr}
for source in sources.keys():
print('Reading from source {} ....'.format(source))
try:
data = mdf_reader.read(sources.get(source), data_model = data_model, sections = ['core'])
if source == 'data_pandas_tfr':
data_c = data.get_chunk()
print(data_c['core']['SST'][0])
else:
print(data['core']['SST'][0])
print('.....OK')
except Exception as e:
print('ERROR: {}'.format(e))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment