properties.py 2.13 KB
Newer Older
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
1 2 3 4 5 6 7 8 9 10 11 12 13
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""


"""
import glob
import os
import io
import pandas as pd


# Supported formats, sources and internal data models -------------------------
14
schema_path = os.path.join(os.path.dirname(__file__),'data_models','lib')
15
supported_data_models = [ os.path.basename(x).split(".")[0] for x in glob.glob(schema_path + '/*/*.json') if os.path.basename(x).split(".")[0] == os.path.dirname(x).split("/")[-1]]
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
16 17 18 19

# Data types ------------------------------------------------------------------
numpy_integers = ['int8','int16','int32','int64','uint8','uint16','uint32','uint64']
numpy_floats = ['float16','float32','float64']
20 21 22 23 24

pandas_nan_integers = {'int8':'Int8','int16':'Int16','int32':'Int32',
                       'int64':'Int64','uint8':'UInt8','uint16':'UInt16',
                       'uint32':'UInt32','uint64':'UInt64'}

Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
25 26
numeric_types = numpy_integers.copy()
numeric_types.extend(numpy_floats)
27
numeric_types.extend(pandas_nan_integers.values())
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44

object_types = ['str','object','key','datetime']

data_types = object_types.copy()
data_types.extend(numpy_integers)
data_types.extend(numpy_floats)

pandas_dtypes = {}
for dtype in object_types:
    pandas_dtypes[dtype] = 'object'
pandas_dtypes.update({ x:x for x in numeric_types })

# ....and how they are managed
data_type_conversion_args = {}
for dtype in numeric_types:
    data_type_conversion_args[dtype] = ['scale','offset']
data_type_conversion_args['str'] = ['disable_white_strip']
45
data_type_conversion_args['object'] = ['disable_white_strip']
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
46
data_type_conversion_args['key'] = ['disable_white_strip']
47
data_type_conversion_args['datetime'] = ['datetime_format']
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
48 49 50

# Misc ------------------------------------------------------------------------
tol = 1E-10
iregon's avatar
iregon committed
51
dummy_level = '_SECTION_'
iregon's avatar
iregon committed
52
# Length of reports in initial read
53
MAX_FULL_REPORT_WIDTH = 100000
iregon's avatar
iregon committed
54 55 56 57 58 59
# This is a delimiter internally used when writing to buffers
# It is the Unicode Character 'END OF TEXT' 
# It is supposed to be safe because we don;t expect it in a string 
# It's UTF-8 encoding lenght is not > 1, so it is supported by pandas 'c' 
# engine, which is faster than the python engine.
internal_delimiter = u"\u0003"