decoders.py 4.19 KB
Newer Older
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
1 2

import numpy as np
3
import pandas as pd
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
4
import string
iregon's avatar
iregon committed
5 6

from .. import properties
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60


#for importer, modname, ispkg in pkgutil.walk_packages(path=package.__path__,prefix=package.__name__+'.',onerror=lambda x: None):
#    print(modname.split(".")[-1])
# TO DECODE FROM OBJECT TO INTEGER
#
# Decodes input object type pd.series to a specified data type
#
# On missing data, the resulting DATA type in numerics will be as integer promotion to accomodate np.nan:
# Promotion dtype for storing NAs: integer	cast to float64
# (https://pandas.pydata.org/pandas-docs/version/0.22/gotchas.html#nan-integer-na-values-and-na-type-promotions)
#
# return base10.astype(self.dtype, casting = 'safe')
# safe casting specified, otherwise converts np.nan to some number depending on dtype.

def signed_overpunch_i(x):
        # Blanks and np.nan as missing data
        # In TDF-11, mix of overpunch and no overpunch: include integers in dictionary
        # Define decoding dictionary: should do this smart-like: None where non-existing keys!!!!
        overpunch_number =  { string.digits[i]:str(i) for i in range(0,10)}
        overpunch_number.update({ string.ascii_uppercase[i]:str(i+1) for i in range(0,9)})
        overpunch_number.update({ string.ascii_uppercase[i]:str(i-8) for i in range(9,18)})
        overpunch_number.update({'{':str(0)})
        overpunch_number.update({'<':str(0)})
        overpunch_number.update({'}':str(0)})
        overpunch_number.update({'!':str(0)})
        overpunch_factor =  { string.digits[i]:1 for i in range(0,10)}
        overpunch_factor.update({ string.ascii_uppercase[i]:1 for i in range(0,9)})
        overpunch_factor.update({ string.ascii_uppercase[i]:-1 for i in range(9,18)})
        overpunch_factor.update({'}':-1})
        overpunch_factor.update({'!':-1})
        overpunch_factor.update({'{':1})
        overpunch_factor.update({'<':1})
        try:
            n = "".join(list(map(lambda x: overpunch_number.get(x,np.nan), list(x) ))) if x==x else np.nan
            f = np.prod(list(map(lambda x: overpunch_factor.get(x,np.nan), list(x) ))) if x==x else np.nan
            converted = f*int(n) if f and n and n == n and f == f else np.nan
            return converted 
        except Exception as e:
            print('ERROR decoding element: {}'.format(x))
            print(e)
            print('Conversion sequence:')
            try:
                print('number base conversion: {}'.format(n))
            except:
                pass
            try:
                print('factor conversion: {}'.format(f))
            except:
                pass            
            return np.nan

class df_decoders():
    def __init__(self, dtype):
61 62
        # Return as object, conversion to actual type in converters only!
        self.dtype = 'object'
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
63 64
    def signed_overpunch(self, data ):
        decoded_numeric = np.vectorize(signed_overpunch_i,otypes=[float])(data)
65 66
        return pd.Series(decoded_numeric,dtype = self.dtype)

Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
67
    def base36(self, data):
68
        # Caution: int(str(np.nan),36) ==> 30191
69
        base10 = [ str(int(str(i), 36)) if i == i and i else np.nan for i in data ]
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
70
            
71
        return pd.Series(base10,dtype = self.dtype)
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99

decoders = dict()

decoders['signed_overpunch'] = dict()
for dtype in properties.numeric_types:
    decoders['signed_overpunch'][dtype] = df_decoders(dtype).signed_overpunch
decoders['signed_overpunch']['key'] = df_decoders('key').signed_overpunch

decoders['base36'] = dict()
for dtype in properties.numeric_types:
    decoders['base36'][dtype] = df_decoders(dtype).base36
decoders['base36']['key'] = df_decoders('key').base36


## Now add the file format specific decoders
#import pkgutil
#import importlib
#from mdf_reader import fs_decoders
#package=fs_decoders
#for importer, modname, ispkg in pkgutil.walk_packages(path=package.__path__,prefix=package.__name__+'.',onerror=lambda x: None):
#    file_format = modname.split(".")[-1]
#    try:
#        file_format_decoders = importlib.import_module(modname, package=None).decoders
#        for decoder in file_format_decoders.keys():
#            decoders[".".join([file_format,decoder])] = file_format_decoders.get(decoder)
#    except Exception as e:
#        logging.error("Error loading {0} decoders: {1}".format(modname,e))
#