import numpy as np import string from .. import properties #for importer, modname, ispkg in pkgutil.walk_packages(path=package.__path__,prefix=package.__name__+'.',onerror=lambda x: None): # print(modname.split(".")[-1]) # TO DECODE FROM OBJECT TO INTEGER # # Decodes input object type pd.series to a specified data type # # On missing data, the resulting DATA type in numerics will be as integer promotion to accomodate np.nan: # Promotion dtype for storing NAs: integer cast to float64 # (https://pandas.pydata.org/pandas-docs/version/0.22/gotchas.html#nan-integer-na-values-and-na-type-promotions) # # return base10.astype(self.dtype, casting = 'safe') # safe casting specified, otherwise converts np.nan to some number depending on dtype. def signed_overpunch_i(x): # Blanks and np.nan as missing data # In TDF-11, mix of overpunch and no overpunch: include integers in dictionary # Define decoding dictionary: should do this smart-like: None where non-existing keys!!!! overpunch_number = { string.digits[i]:str(i) for i in range(0,10)} overpunch_number.update({ string.ascii_uppercase[i]:str(i+1) for i in range(0,9)}) overpunch_number.update({ string.ascii_uppercase[i]:str(i-8) for i in range(9,18)}) overpunch_number.update({'{':str(0)}) overpunch_number.update({'<':str(0)}) overpunch_number.update({'}':str(0)}) overpunch_number.update({'!':str(0)}) overpunch_factor = { string.digits[i]:1 for i in range(0,10)} overpunch_factor.update({ string.ascii_uppercase[i]:1 for i in range(0,9)}) overpunch_factor.update({ string.ascii_uppercase[i]:-1 for i in range(9,18)}) overpunch_factor.update({'}':-1}) overpunch_factor.update({'!':-1}) overpunch_factor.update({'{':1}) overpunch_factor.update({'<':1}) try: n = "".join(list(map(lambda x: overpunch_number.get(x,np.nan), list(x) ))) if x==x else np.nan f = np.prod(list(map(lambda x: overpunch_factor.get(x,np.nan), list(x) ))) if x==x else np.nan converted = f*int(n) if f and n and n == n and f == f else np.nan return converted except Exception as e: print('ERROR decoding element: {}'.format(x)) print(e) print('Conversion sequence:') try: print('number base conversion: {}'.format(n)) except: pass try: print('factor conversion: {}'.format(f)) except: pass return np.nan class df_decoders(): def __init__(self, dtype): self.dtype = dtype if dtype in properties.numeric_types else 'object' def signed_overpunch(self, data ): decoded_numeric = np.vectorize(signed_overpunch_i,otypes=[float])(data) try: return decoded_numeric.astype(self.dtype, casting = 'safe') except: return decoded_numeric def base36(self, data): # int(str(np.nan),36) ==> 30191 # Had to do the following because the astype() below did not seem to convert # to object element-wise, but the full thing. As a result, str methods # in converters from objects originating here were failing: the column # was dtype = 'object', but the elements inside where 'int'.... # Checked that manually a seemed to be happening that way.... if self.dtype == 'object' : base10 = np.array([str(int(str(i), 36)) if i == i and i else np.nan for i in data ]) else: base10 = np.array([int(str(i), 36) if i == i and i else np.nan for i in data ]) try: return base10.astype(self.dtype, casting = 'safe') except: return base10 decoders = dict() decoders['signed_overpunch'] = dict() for dtype in properties.numeric_types: decoders['signed_overpunch'][dtype] = df_decoders(dtype).signed_overpunch decoders['signed_overpunch']['key'] = df_decoders('key').signed_overpunch decoders['base36'] = dict() for dtype in properties.numeric_types: decoders['base36'][dtype] = df_decoders(dtype).base36 decoders['base36']['key'] = df_decoders('key').base36 ## Now add the file format specific decoders #import pkgutil #import importlib #from mdf_reader import fs_decoders #package=fs_decoders #for importer, modname, ispkg in pkgutil.walk_packages(path=package.__path__,prefix=package.__name__+'.',onerror=lambda x: None): # file_format = modname.split(".")[-1] # try: # file_format_decoders = importlib.import_module(modname, package=None).decoders # for decoder in file_format_decoders.keys(): # decoders[".".join([file_format,decoder])] = file_format_decoders.get(decoder) # except Exception as e: # logging.error("Error loading {0} decoders: {1}".format(modname,e)) #