code_tables.py 6 KB
Newer Older
Irene Perez Gonzalez's avatar
Irene Perez Gonzalez committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 13 15:14:51 2018
"""
from __future__ import print_function
from __future__ import absolute_import
# Import required libraries

import sys
import json
import datetime
import copy
import numpy as np
import pandas as pd
import os
import glob
import shutil
from copy import deepcopy
from pandas.io.json.normalize import nested_to_record
import ast

if sys.version_info[0] >= 3:
    py3 = True
else:
    py3 = False


#https://stackoverflow.com/questions/10756427/loop-through-all-nested-dictionary-values
#def print_nested(d):
#    if isinstance(d, dict):
#        for k, v in d.items():
#            print_nested(v)
#    elif hasattr(d, '__iter__') and not isinstance(d, str):
#        for item in d:
#            print_nested(item)
#    elif isinstance(d, str):
#        print(d)
#
#    else:
#        print(d)
toolPath = os.path.dirname(os.path.abspath(__file__))
table_lib = os.path.join(toolPath,'lib')
templates_path = os.path.join(table_lib,'templates','code_tables')

def templates():
    tables = glob.glob(os.path.join(templates_path,'*.json'))
    return [ os.path.basename(x).split(".")[0] for x in tables ]

def copy_template(table, out_dir = None,out_path = None):
    tables = templates()
    if table in tables:
        table_path = os.path.join(templates_path,table + '.json')
        table_out = out_path if out_path else os.path.join(out_dir,table + '.json')
        shutil.copyfile(table_path,  table_out)
        if os.path.isfile( table_out):
            print('Schema template {0} copied to {1}'.format(table, table_out))
            return
        else:
            print('copy_template ERROR:')
            print('\tError copying table template {0} copied to {1}'.format(table, table_out))
            return
    else:
        print('copy_template ERROR:')
        print('\tRequested template {} must be a valid name.'.format(table))
        print('\tValid names are: {}'.format(", ".join(tables)))
        return

def expand_integer_range_key(d):
    # Looping based on print_nested above
    if isinstance(d, dict):
        for k,v in list(d.items()):
            if 'range_key' in k[0:9]:
                range_params = k[10:-1].split(",")
                try:
                    lower = int(range_params[0])
                except Exception as e:
                    print("Lower bound parsing error in range key: ",k)
                    print("Error is:")
                    print(e)
                    return
                try:
                    upper = int(range_params[1])
                except Exception as e:
                    if range_params[1] == 'yyyy':
                        upper = datetime.date.today().year
                    else:
                        print("Upper bound parsing error in range key: ",k)
                        print("Error is:")
                        print(e)
                        return
                if len(range_params) > 2:
                    try:
                        step = int(range_params[2])
                    except Exception as e:
                        print("Range step parsing error in range key: ",k)
                        print("Error is:")
                        print(e)
                        return
                else:
                    step = 1
                for i_range in range(lower,upper + 1,step):
                    deep_copy_value = deepcopy(d[k]) # Otherwiserepetitions are linked and act as one!
                    d.update({str(i_range):deep_copy_value})
                d.pop(k, None)
            else:
                for k, v in d.items():
                    expand_integer_range_key(v)


def eval_dict_items(item):
    try:
        return ast.literal_eval(item)
    except:
        return item

def read_table(table_path):
    with open(table_path) as fileObj:
        table = json.load(fileObj)
    keys_path = ".".join([".".join(table_path.split('.')[:-1]),'keys'])
    if os.path.isfile(keys_path):
        with open(keys_path) as fileObj:
            table_keys = json.load(fileObj)
            table['_keys'] = {}
            for x,y in table_keys.items():
                key = eval_dict_items(x)
                values = [ eval_dict_items(k) for k in y ]
                table['_keys'][key] = values
    expand_integer_range_key(table)
    return table

def table_keys(table):
    separator = '∿' # something hopefully not in keys...
    if table.get('_keys'):
        _table = deepcopy(table)
        _table.pop('_keys')
        keys = list(nested_to_record(_table,sep = separator).keys())

        return [ x.split(separator) for x in keys ]
    else:
        return list(table.keys())


def get_nested(table,*args):
    # HERE HAVE TO ADD WHICH ITEM TO GET FROM THE KEY: WE HAVE TO ADD VALUE, LOWER, ETC...TO THE CODE TABLES!!!
    # CAN BE AND OPTIONAL PARAMETER, LIKE: similarly, would have to add tbis to table_value_from_keys
#    def get_nested(table,param = None,*args):
#       nested_get_str = 'table'
#       z = np.array([*args])
#       for i,x in enumerate(z):
#           nested_get_str += '.get(z[' + str(i) + '])'
#       if param:
#           nested_get_str += '.get(' + param + ')'
#       try:
#           return eval(nested_get_str)
#       except:
#           return None
    nested_get_str = 'table'
    z = np.array([*args])
    for i,x in enumerate(z):
        nested_get_str += '.get(z[' + str(i) + '])'
    try:
        return eval(nested_get_str)
    except:
        return None

def table_value_from_keys(table,df):
    # df is pd.DataFrame or Series
    v_nested_get = np.vectorize(get_nested) # Because cannot directly vectorize a nested get, we build it in a function, and then vectorize it
    calling_str = 'v_nested_get(table'
    if isinstance(df, pd.DataFrame):
        #return v_nested_get(table,[ df[x]  for x in df]) # This won't work
        for i,x in enumerate(df.columns):
            calling_str += ',df[' + str(x) + '].astype(str)' # have to do likewise in not DataFrame!!!
        calling_str += ')'
        return eval(calling_str)
    else:
        return v_nested_get(table,df)