Goal: apply fn to every element of an arbitrarily nested iterable (tuple, list, dict, np.ndarray), including to iterables. Ex:
def fn1(x, key): # ignore `key` for now
return str(x) if not isinstance(x, Iterable) else x
def fn2(x, key):
return x ** 2 if isinstance(x, (int, float, np.generic)) else x
arr = np.random.randint(0, 9, size=(2, 2))
obj = (1, {'a': 3, 'b': 4, 'c': ('5', 6., (7, 8)), 'd': 9}, arr)
deepmap(obj, fn1) == ('1', {'a': '3', 'b': '4', 'c': ('5', '6.0', ('7', '8')), 'd': '9'},
array([[6, 1], [5, 4]]))
deepmap(obj, fn2) == (1, {'a': 9, 'b': 16, 'c': ('5', 36.0, (49, 64)), 'd': 81},
array([[36, 1], [25, 16]]))
deeplen should also be a special case of deepmap, with proper fn (just a demo, don't care for optimizing this):
def deeplen(obj):
count = [0]
def fn(x, key):
if not isinstance(x, Iterable) or isinstance(x, str):
count[0] += 1
return x
deepmap(obj, fn)
return count[0]
deeplen(obj) == 12
My working implementation, along tests, below. Unsure this works without OrderedDict (Python >=3.6 default); it does if key order doesn't change even as values are changed (but no keys inserted/popped). A resolution is appending actual Mapping keys to key instead of their index, but this complicates implementing. (As for why pass key to fn: we can implement e.g. deepequal, comparing obj against another obj; this requires key info. Also deepcopy.)
Any improvements? I haven't tested exhaustively - maybe there are objects for which this fails, hence subject to extendability. Performance/readability could be better also.
Visualization: deepmap traverses a nest left-to-right, inward-first: (code)
- Purple:
fnapplied to iterable / element - Blue:
tuple()applied to list - Green:
list()applied to tuple - Grey: iterable exit; dropping
key[-1]and decrementingdepth- no operation - Green + Purple:
fn(item)is only assigned tolist-cast tuple; no 'intermediate' state - Empty: see placement of
printin linked code; this is to show effect of list+fn
Implementation: live demo
from collections.abc import Mapping
def deepmap(obj, fn):
def deepget(obj, key=None, drop_keys=0):
if not key or not obj:
return obj
if drop_keys != 0:
key = key[:-drop_keys]
for k in key:
if isinstance(obj, Mapping):
k = list(obj)[k] # get key by index (OrderedDict, Python >=3.6)
obj = obj[k]
return obj
def dkey(x, k):
return list(x)[k] if isinstance(x, Mapping) else k
def nonempty_iter(item):
# do not enter empty iterable, since nothing to 'iterate' or apply fn to
try:
list(iter(item))
except:
return False
return not isinstance(item, str) and len(item) > 0
def _process_key(obj, key, depth, revert_tuple_keys, recursive=False):
container = deepget(obj, key, 1)
item = deepget(obj, key, 0)
if nonempty_iter(item) and not recursive:
depth += 1
if len(key) == depth:
if key[-1] == len(container) - 1: # iterable end reached
depth -= 1 # exit iterable
key = key[:-1] # drop iterable key
if key in revert_tuple_keys:
supercontainer = deepget(obj, key, 1)
k = dkey(supercontainer, key[-1])
supercontainer[k] = tuple(deepget(obj, key))
revert_tuple_keys.pop(revert_tuple_keys.index(key))
if depth == 0 or len(key) == 0:
key = None # exit flag
else:
# recursively exit iterables, decrementing depth
# and dropping last key with each recursion
key, depth = _process_key(obj, key, depth, revert_tuple_keys,
recursive=True)
else: # iterate next element
key[-1] += 1
elif depth > len(key):
key.append(0) # iterable entry
return key, depth
key = [0]
depth = 1
revert_tuple_keys = []
if not nonempty_iter(obj): # nothing to do here
raise ValueError(f"input must be a non-empty iterable - got: {obj}")
elif isinstance(obj, tuple):
obj = list(obj)
revert_tuple_keys.append(None) # revert to tuple at function exit
while key is not None:
container = deepget(obj, key, 1)
item = deepget(obj, key, 0)
if isinstance(container, tuple):
ls = list(container) # cast to list to enable mutating
ls[key[-1]] = fn(item, key)
supercontainer = deepget(obj, key, 2)
k = dkey(supercontainer, key[-2])
supercontainer[k] = ls
revert_tuple_keys.append(key[:-1]) # revert to tuple at iterable exit
else:
k = dkey(container, key[-1])
container[k] = fn(item, key)
key, depth = _process_key(obj, key, depth, revert_tuple_keys)
if None in revert_tuple_keys:
obj = tuple(obj)
return obj
Testing:
import numpy as np
from collections.abc import Iterable
from copy import deepcopy
from time import time
from deepmap import deepmap
def fn1(x, key):
return str(x) if not isinstance(x, Iterable) else x
def fn2(x, key):
return x ** 2 if isinstance(x, (int, float, np.generic)) else x
def fn3(x, key):
return str(x)
def make_bigobj():
arrays = [np.random.randn(100, 100), np.random.uniform(30, 40, 10)]
lists = [[1, 2, '3', '4', 5, [6, 7]] * 555, {'a': 1, 'b': arrays[0]}]
dicts = {'x': [1, {2: [3, 4]}, [5, '6', {'7': 8}] * 99] * 55,
'b': [{'a': 5, 'b': 3}] * 333, ('k', 'g'): (5, 9, [1, 2])}
tuples = (1, (2, {3: np.array([4., 5.])}, (6, 7, 8, 9) * 21) * 99,
(10, (11,) * 5) * 666)
return {'arrays': arrays, 'lists': lists,
'dicts': dicts, 'tuples': tuples}
def deeplen(obj):
count = [0]
def fn(x, key):
if not isinstance(x, Iterable) or isinstance(x, str):
count[0] += 1
return x
deepmap(obj, fn)
return count[0]
#### CORRECTNESS ##############################################################
np.random.seed(4)
arr = np.random.randint(0, 9, (2, 2))
obj = (1, {'a': 3, 'b': 4, 'c': ('5', 6., (7, 8)), 'd': 9}, {}, arr)
out1 = deepmap(deepcopy(obj), fn1)
assert str(out1) == ("('1', {'a': '3', 'b': '4', 'c': ('5', '6.0', ('7', '8'))"
", 'd': '9'}, {}, array([[7, 5],\n [1, 8]]))")
out2 = deepmap(deepcopy(obj), fn2)
assert str(out2) == ("(1, {'a': 9, 'b': 16, 'c': ('5', 36.0, (49, 64)), "
"'d': 81}, {}, array([[49, 25],\n [ 1, 64]]))")
out3 = deepmap(deepcopy(obj), fn3)
assert str(out3) == (r"""('1', "{'a': 3, 'b': 4, 'c': ('5', 6.0, (7, 8)), """
r"""'d': 9}", '{}', '[[7 5]\n [1 8]]')""")
try:
deepmap([], fn1)
except ValueError:
pass
except:
print("Failed to catch invalid input")
#### PERFORMANCE ##############################################################
bigobj = make_bigobj()
_bigobj = deepcopy(bigobj)
t0 = time()
assert deeplen(bigobj) == 53676
print("deeplen: {:.3f} sec".format(time() - t0))
assert str(bigobj) == str(_bigobj) # deeplen should not mutate `bigobj`
bigobj = deepcopy(_bigobj)
t0 = time()
deepmap(bigobj, fn1)
print("deepmap-fn1: {:.3f} sec".format(time() - t0))
# deepmap-fn2 takes too long
deeplen: 0.856 sec
deepmap-fn1: 0.851 sec
Updates:
deepmapfailed with empty iterables, e.g.[],{}- fixed, tests updated. Also added invalid input handling.Improved
nonempty_iterto usetry-iter, and applied it as check onobjat input. Unsure what test to add as my use-case is TensorFlow - a more general suggestion is welcome.Improved
nonempty_iteragain.try-iteris insufficient to determine whetherobjis Python-iterable; some objects (e.g. TensorFlowTensor) support creating generators but not consuming them without dedicated methods. It does become a question of what exactly we're mapping and with what methods; in that case, object-specific treatment is required.- This can be considered a limitation rather than improvement, as it doesn't allow non-Pythonic iteration (non-indices/hash keys) - but an exception will be thrown unless improvement #2 below is implemented, and it's an unlikely edge case anyway.
Possible improvements:
fncould also apply to Mappable.keys()instead of.values()only- Only Pythonic iterables are iterated with current implementation (see Update 3). More general access and assignment specifiers can be implemented - but code beyond
deepgetwould require modifying (e.g.supercontainer[k]).

deepmap((1, 2, [3, 4]), fn3)returns('1', 2', '[3, 4]'). I expected('1', '2', ['3', '4']).deepmapshould apply the function (fn3) recursively to all the nested elements (e.g., the 3 and 4) not just to the top level elements (e.g. [3, 4]). Right? \$\endgroup\$fnto every element of an arbitrarily nested iterable (tuple, list, dict, np.ndarray), including to iterables" - so[3, 4]is cast to string before it can be iterated;fn1yields the behavior you describe. This is intended, as it gives more flexibility to what we can do withfn. \$\endgroup\$deepmapand the function. How doesdeepmapdetermine whether to recursively apply a function to an iterable, a lafn1, versus to apply the function to the whole iterable, a lafn3. Does the recursion occur when the function returns the argument unchanged? In this call,deepmap((1, 2, [3, 4]), fn3), the first argument is an iterable, sofn3should be applied to the iterable as a whole and return"(1, 2, [3, 4])". \$\endgroup\$fn, on logic that it is the "object" whose internals we're mapping, and that we can justfn(obj)if needed, nodeepmapnecessary. \$\endgroup\$