Skip to content

Commit 927c599

Browse files
committed
ENH(sideffects): DIFFER from regular DATA...
to allow the same name to be used as regular and sideffect data node. + ENH: sideffect-strclass now contains its class-name. + DOC: a lot, comply with docstrings 1-liners; changes also on optionals. + ENH(TCs): +x2 elaborate TCs, check SEs differ from data.
1 parent 9e1c9fe commit 927c599

File tree

2 files changed

+129
-51
lines changed

2 files changed

+129
-51
lines changed

graphkit/modifiers.py

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111

1212
class optional(str):
1313
"""
14-
Input values in ``needs`` may be designated as optional using this modifier.
15-
If this modifier is applied to an input value, that value will be input to
16-
the ``operation`` if it is available. The function underlying the
17-
``operation`` should have a parameter with the same name as the input value
18-
in ``needs``, and the input value will be passed as a keyword argument if
19-
it is available.
14+
An optional need signifies that the function's argument may not receive a value.
15+
16+
Only input values in ``needs`` may be designated as optional using this modifier.
17+
An ``operation`` will receive a value for an optional need only if if it is available
18+
in the graph at the time of its invocation.
19+
The ``operation``'s function should have a defaulted parameter with the same name
20+
as the opetional, and the input value will be passed as a keyword argument,
21+
if it is available.
2022
2123
Here is an example of an operation that uses an optional argument::
2224
@@ -34,7 +36,7 @@ class optional(str):
3436
NetworkOperation(name='mygraph',
3537
needs=[optional('a'), optional('b'), optional('c')],
3638
provides=['sum'])
37-
39+
3840
>>> # The graph works with and without 'c' provided as input.
3941
>>> graph({'a': 5, 'b': 2, 'c': 4})['sum']
4042
11
@@ -51,22 +53,25 @@ def __repr__(self):
5153

5254
class sideffect(str):
5355
"""
54-
Inputs & outputs in ``needs`` & ``provides`` may be designated as *sideffects*
55-
using this modifier. *Tokens* work as usual while solving the DAG but
56-
they are never assigned any values to/from the ``operation`` functions.
57-
Specifically:
56+
A sideffect data-dependency participates in the graph but never given/asked in functions.
57+
58+
Both inputs & outputs in ``needs`` & ``provides`` may be designated as *sideffects*
59+
using this modifier. *Sideffects* work as usual while solving the graph but
60+
they do not interact with the ``operation``'s function; specifically:
5861
5962
- input sideffects are NOT fed into the function;
6063
- output sideffects are NOT expected from the function.
6164
62-
Their purpose is to describe functions that have modify internal state
63-
their arguments ("side-effects").
64-
Note that an ``operation`` with just a single *sideffect* output return
65-
no value at all, but it would still be called for its side-effects only.
65+
.. info:
66+
an ``operation`` with just a single *sideffect* output return no value at all,
67+
but it would still be called for its side-effect only.
6668
69+
Their purpose is to describe operations that modify the internal state of
70+
some of their arguments ("side-effects").
6771
A typical use case is to signify columns required to produce new ones in
6872
pandas dataframes::
6973
74+
7075
>>> from graphkit import operation, compose, sideffect
7176
7277
>>> # Function appending a new dataframe column from two pre-existing ones.
@@ -81,16 +86,42 @@ class sideffect(str):
8186
... provides=[sideffect('sum')])(addcolumns)
8287
... )
8388
>>> graph
84-
NetworkOperation(name='mygraph', needs=[optional('df'), optional('a'), optional('b')], provides=[sideffect('sum')])
89+
NetworkOperation(name='mygraph', needs=[optional('df'), optional('sideffect(a)'), optional('sideffect(b)')], provides=['sideffect(sum)'])
8590
8691
>>> # The graph works with and without 'c' provided as input.
8792
>>> df = pd.DataFrame({'a': [5], 'b': [2]}) # doctest: +SKIP
8893
>>> graph({'df': df})['sum'] == 11 # doctest: +SKIP
8994
True
9095
96+
Note that regular data in *needs* and *provides* do not match same-named *sideffects*.
97+
That is, in the following operation, the ``prices`` input is different from
98+
the ``sideffect(prices)`` output:
99+
100+
>>> def upd_prices(sales_df, prices):
101+
... sales_df["Prices"] = prices
102+
103+
>>> operation(fn=upd_prices,
104+
... name="upd_prices",
105+
... needs=["sales_df", "price"],
106+
... provides=[sideffect("price")])
107+
operation(name='upd_prices', needs=['sales_df', 'price'], provides=['sideffect(price)'], fn=upd_prices)
108+
109+
.. note::
110+
An ``operation`` with *sideffects* outputs only, have functions that return
111+
no value at all (like the one above). Such operation would still be called for
112+
their side-effects.
113+
114+
.. tip::
115+
You may associate sideffects with other data to convey their relationships,
116+
simply by including their names in the string - in the end, it's just a string -
117+
but no enforcement will happen from *graphkit*.
118+
119+
>>> sideffect("price[sales_df]")
120+
'sideffect(price[sales_df])'
121+
91122
"""
92123

93124
__slots__ = () # avoid __dict__ on instances
94125

95-
def __repr__(self):
96-
return "sideffect('%s')" % self
126+
def __new__(cls, name):
127+
return super(sideffect, cls).__new__(cls, "sideffect(%s)" % name)

test/test_graphkit.py

Lines changed: 80 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -552,46 +552,93 @@ def addplusplus(a, b, c=0):
552552
assert results["sum"] == sum(named_inputs.values())
553553

554554

555-
def test_sideffects():
556-
# Function without return value.
557-
def extend(box):
558-
box.extend([1, 2])
555+
# Function without return value.
556+
def _box_extend(box, *args):
557+
box.extend([1, 2])
559558

560-
def increment(box):
561-
for i in range(len(box)):
562-
box[i] += 1
563559

564-
# Designate `a`, `b` as sideffect inp/out arguments.
565-
graph = compose("mygraph")(
566-
operation(
567-
name="extend",
568-
needs=["box", sideffect("a")],
569-
provides=[sideffect("b")],
570-
)(extend),
571-
operation(
572-
name="increment",
573-
needs=["box", sideffect("b")],
574-
provides=sideffect("c"),
575-
)(increment),
576-
)
560+
def _box_increment(box):
561+
for i in range(len(box)):
562+
box[i] += 1
563+
577564

578-
assert graph({"box": [0], "a": True})["box"] == [1, 2, 3]
565+
@pytest.mark.parametrize("bools", range(4))
566+
def test_sideffect_no_real_data(bools):
567+
reverse = bools >> 0 & 1
568+
parallel = bools >> 1 & 1
579569

580-
# Reverse order of functions.
581-
graph = compose("mygraph")(
570+
ops = [
582571
operation(
583-
name="increment",
584-
needs=["box", sideffect("a")],
585-
provides=sideffect("b"),
586-
)(increment),
572+
name="extend", needs=["box", sideffect("a")], provides=[sideffect("b")]
573+
)(_box_extend),
587574
operation(
588-
name="extend",
589-
needs=["box", sideffect("b")],
590-
provides=[sideffect("c")],
591-
)(extend),
592-
)
575+
name="increment", needs=["box", sideffect("b")], provides=sideffect("c")
576+
)(_box_increment),
577+
]
578+
if reverse:
579+
ops = reversed(ops)
580+
# Designate `a`, `b` as sideffect inp/out arguments.
581+
graph = compose("mygraph")(*ops)
582+
if parallel:
583+
graph.set_execution_method("parallel")
584+
585+
# Normal data must not match sideffects
586+
with pytest.raises(ValueError, match="Unknown output node"):
587+
graph({"box": [0], "a": True}, outputs=["a"])
588+
with pytest.raises(ValueError, match="Unknown output node"):
589+
graph({"box": [0], "a": True}, outputs=["b"])
590+
591+
sol = graph({"box": [0], "a": True})
592+
# Nothing run if no sideffect inputs given.
593+
assert not graph.net.last_plan.executed
594+
assert sol == {"box": [0], "a": True}
595+
596+
# Nothing run if no sideffect inputs given.
597+
sol = graph({"box": [0], "a": True}, outputs=["box", sideffect("b")])
598+
assert not graph.net.last_plan.executed
599+
assert sol == {"box": [0]}
600+
601+
## OK INPUT SIDEFFECTS
602+
#
603+
# ok, no asked out
604+
sol = graph({"box": [0], sideffect("a"): True})
605+
assert sol == {"box": [1, 2, 3], sideffect("a"): True}
606+
#
607+
# bad, not asked the out-sideffect
608+
sol = graph({"box": [0], sideffect("a"): True}, "box")
609+
assert sol == {"box": [0]}
610+
#
611+
# ok, asked the 1st out-sideffect
612+
sol = graph({"box": [0], sideffect("a"): True}, ["box", sideffect("b")])
613+
assert sol == {"box": [0, 1, 2]}
614+
#
615+
# ok, asked the 2nd out-sideffect
616+
sol = graph({"box": [0], sideffect("a"): True}, ["box", sideffect("c")])
617+
assert sol == {"box": [1, 2, 3]}
618+
619+
620+
@pytest.mark.parametrize("bools", range(4))
621+
def test_sideffect_real_input(bools):
622+
reverse = bools >> 0 & 1
623+
parallel = bools >> 1 & 1
624+
625+
ops = [
626+
operation(name="extend", needs=["box", "a"], provides=[sideffect("b")])(
627+
_box_extend
628+
),
629+
operation(name="increment", needs=["box", sideffect("b")], provides="c")(
630+
_box_increment
631+
),
632+
]
633+
if reverse:
634+
ops = reversed(ops)
635+
# Designate `a`, `b` as sideffect inp/out arguments.
636+
graph = compose("mygraph")(*ops)
637+
if parallel:
638+
graph.set_execution_method("parallel")
593639

594-
assert graph({"box": [0], "a": None})["box"] == [1, 1, 2]
640+
assert graph({"box": [0], "a": True}) == {"a": True, "box": [1, 2, 3], "c": None}
641+
assert graph({"box": [0], "a": True}, ["box", "c"]) == {"box": [1, 2, 3], "c": None}
595642

596643

597644
@pytest.mark.xfail(

0 commit comments

Comments
 (0)