html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,issue,performed_via_github_app
https://github.com/simonw/datasette/issues/878#issuecomment-970712713,https://api.github.com/repos/simonw/datasette/issues/878,970712713,IC_kwDOBm6k_c452-aJ,9599,2021-11-16T21:54:33Z,2021-11-16T21:54:33Z,OWNER,I'm going to continue working on this in a PR.,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970705738,https://api.github.com/repos/simonw/datasette/issues/878,970705738,IC_kwDOBm6k_c4528tK,9599,2021-11-16T21:44:31Z,2021-11-16T21:44:31Z,OWNER,Wrote a TIL about what I learned using `TopologicalSorter`: https://til.simonwillison.net/python/graphlib-topologicalsorter,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970673085,https://api.github.com/repos/simonw/datasette/issues/878,970673085,IC_kwDOBm6k_c4520u9,9599,2021-11-16T20:58:24Z,2021-11-16T20:58:24Z,OWNER,"New test:
```python
class Complex(AsyncBase):
def __init__(self):
self.log = []
async def d(self):
await asyncio.sleep(random() * 0.1)
print(""LOG: d"")
self.log.append(""d"")
async def c(self):
await asyncio.sleep(random() * 0.1)
print(""LOG: c"")
self.log.append(""c"")
async def b(self, c, d):
print(""LOG: b"")
self.log.append(""b"")
async def a(self, b, c):
print(""LOG: a"")
self.log.append(""a"")
async def go(self, a):
print(""LOG: go"")
self.log.append(""go"")
return self.log
@pytest.mark.asyncio
async def test_complex():
result = await Complex().go()
# 'c' should only be called once
assert tuple(result) in (
# c and d could happen in either order
(""c"", ""d"", ""b"", ""a"", ""go""),
(""d"", ""c"", ""b"", ""a"", ""go""),
)
```
And this code passes it:
```python
import asyncio
from functools import wraps
import inspect
try:
import graphlib
except ImportError:
from . import vendored_graphlib as graphlib
class AsyncMeta(type):
def __new__(cls, name, bases, attrs):
# Decorate any items that are 'async def' methods
_registry = {}
new_attrs = {""_registry"": _registry}
for key, value in attrs.items():
if inspect.iscoroutinefunction(value) and not value.__name__ == ""resolve"":
new_attrs[key] = make_method(value)
_registry[key] = new_attrs[key]
else:
new_attrs[key] = value
# Gather graph for later dependency resolution
graph = {
key: {
p
for p in inspect.signature(method).parameters.keys()
if p != ""self"" and not p.startswith(""_"")
}
for key, method in _registry.items()
}
new_attrs[""_graph""] = graph
return super().__new__(cls, name, bases, new_attrs)
def make_method(method):
parameters = inspect.signature(method).parameters.keys()
@wraps(method)
async def inner(self, _results=None, **kwargs):
print(""\n{}.{}({}) _results={}"".format(self, method.__name__, kwargs, _results))
# Any parameters not provided by kwargs are resolved from registry
to_resolve = [p for p in parameters if p not in kwargs and p != ""self""]
missing = [p for p in to_resolve if p not in self._registry]
assert (
not missing
), ""The following DI parameters could not be found in the registry: {}"".format(
missing
)
results = {}
results.update(kwargs)
if to_resolve:
resolved_parameters = await self.resolve(to_resolve, _results)
results.update(resolved_parameters)
return_value = await method(self, **results)
if _results is not None:
_results[method.__name__] = return_value
return return_value
return inner
class AsyncBase(metaclass=AsyncMeta):
async def resolve(self, names, results=None):
print(""\n resolve: "", names)
if results is None:
results = {}
# Come up with an execution plan, just for these nodes
ts = graphlib.TopologicalSorter()
to_do = set(names)
done = set()
while to_do:
item = to_do.pop()
dependencies = self._graph[item]
ts.add(item, *dependencies)
done.add(item)
# Add any not-done dependencies to the queue
to_do.update({k for k in dependencies if k not in done})
ts.prepare()
plan = []
while ts.is_active():
node_group = ts.get_ready()
plan.append(node_group)
ts.done(*node_group)
print(""plan:"", plan)
results = {}
for node_group in plan:
awaitables = [
self._registry[name](
self,
_results=results,
**{k: v for k, v in results.items() if k in self._graph[name]},
)
for name in node_group
]
print("" results = "", results)
print("" awaitables: "", awaitables)
awaitable_results = await asyncio.gather(*awaitables)
results.update(
{p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)}
)
print("" End of resolve(), returning"", results)
return {key: value for key, value in results.items() if key in names}
```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970660299,https://api.github.com/repos/simonw/datasette/issues/878,970660299,IC_kwDOBm6k_c452xnL,9599,2021-11-16T20:39:43Z,2021-11-16T20:42:27Z,OWNER,"But that does seem to be the plan that `TopographicalSorter` provides:
```python
graph = {""go"": {""a""}, ""a"": {""b"", ""c""}, ""b"": {""c"", ""d""}}
ts = TopologicalSorter(graph)
ts.prepare()
while ts.is_active():
nodes = ts.get_ready()
print(nodes)
ts.done(*nodes)
```
Outputs:
```
('c', 'd')
('b',)
('a',)
('go',)
```
Also:
```python
graph = {""go"": {""d"", ""e"", ""f""}, ""d"": {""b"", ""c""}, ""b"": {""c""}}
ts = TopologicalSorter(graph)
ts.prepare()
while ts.is_active():
nodes = ts.get_ready()
print(nodes)
ts.done(*nodes)
```
Gives:
```
('e', 'f', 'c')
('b',)
('d',)
('go',)
```
I'm confident that `TopologicalSorter` is the way to do this. I think I need to rewrite my code to call it once to get that plan, then `await asyncio.gather(*nodes)` in turn to execute it.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970657874,https://api.github.com/repos/simonw/datasette/issues/878,970657874,IC_kwDOBm6k_c452xBS,9599,2021-11-16T20:36:01Z,2021-11-16T20:36:01Z,OWNER,"My goal here is to calculate the most efficient way to resolve the different nodes, running them in parallel where possible.
So for this class:
```python
class Complex(AsyncBase):
async def d(self):
pass
async def c(self):
pass
async def b(self, c, d):
pass
async def a(self, b, c):
pass
async def go(self, a):
pass
```
A call to `go()` should do this:
- `c` and `d` in parallel
- `b`
- `a`
- `go`","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970655927,https://api.github.com/repos/simonw/datasette/issues/878,970655927,IC_kwDOBm6k_c452wi3,9599,2021-11-16T20:33:11Z,2021-11-16T20:33:11Z,OWNER,"What should be happening here instead is it should resolve the full graph and notice that `c` is depended on by both `b` and `a` - so it should run `c` first, then run the next ones in parallel.
So maybe the algorithm I'm inheriting from https://docs.python.org/3/library/graphlib.html isn't the correct algorithm?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970655304,https://api.github.com/repos/simonw/datasette/issues/878,970655304,IC_kwDOBm6k_c452wZI,9599,2021-11-16T20:32:16Z,2021-11-16T20:32:16Z,OWNER,"This code is really fiddly. I just got to this version:
```python
import asyncio
from functools import wraps
import inspect
try:
import graphlib
except ImportError:
from . import vendored_graphlib as graphlib
class AsyncMeta(type):
def __new__(cls, name, bases, attrs):
# Decorate any items that are 'async def' methods
_registry = {}
new_attrs = {""_registry"": _registry}
for key, value in attrs.items():
if inspect.iscoroutinefunction(value) and not value.__name__ == ""resolve"":
new_attrs[key] = make_method(value)
_registry[key] = new_attrs[key]
else:
new_attrs[key] = value
# Gather graph for later dependency resolution
graph = {
key: {
p
for p in inspect.signature(method).parameters.keys()
if p != ""self"" and not p.startswith(""_"")
}
for key, method in _registry.items()
}
new_attrs[""_graph""] = graph
return super().__new__(cls, name, bases, new_attrs)
def make_method(method):
@wraps(method)
async def inner(self, _results=None, **kwargs):
print(""inner - _results="", _results)
parameters = inspect.signature(method).parameters.keys()
# Any parameters not provided by kwargs are resolved from registry
to_resolve = [p for p in parameters if p not in kwargs and p != ""self""]
missing = [p for p in to_resolve if p not in self._registry]
assert (
not missing
), ""The following DI parameters could not be found in the registry: {}"".format(
missing
)
results = {}
results.update(kwargs)
if to_resolve:
resolved_parameters = await self.resolve(to_resolve, _results)
results.update(resolved_parameters)
return_value = await method(self, **results)
if _results is not None:
_results[method.__name__] = return_value
return return_value
return inner
class AsyncBase(metaclass=AsyncMeta):
async def resolve(self, names, results=None):
print(""\n resolve: "", names)
if results is None:
results = {}
# Resolve them in the correct order
ts = graphlib.TopologicalSorter()
for name in names:
ts.add(name, *self._graph[name])
ts.prepare()
async def resolve_nodes(nodes):
print("" resolve_nodes"", nodes)
print("" (current results = {})"".format(repr(results)))
awaitables = [
self._registry[name](
self,
_results=results,
**{k: v for k, v in results.items() if k in self._graph[name]},
)
for name in nodes
if name not in results
]
print("" awaitables: "", awaitables)
awaitable_results = await asyncio.gather(*awaitables)
results.update(
{p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)}
)
if not ts.is_active():
# Nothing has dependencies - just resolve directly
print("" no dependencies, resolve directly"")
await resolve_nodes(names)
else:
# Resolve in topological order
while ts.is_active():
nodes = ts.get_ready()
print("" ts.get_ready() returned nodes:"", nodes)
await resolve_nodes(nodes)
for node in nodes:
ts.done(node)
print("" End of resolve(), returning"", results)
return {key: value for key, value in results.items() if key in names}
```
With this test:
```python
class Complex(AsyncBase):
def __init__(self):
self.log = []
async def c(self):
print(""LOG: c"")
self.log.append(""c"")
async def b(self, c):
print(""LOG: b"")
self.log.append(""b"")
async def a(self, b, c):
print(""LOG: a"")
self.log.append(""a"")
async def go(self, a):
print(""LOG: go"")
self.log.append(""go"")
return self.log
@pytest.mark.asyncio
async def test_complex():
result = await Complex().go()
# 'c' should only be called once
assert result == [""c"", ""b"", ""a"", ""go""]
```
This test sometimes passes, and sometimes fails!
Output for a pass:
```
tests/test_asyncdi.py inner - _results= None
resolve: ['a']
ts.get_ready() returned nodes: ('c', 'b')
resolve_nodes ('c', 'b')
(current results = {})
awaitables: [, ]
inner - _results= {}
LOG: c
inner - _results= {'c': None}
resolve: ['c']
ts.get_ready() returned nodes: ('c',)
resolve_nodes ('c',)
(current results = {'c': None})
awaitables: []
End of resolve(), returning {'c': None}
LOG: b
ts.get_ready() returned nodes: ('a',)
resolve_nodes ('a',)
(current results = {'c': None, 'b': None})
awaitables: []
inner - _results= {'c': None, 'b': None}
LOG: a
End of resolve(), returning {'c': None, 'b': None, 'a': None}
LOG: go
```
Output for a fail:
```
tests/test_asyncdi.py inner - _results= None
resolve: ['a']
ts.get_ready() returned nodes: ('b', 'c')
resolve_nodes ('b', 'c')
(current results = {})
awaitables: [, ]
inner - _results= {}
resolve: ['c']
ts.get_ready() returned nodes: ('c',)
resolve_nodes ('c',)
(current results = {})
awaitables: []
inner - _results= {}
LOG: c
inner - _results= {'c': None}
LOG: c
End of resolve(), returning {'c': None}
LOG: b
ts.get_ready() returned nodes: ('a',)
resolve_nodes ('a',)
(current results = {'c': None, 'b': None})
awaitables: []
inner - _results= {'c': None, 'b': None}
LOG: a
End of resolve(), returning {'c': None, 'b': None, 'a': None}
LOG: go
F
=================================================================================================== FAILURES ===================================================================================================
_________________________________________________________________________________________________ test_complex _________________________________________________________________________________________________
@pytest.mark.asyncio
async def test_complex():
result = await Complex().go()
# 'c' should only be called once
> assert result == [""c"", ""b"", ""a"", ""go""]
E AssertionError: assert ['c', 'c', 'b', 'a', 'go'] == ['c', 'b', 'a', 'go']
E At index 1 diff: 'c' != 'b'
E Left contains one more item: 'go'
E Use -v to get the full diff
tests/test_asyncdi.py:48: AssertionError
================== short test summary info ================================
FAILED tests/test_asyncdi.py::test_complex - AssertionError: assert ['c', 'c', 'b', 'a', 'go'] == ['c', 'b', 'a', 'go']
```
I figured out why this is happening.
`a` requires `b` and `c`
`b` also requires `c`
The code decides to run `b` and `c` in parallel.
If `c` completes first, then when `b` runs it gets to use the already-calculated result for `c` - so it doesn't need to call `c` again.
If `b` gets to that point before `c` does it also needs to call `c`.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,
https://github.com/simonw/datasette/issues/878#issuecomment-970624197,https://api.github.com/repos/simonw/datasette/issues/878,970624197,IC_kwDOBm6k_c452ozF,9599,2021-11-16T19:49:05Z,2021-11-16T19:49:05Z,OWNER,"Here's the latest version of my weird dependency injection async class:
```python
import inspect
class AsyncMeta(type):
def __new__(cls, name, bases, attrs):
# Decorate any items that are 'async def' methods
_registry = {}
new_attrs = {""_registry"": _registry}
for key, value in attrs.items():
if inspect.iscoroutinefunction(value) and not value.__name__ == ""resolve"":
new_attrs[key] = make_method(value)
_registry[key] = new_attrs[key]
else:
new_attrs[key] = value
# Topological sort of _registry by parameter dependencies
graph = {
key: {
p for p in inspect.signature(method).parameters.keys()
if p != ""self"" and not p.startswith(""_"")
}
for key, method in _registry.items()
}
new_attrs[""_graph""] = graph
return super().__new__(cls, name, bases, new_attrs)
def make_method(method):
@wraps(method)
async def inner(self, **kwargs):
parameters = inspect.signature(method).parameters.keys()
# Any parameters not provided by kwargs are resolved from registry
to_resolve = [p for p in parameters if p not in kwargs and p != ""self""]
missing = [p for p in to_resolve if p not in self._registry]
assert (
not missing
), ""The following DI parameters could not be found in the registry: {}"".format(
missing
)
results = {}
results.update(kwargs)
results.update(await self.resolve(to_resolve))
return await method(self, **results)
return inner
bad = [0]
class AsyncBase(metaclass=AsyncMeta):
async def resolve(self, names):
print("" resolve({})"".format(names))
results = {}
# Resolve them in the correct order
ts = TopologicalSorter()
ts2 = TopologicalSorter()
print("" names = "", names)
print("" self._graph = "", self._graph)
for name in names:
if self._graph[name]:
ts.add(name, *self._graph[name])
ts2.add(name, *self._graph[name])
print("" static_order ="", tuple(ts2.static_order()))
ts.prepare()
while ts.is_active():
print("" is_active, i = "", bad[0])
bad[0] += 1
if bad[0] > 20:
print("" Infinite loop?"")
break
nodes = ts.get_ready()
print("" Do nodes:"", nodes)
awaitables = [self._registry[name](self, **{
k: v for k, v in results.items() if k in self._graph[name]
}) for name in nodes]
print("" awaitables: "", awaitables)
awaitable_results = await asyncio.gather(*awaitables)
results.update({
p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)
})
print(results)
for node in nodes:
ts.done(node)
return results
```
Example usage:
```python
class Foo(AsyncBase):
async def graa(self, boff):
print(""graa"")
return 5
async def boff(self):
print(""boff"")
return 8
async def other(self, boff, graa):
print(""other"")
return 5 + boff + graa
foo = Foo()
await foo.other()
```
Output:
```
resolve(['boff', 'graa'])
names = ['boff', 'graa']
self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}}
static_order = ('boff', 'graa')
is_active, i = 0
Do nodes: ('boff',)
awaitables: []
resolve([])
names = []
self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}}
static_order = ()
boff
{'boff': 8}
is_active, i = 1
Do nodes: ('graa',)
awaitables: []
resolve([])
names = []
self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}}
static_order = ()
graa
{'boff': 8, 'graa': 5}
other
18
```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,