html_url,issue_url,id,node_id,user,user_label,created_at,updated_at,author_association,body,reactions,issue,issue_label,performed_via_github_app https://github.com/simonw/datasette/issues/878#issuecomment-970712713,https://api.github.com/repos/simonw/datasette/issues/878,970712713,IC_kwDOBm6k_c452-aJ,9599,simonw,2021-11-16T21:54:33Z,2021-11-16T21:54:33Z,OWNER,I'm going to continue working on this in a PR.,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970705738,https://api.github.com/repos/simonw/datasette/issues/878,970705738,IC_kwDOBm6k_c4528tK,9599,simonw,2021-11-16T21:44:31Z,2021-11-16T21:44:31Z,OWNER,Wrote a TIL about what I learned using `TopologicalSorter`: https://til.simonwillison.net/python/graphlib-topologicalsorter,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970673085,https://api.github.com/repos/simonw/datasette/issues/878,970673085,IC_kwDOBm6k_c4520u9,9599,simonw,2021-11-16T20:58:24Z,2021-11-16T20:58:24Z,OWNER,"New test: ```python class Complex(AsyncBase): def __init__(self): self.log = [] async def d(self): await asyncio.sleep(random() * 0.1) print(""LOG: d"") self.log.append(""d"") async def c(self): await asyncio.sleep(random() * 0.1) print(""LOG: c"") self.log.append(""c"") async def b(self, c, d): print(""LOG: b"") self.log.append(""b"") async def a(self, b, c): print(""LOG: a"") self.log.append(""a"") async def go(self, a): print(""LOG: go"") self.log.append(""go"") return self.log @pytest.mark.asyncio async def test_complex(): result = await Complex().go() # 'c' should only be called once assert tuple(result) in ( # c and d could happen in either order (""c"", ""d"", ""b"", ""a"", ""go""), (""d"", ""c"", ""b"", ""a"", ""go""), ) ``` And this code passes it: ```python import asyncio from functools import wraps import inspect try: import graphlib except ImportError: from . import vendored_graphlib as graphlib class AsyncMeta(type): def __new__(cls, name, bases, attrs): # Decorate any items that are 'async def' methods _registry = {} new_attrs = {""_registry"": _registry} for key, value in attrs.items(): if inspect.iscoroutinefunction(value) and not value.__name__ == ""resolve"": new_attrs[key] = make_method(value) _registry[key] = new_attrs[key] else: new_attrs[key] = value # Gather graph for later dependency resolution graph = { key: { p for p in inspect.signature(method).parameters.keys() if p != ""self"" and not p.startswith(""_"") } for key, method in _registry.items() } new_attrs[""_graph""] = graph return super().__new__(cls, name, bases, new_attrs) def make_method(method): parameters = inspect.signature(method).parameters.keys() @wraps(method) async def inner(self, _results=None, **kwargs): print(""\n{}.{}({}) _results={}"".format(self, method.__name__, kwargs, _results)) # Any parameters not provided by kwargs are resolved from registry to_resolve = [p for p in parameters if p not in kwargs and p != ""self""] missing = [p for p in to_resolve if p not in self._registry] assert ( not missing ), ""The following DI parameters could not be found in the registry: {}"".format( missing ) results = {} results.update(kwargs) if to_resolve: resolved_parameters = await self.resolve(to_resolve, _results) results.update(resolved_parameters) return_value = await method(self, **results) if _results is not None: _results[method.__name__] = return_value return return_value return inner class AsyncBase(metaclass=AsyncMeta): async def resolve(self, names, results=None): print(""\n resolve: "", names) if results is None: results = {} # Come up with an execution plan, just for these nodes ts = graphlib.TopologicalSorter() to_do = set(names) done = set() while to_do: item = to_do.pop() dependencies = self._graph[item] ts.add(item, *dependencies) done.add(item) # Add any not-done dependencies to the queue to_do.update({k for k in dependencies if k not in done}) ts.prepare() plan = [] while ts.is_active(): node_group = ts.get_ready() plan.append(node_group) ts.done(*node_group) print(""plan:"", plan) results = {} for node_group in plan: awaitables = [ self._registry[name]( self, _results=results, **{k: v for k, v in results.items() if k in self._graph[name]}, ) for name in node_group ] print("" results = "", results) print("" awaitables: "", awaitables) awaitable_results = await asyncio.gather(*awaitables) results.update( {p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)} ) print("" End of resolve(), returning"", results) return {key: value for key, value in results.items() if key in names} ```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970660299,https://api.github.com/repos/simonw/datasette/issues/878,970660299,IC_kwDOBm6k_c452xnL,9599,simonw,2021-11-16T20:39:43Z,2021-11-16T20:42:27Z,OWNER,"But that does seem to be the plan that `TopographicalSorter` provides: ```python graph = {""go"": {""a""}, ""a"": {""b"", ""c""}, ""b"": {""c"", ""d""}} ts = TopologicalSorter(graph) ts.prepare() while ts.is_active(): nodes = ts.get_ready() print(nodes) ts.done(*nodes) ``` Outputs: ``` ('c', 'd') ('b',) ('a',) ('go',) ``` Also: ```python graph = {""go"": {""d"", ""e"", ""f""}, ""d"": {""b"", ""c""}, ""b"": {""c""}} ts = TopologicalSorter(graph) ts.prepare() while ts.is_active(): nodes = ts.get_ready() print(nodes) ts.done(*nodes) ``` Gives: ``` ('e', 'f', 'c') ('b',) ('d',) ('go',) ``` I'm confident that `TopologicalSorter` is the way to do this. I think I need to rewrite my code to call it once to get that plan, then `await asyncio.gather(*nodes)` in turn to execute it.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970657874,https://api.github.com/repos/simonw/datasette/issues/878,970657874,IC_kwDOBm6k_c452xBS,9599,simonw,2021-11-16T20:36:01Z,2021-11-16T20:36:01Z,OWNER,"My goal here is to calculate the most efficient way to resolve the different nodes, running them in parallel where possible. So for this class: ```python class Complex(AsyncBase): async def d(self): pass async def c(self): pass async def b(self, c, d): pass async def a(self, b, c): pass async def go(self, a): pass ``` A call to `go()` should do this: - `c` and `d` in parallel - `b` - `a` - `go`","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970655927,https://api.github.com/repos/simonw/datasette/issues/878,970655927,IC_kwDOBm6k_c452wi3,9599,simonw,2021-11-16T20:33:11Z,2021-11-16T20:33:11Z,OWNER,"What should be happening here instead is it should resolve the full graph and notice that `c` is depended on by both `b` and `a` - so it should run `c` first, then run the next ones in parallel. So maybe the algorithm I'm inheriting from https://docs.python.org/3/library/graphlib.html isn't the correct algorithm?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970655304,https://api.github.com/repos/simonw/datasette/issues/878,970655304,IC_kwDOBm6k_c452wZI,9599,simonw,2021-11-16T20:32:16Z,2021-11-16T20:32:16Z,OWNER,"This code is really fiddly. I just got to this version: ```python import asyncio from functools import wraps import inspect try: import graphlib except ImportError: from . import vendored_graphlib as graphlib class AsyncMeta(type): def __new__(cls, name, bases, attrs): # Decorate any items that are 'async def' methods _registry = {} new_attrs = {""_registry"": _registry} for key, value in attrs.items(): if inspect.iscoroutinefunction(value) and not value.__name__ == ""resolve"": new_attrs[key] = make_method(value) _registry[key] = new_attrs[key] else: new_attrs[key] = value # Gather graph for later dependency resolution graph = { key: { p for p in inspect.signature(method).parameters.keys() if p != ""self"" and not p.startswith(""_"") } for key, method in _registry.items() } new_attrs[""_graph""] = graph return super().__new__(cls, name, bases, new_attrs) def make_method(method): @wraps(method) async def inner(self, _results=None, **kwargs): print(""inner - _results="", _results) parameters = inspect.signature(method).parameters.keys() # Any parameters not provided by kwargs are resolved from registry to_resolve = [p for p in parameters if p not in kwargs and p != ""self""] missing = [p for p in to_resolve if p not in self._registry] assert ( not missing ), ""The following DI parameters could not be found in the registry: {}"".format( missing ) results = {} results.update(kwargs) if to_resolve: resolved_parameters = await self.resolve(to_resolve, _results) results.update(resolved_parameters) return_value = await method(self, **results) if _results is not None: _results[method.__name__] = return_value return return_value return inner class AsyncBase(metaclass=AsyncMeta): async def resolve(self, names, results=None): print(""\n resolve: "", names) if results is None: results = {} # Resolve them in the correct order ts = graphlib.TopologicalSorter() for name in names: ts.add(name, *self._graph[name]) ts.prepare() async def resolve_nodes(nodes): print("" resolve_nodes"", nodes) print("" (current results = {})"".format(repr(results))) awaitables = [ self._registry[name]( self, _results=results, **{k: v for k, v in results.items() if k in self._graph[name]}, ) for name in nodes if name not in results ] print("" awaitables: "", awaitables) awaitable_results = await asyncio.gather(*awaitables) results.update( {p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)} ) if not ts.is_active(): # Nothing has dependencies - just resolve directly print("" no dependencies, resolve directly"") await resolve_nodes(names) else: # Resolve in topological order while ts.is_active(): nodes = ts.get_ready() print("" ts.get_ready() returned nodes:"", nodes) await resolve_nodes(nodes) for node in nodes: ts.done(node) print("" End of resolve(), returning"", results) return {key: value for key, value in results.items() if key in names} ``` With this test: ```python class Complex(AsyncBase): def __init__(self): self.log = [] async def c(self): print(""LOG: c"") self.log.append(""c"") async def b(self, c): print(""LOG: b"") self.log.append(""b"") async def a(self, b, c): print(""LOG: a"") self.log.append(""a"") async def go(self, a): print(""LOG: go"") self.log.append(""go"") return self.log @pytest.mark.asyncio async def test_complex(): result = await Complex().go() # 'c' should only be called once assert result == [""c"", ""b"", ""a"", ""go""] ``` This test sometimes passes, and sometimes fails! Output for a pass: ``` tests/test_asyncdi.py inner - _results= None resolve: ['a'] ts.get_ready() returned nodes: ('c', 'b') resolve_nodes ('c', 'b') (current results = {}) awaitables: [, ] inner - _results= {} LOG: c inner - _results= {'c': None} resolve: ['c'] ts.get_ready() returned nodes: ('c',) resolve_nodes ('c',) (current results = {'c': None}) awaitables: [] End of resolve(), returning {'c': None} LOG: b ts.get_ready() returned nodes: ('a',) resolve_nodes ('a',) (current results = {'c': None, 'b': None}) awaitables: [] inner - _results= {'c': None, 'b': None} LOG: a End of resolve(), returning {'c': None, 'b': None, 'a': None} LOG: go ``` Output for a fail: ``` tests/test_asyncdi.py inner - _results= None resolve: ['a'] ts.get_ready() returned nodes: ('b', 'c') resolve_nodes ('b', 'c') (current results = {}) awaitables: [, ] inner - _results= {} resolve: ['c'] ts.get_ready() returned nodes: ('c',) resolve_nodes ('c',) (current results = {}) awaitables: [] inner - _results= {} LOG: c inner - _results= {'c': None} LOG: c End of resolve(), returning {'c': None} LOG: b ts.get_ready() returned nodes: ('a',) resolve_nodes ('a',) (current results = {'c': None, 'b': None}) awaitables: [] inner - _results= {'c': None, 'b': None} LOG: a End of resolve(), returning {'c': None, 'b': None, 'a': None} LOG: go F =================================================================================================== FAILURES =================================================================================================== _________________________________________________________________________________________________ test_complex _________________________________________________________________________________________________ @pytest.mark.asyncio async def test_complex(): result = await Complex().go() # 'c' should only be called once > assert result == [""c"", ""b"", ""a"", ""go""] E AssertionError: assert ['c', 'c', 'b', 'a', 'go'] == ['c', 'b', 'a', 'go'] E At index 1 diff: 'c' != 'b' E Left contains one more item: 'go' E Use -v to get the full diff tests/test_asyncdi.py:48: AssertionError ================== short test summary info ================================ FAILED tests/test_asyncdi.py::test_complex - AssertionError: assert ['c', 'c', 'b', 'a', 'go'] == ['c', 'b', 'a', 'go'] ``` I figured out why this is happening. `a` requires `b` and `c` `b` also requires `c` The code decides to run `b` and `c` in parallel. If `c` completes first, then when `b` runs it gets to use the already-calculated result for `c` - so it doesn't need to call `c` again. If `b` gets to that point before `c` does it also needs to call `c`.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins", https://github.com/simonw/datasette/issues/878#issuecomment-970624197,https://api.github.com/repos/simonw/datasette/issues/878,970624197,IC_kwDOBm6k_c452ozF,9599,simonw,2021-11-16T19:49:05Z,2021-11-16T19:49:05Z,OWNER,"Here's the latest version of my weird dependency injection async class: ```python import inspect class AsyncMeta(type): def __new__(cls, name, bases, attrs): # Decorate any items that are 'async def' methods _registry = {} new_attrs = {""_registry"": _registry} for key, value in attrs.items(): if inspect.iscoroutinefunction(value) and not value.__name__ == ""resolve"": new_attrs[key] = make_method(value) _registry[key] = new_attrs[key] else: new_attrs[key] = value # Topological sort of _registry by parameter dependencies graph = { key: { p for p in inspect.signature(method).parameters.keys() if p != ""self"" and not p.startswith(""_"") } for key, method in _registry.items() } new_attrs[""_graph""] = graph return super().__new__(cls, name, bases, new_attrs) def make_method(method): @wraps(method) async def inner(self, **kwargs): parameters = inspect.signature(method).parameters.keys() # Any parameters not provided by kwargs are resolved from registry to_resolve = [p for p in parameters if p not in kwargs and p != ""self""] missing = [p for p in to_resolve if p not in self._registry] assert ( not missing ), ""The following DI parameters could not be found in the registry: {}"".format( missing ) results = {} results.update(kwargs) results.update(await self.resolve(to_resolve)) return await method(self, **results) return inner bad = [0] class AsyncBase(metaclass=AsyncMeta): async def resolve(self, names): print("" resolve({})"".format(names)) results = {} # Resolve them in the correct order ts = TopologicalSorter() ts2 = TopologicalSorter() print("" names = "", names) print("" self._graph = "", self._graph) for name in names: if self._graph[name]: ts.add(name, *self._graph[name]) ts2.add(name, *self._graph[name]) print("" static_order ="", tuple(ts2.static_order())) ts.prepare() while ts.is_active(): print("" is_active, i = "", bad[0]) bad[0] += 1 if bad[0] > 20: print("" Infinite loop?"") break nodes = ts.get_ready() print("" Do nodes:"", nodes) awaitables = [self._registry[name](self, **{ k: v for k, v in results.items() if k in self._graph[name] }) for name in nodes] print("" awaitables: "", awaitables) awaitable_results = await asyncio.gather(*awaitables) results.update({ p[0].__name__: p[1] for p in zip(awaitables, awaitable_results) }) print(results) for node in nodes: ts.done(node) return results ``` Example usage: ```python class Foo(AsyncBase): async def graa(self, boff): print(""graa"") return 5 async def boff(self): print(""boff"") return 8 async def other(self, boff, graa): print(""other"") return 5 + boff + graa foo = Foo() await foo.other() ``` Output: ``` resolve(['boff', 'graa']) names = ['boff', 'graa'] self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}} static_order = ('boff', 'graa') is_active, i = 0 Do nodes: ('boff',) awaitables: [] resolve([]) names = [] self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}} static_order = () boff {'boff': 8} is_active, i = 1 Do nodes: ('graa',) awaitables: [] resolve([]) names = [] self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}} static_order = () graa {'boff': 8, 'graa': 5} other 18 ```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",648435885,"New pattern for views that return either JSON or HTML, available for plugins",