github: issue_comments: 8,883 rows where user = 9599 sorted by updated

8,883 rows where user = 9599 sorted by updated_at descending

Search:

descending

id	html_url	issue_url	node_id	user	created_at	updated_at ▲	author_association	body	reactions	issue
973635157	https://github.com/simonw/datasette/issues/878#issuecomment-973635157	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c46CH5V	simonw 9599	2021-11-19T01:07:08Z	2021-11-19T01:07:08Z	OWNER	This exercise is proving so useful in getting my head around how the enormous and complex `TableView` class works again. Here's where I've got to now - I'm systematically working through the variables that are returned for HTML and for JSON copying across code to get it to work: ```python from datasette.database import QueryInterrupted from datasette.utils import escape_sqlite from datasette.utils.asgi import Response, NotFound, Forbidden from datasette.views.base import DatasetteError from datasette import hookimpl from asyncinject import AsyncInject, inject from pprint import pformat class Table(AsyncInject): @inject async def database(self, request, datasette): # TODO: all that nasty hash resolving stuff can go here db_name = request.url_vars["db_name"] try: db = datasette.databases[db_name] except KeyError: raise NotFound(f"Database '{db_name}' does not exist") return db @inject async def table_and_format(self, request, database, datasette): table_and_format = request.url_vars["table_and_format"] # TODO: be a lot smarter here if "." in table_and_format: return table_and_format.split(".", 2) else: return table_and_format, "html" @inject async def main(self, request, database, table_and_format, datasette): # TODO: if this is actually a canned query, dispatch to it table, format = table_and_format is_view = bool(await database.get_view_definition(table)) table_exists = bool(await database.table_exists(table)) if not is_view and not table_exists: raise NotFound(f"Table not found: {table}") await check_permissions( datasette, request, [ ("view-table", (database.name, table)), ("view-database", database.name), "view-instance", ], ) private = not await datasette.permission_allowed( None, "view-table", (database.name, table), default=True ) pks = await database.primary_keys(table) table_columns = await database.table_columns(table) specified_columns = await columns_to_select(datasette, database, table, request) select_specified_columns = ", ".join( escape_sqlite(t) for t in specified_columns ) select_all_columns = ", ".join(escape_sqlite(t) for t in table_columns) use_rowid = not pks and not is_view if use_rowid: select_specified_columns = f"rowid, {select_specified_columns}" select_all_columns = f"rowid, {select_all_columns}" order_by = "rowid" order_by_pks = "rowid" else: order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" nocount = request.args.get("_nocount") nofacet = request.args.get("_nofacet") if request.args.get("_shape") in ("array", "object"): nocount = True nofacet = True # Next, a TON of SQL to build where_params and filters and suchlike # skipping that and jumping straight to... where_clauses = [] where_clause = "" if where_clauses: where_clause = f"where {' and '.join(where_clauses)} " from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", ) from_sql_params ={} params = {} count_sql = f"select count(*) {from_sql}" sql_no_order_no_limit = ( "select {select_all_columns} from {table_name} {where}".format( select_all_columns=select_all_columns, table_name=escape_sqlite(table), where=where_clause, ) ) page_size = 100 offset = " offset 0" sql = "select {select_specified_columns} from {table_name} {where}{order_by} limit {page_size}{offset}".format( select_specified_columns=select_specified_columns, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, page_size=page_size + 1, offset=offset, ) # Fetch rows results = await database.execute(sql, params, truncate=True) columns = [r[0] for r in results.description] rows = list(results.rows) # Fetch count filtered_table_rows_count = None if count_sql: try: count_rows = list(await database.execute(count_sql, from_sql_params)) filtered_table_rows_count = count_rows[0][0] except QueryInterrupted: pass vars = { "json": { # THIS STUFF is from the regular JSON "database": database.name, "table": table, "is_view": is_view, # "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": results.truncated, "filtered_table_rows_count": filtered_table_rows_count, # "expanded_columns": expanded_columns, # "expandable_columns": expandable_columns, "columns": columns, "primary_keys": pks, # "units": units, "query": {"sql": sql, "params": params}, # "facet_results": facet_results, # "suggested_facets": suggested_facets, # "next": next_value and str(next_value) or None, # "next_url": next_url, "private": private, "allow_execute_sql": await datasette.permission_allowed( request.actor, "execute-sql", database, default=True ), }, "html": { # ... this is the HTML special stuff # "table_actions": table_actions, # "supports_search": bool(fts_table), # "search": search or "", "use_rowid": use_rowid, # "filters": filters, # "display_columns": display_columns, # "filter_columns": filter_columns, # "display_rows": display_rows, # "facets_timed_out": facets_timed_out, # "sorted_facet_results": sorted( # facet_results.values(), # key=lambda f: (len(f["results"]), f["name"]), # reverse=True, # ), # "show_facet_counts": special_args.get("_facet_size") == "max", # "extra_wheres_for_ui": extra_wheres_for_ui, # "form_hidden_args": form_hidden_args, # "is_sortable": any(c["sortable"] for c in display_columns), # "path_with_replaced_args": path_with_replaced_args, # "path_with_removed_args": path_with_removed_args, # "append_querystring": append_querystring, "request": request, # "sort": sort, # "sort_desc": sort_desc, "disable_sort": is_view, # "custom_table_templates": [ # f"_table-{to_css_class(database)}-{to_css_class(table)}.html", # f"_table-table-{to_css_class(database)}-{to_css_class(table)}.html", # "_table.html", # ], # "metadata": metadata, # "view_definition": await db.get_view_definition(table), # "table_definition": await db.get_table_definition(table), }, } # I'm just trying to get HTML to work for the moment if format == "json": return Response.json(dict(vars, locals=locals()), default=repr) else: return Response.html(repr(vars["html"])) async def view(self, request, datasette): return await self.main(request=request, datasette=datasette) @hookimpl def register_routes(): return [ (r"/t/(?P<db_name>[^/]+)/(?P<table_and_format>[^/]+?$)", Table().view), ] async def check_permissions(datasette, request, permissions): """permissions is a list of (action, resource) tuples or 'action' strings""" for permission in permissions: if isinstance(permission, str): action = permission resource = None elif isinstance(permission, (tuple, list)) and len(permission) == 2: action, resource = permission else: assert ( False ), "permission should be string or tuple of two items: {}".format( repr(permission) ) ok = await datasette.permission_allowed( request.actor, action, resource=resource, default=None, ) if ok is not None: if ok: return else: raise Forbidden(action) async def columns_to_select(datasette, database, table, request): table_columns = await database.table_columns(table) pks = await database.primary_keys(table) columns = list(table_columns) if "_col" in request.args: columns = list(pks) _cols = request.args.getlist("_col") bad_columns = [column for column in _cols if column not in table_columns] if bad_columns: raise DatasetteError( "_col={} - invalid columns".format(", ".join(bad_columns)), status=400, ) # De-duplicate maintaining order: columns.extend(dict.fromkeys(_cols)) if "_nocol" in request.args: # Return all columns EXCEPT these bad_columns = [ column for column in request.args.getlist("_nocol") if (column not in table_columns) or (column in pks) ] if bad_columns: raise DatasetteError( "_nocol={} - invalid columns".format(", ".join(bad_columns)), status=400, ) tmp_columns = [ column for column in columns if column not in request.args.getlist("_nocol") ] columns = tmp_columns return columns ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
973568285	https://github.com/simonw/datasette/issues/878#issuecomment-973568285	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c46B3kd	simonw 9599	2021-11-19T00:29:20Z	2021-11-19T00:29:20Z	OWNER	This is working! ```python from datasette.utils.asgi import Response from datasette import hookimpl import html from asyncinject import AsyncInject, inject class Table(AsyncInject): @inject async def database(self, request): return request.url_vars["db_name"] `@inject async def main(self, request, database): return Response.html("Database: {}".format( html.escape(database) )) async def view(self, request): return await self.main(request=request)` @hookimpl def register_routes(): return [ (r"/t/(?P<db_name>[^/]+)/(?P<table_and_format>[^/]+?$)", Table().view), ] `` This project will definitely show me if I actually like theasyncinject` patterns or not.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
973564260	https://github.com/simonw/datasette/issues/878#issuecomment-973564260	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c46B2lk	simonw 9599	2021-11-19T00:27:06Z	2021-11-19T00:27:06Z	OWNER	Problem: the fancy `asyncinject` stuff inteferes with the fancy Datasette thing that introspects view functions to look for what parameters they take: ```python class Table(asyncinject.AsyncInjectAll): async def view(self, request): return Response.html("Hello from {}".format( html.escape(repr(request.url_vars)) )) @hookimpl def register_routes(): return [ (r"/t/(?P<db_name>[^/]+)/(?P<table_and_format>[^/]+?$)", Table().view), ] ``` This failed with error: "Table.view() takes 1 positional argument but 2 were given" So I'm going to use `AsyncInject` and have the `view` function NOT use the `@inject` decorator.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
973554024	https://github.com/simonw/datasette/issues/878#issuecomment-973554024	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c46B0Fo	simonw 9599	2021-11-19T00:21:20Z	2021-11-19T00:21:20Z	OWNER	That's annoying: it looks like plugins can't use `register_routes()` to over-ride default routes within Datasette itself. This didn't work: ```python from datasette.utils.asgi import Response from datasette import hookimpl import html async def table(request): return Response.html("Hello from {}".format( html.escape(repr(request.url_vars)) )) @hookimpl def register_routes(): return [ (r"/(?P<db_name>[^/]+)/(?P<table_and_format>[^/]+?$)", table), ] `` I'll use a/t/` prefix for the moment, but this is probably something I'll fix in Datasette itself later.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
973542284	https://github.com/simonw/datasette/issues/878#issuecomment-973542284	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c46BxOM	simonw 9599	2021-11-19T00:16:44Z	2021-11-19T00:16:44Z	OWNER	`Development % cookiecutter gh:simonw/datasette-plugin You've downloaded /Users/simon/.cookiecutters/datasette-plugin before. Is it okay to delete and re-download it? [yes]: yes plugin_name []: table-new description []: New implementation of TableView, see https://github.com/simonw/datasette/issues/878 hyphenated [table-new]: underscored [table_new]: github_username []: simonw author_name []: Simon Willison include_static_directory []: include_templates_directory []:`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
973527870	https://github.com/simonw/datasette/issues/878#issuecomment-973527870	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c46Bts-	simonw 9599	2021-11-19T00:13:43Z	2021-11-19T00:13:43Z	OWNER	New plan: I'm going to build a brand new implementation of `TableView` starting out as a plugin, using the `register_routes()` plugin hook. It will reuse the existing HTML template but will be a completely new Python implementation, based on `asyncinject`. I'm going to start by just getting the table to show up on the page - then I'll add faceting, suggested facets, filters and so-on. Bonus: I'm going to see if I can get it to work for arbitrary SQL queries too (stretch goal).	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
971209475	https://github.com/simonw/datasette/issues/878#issuecomment-971209475	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c4543sD	simonw 9599	2021-11-17T05:41:42Z	2021-11-17T05:41:42Z	OWNER	I'm going to build a brand new implementation of the `TableView` class that doesn't subclass `BaseView` at all, instead using `asyncinject`. If I'm lucky that will clean up the grungiest part of the codebase. I can maybe even run the tests against old `TableView` and `TableView2` to check that they behave the same.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
971057553	https://github.com/simonw/datasette/issues/878#issuecomment-971057553	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c454SmR	simonw 9599	2021-11-17T01:40:45Z	2021-11-17T01:40:45Z	OWNER	I shipped that code as a new library, `asyncinject`: https://pypi.org/project/asyncinject/ - I'll open a new PR to attempt to refactor `TableView` to use it.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
971056169	https://github.com/simonw/datasette/pull/1512#issuecomment-971056169	https://api.github.com/repos/simonw/datasette/issues/1512	IC_kwDOBm6k_c454SQp	simonw 9599	2021-11-17T01:39:44Z	2021-11-17T01:39:44Z	OWNER	Closing this PR because I shipped the code in it as a separate library instead.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for async view classes 1055402144
971055677	https://github.com/simonw/datasette/pull/1512#issuecomment-971055677	https://api.github.com/repos/simonw/datasette/issues/1512	IC_kwDOBm6k_c454SI9	simonw 9599	2021-11-17T01:39:25Z	2021-11-17T01:39:25Z	OWNER	https://github.com/simonw/asyncinject version 0.1a0 is now live on PyPI: https://pypi.org/project/asyncinject/	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for async view classes 1055402144
971010724	https://github.com/simonw/datasette/pull/1512#issuecomment-971010724	https://api.github.com/repos/simonw/datasette/issues/1512	IC_kwDOBm6k_c454HKk	simonw 9599	2021-11-17T01:12:22Z	2021-11-17T01:12:22Z	OWNER	I'm going to extract out the `asyncinject` stuff into a separate library.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for async view classes 1055402144
970861628	https://github.com/simonw/datasette/pull/1512#issuecomment-970861628	https://api.github.com/repos/simonw/datasette/issues/1512	IC_kwDOBm6k_c453iw8	simonw 9599	2021-11-16T23:46:07Z	2021-11-16T23:46:07Z	OWNER	I made the changes locally and tested them with Python 3.6 like so: `cd /tmp mkdir v cd v pipenv shell --python=python3.6 cd ~/Dropbox/Development/datasette pip install -e '.[test]' pytest tests/test_asyncdi.py`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for async view classes 1055402144
970857411	https://github.com/simonw/datasette/pull/1512#issuecomment-970857411	https://api.github.com/repos/simonw/datasette/issues/1512	IC_kwDOBm6k_c453hvD	simonw 9599	2021-11-16T23:43:21Z	2021-11-16T23:43:21Z	OWNER	`E File "/home/runner/work/datasette/datasette/datasette/utils/vendored_graphlib.py", line 56 E if (result := self._node2info.get(node)) is None: E ^ E SyntaxError: invalid syntax` Oh no - the vendored code I use has `:=` so doesn't work on Python 3.6! Will have to backport it more thoroughly.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for async view classes 1055402144
970855084	https://github.com/simonw/datasette/issues/1513#issuecomment-970855084	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453hKs	simonw 9599	2021-11-16T23:41:46Z	2021-11-16T23:41:46Z	OWNER	Conclusion: using a giant convoluted CTE and UNION ALL query to attempt to calculate facets at the same time as retrieving rows is a net LOSS for performance! Very surprised to see that.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970853917	https://github.com/simonw/datasette/issues/1513#issuecomment-970853917	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453g4d	simonw 9599	2021-11-16T23:41:01Z	2021-11-16T23:41:01Z	OWNER	One very interesting difference between the two: on the single giant query page: `json { "request_duration_ms": 376.4317020000476, "sum_trace_duration_ms": 370.0828700000329, "num_traces": 5 }` And on the page that uses separate queries: `json { "request_duration_ms": 819.012272000009, "sum_trace_duration_ms": 201.52852100000018, "num_traces": 19 }` The separate pages page takes 819ms total to render the page, but spends 201ms across 19 SQL queries. The single big query takes 376ms total to render the page, spending 370ms in 5 queries Those 5 queries, if you're interested ```sql select database_name, schema_version from databases PRAGMA schema_version PRAGMA schema_version explain with cte as (\r\n select rowid, date, county, state, fips, cases, deaths\r\n from ny_times_us_counties\r\n),\r\ntruncated as (\r\n select null as _facet, null as facet_name, null as facet_count, rowid, date, county, state, fips, cases, deaths\r\n from cte order by date desc limit 4\r\n),\r\nstate_facet as (\r\n select 'state' as _facet, state as facet_name, count() as facet_count,\r\n null, null, null, null, null, null, null\r\n from cte group by facet_name order by facet_count desc limit 3\r\n),\r\nfips_facet as (\r\n select 'fips' as _facet, fips as facet_name, count() as facet_count,\r\n null, null, null, null, null, null, null\r\n from cte group by facet_name order by facet_count desc limit 3\r\n),\r\ncounty_facet as (\r\n select 'county' as _facet, county as facet_name, count() as facet_count,\r\n null, null, null, null, null, null, null\r\n from cte group by facet_name order by facet_count desc limit 3\r\n)\r\nselect from truncated\r\nunion all select * from state_facet\r\nunion all select * from fips_facet\r\nunion all select * from county_facet with cte as (\r\n select rowid, date, county, state, fips, cases, deaths\r\n from ny_times_us_counties\r\n),\r\ntruncated as (\r\n select null as _facet, null as facet_name, null as facet_count, rowid, date, county, state, fips, cases, deaths\r\n from cte order by date desc limit 4\r\n),\r\nstate_facet as (\r\n select 'state' as _facet, state as facet_name, count() as facet_count,\r\n null, null, null, null, null, null, null\r\n from cte group by facet_name order by facet_count desc limit 3\r\n),\r\nfips_facet as (\r\n select 'fips' as _facet, fips as facet_name, count() as facet_count,\r\n null, null, null, null, null, null, null\r\n from cte group by facet_name order by facet_count desc limit 3\r\n),\r\ncounty_facet as (\r\n select 'county' as _facet, county as facet_name, count() as facet_count,\r\n null, null, null, null, null, null, null\r\n from cte group by facet_name order by facet_count desc limit 3\r\n)\r\nselect from truncated\r\nunion all select * from state_facet\r\nunion all select * from fips_facet\r\nunion all select * from county_facet ``` All of that additional non-SQL overhead must be stuff relating to Python and template rendering code running on the page. I'm really surprised at how much overhead that is! This is worth researching separately.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970845844	https://github.com/simonw/datasette/issues/1513#issuecomment-970845844	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453e6U	simonw 9599	2021-11-16T23:35:38Z	2021-11-16T23:35:38Z	OWNER	I tried adding `cases > 10000` but the SQL query now takes too long - so moving this to my laptop. `cd /tmp wget https://covid-19.datasettes.com/covid.db datasette covid.db \ --setting facet_time_limit_ms 10000 \ --setting sql_time_limit_ms 10000 \ --setting trace_debug 1` `http://127.0.0.1:8006/covid/ny_times_us_counties?_trace=1&_facet_size=3&_size=2&cases__gt=10000` shows in the traces: json [ { "type": "sql", "start": 12.693033525, "end": 12.694056904, "duration_ms": 1.0233789999993803, "traceback": [ " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/base.py\", line 262, in get\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/base.py\", line 477, in view_get\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/table.py\", line 705, in data\n ], "database": "covid", "sql": "select rowid, date, county, state, fips, cases, deaths from ny_times_us_counties where \"cases\" > :p0 order by rowid limit 3", "params": { "p0": 10000 } }, { "type": "sql", "start": 12.694285093, "end": 12.814936275, "duration_ms": 120.65118200000136, "traceback": [ " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/base.py\", line 262, in get\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/base.py\", line 477, in view_get\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/table.py\", line 723, in data\n ], "database": "covid", "sql": "select count() from ny_times_us_counties where \"cases\" > :p0", "params": { "p0": 10000 } }, { "type": "sql", "start": 12.818812089, "end": 12.851172544, "duration_ms": 32.360455000000954, "traceback": [ " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/table.py\", line 856, in data\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/facets.py\", line 164, in suggest\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/app.py\", line 634, in execute\n ], "database": "covid", "sql": "select county, count() as n from (\n select rowid, date, county, state, fips, cases, deaths from ny_times_us_counties where \"cases\" > :p0 \n "params": { "p0": 10000 } }, { "type": "sql", "start": 12.851418868, "end": 12.871268359, "duration_ms": 19.84949100000044, "traceback": [ " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/table.py\", line 856, in data\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/facets.py\", line 164, in suggest\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/app.py\", line 634, in execute\n ], "database": "covid", "sql": "select state, count() as n from (\n select rowid, date, county, state, fips, cases, deaths from ny_times_us_counties where \"cases\" > :p0 \n "params": { "p0": 10000 } }, { "type": "sql", "start": 12.871497655, "end": 12.897715027, "duration_ms": 26.217371999999628, "traceback": [ " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/views/table.py\", line 856, in data\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/facets.py\", line 164, in suggest\n " File \"/usr/local/Cellar/datasette/0.58.1/libexec/lib/python3.9/site-packages/datasette/app.py\", line 634, in execute\n ], "database": "covid", "sql": "select fips, count() as n from (\n select rowid, date, county, state, fips, cases, deaths from ny_times_us_counties where \"cases\" > :p0 \n "params": { "p0": 10000 } } ] So that's: `fetch rows: 1.0233789999993803 ms count: 120.65118200000136 ms facet county: 32.360455000000954 ms facet state: 19.84949100000044 ms facet fips: 26.217371999999628 ms` = 200.1 ms total return await self.view_get(\n", response_or_template_contexts = await self.data(\n", results = await db.execute(sql, params, truncate=True, *extra_args)\n" return await self.view_get(\n", response_or_template_contexts = await self.data(\n", count_rows = list(await db.execute(count_sql, from_sql_params))\n" suggested_facets.extend(await facet.suggest())\n", distinct_values = await self.ds.execute(\n", return await self.databases[db_name].execute(\n" ) where county is not null\n group by county\n limit 4", suggested_facets.extend(await facet.suggest())\n", distinct_values = await self.ds.execute(\n", return await self.databases[db_name].execute(\n" ) where state is not null\n group by state\n limit 4", suggested_facets.extend(await facet.suggest())\n", distinct_values = await self.ds.execute(\n", return await self.databases[db_name].execute(\n" ) where fips is not null\n group by fips\n limit 4", Compared to: http://127.0.0.1:8006/covid?sql=with+cte+as+(%0D%0A++select+rowid%2C+date%2C+county%2C+state%2C+fips%2C+cases%2C+deaths%0D%0A++from+ny_times_us_counties%0D%0A)%2C%0D%0Atruncated+as+(%0D%0A++select+null+as+_facet%2C+null+as+facet_name%2C+null+as+facet_count%2C+rowid%2C+date%2C+county%2C+state%2C+fips%2C+cases%2C+deaths%0D%0A++from+cte+order+by+date+desc+limit+4%0D%0A)%2C%0D%0Astate_facet+as+(%0D%0A++select+%27state%27+as+_facet%2C+state+as+facet_name%2C+count()+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A)%2C%0D%0Afips_facet+as+(%0D%0A++select+%27fips%27+as+_facet%2C+fips+as+facet_name%2C+count()+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A)%2C%0D%0Acounty_facet+as+(%0D%0A++select+%27county%27+as+_facet%2C+county+as+facet_name%2C+count()+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A)%0D%0Aselect++from+truncated%0D%0Aunion+all+select++from+state_facet%0D%0Aunion+all+select++from+fips_facet%0D%0Aunion+all+select++from+county_facet&_trace=1 Which is 353ms total. The separate queries ran faster! Really surprising result there.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970828568	https://github.com/simonw/datasette/issues/1513#issuecomment-970828568	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453asY	simonw 9599	2021-11-16T23:27:11Z	2021-11-16T23:27:11Z	OWNER	One last experiment: I'm going to try running an expensive query in the CTE portion.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970827674	https://github.com/simonw/datasette/issues/1513#issuecomment-970827674	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453aea	simonw 9599	2021-11-16T23:26:58Z	2021-11-16T23:26:58Z	OWNER	With trace. https://covid-19.datasettes.com/covid/ny_times_us_counties?_trace=1&_facet_size=3&_size=2&_trace=1 shows the following: `fetch rows: 0.41762600005768036 ms facet state: 284.30423800000426 ms facet county: 273.2565999999679 ms facet fips: 197.80996999998024 ms` = 755.78843400001ms total It didn't run a count because that's the homepage and the count is cached. So I dropped the count from the query and ran it: https://covid-19.datasettes.com/covid?sql=with+cte+as+(%0D%0A++select+rowid%2C+date%2C+county%2C+state%2C+fips%2C+cases%2C+deaths%0D%0A++from+ny_times_us_counties%0D%0A)%2C%0D%0Atruncated+as+(%0D%0A++select+null+as+_facet%2C+null+as+facet_name%2C+null+as+facet_count%2C+rowid%2C+date%2C+county%2C+state%2C+fips%2C+cases%2C+deaths%0D%0A++from+cte+order+by+date+desc+limit+4%0D%0A)%2C%0D%0Astate_facet+as+(%0D%0A++select+%27state%27+as+_facet%2C+state+as+facet_name%2C+count()+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A)%2C%0D%0Afips_facet+as+(%0D%0A++select+%27fips%27+as+_facet%2C+fips+as+facet_name%2C+count()+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A)%2C%0D%0Acounty_facet+as+(%0D%0A++select+%27county%27+as+_facet%2C+county+as+facet_name%2C+count()+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A)%0D%0Aselect++from+truncated%0D%0Aunion+all+select++from+state_facet%0D%0Aunion+all+select++from+fips_facet%0D%0Aunion+all+select+*+from+county_facet&_trace=1 Shows 649.4359889999259 ms for the query - compared to 755.78843400001ms for the separate. So it saved about 100ms. Still not a huge difference though!	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970780866	https://github.com/simonw/datasette/issues/1513#issuecomment-970780866	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453PDC	simonw 9599	2021-11-16T23:01:57Z	2021-11-16T23:01:57Z	OWNER	One disadvantage to this approach: if you have a SQL time limit of 1s and it takes 0.9s to return the rows but then 0.5s to calculate each of the requested facets the entire query will exceed the time limit. Could work around this by catching that error and then re-running the query just for the rows, but that would result in the user having to wait longer for the results. Could try to remember if that has happened using an in-memory Python data structure and skip the faceting optimization if it's caused problems in the past? That seems a bit gross. Maybe this becomes an opt-in optimization you can request in your `metadata.json` setting for that table, which massively increases the time limit? That's a bit weird too - now there are two separate implementations of the faceting logic, which had better have a REALLY big pay-off to be worth maintaining. What if we kept the query that returns the rows to be displayed on the page separate from the facets, but then executed all of the facets together using this method such that the `cte` only (presumably) has to be calculated once? That would still lead to multiple facets potentially exceeding the SQL time limit when single facets would not have. Maybe a better optimization would be to move facets to happening via `fetch()` calls from the client, so the user gets to see their rows instantly and the facets then appear as and when they are available (though it would cause page jank).	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970766486	https://github.com/simonw/datasette/issues/1513#issuecomment-970766486	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453LiW	simonw 9599	2021-11-16T22:52:56Z	2021-11-16T22:56:07Z	OWNER	https://covid-19.datasettes.com/covid is 805.2MB https://covid-19.datasettes.com/covid/ny_times_us_counties?_trace=1&_facet_size=3&_size=2 Equivalent SQL: https://covid-19.datasettes.com/covid?sql=with+cte+as+%28%0D%0A++select+rowid%2C+date%2C+county%2C+state%2C+fips%2C+cases%2C+deaths%0D%0A++from+ny_times_us_counties%0D%0A%29%2C%0D%0Atruncated+as+%28%0D%0A++select+null+as+_facet%2C+null+as+facet_name%2C+null+as+facet_count%2C+rowid%2C+date%2C+county%2C+state%2C+fips%2C+cases%2C+deaths%0D%0A++from+cte+order+by+date+desc+limit+4%0D%0A%29%2C%0D%0Astate_facet+as+%28%0D%0A++select+%27state%27+as+_facet%2C+state+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A%29%2C%0D%0Afips_facet+as+%28%0D%0A++select+%27fips%27+as+_facet%2C+fips+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A%29%2C%0D%0Acounty_facet+as+%28%0D%0A++select+%27county%27+as+_facet%2C+county+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A%29%2C%0D%0Atotal_count+as+%28%0D%0A++select+%27COUNT%27+as+_facet%2C+%27%27+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte%0D%0A%29%0D%0Aselect++from+truncated%0D%0Aunion+all+select++from+state_facet%0D%0Aunion+all+select++from+fips_facet%0D%0Aunion+all+select++from+county_facet%0D%0Aunion+all+select++from+total_count sql with cte as ( select rowid, date, county, state, fips, cases, deaths from ny_times_us_counties ), truncated as ( select null as _facet, null as facet_name, null as facet_count, rowid, date, county, state, fips, cases, deaths from cte order by date desc limit 4 ), state_facet as ( select 'state' as _facet, state as facet_name, count() as facet_count, null, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), fips_facet as ( select 'fips' as _facet, fips as facet_name, count() as facet_count, null, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), county_facet as ( select 'county' as _facet, county as facet_name, count() as facet_count, null, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), total_count as ( select 'COUNT' as _facet, '' as facet_name, count() as facet_count, null, null, null, null, null, null, null from cte ) select from truncated union all select * from state_facet union all select * from fips_facet union all select * from county_facet union all select * from total_count _facet \| facet_name \| facet_count \| rowid \| date \| county \| state \| fips \| cases \| deaths -- \| -- \| -- \| -- \| -- \| -- \| -- \| -- \| -- \| -- \| \| \| 1917344 \| 2021-11-15 \| Autauga \| Alabama \| 1001 \| 10407 \| 154 \| \| \| 1917345 \| 2021-11-15 \| Baldwin \| Alabama \| 1003 \| 37875 \| 581 \| \| \| 1917346 \| 2021-11-15 \| Barbour \| Alabama \| 1005 \| 3648 \| 79 \| \| \| 1917347 \| 2021-11-15 \| Bibb \| Alabama \| 1007 \| 4317 \| 92 state \| Texas \| 148028 \| \| \| \| \| \| \| state \| Georgia \| 96249 \| \| \| \| \| \| \| state \| Virginia \| 79315 \| \| \| \| \| \| \| fips \| \| 17580 \| \| \| \| \| \| \| fips \| 53061 \| 665 \| \| \| \| \| \| \| fips \| 17031 \| 662 \| \| \| \| \| \| \| county \| Washington \| 18666 \| \| \| \| \| \| \| county \| Unknown \| 15840 \| \| \| \| \| \| \| county \| Jefferson \| 15637 \| \| \| \| \| \| \| COUNT \| \| 1920593 \| \| \| \| \| \| \|	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970770304	https://github.com/simonw/datasette/issues/1513#issuecomment-970770304	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453MeA	simonw 9599	2021-11-16T22:55:19Z	2021-11-16T22:55:19Z	OWNER	(One thing I really like about this pattern is that it should work exactly the same when used to facet the results of arbitrary SQL queries as it does when faceting results from the table page.)	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970767952	https://github.com/simonw/datasette/issues/1513#issuecomment-970767952	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453L5Q	simonw 9599	2021-11-16T22:53:52Z	2021-11-16T22:53:52Z	OWNER	It's going to take another 15 minutes for the build to finish and deploy the version with `_trace=1`: https://github.com/simonw/covid-19-datasette/actions/runs/1469150112	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970758179	https://github.com/simonw/datasette/issues/1513#issuecomment-970758179	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453Jgj	simonw 9599	2021-11-16T22:47:38Z	2021-11-16T22:47:38Z	OWNER	Trace now enabled: https://global-power-plants.datasettes.com/global-power-plants/global-power-plants?_facet_size=3&_size=2&_nocount=1&_trace=1 Here are the relevant traces: json [ { "type": "sql", "start": 31.214430154, "end": 31.214817089, "duration_ms": 0.3869350000016425, "traceback": [ " File \"/usr/local/lib/python3.8/site-packages/datasette/views/base.py\", line 262, in get\n return await self.view_get(\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/views/base.py\", line 477, in view_get\n response_or_template_contexts = await self.data(\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/views/table.py\", line 705, in data\n results = await db.execute(sql, params, truncate=True, *extra_args)\n" ], "database": "global-power-plants", "sql": "select rowid, country, country_long, name, gppd_idnr, capacity_mw, latitude, longitude, primary_fuel, other_fuel1, other_fuel2, other_fuel3, commissioning_year, owner, source, url, geolocation_source, wepp_id, year_of_capacity_data, generation_gwh_2013, generation_gwh_2014, generation_gwh_2015, generation_gwh_2016, generation_gwh_2017, generation_data_source, estimated_generation_gwh from [global-power-plants] order by rowid limit 3", "params": {} }, { "type": "sql", "start": 31.215234586, "end": 31.220110342, "duration_ms": 4.875756000000564, "traceback": [ " File \"/usr/local/lib/python3.8/site-packages/datasette/views/table.py\", line 760, in data\n ) = await facet.facet_results()\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/facets.py\", line 212, in facet_results\n facet_rows_results = await self.ds.execute(\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/app.py\", line 634, in execute\n return await self.databases[db_name].execute(\n" ], "database": "global-power-plants", "sql": "select country_long as value, count() as count from (\n select rowid, country, country_long, name, gppd_idnr, capacity_mw, latitude, longitude, primary_fuel, other_fuel1, other_fuel2, other_fuel3, commissioning_year, owner, source, url, geolocation_source, wepp_id, year_of_capacity_data, generation_gwh_2013, generation_gwh_2014, generation_gwh_2015, generation_gwh_2016, generation_gwh_2017, generation_data_source, estimated_generation_gwh from [global-power-plants] \n )\n where country_long is not null\n group by country_long order by count desc, value limit 4", "params": [] }, { "type": "sql", "start": 31.221062485, "end": 31.228968364, "duration_ms": 7.905878999999061, "traceback": [ " File \"/usr/local/lib/python3.8/site-packages/datasette/views/table.py\", line 760, in data\n ) = await facet.facet_results()\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/facets.py\", line 212, in facet_results\n facet_rows_results = await self.ds.execute(\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/app.py\", line 634, in execute\n return await self.databases[db_name].execute(\n" ], "database": "global-power-plants", "sql": "select owner as value, count() as count from (\n select rowid, country, country_long, name, gppd_idnr, capacity_mw, latitude, longitude, primary_fuel, other_fuel1, other_fuel2, other_fuel3, commissioning_year, owner, source, url, geolocation_source, wepp_id, year_of_capacity_data, generation_gwh_2013, generation_gwh_2014, generation_gwh_2015, generation_gwh_2016, generation_gwh_2017, generation_data_source, estimated_generation_gwh from [global-power-plants] \n )\n where owner is not null\n group by owner order by count desc, value limit 4", "params": [] }, { "type": "sql", "start": 31.229809757, "end": 31.253902162, "duration_ms": 24.09240499999754, "traceback": [ " File \"/usr/local/lib/python3.8/site-packages/datasette/views/table.py\", line 760, in data\n ) = await facet.facet_results()\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/facets.py\", line 212, in facet_results\n facet_rows_results = await self.ds.execute(\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/app.py\", line 634, in execute\n return await self.databases[db_name].execute(\n" ], "database": "global-power-plants", "sql": "select primary_fuel as value, count() as count from (\n select rowid, country, country_long, name, gppd_idnr, capacity_mw, latitude, longitude, primary_fuel, other_fuel1, other_fuel2, other_fuel3, commissioning_year, owner, source, url, geolocation_source, wepp_id, year_of_capacity_data, generation_gwh_2013, generation_gwh_2014, generation_gwh_2015, generation_gwh_2016, generation_gwh_2017, generation_data_source, estimated_generation_gwh from [global-power-plants] \n )\n where primary_fuel is not null\n group by primary_fuel order by count desc, value limit 4", "params": [] }, { "type": "sql", "start": 31.255699745, "end": 31.256243889, "duration_ms": 0.544143999999136, "traceback": [ " File \"/usr/local/lib/python3.8/site-packages/datasette/facets.py\", line 145, in suggest\n row_count = await self.get_row_count()\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/facets.py\", line 132, in get_row_count\n await self.ds.execute(\n", " File \"/usr/local/lib/python3.8/site-packages/datasette/app.py\", line 634, in execute\n return await self.databases[db_name].execute(\n" ], "database": "global-power-plants", "sql": "select count() from (select rowid, country, country_long, name, gppd_idnr, capacity_mw, latitude, longitude, primary_fuel, other_fuel1, other_fuel2, other_fuel3, commissioning_year, owner, source, url, geolocation_source, wepp_id, year_of_capacity_data, generation_gwh_2013, generation_gwh_2014, generation_gwh_2015, generation_gwh_2016, generation_gwh_2017, generation_data_source, estimated_generation_gwh from [global-power-plants] )", "params": [] } ] `fetch rows: 0.3869350000016425 ms facet country_long: 4.875756000000564 ms facet owner: 7.905878999999061 ms facet primary_fuel: 24.09240499999754 ms count: 0.544143999999136 ms` Total = 37.8ms I modified the query to include the total count as well: https://global-power-plants.datasettes.com/global-power-plants?sql=with+cte+as+%28%0D%0A++select+rowid%2C+country%2C+country_long%2C+name%2C+owner%2C+primary_fuel%0D%0A++from+%5Bglobal-power-plants%5D%0D%0A%29%2C%0D%0Atruncated+as+%28%0D%0A++select+null+as+_facet%2C+null+as+facet_name%2C+null+as+facet_count%2C+rowid%2C+country%2C+country_long%2C+name%2C+owner%2C+primary_fuel%0D%0A++from+cte+order+by+rowid+limit+4%0D%0A%29%2C%0D%0Acountry_long_facet+as+%28%0D%0A++select+%27country_long%27+as+_facet%2C+country_long+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A%29%2C%0D%0Aowner_facet+as+%28%0D%0A++select+%27owner%27+as+_facet%2C+owner+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A%29%2C%0D%0Aprimary_fuel_facet+as+%28%0D%0A++select+%27primary_fuel%27+as+_facet%2C+primary_fuel+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte+group+by+facet_name+order+by+facet_count+desc+limit+3%0D%0A%29%2C%0D%0Atotal_count+as+%28%0D%0A++select+%27COUNT%27+as+_facet%2C+%27%27+as+facet_name%2C+count%28%29+as+facet_count%2C%0D%0A++null%2C+null%2C+null%2C+null%2C+null%2C+null%0D%0A++from+cte%0D%0A%29%0D%0Aselect++from+truncated%0D%0Aunion+all+select++from+country_long_facet%0D%0Aunion+all+select++from+owner_facet%0D%0Aunion+all+select++from+primary_fuel_facet%0D%0Aunion+all+select++from+total_count&_trace=1 sql with cte as ( select rowid, country, country_long, name, owner, primary_fuel from [global-power-plants] ), truncated as ( select null as _facet, null as facet_name, null as facet_count, rowid, country, country_long, name, owner, primary_fuel from cte order by rowid limit 4 ), country_long_facet as ( select 'country_long' as _facet, country_long as facet_name, count() as facet_count, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), owner_facet as ( select 'owner' as _facet, owner as facet_name, count() as facet_count, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), primary_fuel_facet as ( select 'primary_fuel' as _facet, primary_fuel as facet_name, count() as facet_count, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), total_count as ( select 'COUNT' as _facet, '' as facet_name, count() as facet_count, null, null, null, null, null, null from cte ) select * from truncated union all select * from country_long_facet union all select * from owner_facet union all select * from primary_fuel_facet union all select * from total_count The trace says that query took 34.801436999998714 ms. To my huge surprise, this convoluted optimization only shaves the sum query time down from 37.8ms to 34.8ms! That entire database file is just 11.1 MB though. Maybe it would make a meaningful difference on something larger?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970742415	https://github.com/simonw/datasette/issues/1513#issuecomment-970742415	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453FqP	simonw 9599	2021-11-16T22:37:14Z	2021-11-16T22:37:14Z	OWNER	The query takes 42.794ms to run. Here's the equivalent page using separate queries: https://global-power-plants.datasettes.com/global-power-plants/global-power-plants?_facet_size=3&_size=2&_nocount=1 Annoyingly I can't disable facet suggestions but keep facets. I'm going to turn on tracing so I can see how long the separate queries took.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970738130	https://github.com/simonw/datasette/issues/1513#issuecomment-970738130	https://api.github.com/repos/simonw/datasette/issues/1513	IC_kwDOBm6k_c453EnS	simonw 9599	2021-11-16T22:32:19Z	2021-11-16T22:32:19Z	OWNER	I came up with the following query which seems to work! sql with cte as ( select rowid, country, country_long, name, owner, primary_fuel from [global-power-plants] ), truncated as ( select null as _facet, null as facet_name, null as facet_count, rowid, country, country_long, name, owner, primary_fuel from cte order by rowid limit 4 ), country_long_facet as ( select 'country_long' as _facet, country_long as facet_name, count() as facet_count, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), owner_facet as ( select 'owner' as _facet, owner as facet_name, count() as facet_count, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ), primary_fuel_facet as ( select 'primary_fuel' as _facet, primary_fuel as facet_name, count() as facet_count, null, null, null, null, null, null from cte group by facet_name order by facet_count desc limit 3 ) select from truncated union all select * from country_long_facet union all select * from owner_facet union all select * from primary_fuel_facet (Limits should be 101, 31, 31, 31 but I reduced size to get a shorter example table). Results look like this: _facet \| facet_name \| facet_count \| rowid \| country \| country_long \| name \| owner \| primary_fuel -- \| -- \| -- \| -- \| -- \| -- \| -- \| -- \| -- \| \| \| 1 \| AFG \| Afghanistan \| Kajaki Hydroelectric Power Plant Afghanistan \| \| Hydro \| \| \| 2 \| AFG \| Afghanistan \| Kandahar DOG \| \| Solar \| \| \| 3 \| AFG \| Afghanistan \| Kandahar JOL \| \| Solar \| \| \| 4 \| AFG \| Afghanistan \| Mahipar Hydroelectric Power Plant Afghanistan \| \| Hydro country_long \| United States of America \| 8688 \| \| \| \| \| \| country_long \| China \| 4235 \| \| \| \| \| \| country_long \| United Kingdom \| 2603 \| \| \| \| \| \| owner \| \| 14112 \| \| \| \| \| \| owner \| Lightsource Renewable Energy \| 120 \| \| \| \| \| \| owner \| Cypress Creek Renewables \| 109 \| \| \| \| \| \| primary_fuel \| Solar \| 9662 \| \| \| \| \| \| primary_fuel \| Hydro \| 7155 \| \| \| \| \| \| primary_fuel \| Wind \| 5188 \| \| \| \| \| \| This is a neat proof of concept.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Research: CTEs and union all to calculate facets AND query at the same time 1055469073
970718337	https://github.com/simonw/datasette/pull/1512#issuecomment-970718337	https://api.github.com/repos/simonw/datasette/issues/1512	IC_kwDOBm6k_c452_yB	simonw 9599	2021-11-16T22:02:30Z	2021-11-16T22:02:30Z	OWNER	I've decided to make the clever `asyncio` dependency injection opt-in - so you can either decorate with `@inject` or you can set `inject_all = True` on the class - for example: ```python import asyncio from datasette.utils.asyncdi import AsyncBase, inject class Simple(AsyncBase): def init(self): self.log = [] `@inject async def two(self): self.log.append("two") @inject async def one(self, two): self.log.append("one") return self.log async def not_inject(self, one, two): return one + two` class Complex(AsyncBase): inject_all = True `def __init__(self): self.log = [] async def b(self): self.log.append("b") async def a(self, b): self.log.append("a") async def go(self, a): self.log.append("go") return self.log` ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for async view classes 1055402144
970712713	https://github.com/simonw/datasette/issues/878#issuecomment-970712713	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c452-aJ	simonw 9599	2021-11-16T21:54:33Z	2021-11-16T21:54:33Z	OWNER	I'm going to continue working on this in a PR.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970705738	https://github.com/simonw/datasette/issues/878#issuecomment-970705738	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c4528tK	simonw 9599	2021-11-16T21:44:31Z	2021-11-16T21:44:31Z	OWNER	Wrote a TIL about what I learned using `TopologicalSorter`: https://til.simonwillison.net/python/graphlib-topologicalsorter	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970673085	https://github.com/simonw/datasette/issues/878#issuecomment-970673085	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c4520u9	simonw 9599	2021-11-16T20:58:24Z	2021-11-16T20:58:24Z	OWNER	New test: ```python class Complex(AsyncBase): def init(self): self.log = [] `async def d(self): await asyncio.sleep(random() * 0.1) print("LOG: d") self.log.append("d") async def c(self): await asyncio.sleep(random() * 0.1) print("LOG: c") self.log.append("c") async def b(self, c, d): print("LOG: b") self.log.append("b") async def a(self, b, c): print("LOG: a") self.log.append("a") async def go(self, a): print("LOG: go") self.log.append("go") return self.log` @pytest.mark.asyncio async def test_complex(): result = await Complex().go() # 'c' should only be called once assert tuple(result) in ( # c and d could happen in either order ("c", "d", "b", "a", "go"), ("d", "c", "b", "a", "go"), ) `And this code passes it:`python import asyncio from functools import wraps import inspect try: import graphlib except ImportError: from . import vendored_graphlib as graphlib class AsyncMeta(type): def new(cls, name, bases, attrs): # Decorate any items that are 'async def' methods registry = {} new_attrs = {"_registry": _registry} for key, value in attrs.items(): if inspect.iscoroutinefunction(value) and not value.__name__ == "resolve": new_attrs[key] = make_method(value) _registry[key] = new_attrs[key] else: new_attrs[key] = value # Gather graph for later dependency resolution graph = { key: { p for p in inspect.signature(method).parameters.keys() if p != "self" and not p.startswith("") } for key, method in _registry.items() } new_attrs["_graph"] = graph return super().new(cls, name, bases, new_attrs) def make_method(method): parameters = inspect.signature(method).parameters.keys() @wraps(method) async def inner(self, _results=None, kwargs): print("\n{}.{}({}) _results={}".format(self, method.__name__, kwargs, _results)) # Any parameters not provided by kwargs are resolved from registry to_resolve = [p for p in parameters if p not in kwargs and p != "self"] missing = [p for p in to_resolve if p not in self._registry] assert ( not missing ), "The following DI parameters could not be found in the registry: {}".format( missing ) results = {} results.update(kwargs) if to_resolve: resolved_parameters = await self.resolve(to_resolve, _results) results.update(resolved_parameters) return_value = await method(self, results) if _results is not None: _results[method.__name__] = return_value return return_value return inner class AsyncBase(metaclass=AsyncMeta): async def resolve(self, names, results=None): print("\n resolve: ", names) if results is None: results = {} # Come up with an execution plan, just for these nodes ts = graphlib.TopologicalSorter() to_do = set(names) done = set() while to_do: item = to_do.pop() dependencies = self._graph[item] ts.add(item, dependencies) done.add(item) # Add any not-done dependencies to the queue to_do.update({k for k in dependencies if k not in done}) ts.prepare() plan = [] while ts.is_active(): node_group = ts.get_ready() plan.append(node_group) ts.done(node_group) print("plan:", plan) results = {} for node_group in plan: awaitables = [ self._registry[name]( self, _results=results, *{k: v for k, v in results.items() if k in self._graph[name]}, ) for name in node_group ] print(" results = ", results) print(" awaitables: ", awaitables) awaitable_results = await asyncio.gather(awaitables) results.update( {p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)} ) print(" End of resolve(), returning", results) return {key: value for key, value in results.items() if key in names} ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970660299	https://github.com/simonw/datasette/issues/878#issuecomment-970660299	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c452xnL	simonw 9599	2021-11-16T20:39:43Z	2021-11-16T20:42:27Z	OWNER	But that does seem to be the plan that `TopographicalSorter` provides: ```python graph = {"go": {"a"}, "a": {"b", "c"}, "b": {"c", "d"}} ts = TopologicalSorter(graph) ts.prepare() while ts.is_active(): nodes = ts.get_ready() print(nodes) ts.done(*nodes) `Outputs:` ('c', 'd') ('b',) ('a',) ('go',) `Also:`python graph = {"go": {"d", "e", "f"}, "d": {"b", "c"}, "b": {"c"}} ts = TopologicalSorter(graph) ts.prepare() while ts.is_active(): nodes = ts.get_ready() print(nodes) ts.done(nodes) `Gives:` ('e', 'f', 'c') ('b',) ('d',) ('go',) `` I'm confident thatTopologicalSorter`is the way to do this. I think I need to rewrite my code to call it once to get that plan, then`await asyncio.gather(nodes)` in turn to execute it.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970657874	https://github.com/simonw/datasette/issues/878#issuecomment-970657874	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c452xBS	simonw 9599	2021-11-16T20:36:01Z	2021-11-16T20:36:01Z	OWNER	My goal here is to calculate the most efficient way to resolve the different nodes, running them in parallel where possible. So for this class: ```python class Complex(AsyncBase): async def d(self): pass `async def c(self): pass async def b(self, c, d): pass async def a(self, b, c): pass async def go(self, a): pass` `` A call togo()` should do this: `c` and `d` in parallel `b` `a` `go`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970655927	https://github.com/simonw/datasette/issues/878#issuecomment-970655927	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c452wi3	simonw 9599	2021-11-16T20:33:11Z	2021-11-16T20:33:11Z	OWNER	What should be happening here instead is it should resolve the full graph and notice that `c` is depended on by both `b` and `a` - so it should run `c` first, then run the next ones in parallel. So maybe the algorithm I'm inheriting from https://docs.python.org/3/library/graphlib.html isn't the correct algorithm?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970655304	https://github.com/simonw/datasette/issues/878#issuecomment-970655304	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c452wZI	simonw 9599	2021-11-16T20:32:16Z	2021-11-16T20:32:16Z	OWNER	This code is really fiddly. I just got to this version: ```python import asyncio from functools import wraps import inspect try: import graphlib except ImportError: from . import vendored_graphlib as graphlib class AsyncMeta(type): def new(cls, name, bases, attrs): # Decorate any items that are 'async def' methods registry = {} new_attrs = {"_registry": _registry} for key, value in attrs.items(): if inspect.iscoroutinefunction(value) and not value.__name__ == "resolve": new_attrs[key] = make_method(value) _registry[key] = new_attrs[key] else: new_attrs[key] = value # Gather graph for later dependency resolution graph = { key: { p for p in inspect.signature(method).parameters.keys() if p != "self" and not p.startswith("") } for key, method in _registry.items() } new_attrs["_graph"] = graph return super().new(cls, name, bases, new_attrs) def make_method(method): @wraps(method) async def inner(self, _results=None, kwargs): print("inner - _results=", _results) parameters = inspect.signature(method).parameters.keys() # Any parameters not provided by kwargs are resolved from registry to_resolve = [p for p in parameters if p not in kwargs and p != "self"] missing = [p for p in to_resolve if p not in self._registry] assert ( not missing ), "The following DI parameters could not be found in the registry: {}".format( missing ) results = {} results.update(kwargs) if to_resolve: resolved_parameters = await self.resolve(to_resolve, _results) results.update(resolved_parameters) return_value = await method(self, results) if _results is not None: _results[method.name] = return_value return return_value `return inner` class AsyncBase(metaclass=AsyncMeta): async def resolve(self, names, results=None): print("\n resolve: ", names) if results is None: results = {} # Resolve them in the correct order ts = graphlib.TopologicalSorter() for name in names: ts.add(name, self._graph[name]) ts.prepare() async def resolve_nodes(nodes): print(" resolve_nodes", nodes) print(" (current results = {})".format(repr(results))) awaitables = [ self._registry[name]( self, _results=results, {k: v for k, v in results.items() if k in self._graph[name]}, ) for name in nodes if name not in results ] print(" awaitables: ", awaitables) awaitable_results = await asyncio.gather(awaitables) results.update( {p[0].__name__: p[1] for p in zip(awaitables, awaitable_results)} ) if not ts.is_active(): # Nothing has dependencies - just resolve directly print(" no dependencies, resolve directly") await resolve_nodes(names) else: # Resolve in topological order while ts.is_active(): nodes = ts.get_ready() print(" ts.get_ready() returned nodes:", nodes) await resolve_nodes(nodes) for node in nodes: ts.done(node) print(" End of resolve(), returning", results) return {key: value for key, value in results.items() if key in names} `With this test:`python class Complex(AsyncBase): def init(self): self.log = [] `async def c(self): print("LOG: c") self.log.append("c") async def b(self, c): print("LOG: b") self.log.append("b") async def a(self, b, c): print("LOG: a") self.log.append("a") async def go(self, a): print("LOG: go") self.log.append("go") return self.log` @pytest.mark.asyncio async def test_complex(): result = await Complex().go() # 'c' should only be called once assert result == ["c", "b", "a", "go"] ``` This test sometimes passes, and sometimes fails! Output for a pass: ``` tests/test_asyncdi.py inner - _results= None resolve: ['a'] ts.get_ready() returned nodes: ('c', 'b') resolve_nodes ('c', 'b') (current results = {}) awaitables: [<coroutine object Complex.c at 0x1074ac890>, <coroutine object Complex.b at 0x1074ac820>] inner - _results= {} LOG: c inner - _results= {'c': None} resolve: ['c'] ts.get_ready() returned nodes: ('c',) resolve_nodes ('c',) (current results = {'c': None}) awaitables: [] End of resolve(), returning {'c': None} LOG: b ts.get_ready() returned nodes: ('a',) resolve_nodes ('a',) (current results = {'c': None, 'b': None}) awaitables: [<coroutine object Complex.a at 0x1074ac7b0>] inner - _results= {'c': None, 'b': None} LOG: a End of resolve(), returning {'c': None, 'b': None, 'a': None} LOG: go `Output for a fail:` tests/test_asyncdi.py inner - _results= None resolve: ['a'] ts.get_ready() returned nodes: ('b', 'c') resolve_nodes ('b', 'c') (current results = {}) awaitables: [<coroutine object Complex.b at 0x10923c890>, <coroutine object Complex.c at 0x10923c820>] inner - _results= {} resolve: ['c'] ts.get_ready() returned nodes: ('c',) resolve_nodes ('c',) (current results = {}) awaitables: [<coroutine object Complex.c at 0x10923c6d0>] inner - _results= {} LOG: c inner - _results= {'c': None} LOG: c End of resolve(), returning {'c': None} LOG: b ts.get_ready() returned nodes: ('a',) resolve_nodes ('a',) (current results = {'c': None, 'b': None}) awaitables: [<coroutine object Complex.a at 0x10923c6d0>] inner - _results= {'c': None, 'b': None} LOG: a End of resolve(), returning {'c': None, 'b': None, 'a': None} LOG: go F =================================================================================================== FAILURES =================================================================================================== _______________ test_complex _________________ `@pytest.mark.asyncio async def test_complex(): result = await Complex().go() # 'c' should only be called once` `assert result == ["c", "b", "a", "go"]` E AssertionError: assert ['c', 'c', 'b', 'a', 'go'] == ['c', 'b', 'a', 'go'] E At index 1 diff: 'c' != 'b' E Left contains one more item: 'go' E Use -v to get the full diff tests/test_asyncdi.py:48: AssertionError ================== short test summary info ================================ FAILED tests/test_asyncdi.py::test_complex - AssertionError: assert ['c', 'c', 'b', 'a', 'go'] == ['c', 'b', 'a', 'go'] ``` I figured out why this is happening. `a` requires `b` and `c` `b` also requires `c` The code decides to run `b` and `c` in parallel. If `c` completes first, then when `b` runs it gets to use the already-calculated result for `c` - so it doesn't need to call `c` again. If `b` gets to that point before `c` does it also needs to call `c`.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970624197	https://github.com/simonw/datasette/issues/878#issuecomment-970624197	https://api.github.com/repos/simonw/datasette/issues/878	IC_kwDOBm6k_c452ozF	simonw 9599	2021-11-16T19:49:05Z	2021-11-16T19:49:05Z	OWNER	Here's the latest version of my weird dependency injection async class: ```python import inspect class AsyncMeta(type): def new(cls, name, bases, attrs): # Decorate any items that are 'async def' methods _registry = {} new_attrs = {"_registry": _registry} for key, value in attrs.items(): if inspect.iscoroutinefunction(value) and not value.name == "resolve": new_attrs[key] = make_method(value) _registry[key] = new_attrs[key] else: new_attrs[key] = value `# Topological sort of _registry by parameter dependencies graph = { key: { p for p in inspect.signature(method).parameters.keys() if p != "self" and not p.startswith("_") } for key, method in _registry.items() } new_attrs["_graph"] = graph return super().__new__(cls, name, bases, new_attrs)` def make_method(method): @wraps(method) async def inner(self, kwargs): parameters = inspect.signature(method).parameters.keys() # Any parameters not provided by kwargs are resolved from registry to_resolve = [p for p in parameters if p not in kwargs and p != "self"] missing = [p for p in to_resolve if p not in self._registry] assert ( not missing ), "The following DI parameters could not be found in the registry: {}".format( missing ) results = {} results.update(kwargs) results.update(await self.resolve(to_resolve)) return await method(self, results) `return inner` bad = [0] class AsyncBase(metaclass=AsyncMeta): async def resolve(self, names): print(" resolve({})".format(names)) results = {} # Resolve them in the correct order ts = TopologicalSorter() ts2 = TopologicalSorter() print(" names = ", names) print(" self._graph = ", self._graph) for name in names: if self._graph[name]: ts.add(name, self._graph[name]) ts2.add(name, self._graph[name]) print(" static_order =", tuple(ts2.static_order())) ts.prepare() while ts.is_active(): print(" is_active, i = ", bad[0]) bad[0] += 1 if bad[0] > 20: print(" Infinite loop?") break nodes = ts.get_ready() print(" Do nodes:", nodes) awaitables = [self._registryname for name in nodes] print(" awaitables: ", awaitables) awaitable_results = await asyncio.gather(*awaitables) results.update({ p[0].name: p[1] for p in zip(awaitables, awaitable_results) }) print(results) for node in nodes: ts.done(node) `return results` `Example usage:`python class Foo(AsyncBase): async def graa(self, boff): print("graa") return 5 async def boff(self): print("boff") return 8 async def other(self, boff, graa): print("other") return 5 + boff + graa foo = Foo() await foo.other() `Output:` resolve(['boff', 'graa']) names = ['boff', 'graa'] self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}} static_order = ('boff', 'graa') is_active, i = 0 Do nodes: ('boff',) awaitables: [<coroutine object Foo.boff at 0x10bd81a40>] resolve([]) names = [] self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}} static_order = () boff {'boff': 8} is_active, i = 1 Do nodes: ('graa',) awaitables: [<coroutine object Foo.graa at 0x10d66b340>] resolve([]) names = [] self._graph = {'graa': {'boff'}, 'boff': set(), 'other': {'graa', 'boff'}} static_order = () graa {'boff': 8, 'graa': 5} other 18 ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	New pattern for views that return either JSON or HTML, available for plugins 648435885
970554697	https://github.com/simonw/datasette/issues/782#issuecomment-970554697	https://api.github.com/repos/simonw/datasette/issues/782	IC_kwDOBm6k_c452X1J	simonw 9599	2021-11-16T18:32:03Z	2021-11-16T18:32:03Z	OWNER	I'm going to take another look at this: - https://github.com/simonw/datasette/issues/878	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Redesign default .json format 627794879
970553780	https://github.com/simonw/datasette/issues/782#issuecomment-970553780	https://api.github.com/repos/simonw/datasette/issues/782	IC_kwDOBm6k_c452Xm0	simonw 9599	2021-11-16T18:30:51Z	2021-11-16T18:30:58Z	OWNER	OK, I'm ready to start working on this today. I'm going to go with a default representation that looks like this: `json { "rows": [ {"id": 1, "name": "One"}, {"id": 2, "name": "Two"} ], "next_url": null }` Note that there's no `count` - all it provides is the current selection of results and an indication as to how the next can be retrieved (`null` if there are no more results). I'll implement `?_extra=` to provide everything else.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Redesign default .json format 627794879
970544733	https://github.com/simonw/datasette/issues/1509#issuecomment-970544733	https://api.github.com/repos/simonw/datasette/issues/1509	IC_kwDOBm6k_c452VZd	simonw 9599	2021-11-16T18:22:32Z	2021-11-16T18:22:32Z	OWNER	This is mainly happening here: - https://github.com/simonw/datasette/issues/782	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Datasette 1.0 JSON API (and documentation) 1054243511
969621662	https://github.com/simonw/datasette/issues/448#issuecomment-969621662	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45y0Ce	simonw 9599	2021-11-16T01:32:04Z	2021-11-16T01:32:04Z	OWNER	Tests are failing and I think it's because the facets come back in different orders, need a tie-breaker. https://github.com/simonw/datasette/runs/4219325197?check_suite_focus=true	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969616626	https://github.com/simonw/datasette/issues/1176#issuecomment-969616626	https://api.github.com/repos/simonw/datasette/issues/1176	IC_kwDOBm6k_c45yyzy	simonw 9599	2021-11-16T01:29:13Z	2021-11-16T01:29:13Z	OWNER	I'm inclined to create a Sphinx reference documentation page for this, as I did for `sqlite-utils` here: https://sqlite-utils.datasette.io/en/stable/reference.html	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Policy on documenting "public" datasette.utils functions 779691739
969613166	https://github.com/simonw/datasette/issues/1012#issuecomment-969613166	https://api.github.com/repos/simonw/datasette/issues/1012	IC_kwDOBm6k_c45yx9u	simonw 9599	2021-11-16T01:27:25Z	2021-11-16T01:27:25Z	OWNER	Requested here: https://github.com/pypa/trove-classifiers/pull/85	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	For 1.0 update trove classifier in setup.py 718540751
969602825	https://github.com/simonw/datasette/issues/1012#issuecomment-969602825	https://api.github.com/repos/simonw/datasette/issues/1012	IC_kwDOBm6k_c45yvcJ	simonw 9599	2021-11-16T01:21:14Z	2021-11-16T01:21:14Z	OWNER	I'd been wondering how to get new classifiers into Trove - thanks, I'll give this a go.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	For 1.0 update trove classifier in setup.py 718540751
969600859	https://github.com/simonw/datasette/issues/1511#issuecomment-969600859	https://api.github.com/repos/simonw/datasette/issues/1511	IC_kwDOBm6k_c45yu9b	simonw 9599	2021-11-16T01:20:13Z	2021-11-16T01:20:13Z	OWNER	See: - #830	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Review plugin hooks for Datasette 1.0 1054246919
969582098	https://github.com/simonw/datasette/issues/448#issuecomment-969582098	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yqYS	simonw 9599	2021-11-16T01:10:28Z	2021-11-16T01:10:28Z	OWNER	Also note that this demo data is using a SQL view to create the JSON arrays - the view is defined as such: `sql CREATE VIEW ads_with_targets as select ads.*, json_group_array(targets.name) as target_names from ads join ad_targets on ad_targets.ad_id = ads.id join targets on ad_targets.target_id = targets.id group by ad_targets.ad_id;` So running JSON faceting on top of that view is a pretty big ask!	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969578466	https://github.com/simonw/datasette/issues/448#issuecomment-969578466	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45ypfi	simonw 9599	2021-11-16T01:08:29Z	2021-11-16T01:08:29Z	OWNER	Actually with the cache warmed up it looks like the facet query is taking 150ms which is good enough.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969572281	https://github.com/simonw/datasette/issues/448#issuecomment-969572281	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yn-5	simonw 9599	2021-11-16T01:05:11Z	2021-11-16T01:05:11Z	OWNER	I tried this and it seems to work correctly: python for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] facet_sql = """ with inner as ({sql}), deduped_array_items as ( select distinct j.value, inner.* from json_each([inner].{col}) j join inner ) select value as value, count() as count from deduped_array_items group by value order by count() desc limit {limit} """.format( col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 ) The queries are very slow though - I had to bump up to 2s time limit even against only a view returning 3,499 rows.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969557008	https://github.com/simonw/datasette/issues/448#issuecomment-969557008	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45ykQQ	simonw 9599	2021-11-16T00:56:09Z	2021-11-16T00:59:59Z	OWNER	This looks like it might work: `sql with inner as ( select * from ads_with_targets where :p0 in ( select value from json_each([ads_with_targets].[target_names]) ) ), deduped_array_items as ( select distinct j.value, inner.* from json_each([inner].[target_names]) j join inner ) select value, count() from deduped_array_items group by value order by count() desc`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969557972	https://github.com/simonw/datasette/issues/448#issuecomment-969557972	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45ykfU	simonw 9599	2021-11-16T00:56:58Z	2021-11-16T00:56:58Z	OWNER	It uses a CTE which were introduced in SQLite 3.8 - and AWS Lambda Python 3.9 still provides 3.7 - but I've checked and I can use `pysqlite3-binary` to work around that there so I'm OK relying on CTEs for this.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969449772	https://github.com/simonw/datasette/issues/448#issuecomment-969449772	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yKEs	simonw 9599	2021-11-15T23:48:37Z	2021-11-15T23:48:37Z	OWNER	Given this query: https://json-view-facet-bug-demo-j7hipcg4aq-uc.a.run.app/russian-ads?sql=select%0D%0A++j.value+as+value%2C%0D%0A++count%28%29+as+count%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++id%2C%0D%0A++++++file%2C%0D%0A++++++clicks%2C%0D%0A++++++impressions%2C%0D%0A++++++text%2C%0D%0A++++++url%2C%0D%0A++++++spend_amount%2C%0D%0A++++++spend_currency%2C%0D%0A++++++created%2C%0D%0A++++++ended%2C%0D%0A++++++target_names%0D%0A++++from%0D%0A++++++ads_with_targets%0D%0A++++where%0D%0A++++++%3Ap0+in+%28%0D%0A++++++++select%0D%0A++++++++++value%0D%0A++++++++from%0D%0A++++++++++json_each%28%5Bads_with_targets%5D.%5Btarget_names%5D%29%0D%0A++++++%29%0D%0A++%29%0D%0A++join+json_each%28target_names%29+j%0D%0Agroup+by%0D%0A++j.value%0D%0Aorder+by%0D%0A++count+desc%2C%0D%0A++value%0D%0Alimit%0D%0A++31&p0=people_who_match%3Ainterests%3AAfrican-American+culture `sql select j.value as value, count() as count from ( select id, file, clicks, impressions, text, url, spend_amount, spend_currency, created, ended, target_names from ads_with_targets where :p0 in ( select value from json_each([ads_with_targets].[target_names]) ) ) join json_each(target_names) j group by j.value order by count desc, value limit 31` How can I return a count of the number of documents containing each tag, but not the number of total tags that match including duplicates?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969446972	https://github.com/simonw/datasette/issues/448#issuecomment-969446972	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yJY8	simonw 9599	2021-11-15T23:46:13Z	2021-11-15T23:46:13Z	OWNER	It looks like the problem here is that some of the tags occur more than once in the documents: So they get counted more than once, hence the 182 count for something that couldn't possibly return more than 172 documents.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969442215	https://github.com/simonw/datasette/issues/448#issuecomment-969442215	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yIOn	simonw 9599	2021-11-15T23:42:03Z	2021-11-15T23:42:03Z	OWNER	I think this code is wrong in the `ArrayFacet` class: https://github.com/simonw/datasette/blob/502c02fa6dde6a8bb840af6c4c8cf858aa1db687/datasette/facets.py#L357-L364	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969440918	https://github.com/simonw/datasette/issues/448#issuecomment-969440918	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yH6W	simonw 9599	2021-11-15T23:40:17Z	2021-11-15T23:40:35Z	OWNER	Applied that fix to the `arraycontains` filter but I'm still getting bad results for the faceting: Should never get 182 results on a page that faceting against only 172 items.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969436930	https://github.com/simonw/datasette/issues/448#issuecomment-969436930	https://api.github.com/repos/simonw/datasette/issues/448	IC_kwDOBm6k_c45yG8C	simonw 9599	2021-11-15T23:31:58Z	2021-11-15T23:31:58Z	OWNER	I think this SQL recipe may work instead: `sql select * from ads_with_targets where 'people_who_match:interests:African-American Civil Rights Movement (1954—68)' in ( select value from json_each(target_names) ) and 'interests:Martin Luther King III' in ( select value from json_each(target_names) )` https://json-view-facet-bug-demo-j7hipcg4aq-uc.a.run.app/russian-ads?sql=select%0D%0A++*%0D%0Afrom%0D%0A++ads_with_targets%0D%0Awhere%0D%0A++%27people_who_match%3Ainterests%3AAfrican-American+Civil+Rights+Movement+%281954%E2%80%9468%29%27+in+%28%0D%0A++++select%0D%0A++++++value%0D%0A++++from%0D%0A++++++json_each%28target_names%29%0D%0A++%29%0D%0A++and+%27interests%3AMartin+Luther+King+III%27+in+%28%0D%0A++++select%0D%0A++++++value%0D%0A++++from%0D%0A++++++json_each%28target_names%29%0D%0A++%29&interests=&African=&Martin=	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	_facet_array should work against views 440222719
969433734	https://github.com/simonw/datasette/issues/519#issuecomment-969433734	https://api.github.com/repos/simonw/datasette/issues/519	IC_kwDOBm6k_c45yGKG	simonw 9599	2021-11-15T23:26:11Z	2021-11-15T23:26:11Z	OWNER	I'm happy with this as the goals for 1.0. I'm going to close this issue and create three tracking tickets for the three key themes: https://github.com/simonw/datasette/issues/1509 https://github.com/simonw/datasette/issues/1510 https://github.com/simonw/datasette/issues/1511	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Decide what goes into Datasette 1.0 459590021
968470212	https://github.com/simonw/sqlite-utils/issues/329#issuecomment-968470212	https://api.github.com/repos/simonw/sqlite-utils/issues/329	IC_kwDOCGYnMM45ua7E	simonw 9599	2021-11-15T02:49:28Z	2021-11-15T02:49:28Z	OWNER	I was going to replace all of the `validate_column_names()` bits with something that fixed them instead, but I think I have a better idea: I'm only going to apply the fix for the various '.insert()` methods that create the initial tables. I'll keep the `validate_column_names()` where they are at the moment. Once you've inserted the data and created the tables it will be up to you to use the new, correct column names. This avoids the whole issue of needing to rewrite parameters, and solves the immediate problem which is consuming CSV files with bad column names.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Rethink approach to [ and ] in column names (currently throws error) 1005891028
968458837	https://github.com/simonw/sqlite-utils/issues/329#issuecomment-968458837	https://api.github.com/repos/simonw/sqlite-utils/issues/329	IC_kwDOCGYnMM45uYJV	simonw 9599	2021-11-15T02:21:15Z	2021-11-15T02:21:15Z	OWNER	I'm not going to implement a fix that rewrites the `pk` and `column_order` and other parameters - at least not yet. The main thing I'm trying to fix here is what happens when you attempt to import a CSV file with `[ ]` in the column names, which should be unaffected by that second challenge.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Rethink approach to [ and ] in column names (currently throws error) 1005891028
968453129	https://github.com/simonw/sqlite-utils/issues/329#issuecomment-968453129	https://api.github.com/repos/simonw/sqlite-utils/issues/329	IC_kwDOCGYnMM45uWwJ	simonw 9599	2021-11-15T02:07:46Z	2021-11-15T02:07:46Z	OWNER	If I replace `validate_column_names(row.keys())` with `fix_column_names(row)` I need to decide what to do about things like `pk=` and `column_order=`. What should the following do? `python table.insert({"foo[bar]": 4}, pk="foo[bar]", column_order=["foo[bar]"])` Should it spot the old column names in the `pk=` and `column_order=` parameters and pretend that `foo_bar_` was passed instead?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Rethink approach to [ and ] in column names (currently throws error) 1005891028
968451954	https://github.com/simonw/sqlite-utils/issues/329#issuecomment-968451954	https://api.github.com/repos/simonw/sqlite-utils/issues/329	IC_kwDOCGYnMM45uWdy	simonw 9599	2021-11-15T02:05:29Z	2021-11-15T02:05:29Z	OWNER	I could even have those replacement characters be properties of the `Database` class, so uses can sub-class and change them. I'm not going to do this, it's unnecessary extra complexity and it means the function that fixes the column names needs to have access to the current `Database` instance.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Rethink approach to [ and ] in column names (currently throws error) 1005891028
968450579	https://github.com/simonw/sqlite-utils/issues/339#issuecomment-968450579	https://api.github.com/repos/simonw/sqlite-utils/issues/339	IC_kwDOCGYnMM45uWIT	simonw 9599	2021-11-15T02:02:34Z	2021-11-15T02:02:34Z	OWNER	Documentation: https://github.com/simonw/sqlite-utils/blob/54a2269e91ce72b059618662ed133a85f3d42e4a/docs/python-api.rst#working-with-lookup-tables	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	`table.lookup()` option to populate additional columns when creating a record 1053122092
968435041	https://github.com/simonw/sqlite-utils/issues/339#issuecomment-968435041	https://api.github.com/repos/simonw/sqlite-utils/issues/339	IC_kwDOCGYnMM45uSVh	simonw 9599	2021-11-15T01:44:42Z	2021-11-15T01:44:42Z	OWNER	`lookup(column_values, extra_values)` is one option. `column_values` isn't actually a great name for the first parameter any more, since the second parameter also takes column values. The first parameter is now all about the unique lookup values. Maybe this: `lookup(lookup_values, extra_values)`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	`table.lookup()` option to populate additional columns when creating a record 1053122092
968434594	https://github.com/simonw/sqlite-utils/issues/339#issuecomment-968434594	https://api.github.com/repos/simonw/sqlite-utils/issues/339	IC_kwDOCGYnMM45uSOi	simonw 9599	2021-11-15T01:43:10Z	2021-11-15T01:43:10Z	OWNER	What should I call this parameter? Django has a similar feature where it calls them `defaults=` (for `get_or_create()`) but I'm not a huge fan of that name.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	`table.lookup()` option to populate additional columns when creating a record 1053122092
968434425	https://github.com/simonw/sqlite-utils/issues/339#issuecomment-968434425	https://api.github.com/repos/simonw/sqlite-utils/issues/339	IC_kwDOCGYnMM45uSL5	simonw 9599	2021-11-15T01:42:36Z	2021-11-15T01:42:36Z	OWNER	Here's the current signature of `table.lookup()`: https://github.com/simonw/sqlite-utils/blob/9cda5b070f885a7995f0c307bcc4f45f0812994a/sqlite_utils/db.py#L2716-L2729 I'm going to add a second positional argument which can provide a dictionary of column->value to use when creating the original table and populating the initial row. If the row already exists, those columns will be ignored entirely.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	`table.lookup()` option to populate additional columns when creating a record 1053122092
968401459	https://github.com/simonw/sqlite-utils/pull/322#issuecomment-968401459	https://api.github.com/repos/simonw/sqlite-utils/issues/322	IC_kwDOCGYnMM45uKIz	simonw 9599	2021-11-15T00:26:42Z	2021-11-15T00:26:42Z	OWNER	This relates to the fact that dictionaries, lists and tuples get special treatment and are converted to JSON strings, using this code: https://github.com/simonw/sqlite-utils/blob/e8d958109ee290cfa1b44ef7a39629bb50ab673e/sqlite_utils/db.py#L2937-L2947 So the `COLUMN_TYPE_MAPPING` should include those too - right now it looks like this: https://github.com/simonw/sqlite-utils/blob/e8d958109ee290cfa1b44ef7a39629bb50ab673e/sqlite_utils/db.py#L165-L188	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Add dict type to be mapped as TEXT in sqllite 979612115
968384988	https://github.com/simonw/sqlite-utils/pull/324#issuecomment-968384988	https://api.github.com/repos/simonw/sqlite-utils/issues/324	IC_kwDOCGYnMM45uGHc	simonw 9599	2021-11-14T23:25:16Z	2021-11-14T23:25:16Z	OWNER	Yes this was absolutely the intention! Thanks, I wonder how often I've made that mistake in other projects?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Use python-dateutil package instead of dateutils 988013247
968384005	https://github.com/simonw/sqlite-utils/issues/331#issuecomment-968384005	https://api.github.com/repos/simonw/sqlite-utils/issues/331	IC_kwDOCGYnMM45uF4F	simonw 9599	2021-11-14T23:19:29Z	2021-11-14T23:20:32Z	OWNER	Tested it like this, against a freshly built `.tar.gz` package from my development environment: (w) w % mypy . hello.py:1: error: Skipping analyzing "sqlite_utils": found module but no type hints or library stubs hello.py:1: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports Found 1 error in 1 file (checked 1 source file) (w) w % pip install ~/Dropbox/Development/sqlite-utils/dist/sqlite-utils-3.17.1.tar.gz Processing /Users/simon/Dropbox/Development/sqlite-utils/dist/sqlite-utils-3.17.1.tar.gz ... Successfully installed sqlite-utils-3.17.1 (w) w % mypy . Success: no issues found in 1 source file I tested against the `.whl` too. My `hello.py` script contained this: ```python import sqlite_utils from typing import cast if name == "main": db = sqlite_utils.Database(memory=True) table = cast(sqlite_utils.db.Table, db["foo"]) table.insert({"id": 5}) print(list(db.query("select * from foo"))) That `cast()` is necessary because without it you get this error: (w) w % mypy . hello.py:7: error: Item "View" of "Union[Table, View]" has no attribute "insert" ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Mypy error: found module but no type hints or library stubs 1026794056
968381939	https://github.com/simonw/sqlite-utils/issues/331#issuecomment-968381939	https://api.github.com/repos/simonw/sqlite-utils/issues/331	IC_kwDOCGYnMM45uFXz	simonw 9599	2021-11-14T23:06:20Z	2021-11-14T23:06:20Z	OWNER	Thanks - I didn't know this was needed!	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Mypy error: found module but no type hints or library stubs 1026794056
968380675	https://github.com/simonw/sqlite-utils/issues/332#issuecomment-968380675	https://api.github.com/repos/simonw/sqlite-utils/issues/332	IC_kwDOCGYnMM45uFED	simonw 9599	2021-11-14T22:57:56Z	2021-11-14T22:57:56Z	OWNER	This is a great idea.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	`sqlite-utils memory --flatten` option to flatten nested JSON 1028056713
968380387	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968380387	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uE_j	simonw 9599	2021-11-14T22:55:56Z	2021-11-14T22:55:56Z	OWNER	OK, this should fix it.	{ "total_count": 1, "+1": 0, "-1": 0, "laugh": 0, "hooray": 1, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968371112	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968371112	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uCuo	simonw 9599	2021-11-14T21:57:43Z	2021-11-14T22:21:31Z	OWNER	`create_index(..., find_unique_name=)` is good. Default to false. `index_foreign_keys` can set it to true.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968361671	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968361671	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAbH	simonw 9599	2021-11-14T20:54:53Z	2021-11-14T21:01:14Z	OWNER	I'm leaning towards `table.create_index(columns, ignore_existing_name=True)` now. Or `resolve_existing_name` - or `skip_existing_name`? "ignore" sounds like it might not create the index if the name exists, but we want to still create the index but pick a new name.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968362285	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968362285	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAkt	simonw 9599	2021-11-14T20:59:44Z	2021-11-14T20:59:44Z	OWNER	I think I'll attempt to create the index and re-try if it fails with that error.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968362214	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968362214	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAjm	simonw 9599	2021-11-14T20:59:15Z	2021-11-14T20:59:15Z	OWNER	How to figure out if an index name is already in use? `PRAGMA index_list(t)` requires a table name. This does it: `sql SELECT name FROM sqlite_master WHERE type = 'index';`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968361409	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968361409	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAXB	simonw 9599	2021-11-14T20:52:55Z	2021-11-14T20:52:55Z	OWNER	Looking at the method signature: https://github.com/simonw/sqlite-utils/blob/92aa5c9c5d26b0889c8c3d97c76a908d5f8af211/sqlite_utils/db.py#L1518-L1524 `if_not_exists` just adds a `IF NOT EXISTS` clause here: https://github.com/simonw/sqlite-utils/blob/92aa5c9c5d26b0889c8c3d97c76a908d5f8af211/sqlite_utils/db.py#L1549-L1561	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968361285	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968361285	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAVF	simonw 9599	2021-11-14T20:51:57Z	2021-11-14T20:51:57Z	OWNER	SQLite will happily create multiple identical indexes on a table, using more disk space each time: ```pycon import sqlite_utils db = sqlite_utils.Database("dupes.db") db["t"].insert_all({"id": i} for i in range(10000)) <Table t (id)> # dupes.db is 98304 bytes >>> db["t"].create_index(["id"]) <Table t (id)> # dupes.db is 204800 bytes >>> db["t"].indexes [Index(seq=0, name='idx_t_id', unique=0, origin='c', partial=0, columns=['id'])] >>> db["t"].create_index(["id"], index_name="t_idx_t_id_2") <Table t (id)> # 311296 bytes >>> db["t"].create_index(["id"], index_name="t_idx_t_id_3") <Table t (id)> # 417792 bytes >>> db.vacuum() # Still 417792 bytes ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968360538	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968360538	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAJa	simonw 9599	2021-11-14T20:46:56Z	2021-11-14T20:46:56Z	OWNER	I'm tempted to not provide an opt-out option either: if you call `table.create_index(...)` without specifying an index name I think the tool should create the index for you, quietly picking an index name that works. But... it feels wasteful to create an index that exactly duplicates an existing index. Would SQLite even let you do that or would it notice and NOT double the amount of disk space used for that index?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968360387	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968360387	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45uAHD	simonw 9599	2021-11-14T20:45:44Z	2021-11-14T20:45:44Z	OWNER	What would such an option be called? Some options: `table.create_index([fk.column], force=True)` - not obvious what `force` means here `table.create_index([fk.column], ignore_existing_name=True)` - not obvious what `ignore` means here `table.create_index([fk.column], pick_unique_name=True)` - bit verbose If the user doesn't pass in an explicit name it seems like their intent is "just create me the index, I don't care what name you use" - so actually perhaps the default behaviour here should be to pick a new unique name if that name is already in use. Then maybe there should be an option to disable that - some options there: `table.create_index([fk.column], error_on_existing_index_name=True)` - too verbose `table.create_index([fk.column], force=False)` - not clear what `force` means	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968359868	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968359868	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45t_-8	simonw 9599	2021-11-14T20:41:42Z	2021-11-14T20:41:42Z	OWNER	The "index idx_generators_eia860_report_date already exists" error suggests that the problem here is actually one of an index name collision. `python table.create_index([fk.column])` This will derive a name for the index automatically from the name of the table and the name of the passed in columns: https://github.com/simonw/sqlite-utils/blob/92aa5c9c5d26b0889c8c3d97c76a908d5f8af211/sqlite_utils/db.py#L1536-L1539 So perhaps `.create_index()` should grow an extra option that creates the index even if the name already exists, by finding a new name.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968359137	https://github.com/simonw/sqlite-utils/issues/335#issuecomment-968359137	https://api.github.com/repos/simonw/sqlite-utils/issues/335	IC_kwDOCGYnMM45t_zh	simonw 9599	2021-11-14T20:37:00Z	2021-11-14T20:37:00Z	OWNER	This is strange - the code already checks that an index doesn't exist before attempting to create it: https://github.com/simonw/sqlite-utils/blob/92aa5c9c5d26b0889c8c3d97c76a908d5f8af211/sqlite_utils/db.py#L893-L902	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils index-foreign-keys fails due to pre-existing index 1042569687
968210842	https://github.com/simonw/datasette/issues/1507#issuecomment-968210842	https://api.github.com/repos/simonw/datasette/issues/1507	IC_kwDOBm6k_c45tbma	simonw 9599	2021-11-14T05:41:55Z	2021-11-14T05:41:55Z	OWNER	Here's the build with that fix: https://readthedocs.org/projects/datasette/builds/15268498/ It passed and published the docs: https://docs.datasette.io/en/latest/changelog.html	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	ReadTheDocs build failed for 0.59.2 release 1052851176
968210222	https://github.com/simonw/datasette/issues/1507#issuecomment-968210222	https://api.github.com/repos/simonw/datasette/issues/1507	IC_kwDOBm6k_c45tbcu	simonw 9599	2021-11-14T05:34:14Z	2021-11-14T05:34:14Z	OWNER	Here's the new build using Python 3: https://readthedocs.org/projects/datasette/builds/15268482/ It's still broken. Here's one of many issue threads about it, this one has a workaround fix: https://github.com/readthedocs/readthedocs.org/issues/8616#issuecomment-952034858 For future readers, the solution for this problem is to pin `docutils<0.18` in your `requirements.txt` file, and have a `.readthedocs.yaml` file with these contents: ``` version: 2 python: install: - requirements: docs/requirements.txt ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	ReadTheDocs build failed for 0.59.2 release 1052851176
968209957	https://github.com/simonw/datasette/issues/1507#issuecomment-968209957	https://api.github.com/repos/simonw/datasette/issues/1507	IC_kwDOBm6k_c45tbYl	simonw 9599	2021-11-14T05:31:07Z	2021-11-14T05:31:07Z	OWNER	Looks like ReadTheDocs builds started failing for `latest` a few weeks ago:	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	ReadTheDocs build failed for 0.59.2 release 1052851176
968209731	https://github.com/simonw/datasette/issues/1507#issuecomment-968209731	https://api.github.com/repos/simonw/datasette/issues/1507	IC_kwDOBm6k_c45tbVD	simonw 9599	2021-11-14T05:28:41Z	2021-11-14T05:28:41Z	OWNER	I will try adding a `.readthedocs.yml` file: https://docs.readthedocs.io/en/stable/config-file/v2.html#python-version This might work: ``` version: 2 build: os: ubuntu-20.04 tools: python: "3.9" sphinx: configuration: docs/conf.py ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	ReadTheDocs build failed for 0.59.2 release 1052851176
968209616	https://github.com/simonw/datasette/issues/1507#issuecomment-968209616	https://api.github.com/repos/simonw/datasette/issues/1507	IC_kwDOBm6k_c45tbTQ	simonw 9599	2021-11-14T05:27:22Z	2021-11-14T05:27:22Z	OWNER	https://blog.readthedocs.com/default-python-3/ they started defaulting new projects to Python 3 back in Feb 2019 but clearly my project was created before then.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	ReadTheDocs build failed for 0.59.2 release 1052851176
968209560	https://github.com/simonw/datasette/issues/1507#issuecomment-968209560	https://api.github.com/repos/simonw/datasette/issues/1507	IC_kwDOBm6k_c45tbSY	simonw 9599	2021-11-14T05:26:36Z	2021-11-14T05:26:36Z	OWNER	It looks like my builds there still run on Python 2! git clone --no-single-branch --depth 50 https://github.com/simonw/datasette . git checkout --force de1e031713f47fbd51eb7239db3e7e6025fbf81a git clean -d -f -f python2.7 -mvirtualenv /home/docs/checkouts/readthedocs.org/user_builds/datasette/envs/0.59.2 /home/docs/checkouts/readthedocs.org/user_builds/datasette/envs/0.59.2/bin/python -m pip install --upgrade --no-cache-dir pip setuptools /home/docs/checkouts/readthedocs.org/user_builds/datasette/envs/0.59.2/bin/python -m pip install --upgrade --no-cache-dir mock==1.0.1 pillow==5.4.1 alabaster>=0.7,<0.8,!=0.7.5 commonmark==0.8.1 recommonmark==0.5.0 sphinx<2 sphinx-rtd-theme<0.5 readthedocs-sphinx-ext<2.2 cat docs/conf.py	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	ReadTheDocs build failed for 0.59.2 release 1052851176
968207906	https://github.com/simonw/datasette/issues/1503#issuecomment-968207906	https://api.github.com/repos/simonw/datasette/issues/1503	IC_kwDOBm6k_c45ta4i	simonw 9599	2021-11-14T05:08:26Z	2021-11-14T05:08:26Z	OWNER	Error: ``` def test_table_html_filter_form_column_options( path, expected_column_options, app_client ): response = app_client.get(path) assert response.status == 200 form = Soup(response.body, "html.parser").find("form") column_options = [ o.attrs.get("value") or o.string for o in form.select("select[name=_filter_column] option") ] `assert expected_column_options == column_options` E AssertionError: assert ['- column -'...wid', 'value'] == ['- column -', 'value'] E At index 1 diff: 'rowid' != 'value' E Left contains one more item: 'value' E Use -v to get the full diff `` This is becauserowid` isn't a table column but IS returned by the query used on that page. My solution: start with the query columns, but then add any table columns that were not already returned by the query to the end of the `filter_columns` list.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	`?_nocol=` removes that column from the filter interface 1050163432
968192980	https://github.com/simonw/datasette/issues/1506#issuecomment-968192980	https://api.github.com/repos/simonw/datasette/issues/1506	IC_kwDOBm6k_c45tXPU	simonw 9599	2021-11-14T02:22:40Z	2021-11-14T02:22:40Z	OWNER	I think the answer is to spot this case and link to `?_item_exact=x` instead of `?_item=x` - it looks like that's already recommended in the documentation here: https://docs.datasette.io/en/stable/json_api.html#column-filter-arguments ?column__exact=value or ?_column=value Returns rows where the specified column exactly matches the value. So maybe the facet selection rendering logic needs to spot this and link correctly to it?	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Columns beginning with an underscore do not facet correctly 1052826038
962411119	https://github.com/simonw/sqlite-utils/issues/336#issuecomment-962411119	https://api.github.com/repos/simonw/sqlite-utils/issues/336	IC_kwDOCGYnMM45XTpv	simonw 9599	2021-11-06T07:21:04Z	2021-11-06T07:21:04Z	OWNER	I've never used `DEFAULT 'CURRENT_TIMESTAMP'` myself so this one should be an interesting bug to explore.	{ "total_count": 1, "+1": 1, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-util tranform --column-order mangles columns of type "timestamp" 1044267332
955367409	https://github.com/simonw/sqlite-utils/issues/206#issuecomment-955367409	https://api.github.com/repos/simonw/sqlite-utils/issues/206	IC_kwDOCGYnMM448b_x	simonw 9599	2021-10-30T15:50:39Z	2021-10-30T15:50:39Z	OWNER	What's the error message? Sometimes I pipe JSON through `jq` to check if it's valid: `cat my.json \| jq`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	sqlite-utils should suggest --csv if JSON parsing fails 761915790
953508979	https://github.com/simonw/datasette/issues/1497#issuecomment-953508979	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c441WRz	simonw 9599	2021-10-28T05:13:49Z	2021-10-28T05:13:49Z	OWNER	Wrote about this in my weeknotes: https://simonwillison.net/2021/Oct/28/weeknotes-kubernetes-web-components/	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950417375	https://github.com/simonw/datasette/issues/1497#issuecomment-950417375	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjff	simonw 9599	2021-10-24T23:36:54Z	2021-10-24T23:36:54Z	OWNER	Tried fixing this by pushing a new `latest` tag from my laptop: (datasette) datasette % docker pull datasetteproject/datasette:0.59.1 0.59.1: Pulling from datasetteproject/datasette 7d63c13d9b9b: Already exists 6ad2a11ca37b: Already exists e9edbe81a001: Already exists 36629b83aba2: Already exists 7338abefe51c: Already exists 6b825daddc6c: Pull complete d7508b065a21: Pull complete Digest: sha256:dc134f65bec40ed4ea7049188fe1e3915b8e6c3fd999b17effe8ec24868b979c Status: Downloaded newer image for datasetteproject/datasette:0.59.1 docker.io/datasetteproject/datasette:0.59.1 (datasette) datasette % docker tag datasetteproject/datasette:0.59.1 datasetteproject/datasette:latest (datasette) datasette % docker push datasetteproject/datasette:latest The push refers to repository [docker.io/datasetteproject/datasette] d668c99b6ff1: Layer already exists aa20c9013575: Layer already exists c97eebf2b227: Layer already exists 284a6c64b82c: Layer already exists 388eedeb736e: Layer already exists 2feece0964b8: Layer already exists e8b689711f21: Layer already exists errors: denied: requested access to the resource is denied unauthorized: authentication required So I logged in with `docker login`: `(datasette) datasette % docker login Login with your Docker ID to push and pull images from Docker Hub. If you don't have a Docker ID, head over to https://hub.docker.com to create one. Username: datasetteproject Password:` And ran the push again and it worked: `(datasette) datasette % docker push datasetteproject/datasette:latest The push refers to repository [docker.io/datasetteproject/datasette] d668c99b6ff1: Layer already exists aa20c9013575: Layer already exists c97eebf2b227: Layer already exists 284a6c64b82c: Layer already exists 388eedeb736e: Layer already exists 2feece0964b8: Layer already exists e8b689711f21: Layer already exists latest: digest: sha256:dc134f65bec40ed4ea7049188fe1e3915b8e6c3fd999b17effe8ec24868b979c size: 1793` https://hub.docker.com/layers/datasetteproject/datasette/latest/images/sha256-dc134f65bec40ed4ea7049188fe1e3915b8e6c3fd999b17effe8ec24868b979c?context=explore	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950416802	https://github.com/simonw/datasette/issues/1497#issuecomment-950416802	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjWi	simonw 9599	2021-10-24T23:32:39Z	2021-10-24T23:32:39Z	OWNER	That's because the `publish.yml` workflow ends with this, which isn't in the `push_docker_tag.yml` workflow: https://github.com/simonw/datasette/blob/2c31d1cd9cd3b63458ccbe391866499fa3f44978/.github/workflows/publish.yml#L117-L119	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950416682	https://github.com/simonw/datasette/issues/1497#issuecomment-950416682	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjUq	simonw 9599	2021-10-24T23:31:51Z	2021-10-24T23:31:51Z	OWNER	One catch: the `latest` tag on Docker Hub is still three months old.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950416659	https://github.com/simonw/datasette/issues/1497#issuecomment-950416659	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjUT	simonw 9599	2021-10-24T23:31:41Z	2021-10-24T23:31:41Z	OWNER	Published `0.59.1` as well: https://github.com/simonw/datasette/runs/3991214225?check_suite_focus=true Result: https://hub.docker.com/layers/datasetteproject/datasette/0.59.1/images/sha256-dc134f65bec40ed4ea7049188fe1e3915b8e6c3fd999b17effe8ec24868b979c?context=explore	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950416460	https://github.com/simonw/datasette/issues/1497#issuecomment-950416460	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjRM	simonw 9599	2021-10-24T23:30:10Z	2021-10-24T23:30:10Z	OWNER	Testing that newly published image: % docker run -p 8002:8001 -v `pwd`:/mnt \ datasetteproject/datasette:0.59 datasette -p 8001 -h 0.0.0.0 /mnt/fixtures.db Unable to find image 'datasetteproject/datasette:0.59' locally 0.59: Pulling from datasetteproject/datasette 7d63c13d9b9b: Already exists 6ad2a11ca37b: Already exists e9edbe81a001: Already exists 36629b83aba2: Already exists 7338abefe51c: Already exists 6d71b6b88b82: Pull complete 8c4da3c56bdc: Pull complete Digest: sha256:038decc28e0ea84b281ecc0058fe8eba7aa99596e5a2177ff714092ad03294ed Status: Downloaded newer image for datasetteproject/datasette:0.59 INFO: Started server process [1] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit) and `http://localhost:8002/versions.json` returns: `json { "python": { "version": "3.9.7", "full": "3.9.7 (default, Oct 12 2021, 02:43:43) \n[GCC 10.2.1 20210110]" }, "datasette": { "version": "0.59" }, "asgi": "3.0", "uvicorn": "0.15.0", "sqlite": { "version": "3.34.1"`	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950416061	https://github.com/simonw/datasette/issues/1497#issuecomment-950416061	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjK9	simonw 9599	2021-10-24T23:27:18Z	2021-10-24T23:27:18Z	OWNER	That worked: https://hub.docker.com/layers/datasetteproject/datasette/0.59/images/sha256-038decc28e0ea84b281ecc0058fe8eba7aa99596e5a2177ff714092ad03294ed?context=explore	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950415822	https://github.com/simonw/datasette/issues/1497#issuecomment-950415822	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pjHO	simonw 9599	2021-10-24T23:25:45Z	2021-10-24T23:25:45Z	OWNER	I'm going to attempt to publish `0.59` to Docker Hub using https://github.com/simonw/datasette/blob/2c31d1cd9cd3b63458ccbe391866499fa3f44978/.github/workflows/push_docker_tag.yml - if that works I'll push `0.59.1` as well.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950415129	https://github.com/simonw/datasette/issues/1497#issuecomment-950415129	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pi8Z	simonw 9599	2021-10-24T23:21:33Z	2021-10-24T23:21:33Z	OWNER	That fixed it! Resulting image is 249MB which is a very slight size reduction (I think previous was 259MB (uncompressed).	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950413185	https://github.com/simonw/datasette/issues/1497#issuecomment-950413185	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44pieB	simonw 9599	2021-10-24T23:16:25Z	2021-10-24T23:18:30Z	OWNER	Debian stable these days is "bullseye" - https://www.debian.org/releases/ - which has the version of SpatiaLite that I was previously pulling in from Sid: https://packages.debian.org/bullseye/libsqlite3-mod-spatialite So upgrading to the 3.9.7-slim-bullseye base image may help. https://hub.docker.com/layers/python/library/python/3.9.7-slim-bullseye/images/sha256-67af5f544115124dc6d6da1d9d2815aa9825f6fd4aa6710adb0ec1725280fb89?context=explore	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950412628	https://github.com/simonw/datasette/issues/1497#issuecomment-950412628	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44piVU	simonw 9599	2021-10-24T23:13:20Z	2021-10-24T23:13:27Z	OWNER	I think the root cause here is that I'm using a Debian Buster base image and then installing SpatiaLite from Debian unstable (sid) - as described in this comment: https://github.com/simonw/datasette/issues/1249#issuecomment-804309510 That's has worked fine in the past, but Sid is unstable - and this seems to be one of those instabilities.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950411417	https://github.com/simonw/datasette/issues/1497#issuecomment-950411417	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44piCZ	simonw 9599	2021-10-24T23:06:45Z	2021-10-24T23:11:14Z	OWNER	Same errors with `3.9.7`: ``` 5 41.46 /usr/bin/perl: error while loading shared libraries: libcrypt.so.1: cannot open shared object file: No such file or directory 5 41.46 dpkg: error processing package libc6:amd64 (--configure): 5 41.46 installed libc6:amd64 package post-installation script subprocess returned error exit status 127 5 41.47 Errors were encountered while processing: 5 41.47 libc6:amd64 5 41.50 E: Sub-process /usr/bin/dpkg returned an error code (1) `` I'm suspicious of this part of theDockerfile`: https://github.com/simonw/datasette/blob/e6e44372b34414eac2f36a4c1120af4f755aa423/Dockerfile#L1-L18	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001
950411912	https://github.com/simonw/datasette/issues/1497#issuecomment-950411912	https://api.github.com/repos/simonw/datasette/issues/1497	IC_kwDOBm6k_c44piKI	simonw 9599	2021-10-24T23:09:41Z	2021-10-24T23:09:41Z	OWNER	Here that is in the Debian bug tracker: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=993755	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }	Publish to Docker Hub failing with "libcrypt.so.1: cannot open shared object file" 1034535001

Advanced export

JSON shape: default, array, newline-delimited, object

CREATE TABLE [issue_comments] (
   [html_url] TEXT,
   [issue_url] TEXT,
   [id] INTEGER PRIMARY KEY,
   [node_id] TEXT,
   [user] INTEGER REFERENCES [users]([id]),
   [created_at] TEXT,
   [updated_at] TEXT,
   [author_association] TEXT,
   [body] TEXT,
   [reactions] TEXT,
   [issue] INTEGER REFERENCES [issues]([id])
, [performed_via_github_app] TEXT);
CREATE INDEX [idx_issue_comments_issue]
                ON [issue_comments] ([issue]);
CREATE INDEX [idx_issue_comments_user]
                ON [issue_comments] ([user]);

issue_comments

8,883 rows where user = 9599 sorted by updated_at descending

5 41.46 /usr/bin/perl: error while loading shared libraries: libcrypt.so.1: cannot open shared object file: No such file or directory

5 41.46 dpkg: error processing package libc6:amd64 (--configure):

5 41.46 installed libc6:amd64 package post-installation script subprocess returned error exit status 127

5 41.47 Errors were encountered while processing:

5 41.47 libc6:amd64

5 41.50 E: Sub-process /usr/bin/dpkg returned an error code (1)

Advanced export