html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,issue,performed_via_github_app
https://github.com/simonw/datasette/issues/1843#issuecomment-1302574330,https://api.github.com/repos/simonw/datasette/issues/1843,1302574330,IC_kwDOBm6k_c5No7T6,9599,2022-11-03T19:30:22Z,2022-11-03T19:30:22Z,OWNER,"This is affecting me a lot at the moment, on my laptop (runs fine in CI).
Here's a change to `conftest.py` which highlights the problem - it cause a failure the moment there are more than 5 open files according to `psutil`:
```diff
diff --git a/tests/conftest.py b/tests/conftest.py
index f4638a14..21d433c1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,7 @@
import httpx
import os
import pathlib
+import psutil
import pytest
import re
import subprocess
@@ -192,3 +193,8 @@ def ds_unix_domain_socket_server(tmp_path_factory):
yield ds_proc, uds
# Shut it down at the end of the pytest session
ds_proc.terminate()
+
+
+def pytest_runtest_teardown(item: pytest.Item) -> None:
+ open_files = psutil.Process().open_files()
+ assert len(open_files) < 5
```
The first error I get from this with `pytest --pdb -x` is here:
```
tests/test_api.py ............E
>>>>> traceback >>>>>
item =
def pytest_runtest_teardown(item: pytest.Item) -> None:
open_files = psutil.Process().open_files()
> assert len(open_files) < 5
E AssertionError: assert 5 < 5
E + where 5 = len([popenfile(path='/private/var/folders/wr/hn3206rs1yzgq3r49bz8nvnh0000gn/T/tmpfglrt4p2/fixtures.db', fd=14), popenfile(... fd=19), popenfile(path='/private/var/folders/wr/hn3206rs1yzgq3r49bz8nvnh0000gn/T/tmphdi5b250/fixtures.dot.db', fd=20)])
/Users/simon/Dropbox/Development/datasette/tests/conftest.py:200: AssertionError
>>>>> entering PDB >>>>>
>>>>> PDB post_mortem (IO-capturing turned off) >>>>>
> /Users/simon/Dropbox/Development/datasette/tests/conftest.py(200)pytest_runtest_teardown()
-> assert len(open_files) < 5
```
That's this test:
https://github.com/simonw/datasette/blob/2ec5583629005b32cb0877786f9681c5d43ca33f/tests/test_api.py#L656-L673
Which uses this fixture:
https://github.com/simonw/datasette/blob/2ec5583629005b32cb0877786f9681c5d43ca33f/tests/fixtures.py#L228-L231
Which calls this function:
https://github.com/simonw/datasette/blob/2ec5583629005b32cb0877786f9681c5d43ca33f/tests/fixtures.py#L105-L122
So now I'm suspicious that, even though the fixture is meant to be session scoped, the way I'm using `with tempfile.TemporaryDirectory() as tmpdir:` is causing a whole load of files to be created and held open which are not later closed.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1408757705,
https://github.com/simonw/datasette/issues/1843#issuecomment-1302634332,https://api.github.com/repos/simonw/datasette/issues/1843,1302634332,IC_kwDOBm6k_c5NpJ9c,9599,2022-11-03T20:34:56Z,2022-11-03T20:34:56Z,OWNER,"Confirmed that calling `conn.close()` on each SQLite file-based connection is the way to fix this problem.
I'm adding a `db.close()` method (sync, not async - I tried async first but it was really hard to cause every thread in the pool to close its threadlocal database connection) which loops through all known open file-based connections and closes them.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1408757705,
https://github.com/simonw/datasette/issues/1843#issuecomment-1302678384,https://api.github.com/repos/simonw/datasette/issues/1843,1302678384,IC_kwDOBm6k_c5NpUtw,9599,2022-11-03T21:21:59Z,2022-11-03T21:21:59Z,OWNER,"I added extra debug info to `/-/threads` to see this for myself:
```diff
diff --git a/datasette/app.py b/datasette/app.py
index 02bd38f1..16579e28 100644
--- a/datasette/app.py
+++ b/datasette/app.py
@@ -969,6 +969,13 @@ class Datasette:
""threads"": [
{""name"": t.name, ""ident"": t.ident, ""daemon"": t.daemon} for t in threads
],
+ ""file_connections"": {
+ db.name: [
+ [dict(r) for r in conn.execute(""pragma database_list"").fetchall()]
+ for conn in db._all_file_connections
+ ]
+ for db in self.databases.values()
+ },
}
# Only available in Python 3.7+
if hasattr(asyncio, ""all_tasks""):
```
Output after hitting refresh on a few `/fixtures` tables to ensure more threads started:
```
""file_connections"": {
""_internal"": [],
""fixtures"": [
[
{
""seq"": 0,
""name"": ""main"",
""file"": ""/Users/simon/Dropbox/Development/datasette/fixtures.db""
}
],
[
{
""seq"": 0,
""name"": ""main"",
""file"": ""/Users/simon/Dropbox/Development/datasette/fixtures.db""
}
],
[
{
""seq"": 0,
""name"": ""main"",
""file"": ""/Users/simon/Dropbox/Development/datasette/fixtures.db""
}
]
]
},
```
I decided not to ship this feature though as it leaks the names of internal database files.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1408757705,
https://github.com/simonw/datasette/issues/1843#issuecomment-1302679026,https://api.github.com/repos/simonw/datasette/issues/1843,1302679026,IC_kwDOBm6k_c5NpU3y,9599,2022-11-03T21:22:42Z,2022-11-03T21:22:42Z,OWNER,Docs for the new `db.close()` method: https://docs.datasette.io/en/latest/internals.html#db-close,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1408757705,
https://github.com/simonw/datasette/issues/1851#issuecomment-1294224185,https://api.github.com/repos/simonw/datasette/issues/1851,1294224185,IC_kwDOBm6k_c5NJEs5,9599,2022-10-27T23:18:24Z,2022-11-03T23:26:05Z,OWNER,"So new API design is:
```
POST /db/table/-/insert
Authorization: Bearer xxx
Content-Type: application/json
{
""row"": {
""id"": 1,
""name"": ""New record""
}
}
```
Returns:
```
201 Created
{
""row"": [{
""id"": 1,
""name"": ""New record""
}]
}
```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1421544654,
https://github.com/simonw/datasette/issues/1855#issuecomment-1301594495,https://api.github.com/repos/simonw/datasette/issues/1855,1301594495,IC_kwDOBm6k_c5NlMF_,9599,2022-11-03T03:11:17Z,2022-11-03T03:11:17Z,OWNER,"Maybe the way to do this is through a new standard mechanism on the actor: a set of additional restrictions, e.g.:
```
{
""id"": ""root"",
""_r"": {
""a"": [""ir"", ""ur"", ""dr""],
""d"": {
""fixtures"": [""ir"", ""ur"", ""dr""]
},
""t"": {
""fixtures"": {
""searchable"": [""ir""]
}
}
}
```
`""a""` is ""all permissions"" - these apply to everything.
`""d""` permissions only apply to the specified database
`""t""` permissions only apply to the specified table
The way this works is there's a default [permission_allowed(datasette, actor, action, resource)](https://docs.datasette.io/en/stable/plugin_hooks.html#id25) hook which only consults these, and crucially just says NO if those rules do not match.
In this way it would apply as an extra layer of permission rules over the defaults (which for this `root` instance would all return yes).","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1423336089,
https://github.com/simonw/datasette/issues/1855#issuecomment-1301646493,https://api.github.com/repos/simonw/datasette/issues/1855,1301646493,IC_kwDOBm6k_c5NlYyd,9599,2022-11-03T05:11:06Z,2022-11-03T05:11:06Z,OWNER,"Built a prototype of the above:
```diff
diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py
index 32b0c758..f68aa38f 100644
--- a/datasette/default_permissions.py
+++ b/datasette/default_permissions.py
@@ -6,8 +6,8 @@ import json
import time
-@hookimpl(tryfirst=True)
-def permission_allowed(datasette, actor, action, resource):
+@hookimpl(tryfirst=True, specname=""permission_allowed"")
+def permission_allowed_default(datasette, actor, action, resource):
async def inner():
if action in (
""permissions-debug"",
@@ -57,6 +57,44 @@ def permission_allowed(datasette, actor, action, resource):
return inner
+@hookimpl(specname=""permission_allowed"")
+def permission_allowed_actor_restrictions(actor, action, resource):
+ if actor is None:
+ return None
+ _r = actor.get(""_r"")
+ if not _r:
+ # No restrictions, so we have no opinion
+ return None
+ action_initials = """".join([word[0] for word in action.split(""-"")])
+ # If _r is defined then we use those to further restrict the actor
+ # Crucially, we only use this to say NO (return False) - we never
+ # use it to return YES (True) because that might over-ride other
+ # restrictions placed on this actor
+ all_allowed = _r.get(""a"")
+ if all_allowed is not None:
+ assert isinstance(all_allowed, list)
+ if action_initials in all_allowed:
+ return None
+ # How about for the current database?
+ if action in (""view-database"", ""view-database-download"", ""execute-sql""):
+ database_allowed = _r.get(""d"", {}).get(resource)
+ if database_allowed is not None:
+ assert isinstance(database_allowed, list)
+ if action_initials in database_allowed:
+ return None
+ # Or the current table? That's any time the resource is (database, table)
+ if not isinstance(resource, str) and len(resource) == 2:
+ database, table = resource
+ table_allowed = _r.get(""t"", {}).get(database, {}).get(table)
+ # TODO: What should this do for canned queries?
+ if table_allowed is not None:
+ assert isinstance(table_allowed, list)
+ if action_initials in table_allowed:
+ return None
+ # This action is not specifically allowed, so reject it
+ return False
+
+
@hookimpl
def actor_from_request(datasette, request):
prefix = ""dstok_""
```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1423336089,
https://github.com/simonw/datasette/issues/1855#issuecomment-1301646670,https://api.github.com/repos/simonw/datasette/issues/1855,1301646670,IC_kwDOBm6k_c5NlY1O,9599,2022-11-03T05:11:26Z,2022-11-03T05:11:26Z,OWNER,That still needs comprehensive tests before I land it.,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1423336089,
https://github.com/simonw/datasette/issues/1863#issuecomment-1302759174,https://api.github.com/repos/simonw/datasette/issues/1863,1302759174,IC_kwDOBm6k_c5NpocG,9599,2022-11-03T22:40:47Z,2022-11-03T22:40:47Z,OWNER,"I'm considering Pydantic for this, see:
- https://github.com/simonw/datasette/issues/1882#issuecomment-1302716350
In particular the `create_model()` method: https://pydantic-docs.helpmanual.io/usage/models/#dynamic-model-creation
This would give me good validation. It would also, weirdly, give me the ability to output JSON schema. Maybe I could have this as the JSON schema for a row?
`/db/table/-/json-schema`","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1425029242,
https://github.com/simonw/datasette/issues/1863#issuecomment-1302760382,https://api.github.com/repos/simonw/datasette/issues/1863,1302760382,IC_kwDOBm6k_c5Npou-,9599,2022-11-03T22:42:47Z,2022-11-03T22:42:47Z,OWNER,"```python
print(create_model('document', id=(int, ...), title=(str, None)).schema_json(indent=2))
```
```json
{
""title"": ""document"",
""type"": ""object"",
""properties"": {
""id"": {
""title"": ""Id"",
""type"": ""integer""
},
""title"": {
""title"": ""Title"",
""type"": ""string""
}
},
""required"": [
""id""
]
}
```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1425029242,
https://github.com/simonw/datasette/issues/1863#issuecomment-1302760549,https://api.github.com/repos/simonw/datasette/issues/1863,1302760549,IC_kwDOBm6k_c5Npoxl,9599,2022-11-03T22:43:04Z,2022-11-03T23:21:31Z,OWNER,"The `id=(int, ...)` thing is weird, but is apparently Pydantic syntax for a required field?
https://cs.github.com/starlite-api/starlite/blob/28ddc847c4cb072f0d5d21a9ecd5259711f12ec9/docs/usage/11-data-transfer-objects.md#L161 confirms:
> 1. For required fields use a tuple of type + ellipsis, for example `(str, ...)`.
> 2. For optional fields use a tuple of type + `None`, for example `(str, None)`
> 3. To set a default value use a tuple of type + default value, for example `(str, ""Hello World"")`","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1425029242,
https://github.com/simonw/datasette/issues/1863#issuecomment-1302785086,https://api.github.com/repos/simonw/datasette/issues/1863,1302785086,IC_kwDOBm6k_c5Npuw-,9599,2022-11-03T23:24:33Z,2022-11-03T23:24:56Z,OWNER,"Thinking more about validation: I'm considering if this should validate that columns which are defined as SQLite foreign keys are being updated to values that exist in those other tables.
I like the sound of this. It seems like a sensible default behaviour for Datasette. And it fits with the fact that Datasette treats foreign keys specially elsewhere in the interface.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1425029242,
https://github.com/simonw/datasette/issues/1863#issuecomment-1302790013,https://api.github.com/repos/simonw/datasette/issues/1863,1302790013,IC_kwDOBm6k_c5Npv99,9599,2022-11-03T23:32:30Z,2022-11-03T23:32:30Z,OWNER,"I'm not going to allow updates to primary keys. If you need to do that, you can instead delete the record and then insert a new one with the new primary keys you wanted - or maybe use a custom SQL query.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1425029242,
https://github.com/simonw/datasette/issues/1881#issuecomment-1301635340,https://api.github.com/repos/simonw/datasette/issues/1881,1301635340,IC_kwDOBm6k_c5NlWEM,9599,2022-11-03T04:46:41Z,2022-11-03T04:46:41Z,OWNER,"Built this prototype:
![prototype](https://user-images.githubusercontent.com/9599/199649219-f146e43b-bfb5-45e6-9777-956f21a79887.gif)
In building it I realized I needed to know which permissions took a table, a database, both or neither. So I had to bake that into the code.
Here's the prototype so far (which includes a prototype of the logic for the `_r` field on actor, see #1855):
```diff
diff --git a/datasette/default_permissions.py b/datasette/default_permissions.py
index 32b0c758..f68aa38f 100644
--- a/datasette/default_permissions.py
+++ b/datasette/default_permissions.py
@@ -6,8 +6,8 @@ import json
import time
-@hookimpl(tryfirst=True)
-def permission_allowed(datasette, actor, action, resource):
+@hookimpl(tryfirst=True, specname=""permission_allowed"")
+def permission_allowed_default(datasette, actor, action, resource):
async def inner():
if action in (
""permissions-debug"",
@@ -57,6 +57,44 @@ def permission_allowed(datasette, actor, action, resource):
return inner
+@hookimpl(specname=""permission_allowed"")
+def permission_allowed_actor_restrictions(actor, action, resource):
+ if actor is None:
+ return None
+ _r = actor.get(""_r"")
+ if not _r:
+ # No restrictions, so we have no opinion
+ return None
+ action_initials = """".join([word[0] for word in action.split(""-"")])
+ # If _r is defined then we use those to further restrict the actor
+ # Crucially, we only use this to say NO (return False) - we never
+ # use it to return YES (True) because that might over-ride other
+ # restrictions placed on this actor
+ all_allowed = _r.get(""a"")
+ if all_allowed is not None:
+ assert isinstance(all_allowed, list)
+ if action_initials in all_allowed:
+ return None
+ # How about for the current database?
+ if action in (""view-database"", ""view-database-download"", ""execute-sql""):
+ database_allowed = _r.get(""d"", {}).get(resource)
+ if database_allowed is not None:
+ assert isinstance(database_allowed, list)
+ if action_initials in database_allowed:
+ return None
+ # Or the current table? That's any time the resource is (database, table)
+ if not isinstance(resource, str) and len(resource) == 2:
+ database, table = resource
+ table_allowed = _r.get(""t"", {}).get(database, {}).get(table)
+ # TODO: What should this do for canned queries?
+ if table_allowed is not None:
+ assert isinstance(table_allowed, list)
+ if action_initials in table_allowed:
+ return None
+ # This action is not specifically allowed, so reject it
+ return False
+
+
@hookimpl
def actor_from_request(datasette, request):
prefix = ""dstok_""
diff --git a/datasette/templates/allow_debug.html b/datasette/templates/allow_debug.html
index 0f1b30f0..ae43f0f5 100644
--- a/datasette/templates/allow_debug.html
+++ b/datasette/templates/allow_debug.html
@@ -35,7 +35,7 @@ p.message-warning {