home / github

Menu
  • Search all tables
  • GraphQL API

issue_comments

Table actions
  • GraphQL API for issue_comments

26 rows where "updated_at" is on date 2022-09-26 sorted by updated_at descending

✖
✖

✎ View and edit SQL

This data as json, CSV (advanced)

Suggested facets: issue_url, reactions, created_at (date), updated_at (date)

issue 14

  • Ability to merge databases and tables 5
  • Setting to turn off table row counts entirely 3
  • Preserve query on timeout 3
  • Stream all results for arbitrary SQL and canned queries 2
  • `sqlite-utils install` command 2
  • Document how to use Just 2
  • Switch to keyword-only arguments for a bunch of internal methods 2
  • query result page is using 400mb of browser memory 40x size of html page and 400x size of csv data 1
  • Research: demonstrate if parallel SQL queries are worthwhile 1
  • Ability to insert multi-line files 1
  • Expose `sql` and `params` arguments to various plugin hooks 1
  • Idea: ability to pass extra variables to `--convert` scripts 1
  • Tiny typographical error in install/uninstall docs 1
  • Release Datasette 0.63a0 1

user 4

  • simonw 19
  • fgregg 4
  • eyeseast 2
  • nelsonjchen 1

author_association 3

  • OWNER 19
  • CONTRIBUTOR 6
  • NONE 1
id html_url issue_url node_id user created_at updated_at ▲ author_association body reactions issue performed_via_github_app
1258760299 https://github.com/simonw/datasette/issues/1822#issuecomment-1258760299 https://api.github.com/repos/simonw/datasette/issues/1822 IC_kwDOBm6k_c5LByhr simonw 9599 2022-09-26T23:25:12Z 2022-09-26T23:25:55Z OWNER

A start: ```diff diff --git a/datasette/utils/asgi.py b/datasette/utils/asgi.py index 8a2fa060..41ade961 100644 --- a/datasette/utils/asgi.py +++ b/datasette/utils/asgi.py @@ -118,7 +118,7 @@ class Request: return dict(parse_qsl(body.decode("utf-8"), keep_blank_values=True))

 @classmethod
  • def fake(cls, path_with_query_string, method="GET", scheme="http", url_vars=None):
  • def fake(cls, path_with_query_string, *, method="GET", scheme="http", url_vars=None): """Useful for constructing Request objects for tests""" path, _, query_string = path_with_query_string.partition("?") scope = { @@ -204,7 +204,7 @@ class AsgiWriter: )

-async def asgi_send_json(send, info, status=200, headers=None): +async def asgi_send_json(send, info, *, status=200, headers=None): headers = headers or {} await asgi_send( send, @@ -215,7 +215,7 @@ async def asgi_send_json(send, info, status=200, headers=None): )

-async def asgi_send_html(send, html, status=200, headers=None): +async def asgi_send_html(send, html, *, status=200, headers=None): headers = headers or {} await asgi_send( send, @@ -226,7 +226,7 @@ async def asgi_send_html(send, html, status=200, headers=None): )

-async def asgi_send_redirect(send, location, status=302): +async def asgi_send_redirect(send, location, *, status=302): await asgi_send( send, "", @@ -236,12 +236,12 @@ async def asgi_send_redirect(send, location, status=302): )

-async def asgi_send(send, content, status, headers=None, content_type="text/plain"): +async def asgi_send(send, content, status, *, headers=None, content_type="text/plain"): await asgi_start(send, status, headers, content_type) await send({"type": "http.response.body", "body": content.encode("utf-8")})

-async def asgi_start(send, status, headers=None, content_type="text/plain"): +async def asgi_start(send, status, *, headers=None, content_type="text/plain"): headers = headers or {} # Remove any existing content-type header headers = {k: v for k, v in headers.items() if k.lower() != "content-type"} @@ -259,7 +259,7 @@ async def asgi_start(send, status, headers=None, content_type="text/plain"):

async def asgi_send_file( - send, filepath, filename=None, content_type=None, chunk_size=4096, headers=None + send, filepath, filename=None, *, content_type=None, chunk_size=4096, headers=None ): headers = headers or {} if filename: @@ -284,7 +284,7 @@ async def asgi_send_file( )

-def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None): +def asgi_static(root_path, *, chunk_size=4096, headers=None, content_type=None): root_path = Path(root_path)

 async def inner_static(request, send):

@@ -313,7 +313,7 @@ def asgi_static(root_path, chunk_size=4096, headers=None, content_type=None):

class Response: - def init(self, body=None, status=200, headers=None, content_type="text/plain"): + def init(self, body=None, , status=200, headers=None, content_type="text/plain"): self.body = body self.status = status self.headers = headers or {} @@ -346,6 +346,7 @@ class Response: self, key, value="", + , max_age=None, expires=None, path="/", @@ -374,7 +375,7 @@ class Response: self._set_cookie_headers.append(cookie.output(header="").strip())

 @classmethod
  • def html(cls, body, status=200, headers=None):
  • def html(cls, body, *, status=200, headers=None): return cls( body, status=status, @@ -383,7 +384,7 @@ class Response: )

    @classmethod - def text(cls, body, status=200, headers=None): + def text(cls, body, *, status=200, headers=None): return cls( str(body), status=status, @@ -392,7 +393,7 @@ class Response: )

    @classmethod - def json(cls, body, status=200, headers=None, default=None): + def json(cls, body, *, status=200, headers=None, default=None): return cls( json.dumps(body, default=default), status=status, @@ -401,7 +402,7 @@ class Response: )

    @classmethod - def redirect(cls, path, status=302, headers=None): + def redirect(cls, path, , status=302, headers=None): headers = headers or {} headers["Location"] = path return cls("", status=status, headers=headers) @@ -412,6 +413,7 @@ class AsgiFileDownload: self, filepath, filename=None, + , content_type="application/octet-stream", headers=None, ): diff diff --git a/datasette/app.py b/datasette/app.py index 03d1dacc..4d4e5584 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -190,6 +190,7 @@ class Datasette: def init( self, files=None, + *, immutables=None, cache_headers=True, cors=False, ```

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Switch to keyword-only arguments for a bunch of internal methods 1386854246  
1258757544 https://github.com/simonw/datasette/issues/1822#issuecomment-1258757544 https://api.github.com/repos/simonw/datasette/issues/1822 IC_kwDOBm6k_c5LBx2o simonw 9599 2022-09-26T23:21:23Z 2022-09-26T23:21:23Z OWNER

Everything on https://docs.datasette.io/en/stable/internals.html that uses keyword arguments should do this I think.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Switch to keyword-only arguments for a bunch of internal methods 1386854246  
1258756231 https://github.com/simonw/datasette/issues/1817#issuecomment-1258756231 https://api.github.com/repos/simonw/datasette/issues/1817 IC_kwDOBm6k_c5LBxiH simonw 9599 2022-09-26T23:19:34Z 2022-09-26T23:19:34Z OWNER

This is a good idea - it's something I should do before Datasette 1.0.

I was a tiny bit worried about compatibility (Datasette is 3.7+) but it looks like they have been in Python since 3.0!

{
    "total_count": 1,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 1,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Expose `sql` and `params` arguments to various plugin hooks 1384273985  
1258754105 https://github.com/simonw/datasette/issues/1819#issuecomment-1258754105 https://api.github.com/repos/simonw/datasette/issues/1819 IC_kwDOBm6k_c5LBxA5 simonw 9599 2022-09-26T23:16:15Z 2022-09-26T23:16:15Z OWNER

Demo: https://latest.datasette.io/_memory?sql=with+recursive+counter(x)+as+(%0D%0A++select+0%0D%0A++++union%0D%0A++select+x+%2B+1+from+counter%0D%0A)%2C%0D%0Ablah+as+(select++from+counter+limit+5000000)%0D%0Aselect+count()+from+blah

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Preserve query on timeout 1385026210  
1258746600 https://github.com/simonw/datasette/issues/1819#issuecomment-1258746600 https://api.github.com/repos/simonw/datasette/issues/1819 IC_kwDOBm6k_c5LBvLo simonw 9599 2022-09-26T23:05:40Z 2022-09-26T23:05:40Z OWNER

Implementing it like this, so at least you can copy and paste the SQL query back out again:

I'm not doing a full textarea because this error can be raised in multiple places, including on the table page itself. It's not just an error associated with the manual query page.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Preserve query on timeout 1385026210  
1258738740 https://github.com/simonw/datasette/issues/1818#issuecomment-1258738740 https://api.github.com/repos/simonw/datasette/issues/1818 IC_kwDOBm6k_c5LBtQ0 nelsonjchen 5363 2022-09-26T22:52:45Z 2022-09-26T22:55:57Z NONE

thoughts on order of precedence to use:

  • sqlite-utils count, if present. closest thing to a standard i guess.
  • row(max_id) if like, the first and/or last x amount of rows ids are all contiguous. kind of a cheap/dumb/imperfect heuristic to see if the table is dump/not dump. if the check passes, still stick on est. after the display.
  • count(*) if enabled in datasette
{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Setting to turn off table row counts entirely 1384549993  
1258738435 https://github.com/simonw/datasette/issues/1819#issuecomment-1258738435 https://api.github.com/repos/simonw/datasette/issues/1819 IC_kwDOBm6k_c5LBtMD simonw 9599 2022-09-26T22:52:19Z 2022-09-26T22:52:19Z OWNER

This is a good idea.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Preserve query on timeout 1385026210  
1258735747 https://github.com/simonw/datasette/issues/1818#issuecomment-1258735747 https://api.github.com/repos/simonw/datasette/issues/1818 IC_kwDOBm6k_c5LBsiD simonw 9599 2022-09-26T22:47:59Z 2022-09-26T22:47:59Z OWNER

Another option here is to tie into a feature I built in sqlite-utils with this problem in mind but never introduced on the Datasette side of things: https://sqlite-utils.datasette.io/en/stable/python-api.html#cached-table-counts-using-triggers

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Setting to turn off table row counts entirely 1384549993  
1258735283 https://github.com/simonw/datasette/issues/1818#issuecomment-1258735283 https://api.github.com/repos/simonw/datasette/issues/1818 IC_kwDOBm6k_c5LBsaz simonw 9599 2022-09-26T22:47:19Z 2022-09-26T22:47:19Z OWNER

That's a really interesting idea: for a lot of databases (those made out of straight imports from CSV) max(rowid) would indeed reflect the size of the table, but would be a MUCH faster operation than attempting a count(*).

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Setting to turn off table row counts entirely 1384549993  
1258712931 https://github.com/simonw/sqlite-utils/issues/491#issuecomment-1258712931 https://api.github.com/repos/simonw/sqlite-utils/issues/491 IC_kwDOCGYnMM5LBm9j eyeseast 25778 2022-09-26T22:31:58Z 2022-09-26T22:31:58Z CONTRIBUTOR

Right. The backup command will copy tables completely, but in the case of conflicting table names, the destination gets overwritten silently. That might not be what you want here.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Ability to merge databases and tables 1383646615  
1258697384 https://github.com/simonw/sqlite-utils/issues/491#issuecomment-1258697384 https://api.github.com/repos/simonw/sqlite-utils/issues/491 IC_kwDOCGYnMM5LBjKo simonw 9599 2022-09-26T22:12:45Z 2022-09-26T22:12:45Z OWNER

That feels like a slightly different command to me - maybe sqlite-utils backup data.db data-backup.db? It doesn't have any of the mechanics for merging tables together. Could be a useful feature separately though.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Ability to merge databases and tables 1383646615  
1258692555 https://github.com/simonw/datasette/issues/1821#issuecomment-1258692555 https://api.github.com/repos/simonw/datasette/issues/1821 IC_kwDOBm6k_c5LBh_L simonw 9599 2022-09-26T22:06:39Z 2022-09-26T22:06:39Z OWNER
  • https://github.com/simonw/datasette/actions/runs/3131344150
  • https://github.com/simonw/datasette/releases/tag/0.63a0
  • https://pypi.org/project/datasette/0.63a0/
{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Release Datasette 0.63a0 1386734383  
1258521333 https://github.com/simonw/sqlite-utils/issues/494#issuecomment-1258521333 https://api.github.com/repos/simonw/sqlite-utils/issues/494 IC_kwDOCGYnMM5LA4L1 simonw 9599 2022-09-26T19:32:36Z 2022-09-26T19:32:36Z OWNER

Tweeted about it too: https://twitter.com/simonw/status/1574481628507668480

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Document how to use Just 1386593843  
1258516872 https://github.com/simonw/sqlite-utils/issues/494#issuecomment-1258516872 https://api.github.com/repos/simonw/sqlite-utils/issues/494 IC_kwDOCGYnMM5LA3GI simonw 9599 2022-09-26T19:28:36Z 2022-09-26T19:28:36Z OWNER

New documentation: https://sqlite-utils.datasette.io/en/latest/contributing.html#using-just-and-pipenv

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Document how to use Just 1386593843  
1258508215 https://github.com/simonw/sqlite-utils/issues/491#issuecomment-1258508215 https://api.github.com/repos/simonw/sqlite-utils/issues/491 IC_kwDOCGYnMM5LA0-3 eyeseast 25778 2022-09-26T19:22:14Z 2022-09-26T19:22:14Z CONTRIBUTOR

This might be fairly straightforward using SQLite's backup utility: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Ability to merge databases and tables 1383646615  
1258479462 https://github.com/simonw/sqlite-utils/issues/483#issuecomment-1258479462 https://api.github.com/repos/simonw/sqlite-utils/issues/483 IC_kwDOCGYnMM5LAt9m simonw 9599 2022-09-26T19:04:29Z 2022-09-26T19:04:43Z OWNER

Documentation:

  • https://sqlite-utils.datasette.io/en/latest/cli.html#cli-install
  • https://sqlite-utils.datasette.io/en/latest/cli.html#cli-uninstall
  • https://sqlite-utils.datasette.io/en/latest/cli-reference.html#install
  • https://sqlite-utils.datasette.io/en/latest/cli-reference.html#uninstall
{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
`sqlite-utils install` command 1363765916  
1258476455 https://github.com/simonw/sqlite-utils/issues/493#issuecomment-1258476455 https://api.github.com/repos/simonw/sqlite-utils/issues/493 IC_kwDOCGYnMM5LAtOn simonw 9599 2022-09-26T19:01:49Z 2022-09-26T19:01:49Z OWNER

I tried the tips in https://stackoverflow.com/questions/15258831/how-to-handle-two-dashes-in-rest (not the settings change though, because I might want smart quotes elsewhere) and they didn't work.

Maybe I should disable smart quotes entirely?

I feel like there should be an escaping trick that works here though. I tried insert -\\-convert but it didn't help.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Tiny typographical error in install/uninstall docs 1386562662  
1258451968 https://github.com/simonw/sqlite-utils/issues/483#issuecomment-1258451968 https://api.github.com/repos/simonw/sqlite-utils/issues/483 IC_kwDOCGYnMM5LAnQA simonw 9599 2022-09-26T18:37:54Z 2022-09-26T18:40:41Z OWNER

The implementation of this can be an almost exact copy of Datasette's, which was added in this commit: https://github.com/simonw/datasette/commit/01fe5b740171bfaea3752fc5754431dac53777e3

Current code for that is here: https://github.com/simonw/datasette/blob/0.62/datasette/cli.py#L319-L340 - which is improved to use the from runpy import run_module function.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
`sqlite-utils install` command 1363765916  
1258450447 https://github.com/simonw/sqlite-utils/issues/491#issuecomment-1258450447 https://api.github.com/repos/simonw/sqlite-utils/issues/491 IC_kwDOCGYnMM5LAm4P simonw 9599 2022-09-26T18:36:23Z 2022-09-26T18:36:23Z OWNER

This is also the kind of feature that would need to express itself in both the Python library and the CLI utility.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Ability to merge databases and tables 1383646615  
1258449887 https://github.com/simonw/sqlite-utils/issues/491#issuecomment-1258449887 https://api.github.com/repos/simonw/sqlite-utils/issues/491 IC_kwDOCGYnMM5LAmvf simonw 9599 2022-09-26T18:35:50Z 2022-09-26T18:35:50Z OWNER

This is a really interesting idea.

I'm nervous about needing to set the rules for how duplicate tables should be merged though. This feels like a complex topic - one where there isn't necessarily an obviously "correct" way of doing it, but where different problems that people are solving might need different merging approaches.

Likewise, merging isn't just a database-to-database thing at that point - I could see a need for merging two tables using similar rules to those used for merging two databases.

So I think I'd want to have some good concrete use-cases in mind before trying to design how something like this should work. Will leave this thread open for people to drop those in!

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Ability to merge databases and tables 1383646615  
1258446128 https://github.com/simonw/sqlite-utils/issues/492#issuecomment-1258446128 https://api.github.com/repos/simonw/sqlite-utils/issues/492 IC_kwDOCGYnMM5LAl0w simonw 9599 2022-09-26T18:32:14Z 2022-09-26T18:33:19Z OWNER

This idea would make more sense if there was a good mechanism to say "run the conversion script held in this file" as opposed to passing it as an option. This would also make having to remember bash escaping rules (see tip) much easier!

shot-scraper has that for --javascript, using the --input option: https://shot-scraper.datasette.io/en/stable/javascript.html#shot-scraper-javascript-help

Maybe --convert-script would work here? Or --convert-file? It should accept - for stdin too.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Idea: ability to pass extra variables to `--convert` scripts 1386530156  
1258437060 https://github.com/simonw/sqlite-utils/issues/490#issuecomment-1258437060 https://api.github.com/repos/simonw/sqlite-utils/issues/490 IC_kwDOCGYnMM5LAjnE simonw 9599 2022-09-26T18:24:44Z 2022-09-26T18:24:44Z OWNER

Just saw your great write-up on this: https://jeqo.github.io/notes/2022-09-24-ingest-logs-sqlite/

{
    "total_count": 1,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 1,
    "rocket": 0,
    "eyes": 0
}
Ability to insert multi-line files 1382457780  
1258337011 https://github.com/simonw/datasette/issues/526#issuecomment-1258337011 https://api.github.com/repos/simonw/datasette/issues/526 IC_kwDOBm6k_c5LALLz fgregg 536941 2022-09-26T16:49:48Z 2022-09-26T16:49:48Z CONTRIBUTOR

i think the smallest change that gets close to what i want is to change the behavior so that max_returned_rows is not applied in the execute method when we are are asking for a csv of query.

there are some infelicities for that approach, but i'll make a PR to make it easier to discuss.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Stream all results for arbitrary SQL and canned queries 459882902  
1258167564 https://github.com/simonw/datasette/issues/526#issuecomment-1258167564 https://api.github.com/repos/simonw/datasette/issues/526 IC_kwDOBm6k_c5K_h0M fgregg 536941 2022-09-26T14:57:44Z 2022-09-26T15:08:36Z CONTRIBUTOR

reading the database execute method i have a few questions.

https://github.com/simonw/datasette/blob/cb1e093fd361b758120aefc1a444df02462389a3/datasette/database.py#L229-L242


unless i'm missing something (which is very likely!!), the max_returned_rows argument doesn't actually offer any protections against running very expensive queries.

It's not like adding a LIMIT max_rows argument. it make sense that it isn't because, the query could already have an LIMIT argument. Doing something like select * from (query) limit {max_returned_rows} might be protective but wouldn't always.

Instead the code executes the full original query, and if still has time it fetches out the first max_rows + 1 rows.

this does offer some protection of memory exhaustion, as you won't hydrate a huge result set into python (however, there are data flow patterns that could avoid that too)

given the current architecture, i don't see how creating a new connection would be use?


If we just removed the max_return_rows limitation, then i think most things would be fine except for the QueryViews. Right now rendering, just 5000 rows takes a lot of client-side memory so some form of pagination would be required.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Stream all results for arbitrary SQL and canned queries 459882902  
1258166572 https://github.com/simonw/datasette/issues/1655#issuecomment-1258166572 https://api.github.com/repos/simonw/datasette/issues/1655 IC_kwDOBm6k_c5K_hks fgregg 536941 2022-09-26T14:57:04Z 2022-09-26T14:57:04Z CONTRIBUTOR

I think that paginating, even in javascript, could be very helpful. Maybe render json or csv into the page and let javascript loading that into the dom?

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
query result page is using 400mb of browser memory 40x size of html page and 400x size of csv data 1163369515  
1258129113 https://github.com/simonw/datasette/issues/1727#issuecomment-1258129113 https://api.github.com/repos/simonw/datasette/issues/1727 IC_kwDOBm6k_c5K_YbZ fgregg 536941 2022-09-26T14:30:11Z 2022-09-26T14:48:31Z CONTRIBUTOR

from your analysis, it seems like the GIL is blocking on loading of the data from sqlite to python, (particularly in the fetchmany call)

this is probably a simplistic idea, but what if you had the python code in the execute method iterate over the cursor and yield out rows or small chunks of rows.

something like:
python with sqlite_timelimit(conn, time_limit_ms): try: cursor = conn.cursor() cursor.execute(sql, params if params is not None else {}) except: ... max_returned_rows = self.ds.max_returned_rows if max_returned_rows == page_size: max_returned_rows += 1 if max_returned_rows and truncate: for i, row in enumerate(cursor): yield row if i == max_returned_rows - 1: break else: for row in cursor: yield row truncated = False

this kind of thing works well with a postgres server side cursor, but i'm not sure if it will hold for sqlite.

you would still spend about the same amount of time in python and would be contending for the gil, but it would be could be non blocking.

depending on the data flow, this could also some benefit for memory. (data stays in more compact sqlite-land until you need it)

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
Research: demonstrate if parallel SQL queries are worthwhile 1217759117  

Advanced export

JSON shape: default, array, newline-delimited, object

CSV options:

CREATE TABLE [issue_comments] (
   [html_url] TEXT,
   [issue_url] TEXT,
   [id] INTEGER PRIMARY KEY,
   [node_id] TEXT,
   [user] INTEGER REFERENCES [users]([id]),
   [created_at] TEXT,
   [updated_at] TEXT,
   [author_association] TEXT,
   [body] TEXT,
   [reactions] TEXT,
   [issue] INTEGER REFERENCES [issues]([id])
, [performed_via_github_app] TEXT);
CREATE INDEX [idx_issue_comments_issue]
                ON [issue_comments] ([issue]);
CREATE INDEX [idx_issue_comments_user]
                ON [issue_comments] ([user]);
Powered by Datasette · Queries took 565.571ms · About: github-to-sqlite
  • Sort ascending
  • Sort descending
  • Facet by this
  • Hide this column
  • Show all columns
  • Show not-blank rows