{"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248621072", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248621072, "node_id": "IC_kwDOCGYnMM5KbHIQ", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T20:56:09Z", "updated_at": "2022-09-15T20:56:09Z", "author_association": "OWNER", "body": "Prototype so far:\r\n```diff\r\ndiff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py\r\nindex 767b170..d96c507 100644\r\n--- a/sqlite_utils/cli.py\r\n+++ b/sqlite_utils/cli.py\r\n@@ -1762,6 +1762,17 @@ def query(\r\n is_flag=True,\r\n help=\"Analyze resulting tables and output results\",\r\n )\r\n+@click.option(\"--key\", help=\"read data from this key of the root object\")\r\n+@click.option(\r\n+ \"--auto-key\",\r\n+ is_flag=True,\r\n+ help=\"Find a key in the root object that is a list of objects\",\r\n+)\r\n+@click.option(\r\n+ \"--analyze\",\r\n+ is_flag=True,\r\n+ help=\"Analyze resulting tables and output results\",\r\n+)\r\n @load_extension_option\r\n def memory(\r\n paths,\r\n@@ -1784,6 +1795,8 @@ def memory(\r\n schema,\r\n dump,\r\n save,\r\n+ key,\r\n+ auto_key,\r\n analyze,\r\n load_extension,\r\n ):\r\n@@ -1838,7 +1851,9 @@ def memory(\r\n csv_table = stem\r\n stem_counts[stem] = stem_counts.get(stem, 1) + 1\r\n csv_fp = csv_path.open(\"rb\")\r\n- rows, format_used = rows_from_file(csv_fp, format=format, encoding=encoding)\r\n+ rows, format_used = rows_from_file(\r\n+ csv_fp, format=format, encoding=encoding, key=key, auto_key=auto_key\r\n+ )\r\n tracker = None\r\n if format_used in (Format.CSV, Format.TSV) and not no_detect_types:\r\n tracker = TypeTracker()\r\ndiff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py\r\nindex 8754554..2e69c26 100644\r\n--- a/sqlite_utils/utils.py\r\n+++ b/sqlite_utils/utils.py\r\n@@ -231,6 +231,8 @@ def rows_from_file(\r\n encoding: Optional[str] = None,\r\n ignore_extras: Optional[bool] = False,\r\n extras_key: Optional[str] = None,\r\n+ key: Optional[str] = None,\r\n+ auto_key: Optional[bool] = False,\r\n ) -> Tuple[Iterable[dict], Format]:\r\n \"\"\"\r\n Load a sequence of dictionaries from a file-like object containing one of four different formats.\r\n@@ -271,13 +273,31 @@ def rows_from_file(\r\n :param encoding: the character encoding to use when reading CSV/TSV data\r\n :param ignore_extras: ignore any extra fields on rows\r\n :param extras_key: put any extra fields in a list with this key\r\n+ :param key: read data from this key of the root object\r\n+ :param auto_key: find a key in the root object that is a list of objects\r\n \"\"\"\r\n if ignore_extras and extras_key:\r\n raise ValueError(\"Cannot use ignore_extras= and extras_key= together\")\r\n+ if key and auto_key:\r\n+ raise ValueError(\"Cannot use key= and auto_key= together\")\r\n if format == Format.JSON:\r\n decoded = json.load(fp)\r\n if isinstance(decoded, dict):\r\n- decoded = [decoded]\r\n+ if auto_key:\r\n+ list_keys = [\r\n+ k\r\n+ for k in decoded\r\n+ if isinstance(decoded[k], list)\r\n+ and decoded[k]\r\n+ and all(isinstance(o, dict) for o in decoded[k])\r\n+ ]\r\n+ if len(list_keys) == 1:\r\n+ decoded = decoded[list_keys[0]]\r\n+ elif key:\r\n+ # Raises KeyError, I think that's OK\r\n+ decoded = decoded[key]\r\n+ if not isinstance(decoded, list):\r\n+ decoded = [decoded]\r\n if not isinstance(decoded, list):\r\n raise RowsFromFileBadJSON(\"JSON must be a list or a dictionary\")\r\n return decoded, Format.JSON\r\n@@ -305,7 +325,9 @@ def rows_from_file(\r\n first_bytes = buffered.peek(2048).strip()\r\n if first_bytes.startswith(b\"[\") or first_bytes.startswith(b\"{\"):\r\n # TODO: Detect newline-JSON\r\n- return rows_from_file(buffered, format=Format.JSON)\r\n+ return rows_from_file(\r\n+ buffered, format=Format.JSON, key=key, auto_key=auto_key\r\n+ )\r\n else:\r\n dialect = csv.Sniffer().sniff(\r\n first_bytes.decode(encoding or \"utf-8-sig\", \"ignore\")\r\n```", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248522618", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248522618, "node_id": "IC_kwDOCGYnMM5KavF6", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T19:29:20Z", "updated_at": "2022-09-15T19:29:20Z", "author_association": "OWNER", "body": "I think refactoring `sqlite-utils insert` to use `rows_from_file` needs to happen as part of this work.", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248512739", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248512739, "node_id": "IC_kwDOCGYnMM5Kasrj", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T19:18:24Z", "updated_at": "2022-09-15T19:21:01Z", "author_association": "OWNER", "body": "Why doesn't `sqlite-utils insert` use the `rows_from_file` function I wonder?\r\n\r\nhttps://github.com/simonw/sqlite-utils/issues/279#issuecomment-864207841 says:\r\n\r\n> I can refactor `sqlite-utils insert` to use this new code too.\r\n\r\nMaybe I forgot to do that?", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248501824", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248501824, "node_id": "IC_kwDOCGYnMM5KaqBA", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T19:10:48Z", "updated_at": "2022-09-15T19:10:48Z", "author_association": "OWNER", "body": "This feels pretty good:\r\n```\r\n% sqlite-utils memory ~/Downloads/CVR_Export_20220908084311/*.json --schema --auto-key\r\nCREATE TABLE [BallotTypeContestManifest] (\r\n [BallotTypeId] INTEGER,\r\n [ContestId] INTEGER\r\n);\r\nCREATE VIEW t1 AS select * from [BallotTypeContestManifest];\r\nCREATE VIEW t AS select * from [BallotTypeContestManifest];\r\nCREATE TABLE [BallotTypeManifest] (\r\n [Description] TEXT,\r\n [Id] INTEGER,\r\n [ExternalId] TEXT\r\n);\r\n```", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248484094", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248484094, "node_id": "IC_kwDOCGYnMM5Kalr-", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T18:56:31Z", "updated_at": "2022-09-15T18:56:31Z", "author_association": "OWNER", "body": "Actually I quite like `--key X` - it could work for single nested objects too. You could insert a single record like this:\r\n\r\n```json\r\n{\r\n \"record\" {\r\n \"id\": 1\r\n }\r\n}\r\n```\r\n```\r\nsqlite-utils insert db.db records record.json --key record\r\n``` ", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248481303", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248481303, "node_id": "IC_kwDOCGYnMM5KalAX", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T18:54:30Z", "updated_at": "2022-09-15T18:55:14Z", "author_association": "OWNER", "body": "Maybe this would make more sense as a mechanism where you can say \"Use the data in the key called X\" - but there's a special option for \"figure out that key automatically\".\r\n\r\nThe syntax then could be:\r\n\r\n`--list-key List`\r\n\r\nOr for automatic detection:\r\n\r\n`--list-key-auto`\r\n\r\nCould also go with `--key List` and `--key-auto` - but would that be as obvious as `--list-key`?", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248479485", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248479485, "node_id": "IC_kwDOCGYnMM5Kakj9", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T18:52:52Z", "updated_at": "2022-09-15T18:53:45Z", "author_association": "OWNER", "body": "The most similar option I have at the moment is probably `--flatten`. What would good names for this option be?\r\n\r\n- `--auto-list`\r\n- `--auto-key`\r\n- `--inner-key`\r\n- `--auto-json`\r\n- `--find-list`\r\n- `--find-key`\r\n\r\nThose are all bad.\r\n\r\nAnother option: introduce a new explicit format for it. Right now the explicit formats you can use are:\r\n\r\nhttps://github.com/simonw/sqlite-utils/blob/d9b9e075f07a20f1137cd2e34ed5d3f1a3db4ad8/docs/cli-reference.rst#L153-L158\r\n\r\nSo I could add a `:autojson` format.", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248475718", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248475718, "node_id": "IC_kwDOCGYnMM5KajpG", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T18:49:05Z", "updated_at": "2022-09-15T18:49:53Z", "author_association": "OWNER", "body": "Here's how I used my prototype to build [that Gist](https://gist.github.com/simonw/0e6901974a14ab7d56c2746a04d72c8c):\r\n\r\n sqlite-utils memory ~/Downloads/CVR_Export_20220908084311/*.json --schema > database.sql\r\n", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null} {"html_url": "https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248474806", "issue_url": "https://api.github.com/repos/simonw/sqlite-utils/issues/489", "id": 1248474806, "node_id": "IC_kwDOCGYnMM5Kaja2", "user": {"value": 9599, "label": "simonw"}, "created_at": "2022-09-15T18:48:09Z", "updated_at": "2022-09-15T18:48:09Z", "author_association": "OWNER", "body": "Built a prototype of this that works really well:\r\n```diff\r\n diff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py\r\nindex c0b7bf1..f9a482c 100644\r\n--- a/sqlite_utils/utils.py\r\n+++ b/sqlite_utils/utils.py\r\n@@ -272,7 +272,19 @@ def rows_from_file(\r\n if format == Format.JSON:\r\n decoded = json.load(fp)\r\n if isinstance(decoded, dict):\r\n- decoded = [decoded]\r\n+ # TODO: Solve for if this isn't what people want\r\n+ # Does it have just one key that is a list of dicts?\r\n+ list_keys = [\r\n+ k\r\n+ for k in decoded\r\n+ if isinstance(decoded[k], list)\r\n+ and decoded[k]\r\n+ and all(isinstance(o, dict) for o in decoded[k])\r\n+ ]\r\n+ if len(list_keys) == 1:\r\n+ decoded = decoded[list_keys[0]]\r\n+ else:\r\n+ decoded = [decoded]\r\n if not isinstance(decoded, list):\r\n raise RowsFromFileBadJSON(\"JSON must be a list or a dictionary\")\r\n return decoded, Format.JSON\r\n```\r\nI used that to build this: https://gist.github.com/simonw/0e6901974a14ab7d56c2746a04d72c8c\r\n\r\nOne problem though: right now, if you do this `sqlite-utils` treats it as a single object and adds a `tags` column with JSON in it:\r\n```\r\necho '{\"title\": \"Hi\", \"tags\": [{\"t\": \"one\"}]}` | sqlite-utils insert db.db t -\r\n```\r\nIf I implement this new mechanism the above line would behave differently - which would be a backwards incompatible change.\r\n\r\nSo I probably need some kind of opt-in mechanism for this. And I need a good name for it.", "reactions": "{\"total_count\": 0, \"+1\": 0, \"-1\": 0, \"laugh\": 0, \"hooray\": 0, \"confused\": 0, \"heart\": 0, \"rocket\": 0, \"eyes\": 0}", "issue": {"value": 1374939463, "label": "Ability to load JSON records held in a file with a single top level key that is a list of objects"}, "performed_via_github_app": null}