home / github / issue_comments

Menu
  • Search all tables
  • GraphQL API

issue_comments: 743701422

This data as json

html_url issue_url id node_id user created_at updated_at author_association body reactions issue performed_via_github_app
https://github.com/simonw/sqlite-utils/issues/207#issuecomment-743701422 https://api.github.com/repos/simonw/sqlite-utils/issues/207 743701422 MDEyOklzc3VlQ29tbWVudDc0MzcwMTQyMg== 9599 2020-12-12T04:37:14Z 2020-12-12T04:38:25Z OWNER

Prototype: ```python from collections import namedtuple

ColumnDetails = namedtuple("ColumnDetails", ("column", "num_null", "num_blank", "num_distinct", "most_common", "least_common"))

def analyze_column(db, table, column, values=10): num_null = db.execute("select count() from [{}] where [{}] is null".format(table, column)).fetchone()[0] num_blank = db.execute("select count() from [{}] where [{}] = ''".format(table, column)).fetchone()[0] num_distinct = db.execute("select count(distinct [{}]) from [{}]".format(column, table)).fetchone()[0] most_common = None least_common = None if num_distinct != 1: most_common = [(r[0], r[1]) for r in db.execute( "select [{}], count() from [{}] group by [{}] order by count() desc limit ".format(column, table, column, values) ).fetchall()] if num_distinct <= values: # No need to run the query if it will just return the results in revers order least_common = most_common[::-1] else: least_common = [(r[0], r[1]) for r in db.execute( "select [{}], count() from [{}] group by [{}] order by count() limit {}".format(column, table, column, values) ).fetchall()] return ColumnDetails(column, num_null, num_blank, num_distinct, most_common, least_common)

def analyze_table(db, table): for column in db[table].columns: details = analyze_column(db, table, column.name) print(details) ```

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
763283616  
Powered by Datasette · Queries took 1.317ms · About: github-to-sqlite