elvish/website/tools/mkdsidx.py
Qi Xiao c1b403e46a website: Remove the dependency on SQLite3 CLI for generating docset.
Python bundles SQLite3, so just use that.
2024-01-22 16:16:56 +00:00

40 lines
1.2 KiB
Python

import glob
import os
import sys
import sqlite3
import urllib.parse
import bs4
PRELUDE = """
DROP TABLE IF EXISTS searchIndex;
CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
""".strip()
def main(args):
if len(args) != 3:
print('Usage: mkdsidx.py dir-of-html-files output-db-file')
sys.exit(1)
html_dir, output_db = args[1:]
sql_statements = [PRELUDE]
for filename in glob.glob('*.html', root_dir=html_dir):
with open(os.path.join(html_dir, filename)) as f:
soup = bs4.BeautifulSoup(f, 'html.parser')
anchors = soup.find_all('a', class_='dashAnchor')
for anchor in anchors:
name = anchor['name']
entry_type, symbol = name.split('/')[-2:]
symbol = urllib.parse.unquote(symbol)
sql_statements.append(
"INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES "
" ('%s', '%s', '%s#%s');" % (symbol, entry_type, filename, name))
with sqlite3.connect(output_db) as conn:
conn.cursor().executescript(''.join(sql_statements))
if __name__ == '__main__':
main(sys.argv)