mirror of
https://github.com/go-sylixos/elvish.git
synced 2024-11-27 23:11:20 +08:00
c1b403e46a
Python bundles SQLite3, so just use that.
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import glob
|
|
import os
|
|
import sys
|
|
import sqlite3
|
|
import urllib.parse
|
|
|
|
import bs4
|
|
|
|
PRELUDE = """
|
|
DROP TABLE IF EXISTS searchIndex;
|
|
CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
|
|
CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
|
|
""".strip()
|
|
|
|
def main(args):
|
|
if len(args) != 3:
|
|
print('Usage: mkdsidx.py dir-of-html-files output-db-file')
|
|
sys.exit(1)
|
|
html_dir, output_db = args[1:]
|
|
|
|
sql_statements = [PRELUDE]
|
|
|
|
for filename in glob.glob('*.html', root_dir=html_dir):
|
|
with open(os.path.join(html_dir, filename)) as f:
|
|
soup = bs4.BeautifulSoup(f, 'html.parser')
|
|
anchors = soup.find_all('a', class_='dashAnchor')
|
|
for anchor in anchors:
|
|
name = anchor['name']
|
|
entry_type, symbol = name.split('/')[-2:]
|
|
symbol = urllib.parse.unquote(symbol)
|
|
sql_statements.append(
|
|
"INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES "
|
|
" ('%s', '%s', '%s#%s');" % (symbol, entry_type, filename, name))
|
|
|
|
with sqlite3.connect(output_db) as conn:
|
|
conn.cursor().executescript(''.join(sql_statements))
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv)
|