mirror of
https://github.com/qwjyh/browser-history-merger.git
synced 2024-11-21 14:40:12 +09:00
init commit with init
add
subcommands
This commit is contained in:
commit
b3c9013f75
8 changed files with 452 additions and 0 deletions
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
# python generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# venv
|
||||
.venv
|
1
.python-version
Normal file
1
.python-version
Normal file
|
@ -0,0 +1 @@
|
|||
3.12.2
|
18
README.md
Normal file
18
README.md
Normal file
|
@ -0,0 +1,18 @@
|
|||
# browser-history-merger
|
||||
|
||||
Merge browser histories into a single database.
|
||||
|
||||
# Usage
|
||||
## Initialization
|
||||
For the first execution on each device and browser, do
|
||||
```sh
|
||||
browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database
|
||||
```
|
||||
`browser-id` should be unique to identify browser and machine.
|
||||
|
||||
## Add histories
|
||||
Then add histories to the database by
|
||||
```sh
|
||||
browser-history-merger path/to/merged.db add browser-id
|
||||
```
|
||||
|
28
pyproject.toml
Normal file
28
pyproject.toml
Normal file
|
@ -0,0 +1,28 @@
|
|||
[project]
|
||||
name = "browser-history-merger"
|
||||
version = "0.1.0"
|
||||
description = "Simple tool to merge browser histories into a single database"
|
||||
authors = [
|
||||
{ name = "qwjyh", email = "urataw421@gmail.com" }
|
||||
]
|
||||
dependencies = []
|
||||
readme = "README.md"
|
||||
license = { text = "MIT License" }
|
||||
requires-python = ">= 3.8"
|
||||
|
||||
[project.scripts]
|
||||
"browser-history-merger" = "browser_history_merger:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.rye]
|
||||
managed = true
|
||||
dev-dependencies = []
|
||||
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/browser_history_merger"]
|
10
requirements-dev.lock
Normal file
10
requirements-dev.lock
Normal file
|
@ -0,0 +1,10 @@
|
|||
# generated by rye
|
||||
# use `rye lock` or `rye sync` to update this lockfile
|
||||
#
|
||||
# last locked with the following flags:
|
||||
# pre: false
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
|
||||
-e file:.
|
10
requirements.lock
Normal file
10
requirements.lock
Normal file
|
@ -0,0 +1,10 @@
|
|||
# generated by rye
|
||||
# use `rye lock` or `rye sync` to update this lockfile
|
||||
#
|
||||
# last locked with the following flags:
|
||||
# pre: false
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
|
||||
-e file:.
|
371
src/browser_history_merger/__init__.py
Normal file
371
src/browser_history_merger/__init__.py
Normal file
|
@ -0,0 +1,371 @@
|
|||
#!/usr/bin/env python
|
||||
import argparse
|
||||
import logging
|
||||
import socket
|
||||
import sqlite3
|
||||
from typing import Literal
|
||||
|
||||
|
||||
def init_db(
|
||||
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
|
||||
):
|
||||
print("Initialize db")
|
||||
logging.info("Initializing db")
|
||||
|
||||
# Create db
|
||||
res = root_cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
sqlite_master
|
||||
WHERE
|
||||
type = 'table' AND name='browsers'
|
||||
"""
|
||||
)
|
||||
|
||||
if res.fetchone() is None:
|
||||
print("Creating root db")
|
||||
root_cur.execute(
|
||||
"""
|
||||
CREATE TABLE browsers (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name LONGVARCHAR NOT NULL UNIQUE,
|
||||
hostname LONGVARCHAR,
|
||||
visits_time_max INTEGER NOT NULL,
|
||||
database_path LONGVARCHAR NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
root_cur.execute(
|
||||
"""
|
||||
CREATE TABLE urls (
|
||||
id INTEGER,
|
||||
browser INTEGER NOT NULL,
|
||||
original_id INTEGER,
|
||||
url LONGVARCHAR,
|
||||
title LONGVARCHAR,
|
||||
PRIMARY KEY("id" AUTOINCREMENT),
|
||||
FOREIGN KEY("browser") REFERENCES "browsers"("id")
|
||||
)
|
||||
"""
|
||||
)
|
||||
# `visits` table
|
||||
# - id: visits id
|
||||
# - browser:
|
||||
# - url:
|
||||
# - title: urls.title at the time when the `add` is executed
|
||||
# - visit_time: usec with chromium offset
|
||||
root_cur.execute(
|
||||
"""
|
||||
CREATE TABLE visits (
|
||||
id INTEGER,
|
||||
browser INTEGER NOT NULL,
|
||||
original_id INTEGER,
|
||||
url_id INTEGER NOT NULL,
|
||||
url LONGVARCHAR NOT NULL,
|
||||
title LONGVARCHAR,
|
||||
visit_time INTEGER NOT NULL,
|
||||
from_visit INTEGER,
|
||||
transition_qualifier INTEGER DEFAULT 0,
|
||||
transition_type INTEGER,
|
||||
PRIMARY KEY("id" AUTOINCREMENT),
|
||||
FOREIGN KEY("browser") REFERENCES "browsers"("id")
|
||||
FOREIGN KEY("transition_type") REFERENCES "transition_type"("id")
|
||||
)
|
||||
"""
|
||||
)
|
||||
# `transition_type`
|
||||
root_cur.execute(
|
||||
"""
|
||||
CREATE TABLE transition_type (
|
||||
id INTEGER NOT NULL,
|
||||
name LONGVARCHAR,
|
||||
PRIMARY KEY("id")
|
||||
)
|
||||
"""
|
||||
)
|
||||
visit_types = [
|
||||
(1, "link"),
|
||||
(2, "typed"),
|
||||
(3, "auto_bookmark"),
|
||||
(4, "auto_subframe"),
|
||||
(5, "manual_subframe"),
|
||||
(6, "generated"),
|
||||
(7, "auto_toplevel"),
|
||||
(8, "form_submit"),
|
||||
(9, "reload"),
|
||||
(10, "keyword"),
|
||||
(11, "keyword_generated"),
|
||||
(12, "redirect_permanent"),
|
||||
(13, "redirect_temporary"),
|
||||
(14, "download"),
|
||||
(0, "unknown"),
|
||||
]
|
||||
root_cur.executemany(
|
||||
"""
|
||||
INSERT INTO transition_type VALUES(?, ?)
|
||||
""",
|
||||
visit_types,
|
||||
)
|
||||
root_con.commit()
|
||||
|
||||
res = root_cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
browsers.name
|
||||
FROM
|
||||
browsers
|
||||
WHERE
|
||||
browsers.name = (?)
|
||||
""",
|
||||
[args.name]
|
||||
)
|
||||
if res.fetchone() is not None:
|
||||
print(f"The name {args.name} is already used")
|
||||
raise ValueError("The provided name for the browser is already used")
|
||||
root_cur.execute(
|
||||
"""
|
||||
INSERT INTO browsers VALUES(NULL, ?, ?, 0, ?)
|
||||
""",
|
||||
[args.name, socket.gethostname(), args.database],
|
||||
)
|
||||
root_con.commit()
|
||||
|
||||
|
||||
def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]:
|
||||
res = cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
sqlite_master
|
||||
WHERE
|
||||
type='table' AND name='urls'
|
||||
"""
|
||||
)
|
||||
db_type = "firefox" if res.fetchone() is None else "chromium"
|
||||
return db_type
|
||||
|
||||
|
||||
def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str]:
|
||||
res = root_cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
id,
|
||||
visits_time_max,
|
||||
database_path
|
||||
FROM
|
||||
browsers
|
||||
WHERE
|
||||
browsers.name = (?)
|
||||
""",
|
||||
(name,),
|
||||
)
|
||||
browser_id, visits_time_max, database_path = res.fetchone()
|
||||
return (browser_id, visits_time_max, database_path)
|
||||
|
||||
|
||||
def convert_chromium_transition_type(transition_qualifier: int) -> int:
|
||||
"""
|
||||
Convert transition qualifier of chromium to transition type id defined in doc.
|
||||
"""
|
||||
match transition_qualifier % 0x100:
|
||||
case x if 0 <= x <= 10:
|
||||
return x + 1
|
||||
case _:
|
||||
return 0 # unknown
|
||||
|
||||
|
||||
def convert_firefox_transition_type(transition_type: int) -> int:
|
||||
"""
|
||||
Convert `visit_type` of chromium to transition type id defined in doc.
|
||||
"""
|
||||
match transition_type:
|
||||
case x if 1 <= x <= 4:
|
||||
return x
|
||||
case 8:
|
||||
return 5
|
||||
case 9:
|
||||
return 9
|
||||
case 5:
|
||||
return 12
|
||||
case 6:
|
||||
return 13
|
||||
case 7:
|
||||
return 14
|
||||
case _:
|
||||
return 0
|
||||
|
||||
|
||||
def add_db(
|
||||
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
|
||||
):
|
||||
print("Add history to root db")
|
||||
browser_id, visits_time_max, database_path = get_browser_info(root_cur, args.name)
|
||||
logging.info(f"{browser_id=}, {visits_time_max=}")
|
||||
|
||||
logging.info(f"Source: {database_path}")
|
||||
logging.info(f"Root: {args.root_db}")
|
||||
|
||||
dburi = f"file:{database_path}?mode=ro&nolock=1"
|
||||
logging.info(f"DB uri: {dburi}")
|
||||
con = sqlite3.connect(dburi, uri=True)
|
||||
cur = con.cursor()
|
||||
|
||||
db_type = get_db_type(cur)
|
||||
logging.info(f"DB type: {db_type}")
|
||||
|
||||
match db_type:
|
||||
case "firefox":
|
||||
logging.error("Not implemented")
|
||||
raise RuntimeError("Not implemented")
|
||||
case "chromium":
|
||||
select_url_toupdate_sql = """
|
||||
SELECT
|
||||
urls.id,
|
||||
urls.url,
|
||||
urls.title
|
||||
FROM
|
||||
visits,
|
||||
urls
|
||||
WHERE
|
||||
visits.visit_time > (?)
|
||||
AND visits.url = urls.id
|
||||
"""
|
||||
select_visit_sql = """
|
||||
SELECT
|
||||
visits.id,
|
||||
visits.url,
|
||||
urls.url,
|
||||
urls.title,
|
||||
visits.visit_time,
|
||||
visits.from_visit,
|
||||
visits.transition
|
||||
FROM
|
||||
visits,
|
||||
urls
|
||||
WHERE
|
||||
visits.visit_time > (?)
|
||||
AND visits.url = urls.id
|
||||
"""
|
||||
convert_transition_type = convert_chromium_transition_type
|
||||
convert_transition_qualifier = lambda x: x
|
||||
res = cur.execute(select_url_toupdate_sql, [visits_time_max])
|
||||
updating_urls = (
|
||||
(
|
||||
browser_id,
|
||||
id,
|
||||
url,
|
||||
title,
|
||||
)
|
||||
for id, url, title in res
|
||||
)
|
||||
root_cur.executemany(
|
||||
"""
|
||||
REPLACE INTO urls
|
||||
VALUES(NULL, ?, ?, ?, ?)
|
||||
""",
|
||||
updating_urls,
|
||||
)
|
||||
print(f"Wrote {root_cur.rowcount} urls")
|
||||
root_con.commit()
|
||||
print(f"Wrote {root_cur.rowcount} urls")
|
||||
logging.info("updated urls in new visits")
|
||||
res = cur.execute(select_visit_sql, [visits_time_max])
|
||||
new_visits = (
|
||||
(
|
||||
browser_id,
|
||||
id,
|
||||
url_id,
|
||||
url,
|
||||
title,
|
||||
visit_time,
|
||||
from_visit,
|
||||
convert_transition_qualifier(transition),
|
||||
convert_transition_type(transition),
|
||||
)
|
||||
for id, url_id, url, title, visit_time, from_visit, transition in res
|
||||
)
|
||||
root_cur.executemany(
|
||||
"""
|
||||
INSERT INTO visits
|
||||
VALUES(NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
new_visits,
|
||||
)
|
||||
print(f"Wrote {root_cur.rowcount} visits")
|
||||
root_con.commit()
|
||||
print(f"Wrote {root_cur.rowcount} visits")
|
||||
logging.info("added new visits")
|
||||
|
||||
# update visits_time_max
|
||||
res = root_cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
max(visits.visit_time)
|
||||
FROM
|
||||
visits
|
||||
WHERE
|
||||
visits.browser = (?)
|
||||
""",
|
||||
[browser_id],
|
||||
)
|
||||
(new_urls_time_max,) = res.fetchone()
|
||||
logging.info(f"{new_urls_time_max=}")
|
||||
root_cur.execute(
|
||||
"""
|
||||
UPDATE
|
||||
browsers
|
||||
SET
|
||||
visits_time_max = (?)
|
||||
WHERE
|
||||
browsers.id = (?)
|
||||
""",
|
||||
(new_urls_time_max, browser_id),
|
||||
)
|
||||
root_con.commit()
|
||||
logging.info("Updated browser information")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Browser history merger")
|
||||
parser.add_argument("root_db", help="Merged database path")
|
||||
parser.add_argument(
|
||||
"-v", "--verbosity", action="count", default=0, help="Increase log verbosity"
|
||||
)
|
||||
subparsers = parser.add_subparsers()
|
||||
parser_init = subparsers.add_parser("init", help="Initialize root db")
|
||||
parser_init.add_argument("name", help="Unique name for the browser")
|
||||
parser_init.add_argument("database", help="Path to the browser's history db")
|
||||
parser_init.set_defaults(func=init_db)
|
||||
parse_add = subparsers.add_parser("add", help="Add history to root db")
|
||||
# parse_add.add_argument("db", help="Source db file")
|
||||
parse_add.add_argument(
|
||||
"name", help="Source browser name(which was added to root db before)"
|
||||
)
|
||||
parse_add.set_defaults(func=add_db)
|
||||
args = parser.parse_args()
|
||||
|
||||
match args.verbosity:
|
||||
case 0:
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
case 1:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
case _:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.debug(f"{args=}")
|
||||
|
||||
root_db_path = args.root_db
|
||||
root_con = sqlite3.connect(root_db_path)
|
||||
root_cur = root_con.cursor()
|
||||
|
||||
if not hasattr(args, "func"):
|
||||
parser.print_help()
|
||||
return 1
|
||||
args.func(root_con, root_cur, args)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
4
src/browser_history_merger/__main__.py
Normal file
4
src/browser_history_merger/__main__.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
import browser_history_merger
|
||||
import sys
|
||||
|
||||
sys.exit(browser_history_merger.main())
|
Loading…
Reference in a new issue