diff --git a/.python-version b/.python-version index 8531a3b..7ecaebe 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.12.2 +system@3.13.1 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cbc04c9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog + +## [Unreleased] + +### Added +- Firefox support + +### Changed +- explicit close of databases +- Updated readme +- Add changelog + +## [0.1.0] - 2024-06-07 + +### Added +- initial release +- `init` subcommand +- `add` subcommand +- Chromium support + +[unreleased]: https://github.com/qwjyh/browser-history-merger/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/qwjyh/browser-history-merger/releases/tag/v0.1.0 diff --git a/README.md b/README.md index e8191bd..f178212 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,55 @@ Merge browser histories into a single database. -# Usage -## Initialization +## Usage +### Initialization For the first execution on each device and browser, do ```sh -browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database +browser-history-merger path/to/merged.db init browser-id /abs/path/to/browser/history/database ``` `browser-id` should be unique to identify browser and machine. -## Add histories +### Add histories Then add histories to the database by ```sh browser-history-merger path/to/merged.db add browser-id ``` +## Supported environments +Python 3.12 (works with standard libraries only) + +- Chromium + - Tested: + - chrome on windows, linux + - brave on windows, linux + - vivaldi on linux +- Firefox + - Tested: + - firefox on windows + +## Tips +The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. + +### Example SQL to see the history + +```sql +SELECT + browsers.name, + visits.title, + visits.url, + datetime(visits.visit_time / 1000000 - 11644473600, 'unixepoch') +FROM + visits, + browsers +WHERE + visits.browser = browsers.id +ORDER by + visits.visit_time + DESC LIMIT 0, 100 +``` + +## Todo +- exporting + - JSON output + - browser list +- multiple profiles? diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index 417052b..493b9fe 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -3,8 +3,11 @@ import argparse import logging import socket import sqlite3 -from typing import Literal +from typing import Literal, Tuple +# ref: https://en.wikiversity.org/wiki/Chromium_browsing_history_database +# Time offset of chromium to unixepoch +CHROMIUM_TIME_OFFSET = 11644473600 * 1_000_000 def init_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace @@ -132,6 +135,43 @@ def init_db( ) root_con.commit() + # cleanup + root_con.close() + + +def open_browser_db(database_path: str) -> Tuple[sqlite3.Connection, Literal["firefox", "chromium"]]: + dburi = f"file:{database_path}?mode=ro&nolock=1" + logging.info(f"DB uri: {dburi}") + con = sqlite3.connect(dburi, uri=True) + cur = con.cursor() + + logging.debug(f"{con=}") + logging.debug(f"{cur=}") + try: + res = cur.execute( + """ + SELECT + * + FROM + sqlite_master + WHERE + type='table' AND name='urls' + """ + ) + res.fetchone() + except sqlite3.OperationalError as e: + if "unable to open database file" in str(e): + # might be firefox + logging.debug("Failed to open db while executing SELECT from sqlite_master") + dburi = f"file:{database_path}?mode=ro" + con = sqlite3.connect(dburi, uri=True) + cur = con.cursor() + else: + raise e + db_type = get_db_type(cur) + logging.info(f"DB type: {db_type}") + return con, db_type + def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]: res = cur.execute( @@ -148,7 +188,7 @@ def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]: return db_type -def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str]: +def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str] | None: res = root_cur.execute( """ SELECT @@ -162,7 +202,10 @@ def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str """, (name,), ) - browser_id, visits_time_max, database_path = res.fetchone() + res_one = res.fetchone() + if res_one is None: + return None + browser_id, visits_time_max, database_path = res_one return (browser_id, visits_time_max, database_path) @@ -198,28 +241,67 @@ def convert_firefox_transition_type(transition_type: int) -> int: return 0 +def convert_firefox_datetime_to_choromium(time: str) -> str: + """ + Convert time in Firefox to Chromium format. + """ + num = int(time) + return str(num + CHROMIUM_TIME_OFFSET) + + def add_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace ): print("Add history to root db") - browser_id, visits_time_max, database_path = get_browser_info(root_cur, args.name) + browser_info = get_browser_info(root_cur, args.name) + if browser_info is None: + print(f"browser {args.name} is not registered to the database") + logging.debug("Cleaning up (closing db connection)") + root_con.close() + exit(1) + browser_id, visits_time_max, database_path = browser_info logging.info(f"{browser_id=}, {visits_time_max=}") logging.info(f"Source: {database_path}") logging.info(f"Root: {args.root_db}") - dburi = f"file:{database_path}?mode=ro&nolock=1" - logging.info(f"DB uri: {dburi}") - con = sqlite3.connect(dburi, uri=True) + con, db_type = open_browser_db(database_path) cur = con.cursor() - db_type = get_db_type(cur) - logging.info(f"DB type: {db_type}") - match db_type: case "firefox": - logging.error("Not implemented") - raise RuntimeError("Not implemented") + select_url_toupdate_sql = """ + SELECT + moz_places.id, + moz_places.url, + moz_places.title + FROM + moz_historyvisits, + moz_places + WHERE + moz_historyvisits.visit_date > (?) - 11644473600000000 + AND moz_historyvisits.place_id = moz_places.id + """ + select_visit_sql = """ + SELECT + moz_historyvisits.id, + moz_historyvisits.place_id, + moz_places.url, + moz_places.title, + moz_historyvisits.visit_date, + moz_historyvisits.from_visit, + moz_historyvisits.visit_type + FROM + moz_historyvisits, + moz_places + WHERE + moz_historyvisits.visit_date > (?) - 11644473600000000 + AND moz_historyvisits.place_id = moz_places.id + """ + convert_transition_type = convert_firefox_transition_type + # Firefox doesn't have transition_qualifier + convert_transition_qualifier = lambda _: None + convert_visit_time = convert_firefox_datetime_to_choromium case "chromium": select_url_toupdate_sql = """ SELECT @@ -251,6 +333,7 @@ def add_db( """ convert_transition_type = convert_chromium_transition_type convert_transition_qualifier = lambda x: x + convert_visit_time = lambda x: x res = cur.execute(select_url_toupdate_sql, [visits_time_max]) updating_urls = ( ( @@ -269,8 +352,6 @@ def add_db( updating_urls, ) print(f"Wrote {root_cur.rowcount} urls") - root_con.commit() - print(f"Wrote {root_cur.rowcount} urls") logging.info("updated urls in new visits") res = cur.execute(select_visit_sql, [visits_time_max]) new_visits = ( @@ -280,7 +361,7 @@ def add_db( url_id, url, title, - visit_time, + convert_visit_time(visit_time), from_visit, convert_transition_qualifier(transition), convert_transition_type(transition), @@ -295,8 +376,6 @@ def add_db( new_visits, ) print(f"Wrote {root_cur.rowcount} visits") - root_con.commit() - print(f"Wrote {root_cur.rowcount} visits") logging.info("added new visits") # update visits_time_max @@ -312,7 +391,7 @@ def add_db( [browser_id], ) (new_urls_time_max,) = res.fetchone() - logging.info(f"{new_urls_time_max=}") + logging.info(f"{new_urls_time_max=} (in chromium format)") root_cur.execute( """ UPDATE @@ -327,6 +406,10 @@ def add_db( root_con.commit() logging.info("Updated browser information") + # cleanup + root_con.close() + con.close() + def main() -> int: parser = argparse.ArgumentParser(description="Browser history merger")