From a67e500c59fbc9868149683568b2819b8c7c9b44 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Fri, 7 Jun 2024 19:01:52 +0900 Subject: [PATCH 01/10] remove unnecssary prints --- src/browser_history_merger/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index 417052b..6f612db 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -269,8 +269,6 @@ def add_db( updating_urls, ) print(f"Wrote {root_cur.rowcount} urls") - root_con.commit() - print(f"Wrote {root_cur.rowcount} urls") logging.info("updated urls in new visits") res = cur.execute(select_visit_sql, [visits_time_max]) new_visits = ( @@ -295,8 +293,6 @@ def add_db( new_visits, ) print(f"Wrote {root_cur.rowcount} visits") - root_con.commit() - print(f"Wrote {root_cur.rowcount} visits") logging.info("added new visits") # update visits_time_max From 521570282097fadaaf2ab90b009959593ab5a3f4 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Fri, 7 Jun 2024 19:06:43 +0900 Subject: [PATCH 02/10] add note on modularity --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e8191bd..702892c 100644 --- a/README.md +++ b/README.md @@ -16,3 +16,6 @@ Then add histories to the database by browser-history-merger path/to/merged.db add browser-id ``` +# Tips +The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. + From 5970e4594a5bed4c28faa8dbb866d62bb4948e0c Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:02:02 +0900 Subject: [PATCH 03/10] add Firefox support --- requirements-dev.lock | 1 + requirements.lock | 1 + src/browser_history_merger/__init__.py | 98 +++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index 8f23096..d795436 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,5 +6,6 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false -e file:. diff --git a/requirements.lock b/requirements.lock index 8f23096..d795436 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,5 +6,6 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false -e file:. diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index 6f612db..e47c3fa 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -3,8 +3,11 @@ import argparse import logging import socket import sqlite3 -from typing import Literal +from typing import Literal, Tuple +# ref: https://en.wikiversity.org/wiki/Chromium_browsing_history_database +# Time offset of chromium to unixepoch +CHROMIUM_TIME_OFFSET = 11644473600 * 1_000_000 def init_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace @@ -132,6 +135,43 @@ def init_db( ) root_con.commit() + # cleanup + root_con.close() + + +def open_browser_db(database_path: str) -> Tuple[sqlite3.Connection, Literal["firefox", "chromium"]]: + dburi = f"file:{database_path}?mode=ro&nolock=1" + logging.info(f"DB uri: {dburi}") + con = sqlite3.connect(dburi, uri=True) + cur = con.cursor() + + logging.debug(f"{con=}") + logging.debug(f"{cur=}") + try: + res = cur.execute( + """ + SELECT + * + FROM + sqlite_master + WHERE + type='table' AND name='urls' + """ + ) + res.fetchone() + except sqlite3.OperationalError as e: + if "unable to open database file" in str(e): + # might be firefox + logging.debug("Failed to open db while executing SELECT from sqlite_master") + dburi = f"file:{database_path}?mode=ro" + con = sqlite3.connect(dburi, uri=True) + cur = con.cursor() + else: + raise e + db_type = get_db_type(cur) + logging.info(f"DB type: {db_type}") + return con, db_type + def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]: res = cur.execute( @@ -198,6 +238,14 @@ def convert_firefox_transition_type(transition_type: int) -> int: return 0 +def convert_firefox_datetime_to_choromium(time: str) -> str: + """ + Convert time in Firefox to Chromium format. + """ + num = int(time) + return str(num + CHROMIUM_TIME_OFFSET) + + def add_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace ): @@ -208,18 +256,43 @@ def add_db( logging.info(f"Source: {database_path}") logging.info(f"Root: {args.root_db}") - dburi = f"file:{database_path}?mode=ro&nolock=1" - logging.info(f"DB uri: {dburi}") - con = sqlite3.connect(dburi, uri=True) + con, db_type = open_browser_db(database_path) cur = con.cursor() - db_type = get_db_type(cur) - logging.info(f"DB type: {db_type}") - match db_type: case "firefox": - logging.error("Not implemented") - raise RuntimeError("Not implemented") + select_url_toupdate_sql = """ + SELECT + moz_places.id, + moz_places.url, + moz_places.title + FROM + moz_historyvisits, + moz_places + WHERE + moz_historyvisits.visit_date > (?) + AND moz_historyvisits.place_id = moz_places.id + """ + select_visit_sql = """ + SELECT + moz_historyvisits.id, + moz_historyvisits.place_id, + moz_places.url, + moz_places.title, + moz_historyvisits.visit_date, + moz_historyvisits.from_visit, + moz_historyvisits.visit_type + FROM + moz_historyvisits, + moz_places + WHERE + moz_historyvisits.visit_date > (?) + AND moz_historyvisits.place_id = moz_places.id + """ + convert_transition_type = convert_firefox_transition_type + # Firefox doesn't have transition_qualifier + convert_transition_qualifier = lambda _: None + convert_visit_time = convert_firefox_datetime_to_choromium case "chromium": select_url_toupdate_sql = """ SELECT @@ -251,6 +324,7 @@ def add_db( """ convert_transition_type = convert_chromium_transition_type convert_transition_qualifier = lambda x: x + convert_visit_time = lambda x: x res = cur.execute(select_url_toupdate_sql, [visits_time_max]) updating_urls = ( ( @@ -278,7 +352,7 @@ def add_db( url_id, url, title, - visit_time, + convert_visit_time(visit_time), from_visit, convert_transition_qualifier(transition), convert_transition_type(transition), @@ -323,6 +397,10 @@ def add_db( root_con.commit() logging.info("Updated browser information") + # cleanup + root_con.close() + con.close() + def main() -> int: parser = argparse.ArgumentParser(description="Browser history merger") From 1c7e8c892c0ebff0caf8bd556dedbcb1c22fa794 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:13:57 +0900 Subject: [PATCH 04/10] update documents --- CHANGELOG.md | 22 ++++++++++++++++++++++ README.md | 20 ++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cbc04c9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog + +## [Unreleased] + +### Added +- Firefox support + +### Changed +- explicit close of databases +- Updated readme +- Add changelog + +## [0.1.0] - 2024-06-07 + +### Added +- initial release +- `init` subcommand +- `add` subcommand +- Chromium support + +[unreleased]: https://github.com/qwjyh/browser-history-merger/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/qwjyh/browser-history-merger/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 702892c..adf0461 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,32 @@ Merge browser histories into a single database. -# Usage -## Initialization +## Usage +### Initialization For the first execution on each device and browser, do ```sh browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database ``` `browser-id` should be unique to identify browser and machine. -## Add histories +### Add histories Then add histories to the database by ```sh browser-history-merger path/to/merged.db add browser-id ``` -# Tips +## Supported environments +Python 3.12 (works with standard libraries only) + +- Chromium + - Tested: + - chrome on windows, linux + - brave on windows, linux + - vivaldi on linux +- Firefox + - Tested: + - firefox on windows + +## Tips The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. From c24f02fe948b26baf6ab4db2c209d16812121440 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:16:46 +0900 Subject: [PATCH 05/10] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index adf0461..95132c7 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Merge browser histories into a single database. ### Initialization For the first execution on each device and browser, do ```sh -browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database +browser-history-merger path/to/merged.db init browser-id /abs/path/to/browser/history/database ``` `browser-id` should be unique to identify browser and machine. From c25ff128955784d7d7950af604f79dcab3d1d333 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:51:28 +0900 Subject: [PATCH 06/10] Fix firefox time filtering --- src/browser_history_merger/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index e47c3fa..a44a76e 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -270,7 +270,7 @@ def add_db( moz_historyvisits, moz_places WHERE - moz_historyvisits.visit_date > (?) + moz_historyvisits.visit_date > (?) - 11644473600000000 AND moz_historyvisits.place_id = moz_places.id """ select_visit_sql = """ @@ -286,7 +286,7 @@ def add_db( moz_historyvisits, moz_places WHERE - moz_historyvisits.visit_date > (?) + moz_historyvisits.visit_date > (?) - 11644473600000000 AND moz_historyvisits.place_id = moz_places.id """ convert_transition_type = convert_firefox_transition_type @@ -382,7 +382,7 @@ def add_db( [browser_id], ) (new_urls_time_max,) = res.fetchone() - logging.info(f"{new_urls_time_max=}") + logging.info(f"{new_urls_time_max=} (in chromium format)") root_cur.execute( """ UPDATE From 395ddb72cd32f56b1998b6023a82236487511be5 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:55:31 +0900 Subject: [PATCH 07/10] add Todo to README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 95132c7..90c51aa 100644 --- a/README.md +++ b/README.md @@ -31,3 +31,8 @@ Python 3.12 (works with standard libraries only) ## Tips The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. +## Todo +- exporting + - JSON output + - browser list +- multiple profiles? From 4fd448e29e5eee78259f94bc01fee45010ee9409 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 18:11:43 +0900 Subject: [PATCH 08/10] add sql example --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 90c51aa..f178212 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,24 @@ Python 3.12 (works with standard libraries only) ## Tips The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. +### Example SQL to see the history + +```sql +SELECT + browsers.name, + visits.title, + visits.url, + datetime(visits.visit_time / 1000000 - 11644473600, 'unixepoch') +FROM + visits, + browsers +WHERE + visits.browser = browsers.id +ORDER by + visits.visit_time + DESC LIMIT 0, 100 +``` + ## Todo - exporting - JSON output From 7486098a11b1d9016cdf0e9efc088c0d61470599 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Wed, 8 Jan 2025 14:29:40 +0900 Subject: [PATCH 09/10] update python version (system@3.13.1) --- .python-version | 2 +- requirements-dev.lock | 1 - requirements.lock | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.python-version b/.python-version index 8531a3b..7ecaebe 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.12.2 +system@3.13.1 diff --git a/requirements-dev.lock b/requirements-dev.lock index d795436..8f23096 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,6 +6,5 @@ # features: [] # all-features: false # with-sources: false -# generate-hashes: false -e file:. diff --git a/requirements.lock b/requirements.lock index d795436..8f23096 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,6 +6,5 @@ # features: [] # all-features: false # with-sources: false -# generate-hashes: false -e file:. From eb98ce47cbbf36389df5899b98cc975f0a4f61ed Mon Sep 17 00:00:00 2001 From: qwjyh Date: Wed, 8 Jan 2025 14:30:05 +0900 Subject: [PATCH 10/10] update error handling for not registered browser --- src/browser_history_merger/__init__.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index a44a76e..493b9fe 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -188,7 +188,7 @@ def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]: return db_type -def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str]: +def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str] | None: res = root_cur.execute( """ SELECT @@ -202,7 +202,10 @@ def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str """, (name,), ) - browser_id, visits_time_max, database_path = res.fetchone() + res_one = res.fetchone() + if res_one is None: + return None + browser_id, visits_time_max, database_path = res_one return (browser_id, visits_time_max, database_path) @@ -250,7 +253,13 @@ def add_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace ): print("Add history to root db") - browser_id, visits_time_max, database_path = get_browser_info(root_cur, args.name) + browser_info = get_browser_info(root_cur, args.name) + if browser_info is None: + print(f"browser {args.name} is not registered to the database") + logging.debug("Cleaning up (closing db connection)") + root_con.close() + exit(1) + browser_id, visits_time_max, database_path = browser_info logging.info(f"{browser_id=}, {visits_time_max=}") logging.info(f"Source: {database_path}")