From 5970e4594a5bed4c28faa8dbb866d62bb4948e0c Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:02:02 +0900 Subject: [PATCH 1/6] add Firefox support --- requirements-dev.lock | 1 + requirements.lock | 1 + src/browser_history_merger/__init__.py | 98 +++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index 8f23096..d795436 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,5 +6,6 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false -e file:. diff --git a/requirements.lock b/requirements.lock index 8f23096..d795436 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,5 +6,6 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false -e file:. diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index 6f612db..e47c3fa 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -3,8 +3,11 @@ import argparse import logging import socket import sqlite3 -from typing import Literal +from typing import Literal, Tuple +# ref: https://en.wikiversity.org/wiki/Chromium_browsing_history_database +# Time offset of chromium to unixepoch +CHROMIUM_TIME_OFFSET = 11644473600 * 1_000_000 def init_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace @@ -132,6 +135,43 @@ def init_db( ) root_con.commit() + # cleanup + root_con.close() + + +def open_browser_db(database_path: str) -> Tuple[sqlite3.Connection, Literal["firefox", "chromium"]]: + dburi = f"file:{database_path}?mode=ro&nolock=1" + logging.info(f"DB uri: {dburi}") + con = sqlite3.connect(dburi, uri=True) + cur = con.cursor() + + logging.debug(f"{con=}") + logging.debug(f"{cur=}") + try: + res = cur.execute( + """ + SELECT + * + FROM + sqlite_master + WHERE + type='table' AND name='urls' + """ + ) + res.fetchone() + except sqlite3.OperationalError as e: + if "unable to open database file" in str(e): + # might be firefox + logging.debug("Failed to open db while executing SELECT from sqlite_master") + dburi = f"file:{database_path}?mode=ro" + con = sqlite3.connect(dburi, uri=True) + cur = con.cursor() + else: + raise e + db_type = get_db_type(cur) + logging.info(f"DB type: {db_type}") + return con, db_type + def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]: res = cur.execute( @@ -198,6 +238,14 @@ def convert_firefox_transition_type(transition_type: int) -> int: return 0 +def convert_firefox_datetime_to_choromium(time: str) -> str: + """ + Convert time in Firefox to Chromium format. + """ + num = int(time) + return str(num + CHROMIUM_TIME_OFFSET) + + def add_db( root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace ): @@ -208,18 +256,43 @@ def add_db( logging.info(f"Source: {database_path}") logging.info(f"Root: {args.root_db}") - dburi = f"file:{database_path}?mode=ro&nolock=1" - logging.info(f"DB uri: {dburi}") - con = sqlite3.connect(dburi, uri=True) + con, db_type = open_browser_db(database_path) cur = con.cursor() - db_type = get_db_type(cur) - logging.info(f"DB type: {db_type}") - match db_type: case "firefox": - logging.error("Not implemented") - raise RuntimeError("Not implemented") + select_url_toupdate_sql = """ + SELECT + moz_places.id, + moz_places.url, + moz_places.title + FROM + moz_historyvisits, + moz_places + WHERE + moz_historyvisits.visit_date > (?) + AND moz_historyvisits.place_id = moz_places.id + """ + select_visit_sql = """ + SELECT + moz_historyvisits.id, + moz_historyvisits.place_id, + moz_places.url, + moz_places.title, + moz_historyvisits.visit_date, + moz_historyvisits.from_visit, + moz_historyvisits.visit_type + FROM + moz_historyvisits, + moz_places + WHERE + moz_historyvisits.visit_date > (?) + AND moz_historyvisits.place_id = moz_places.id + """ + convert_transition_type = convert_firefox_transition_type + # Firefox doesn't have transition_qualifier + convert_transition_qualifier = lambda _: None + convert_visit_time = convert_firefox_datetime_to_choromium case "chromium": select_url_toupdate_sql = """ SELECT @@ -251,6 +324,7 @@ def add_db( """ convert_transition_type = convert_chromium_transition_type convert_transition_qualifier = lambda x: x + convert_visit_time = lambda x: x res = cur.execute(select_url_toupdate_sql, [visits_time_max]) updating_urls = ( ( @@ -278,7 +352,7 @@ def add_db( url_id, url, title, - visit_time, + convert_visit_time(visit_time), from_visit, convert_transition_qualifier(transition), convert_transition_type(transition), @@ -323,6 +397,10 @@ def add_db( root_con.commit() logging.info("Updated browser information") + # cleanup + root_con.close() + con.close() + def main() -> int: parser = argparse.ArgumentParser(description="Browser history merger") From 1c7e8c892c0ebff0caf8bd556dedbcb1c22fa794 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:13:57 +0900 Subject: [PATCH 2/6] update documents --- CHANGELOG.md | 22 ++++++++++++++++++++++ README.md | 20 ++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cbc04c9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog + +## [Unreleased] + +### Added +- Firefox support + +### Changed +- explicit close of databases +- Updated readme +- Add changelog + +## [0.1.0] - 2024-06-07 + +### Added +- initial release +- `init` subcommand +- `add` subcommand +- Chromium support + +[unreleased]: https://github.com/qwjyh/browser-history-merger/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/qwjyh/browser-history-merger/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 702892c..adf0461 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,32 @@ Merge browser histories into a single database. -# Usage -## Initialization +## Usage +### Initialization For the first execution on each device and browser, do ```sh browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database ``` `browser-id` should be unique to identify browser and machine. -## Add histories +### Add histories Then add histories to the database by ```sh browser-history-merger path/to/merged.db add browser-id ``` -# Tips +## Supported environments +Python 3.12 (works with standard libraries only) + +- Chromium + - Tested: + - chrome on windows, linux + - brave on windows, linux + - vivaldi on linux +- Firefox + - Tested: + - firefox on windows + +## Tips The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. From c24f02fe948b26baf6ab4db2c209d16812121440 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:16:46 +0900 Subject: [PATCH 3/6] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index adf0461..95132c7 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Merge browser histories into a single database. ### Initialization For the first execution on each device and browser, do ```sh -browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database +browser-history-merger path/to/merged.db init browser-id /abs/path/to/browser/history/database ``` `browser-id` should be unique to identify browser and machine. From c25ff128955784d7d7950af604f79dcab3d1d333 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:51:28 +0900 Subject: [PATCH 4/6] Fix firefox time filtering --- src/browser_history_merger/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/browser_history_merger/__init__.py b/src/browser_history_merger/__init__.py index e47c3fa..a44a76e 100644 --- a/src/browser_history_merger/__init__.py +++ b/src/browser_history_merger/__init__.py @@ -270,7 +270,7 @@ def add_db( moz_historyvisits, moz_places WHERE - moz_historyvisits.visit_date > (?) + moz_historyvisits.visit_date > (?) - 11644473600000000 AND moz_historyvisits.place_id = moz_places.id """ select_visit_sql = """ @@ -286,7 +286,7 @@ def add_db( moz_historyvisits, moz_places WHERE - moz_historyvisits.visit_date > (?) + moz_historyvisits.visit_date > (?) - 11644473600000000 AND moz_historyvisits.place_id = moz_places.id """ convert_transition_type = convert_firefox_transition_type @@ -382,7 +382,7 @@ def add_db( [browser_id], ) (new_urls_time_max,) = res.fetchone() - logging.info(f"{new_urls_time_max=}") + logging.info(f"{new_urls_time_max=} (in chromium format)") root_cur.execute( """ UPDATE From 395ddb72cd32f56b1998b6023a82236487511be5 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 17:55:31 +0900 Subject: [PATCH 5/6] add Todo to README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 95132c7..90c51aa 100644 --- a/README.md +++ b/README.md @@ -31,3 +31,8 @@ Python 3.12 (works with standard libraries only) ## Tips The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. +## Todo +- exporting + - JSON output + - browser list +- multiple profiles? From 4fd448e29e5eee78259f94bc01fee45010ee9409 Mon Sep 17 00:00:00 2001 From: qwjyh Date: Sun, 9 Jun 2024 18:11:43 +0900 Subject: [PATCH 6/6] add sql example --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 90c51aa..f178212 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,24 @@ Python 3.12 (works with standard libraries only) ## Tips The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script. +### Example SQL to see the history + +```sql +SELECT + browsers.name, + visits.title, + visits.url, + datetime(visits.visit_time / 1000000 - 11644473600, 'unixepoch') +FROM + visits, + browsers +WHERE + visits.browser = browsers.id +ORDER by + visits.visit_time + DESC LIMIT 0, 100 +``` + ## Todo - exporting - JSON output