Compare commits

...

10 commits
v0.1.0 ... main

Author SHA1 Message Date
eb98ce47cb update error handling for not registered browser 2025-01-08 14:30:05 +09:00
7486098a11 update python version (system@3.13.1) 2025-01-08 14:29:40 +09:00
4fd448e29e add sql example 2024-06-09 18:11:43 +09:00
395ddb72cd add Todo to README 2024-06-09 17:55:31 +09:00
c25ff12895 Fix firefox time filtering 2024-06-09 17:51:28 +09:00
c24f02fe94 update README 2024-06-09 17:16:46 +09:00
1c7e8c892c update documents 2024-06-09 17:13:57 +09:00
5970e4594a add Firefox support 2024-06-09 17:02:02 +09:00
5215702820 add note on modularity 2024-06-07 19:06:43 +09:00
a67e500c59 remove unnecssary prints 2024-06-07 19:01:52 +09:00
4 changed files with 166 additions and 23 deletions

View file

@ -1 +1 @@
3.12.2
system@3.13.1

22
CHANGELOG.md Normal file
View file

@ -0,0 +1,22 @@
# Changelog
## [Unreleased]
### Added
- Firefox support
### Changed
- explicit close of databases
- Updated readme
- Add changelog
## [0.1.0] - 2024-06-07
### Added
- initial release
- `init` subcommand
- `add` subcommand
- Chromium support
[unreleased]: https://github.com/qwjyh/browser-history-merger/compare/v0.1.0...HEAD
[0.1.0]: https://github.com/qwjyh/browser-history-merger/releases/tag/v0.1.0

View file

@ -2,17 +2,55 @@
Merge browser histories into a single database.
# Usage
## Initialization
## Usage
### Initialization
For the first execution on each device and browser, do
```sh
browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database
browser-history-merger path/to/merged.db init browser-id /abs/path/to/browser/history/database
```
`browser-id` should be unique to identify browser and machine.
## Add histories
### Add histories
Then add histories to the database by
```sh
browser-history-merger path/to/merged.db add browser-id
```
## Supported environments
Python 3.12 (works with standard libraries only)
- Chromium
- Tested:
- chrome on windows, linux
- brave on windows, linux
- vivaldi on linux
- Firefox
- Tested:
- firefox on windows
## Tips
The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script.
### Example SQL to see the history
```sql
SELECT
browsers.name,
visits.title,
visits.url,
datetime(visits.visit_time / 1000000 - 11644473600, 'unixepoch')
FROM
visits,
browsers
WHERE
visits.browser = browsers.id
ORDER by
visits.visit_time
DESC LIMIT 0, 100
```
## Todo
- exporting
- JSON output
- browser list
- multiple profiles?

View file

@ -3,8 +3,11 @@ import argparse
import logging
import socket
import sqlite3
from typing import Literal
from typing import Literal, Tuple
# ref: https://en.wikiversity.org/wiki/Chromium_browsing_history_database
# Time offset of chromium to unixepoch
CHROMIUM_TIME_OFFSET = 11644473600 * 1_000_000
def init_db(
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
@ -132,6 +135,43 @@ def init_db(
)
root_con.commit()
# cleanup
root_con.close()
def open_browser_db(database_path: str) -> Tuple[sqlite3.Connection, Literal["firefox", "chromium"]]:
dburi = f"file:{database_path}?mode=ro&nolock=1"
logging.info(f"DB uri: {dburi}")
con = sqlite3.connect(dburi, uri=True)
cur = con.cursor()
logging.debug(f"{con=}")
logging.debug(f"{cur=}")
try:
res = cur.execute(
"""
SELECT
*
FROM
sqlite_master
WHERE
type='table' AND name='urls'
"""
)
res.fetchone()
except sqlite3.OperationalError as e:
if "unable to open database file" in str(e):
# might be firefox
logging.debug("Failed to open db while executing SELECT from sqlite_master")
dburi = f"file:{database_path}?mode=ro"
con = sqlite3.connect(dburi, uri=True)
cur = con.cursor()
else:
raise e
db_type = get_db_type(cur)
logging.info(f"DB type: {db_type}")
return con, db_type
def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]:
res = cur.execute(
@ -148,7 +188,7 @@ def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]:
return db_type
def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str]:
def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str] | None:
res = root_cur.execute(
"""
SELECT
@ -162,7 +202,10 @@ def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str
""",
(name,),
)
browser_id, visits_time_max, database_path = res.fetchone()
res_one = res.fetchone()
if res_one is None:
return None
browser_id, visits_time_max, database_path = res_one
return (browser_id, visits_time_max, database_path)
@ -198,28 +241,67 @@ def convert_firefox_transition_type(transition_type: int) -> int:
return 0
def convert_firefox_datetime_to_choromium(time: str) -> str:
"""
Convert time in Firefox to Chromium format.
"""
num = int(time)
return str(num + CHROMIUM_TIME_OFFSET)
def add_db(
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
):
print("Add history to root db")
browser_id, visits_time_max, database_path = get_browser_info(root_cur, args.name)
browser_info = get_browser_info(root_cur, args.name)
if browser_info is None:
print(f"browser {args.name} is not registered to the database")
logging.debug("Cleaning up (closing db connection)")
root_con.close()
exit(1)
browser_id, visits_time_max, database_path = browser_info
logging.info(f"{browser_id=}, {visits_time_max=}")
logging.info(f"Source: {database_path}")
logging.info(f"Root: {args.root_db}")
dburi = f"file:{database_path}?mode=ro&nolock=1"
logging.info(f"DB uri: {dburi}")
con = sqlite3.connect(dburi, uri=True)
con, db_type = open_browser_db(database_path)
cur = con.cursor()
db_type = get_db_type(cur)
logging.info(f"DB type: {db_type}")
match db_type:
case "firefox":
logging.error("Not implemented")
raise RuntimeError("Not implemented")
select_url_toupdate_sql = """
SELECT
moz_places.id,
moz_places.url,
moz_places.title
FROM
moz_historyvisits,
moz_places
WHERE
moz_historyvisits.visit_date > (?) - 11644473600000000
AND moz_historyvisits.place_id = moz_places.id
"""
select_visit_sql = """
SELECT
moz_historyvisits.id,
moz_historyvisits.place_id,
moz_places.url,
moz_places.title,
moz_historyvisits.visit_date,
moz_historyvisits.from_visit,
moz_historyvisits.visit_type
FROM
moz_historyvisits,
moz_places
WHERE
moz_historyvisits.visit_date > (?) - 11644473600000000
AND moz_historyvisits.place_id = moz_places.id
"""
convert_transition_type = convert_firefox_transition_type
# Firefox doesn't have transition_qualifier
convert_transition_qualifier = lambda _: None
convert_visit_time = convert_firefox_datetime_to_choromium
case "chromium":
select_url_toupdate_sql = """
SELECT
@ -251,6 +333,7 @@ def add_db(
"""
convert_transition_type = convert_chromium_transition_type
convert_transition_qualifier = lambda x: x
convert_visit_time = lambda x: x
res = cur.execute(select_url_toupdate_sql, [visits_time_max])
updating_urls = (
(
@ -269,8 +352,6 @@ def add_db(
updating_urls,
)
print(f"Wrote {root_cur.rowcount} urls")
root_con.commit()
print(f"Wrote {root_cur.rowcount} urls")
logging.info("updated urls in new visits")
res = cur.execute(select_visit_sql, [visits_time_max])
new_visits = (
@ -280,7 +361,7 @@ def add_db(
url_id,
url,
title,
visit_time,
convert_visit_time(visit_time),
from_visit,
convert_transition_qualifier(transition),
convert_transition_type(transition),
@ -295,8 +376,6 @@ def add_db(
new_visits,
)
print(f"Wrote {root_cur.rowcount} visits")
root_con.commit()
print(f"Wrote {root_cur.rowcount} visits")
logging.info("added new visits")
# update visits_time_max
@ -312,7 +391,7 @@ def add_db(
[browser_id],
)
(new_urls_time_max,) = res.fetchone()
logging.info(f"{new_urls_time_max=}")
logging.info(f"{new_urls_time_max=} (in chromium format)")
root_cur.execute(
"""
UPDATE
@ -327,6 +406,10 @@ def add_db(
root_con.commit()
logging.info("Updated browser information")
# cleanup
root_con.close()
con.close()
def main() -> int:
parser = argparse.ArgumentParser(description="Browser history merger")