mirror of
https://github.com/qwjyh/browser-history-merger.git
synced 2025-04-20 03:25:56 +09:00
Compare commits
10 commits
Author | SHA1 | Date | |
---|---|---|---|
eb98ce47cb | |||
7486098a11 | |||
4fd448e29e | |||
395ddb72cd | |||
c25ff12895 | |||
c24f02fe94 | |||
1c7e8c892c | |||
5970e4594a | |||
5215702820 | |||
a67e500c59 |
4 changed files with 166 additions and 23 deletions
|
@ -1 +1 @@
|
||||||
3.12.2
|
system@3.13.1
|
||||||
|
|
22
CHANGELOG.md
Normal file
22
CHANGELOG.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Firefox support
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- explicit close of databases
|
||||||
|
- Updated readme
|
||||||
|
- Add changelog
|
||||||
|
|
||||||
|
## [0.1.0] - 2024-06-07
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- initial release
|
||||||
|
- `init` subcommand
|
||||||
|
- `add` subcommand
|
||||||
|
- Chromium support
|
||||||
|
|
||||||
|
[unreleased]: https://github.com/qwjyh/browser-history-merger/compare/v0.1.0...HEAD
|
||||||
|
[0.1.0]: https://github.com/qwjyh/browser-history-merger/releases/tag/v0.1.0
|
46
README.md
46
README.md
|
@ -2,17 +2,55 @@
|
||||||
|
|
||||||
Merge browser histories into a single database.
|
Merge browser histories into a single database.
|
||||||
|
|
||||||
# Usage
|
## Usage
|
||||||
## Initialization
|
### Initialization
|
||||||
For the first execution on each device and browser, do
|
For the first execution on each device and browser, do
|
||||||
```sh
|
```sh
|
||||||
browser-history-merger path/to/merged.db init browser-id path/to/browser/history/database
|
browser-history-merger path/to/merged.db init browser-id /abs/path/to/browser/history/database
|
||||||
```
|
```
|
||||||
`browser-id` should be unique to identify browser and machine.
|
`browser-id` should be unique to identify browser and machine.
|
||||||
|
|
||||||
## Add histories
|
### Add histories
|
||||||
Then add histories to the database by
|
Then add histories to the database by
|
||||||
```sh
|
```sh
|
||||||
browser-history-merger path/to/merged.db add browser-id
|
browser-history-merger path/to/merged.db add browser-id
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Supported environments
|
||||||
|
Python 3.12 (works with standard libraries only)
|
||||||
|
|
||||||
|
- Chromium
|
||||||
|
- Tested:
|
||||||
|
- chrome on windows, linux
|
||||||
|
- brave on windows, linux
|
||||||
|
- vivaldi on linux
|
||||||
|
- Firefox
|
||||||
|
- Tested:
|
||||||
|
- firefox on windows
|
||||||
|
|
||||||
|
## Tips
|
||||||
|
The program is a single file `./src/browser_history_merger/__init__.py` and can be used as a script.
|
||||||
|
|
||||||
|
### Example SQL to see the history
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
browsers.name,
|
||||||
|
visits.title,
|
||||||
|
visits.url,
|
||||||
|
datetime(visits.visit_time / 1000000 - 11644473600, 'unixepoch')
|
||||||
|
FROM
|
||||||
|
visits,
|
||||||
|
browsers
|
||||||
|
WHERE
|
||||||
|
visits.browser = browsers.id
|
||||||
|
ORDER by
|
||||||
|
visits.visit_time
|
||||||
|
DESC LIMIT 0, 100
|
||||||
|
```
|
||||||
|
|
||||||
|
## Todo
|
||||||
|
- exporting
|
||||||
|
- JSON output
|
||||||
|
- browser list
|
||||||
|
- multiple profiles?
|
||||||
|
|
|
@ -3,8 +3,11 @@ import argparse
|
||||||
import logging
|
import logging
|
||||||
import socket
|
import socket
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from typing import Literal
|
from typing import Literal, Tuple
|
||||||
|
|
||||||
|
# ref: https://en.wikiversity.org/wiki/Chromium_browsing_history_database
|
||||||
|
# Time offset of chromium to unixepoch
|
||||||
|
CHROMIUM_TIME_OFFSET = 11644473600 * 1_000_000
|
||||||
|
|
||||||
def init_db(
|
def init_db(
|
||||||
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
|
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
|
||||||
|
@ -132,6 +135,43 @@ def init_db(
|
||||||
)
|
)
|
||||||
root_con.commit()
|
root_con.commit()
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
root_con.close()
|
||||||
|
|
||||||
|
|
||||||
|
def open_browser_db(database_path: str) -> Tuple[sqlite3.Connection, Literal["firefox", "chromium"]]:
|
||||||
|
dburi = f"file:{database_path}?mode=ro&nolock=1"
|
||||||
|
logging.info(f"DB uri: {dburi}")
|
||||||
|
con = sqlite3.connect(dburi, uri=True)
|
||||||
|
cur = con.cursor()
|
||||||
|
|
||||||
|
logging.debug(f"{con=}")
|
||||||
|
logging.debug(f"{cur=}")
|
||||||
|
try:
|
||||||
|
res = cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
sqlite_master
|
||||||
|
WHERE
|
||||||
|
type='table' AND name='urls'
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
res.fetchone()
|
||||||
|
except sqlite3.OperationalError as e:
|
||||||
|
if "unable to open database file" in str(e):
|
||||||
|
# might be firefox
|
||||||
|
logging.debug("Failed to open db while executing SELECT from sqlite_master")
|
||||||
|
dburi = f"file:{database_path}?mode=ro"
|
||||||
|
con = sqlite3.connect(dburi, uri=True)
|
||||||
|
cur = con.cursor()
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
db_type = get_db_type(cur)
|
||||||
|
logging.info(f"DB type: {db_type}")
|
||||||
|
return con, db_type
|
||||||
|
|
||||||
|
|
||||||
def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]:
|
def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]:
|
||||||
res = cur.execute(
|
res = cur.execute(
|
||||||
|
@ -148,7 +188,7 @@ def get_db_type(cur: sqlite3.Cursor) -> Literal["firefox", "chromium"]:
|
||||||
return db_type
|
return db_type
|
||||||
|
|
||||||
|
|
||||||
def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str]:
|
def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str] | None:
|
||||||
res = root_cur.execute(
|
res = root_cur.execute(
|
||||||
"""
|
"""
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -162,7 +202,10 @@ def get_browser_info(root_cur: sqlite3.Cursor, name: str) -> tuple[int, int, str
|
||||||
""",
|
""",
|
||||||
(name,),
|
(name,),
|
||||||
)
|
)
|
||||||
browser_id, visits_time_max, database_path = res.fetchone()
|
res_one = res.fetchone()
|
||||||
|
if res_one is None:
|
||||||
|
return None
|
||||||
|
browser_id, visits_time_max, database_path = res_one
|
||||||
return (browser_id, visits_time_max, database_path)
|
return (browser_id, visits_time_max, database_path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -198,28 +241,67 @@ def convert_firefox_transition_type(transition_type: int) -> int:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def convert_firefox_datetime_to_choromium(time: str) -> str:
|
||||||
|
"""
|
||||||
|
Convert time in Firefox to Chromium format.
|
||||||
|
"""
|
||||||
|
num = int(time)
|
||||||
|
return str(num + CHROMIUM_TIME_OFFSET)
|
||||||
|
|
||||||
|
|
||||||
def add_db(
|
def add_db(
|
||||||
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
|
root_con: sqlite3.Connection, root_cur: sqlite3.Cursor, args: argparse.Namespace
|
||||||
):
|
):
|
||||||
print("Add history to root db")
|
print("Add history to root db")
|
||||||
browser_id, visits_time_max, database_path = get_browser_info(root_cur, args.name)
|
browser_info = get_browser_info(root_cur, args.name)
|
||||||
|
if browser_info is None:
|
||||||
|
print(f"browser {args.name} is not registered to the database")
|
||||||
|
logging.debug("Cleaning up (closing db connection)")
|
||||||
|
root_con.close()
|
||||||
|
exit(1)
|
||||||
|
browser_id, visits_time_max, database_path = browser_info
|
||||||
logging.info(f"{browser_id=}, {visits_time_max=}")
|
logging.info(f"{browser_id=}, {visits_time_max=}")
|
||||||
|
|
||||||
logging.info(f"Source: {database_path}")
|
logging.info(f"Source: {database_path}")
|
||||||
logging.info(f"Root: {args.root_db}")
|
logging.info(f"Root: {args.root_db}")
|
||||||
|
|
||||||
dburi = f"file:{database_path}?mode=ro&nolock=1"
|
con, db_type = open_browser_db(database_path)
|
||||||
logging.info(f"DB uri: {dburi}")
|
|
||||||
con = sqlite3.connect(dburi, uri=True)
|
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
|
|
||||||
db_type = get_db_type(cur)
|
|
||||||
logging.info(f"DB type: {db_type}")
|
|
||||||
|
|
||||||
match db_type:
|
match db_type:
|
||||||
case "firefox":
|
case "firefox":
|
||||||
logging.error("Not implemented")
|
select_url_toupdate_sql = """
|
||||||
raise RuntimeError("Not implemented")
|
SELECT
|
||||||
|
moz_places.id,
|
||||||
|
moz_places.url,
|
||||||
|
moz_places.title
|
||||||
|
FROM
|
||||||
|
moz_historyvisits,
|
||||||
|
moz_places
|
||||||
|
WHERE
|
||||||
|
moz_historyvisits.visit_date > (?) - 11644473600000000
|
||||||
|
AND moz_historyvisits.place_id = moz_places.id
|
||||||
|
"""
|
||||||
|
select_visit_sql = """
|
||||||
|
SELECT
|
||||||
|
moz_historyvisits.id,
|
||||||
|
moz_historyvisits.place_id,
|
||||||
|
moz_places.url,
|
||||||
|
moz_places.title,
|
||||||
|
moz_historyvisits.visit_date,
|
||||||
|
moz_historyvisits.from_visit,
|
||||||
|
moz_historyvisits.visit_type
|
||||||
|
FROM
|
||||||
|
moz_historyvisits,
|
||||||
|
moz_places
|
||||||
|
WHERE
|
||||||
|
moz_historyvisits.visit_date > (?) - 11644473600000000
|
||||||
|
AND moz_historyvisits.place_id = moz_places.id
|
||||||
|
"""
|
||||||
|
convert_transition_type = convert_firefox_transition_type
|
||||||
|
# Firefox doesn't have transition_qualifier
|
||||||
|
convert_transition_qualifier = lambda _: None
|
||||||
|
convert_visit_time = convert_firefox_datetime_to_choromium
|
||||||
case "chromium":
|
case "chromium":
|
||||||
select_url_toupdate_sql = """
|
select_url_toupdate_sql = """
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -251,6 +333,7 @@ def add_db(
|
||||||
"""
|
"""
|
||||||
convert_transition_type = convert_chromium_transition_type
|
convert_transition_type = convert_chromium_transition_type
|
||||||
convert_transition_qualifier = lambda x: x
|
convert_transition_qualifier = lambda x: x
|
||||||
|
convert_visit_time = lambda x: x
|
||||||
res = cur.execute(select_url_toupdate_sql, [visits_time_max])
|
res = cur.execute(select_url_toupdate_sql, [visits_time_max])
|
||||||
updating_urls = (
|
updating_urls = (
|
||||||
(
|
(
|
||||||
|
@ -269,8 +352,6 @@ def add_db(
|
||||||
updating_urls,
|
updating_urls,
|
||||||
)
|
)
|
||||||
print(f"Wrote {root_cur.rowcount} urls")
|
print(f"Wrote {root_cur.rowcount} urls")
|
||||||
root_con.commit()
|
|
||||||
print(f"Wrote {root_cur.rowcount} urls")
|
|
||||||
logging.info("updated urls in new visits")
|
logging.info("updated urls in new visits")
|
||||||
res = cur.execute(select_visit_sql, [visits_time_max])
|
res = cur.execute(select_visit_sql, [visits_time_max])
|
||||||
new_visits = (
|
new_visits = (
|
||||||
|
@ -280,7 +361,7 @@ def add_db(
|
||||||
url_id,
|
url_id,
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
visit_time,
|
convert_visit_time(visit_time),
|
||||||
from_visit,
|
from_visit,
|
||||||
convert_transition_qualifier(transition),
|
convert_transition_qualifier(transition),
|
||||||
convert_transition_type(transition),
|
convert_transition_type(transition),
|
||||||
|
@ -295,8 +376,6 @@ def add_db(
|
||||||
new_visits,
|
new_visits,
|
||||||
)
|
)
|
||||||
print(f"Wrote {root_cur.rowcount} visits")
|
print(f"Wrote {root_cur.rowcount} visits")
|
||||||
root_con.commit()
|
|
||||||
print(f"Wrote {root_cur.rowcount} visits")
|
|
||||||
logging.info("added new visits")
|
logging.info("added new visits")
|
||||||
|
|
||||||
# update visits_time_max
|
# update visits_time_max
|
||||||
|
@ -312,7 +391,7 @@ def add_db(
|
||||||
[browser_id],
|
[browser_id],
|
||||||
)
|
)
|
||||||
(new_urls_time_max,) = res.fetchone()
|
(new_urls_time_max,) = res.fetchone()
|
||||||
logging.info(f"{new_urls_time_max=}")
|
logging.info(f"{new_urls_time_max=} (in chromium format)")
|
||||||
root_cur.execute(
|
root_cur.execute(
|
||||||
"""
|
"""
|
||||||
UPDATE
|
UPDATE
|
||||||
|
@ -327,6 +406,10 @@ def add_db(
|
||||||
root_con.commit()
|
root_con.commit()
|
||||||
logging.info("Updated browser information")
|
logging.info("Updated browser information")
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
root_con.close()
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
parser = argparse.ArgumentParser(description="Browser history merger")
|
parser = argparse.ArgumentParser(description="Browser history merger")
|
||||||
|
|
Loading…
Add table
Reference in a new issue