#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import sys
import argparse
from pathlib import Path
from datetime import date
# ===== 工具:月份與序數 =====
MONTHS_FULL = [
"January","February","March","April","May","June",
"July","August","September","October","November","December"
]
MONTHS_ABBR = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
MONTH_TO_NUM = {m:i+1 for i,m in enumerate(MONTHS_FULL)}
ABBR_TO_NUM = {m:i+1 for i,m in enumerate(MONTHS_ABBR)}
NUM_TO_MONTH = {i+1:m for i,m in enumerate(MONTHS_FULL)}
def ordinal(n: int) -> str:
if 11 <= (n % 100) <= 13:
suffix = "th"
else:
suffix = {1:"st",2:"nd",3:"rd"}.get(n % 10, "th")
return f"{n}{suffix}"
def canonical_stem(d: date) -> str:
# 目標:MMMM do, yyyy(注意 do 小寫)
return f"{NUM_TO_MONTH[d.month]} {ordinal(d.day)}, {d.year}"
# ===== 解析各種日期字串(給 [[...]] 正規化用) =====
def parse_date_token(s: str):
s = s.strip()
m = re.match(rf"^\s*({'|'.join(MONTHS_FULL)})\s+(\d{{1,2}})(?:st|nd|rd|th)?,\s*(\d{{4}})\s*$", s, flags=re.IGNORECASE)
if m:
month = MONTH_TO_NUM[m.group(1).capitalize()]
day = int(m.group(2)); year = int(m.group(3))
try: return date(year, month, day)
except ValueError: return None
m = re.match(rf"^\s*({'|'.join(MONTHS_ABBR)})\.?\s+(\d{{1,2}})(?:st|nd|rd|th)?,\s*(\d{{4}})\s*$", s, flags=re.IGNORECASE)
if m:
month = ABBR_TO_NUM[m.group(1).capitalize()]
day = int(m.group(2)); year = int(m.group(3))
try: return date(year, month, day)
except ValueError: return None
m = re.match(r"^\s*(\d{4})[._/\-](\d{1,2})[._/\-](\d{1,2})\s*$", s)
if m:
y, mo, d = map(int, m.groups())
try: return date(y, mo, d)
except ValueError: return None
return None
# ===== 連結正規化 [[日期]] → [[MMMM do, yyyy]] =====
REF_PATTERN = re.compile(r"\[\[\s*([^\]\|#]+?)\s*\]\]")
def rewrite_date_page_refs(text: str):
replaced = 0
def _repl(m):
nonlocal replaced
inner = m.group(1)
d = parse_date_token(inner)
if d:
replaced += 1
return f"[[{canonical_stem(d)}]]"
return m.group(0)
new_text = REF_PATTERN.sub(_repl, text)
return new_text, replaced
# ===== 安全重新命名(加 merged 前綴時避免撞名) =====
def prefixed_unique_path(p: Path, prefix="merged ") -> Path:
target = p.with_name(prefix + p.name)
if not target.exists():
return target
stem, suf = target.stem, target.suffix
i = 1
while True:
c = target.with_name(f"{stem} ({i}){suf}")
if not c.exists():
return c
i += 1
# ===== 解析檔名取得日期 =====
def parse_date_from_stem(stem: str):
s = stem.strip()
m = re.match(rf"^({'|'.join(MONTHS_FULL)})\s+(\d{{1,2}})(?:st|nd|rd|th)?,\s*(\d{{4}})$", s)
if m:
month = MONTH_TO_NUM[m.group(1)]
day = int(m.group(2)); year = int(m.group(3))
try:
d = date(year, month, day)
style = "canonical" if re.search(r"(st|nd|rd|th),", s) else "long_month"
return d, style
except ValueError:
return None
m = re.match(rf"^({'|'.join(MONTHS_ABBR)})\.?\s+(\d{{1,2}})(?:st|nd|rd|th)?,\s*(\d{{4}})$", s, flags=re.IGNORECASE)
if m:
month = ABBR_TO_NUM[m.group(1).capitalize()]
day = int(m.group(2)); year = int(m.group(3))
try: return date(year, month, day), "abbr_month"
except ValueError: return None
m = re.match(r"^(\d{4})[._-](\d{1,2})[._-](\d{1,2})$", s)
if m:
y, mo, d = map(int, m.groups())
try: return date(y, mo, d), "iso"
except ValueError: return None
return None
# ===== 檔案讀寫 =====
def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8", errors="ignore")
def write_text(path: Path, text: str):
path.write_text(text, encoding="utf-8")
def append_transformed(src: Path, dest: Path) -> tuple[int,int]:
raw = read_text(src)
fixed, replaced = rewrite_date_page_refs(raw)
with dest.open("a", encoding="utf-8") as f:
if dest.exists() and dest.stat().st_size > 0:
f.write("\n")
f.write(fixed)
if not fixed.endswith("\n"):
f.write("\n")
return len(fixed), replaced
def rewrite_links_in_file(path: Path) -> int:
raw = read_text(path)
fixed, replaced = rewrite_date_page_refs(raw)
if replaced > 0:
write_text(path, fixed)
return replaced
# ===== 掃描候選 =====
def find_candidates(root: Path):
"""
掃描 root 下的 .md(不遞迴,跳過 'merged ' 開頭)
回傳 { date: { 'canonical': Path|None, 'sources': [Path,...] } }
"""
mapping = {}
for p in root.glob("*.md"):
if p.name.lower().startswith("merged "):
continue
parsed = parse_date_from_stem(p.stem)
if not parsed:
continue
d, style = parsed
bucket = mapping.setdefault(d, {"canonical": None, "sources": []})
if style == "canonical":
bucket["canonical"] = p
else:
bucket["sources"].append(p)
# 保留所有有關聯的日期(包含只有 canonical 的,稍後用邏輯過濾「不需處理」的)
return mapping
def process_dates(root: Path, mapping: dict, limit: int, dry_run: bool):
if not mapping:
print("沒有可處理的日期檔。")
return
dates_sorted = sorted(mapping.keys(), reverse=True)
# 僅選擇「需要處理」的日期:
# - 有 sources(需要合併),或
# - 沒有 canonical(只有 1 個或多個來源 → 需要改名或合併)
# 會跳過:已是 canonical 且沒有 sources(代表已正確,屬 no-op)
work_dates = [d for d in dates_sorted if not (mapping[d]["canonical"] and not mapping[d]["sources"])]
if not work_dates:
print("找不到需要處理的日期(最新的都已是 MMMM do, yyyy 且無重複)。")
return
selected = work_dates[:limit]
print("日期狀態(由新到舊):")
for d in dates_sorted:
info = mapping[d]
if info["canonical"] and not info["sources"]:
status = "OK 已是 MMMM do, yyyy(跳過)"
elif info["canonical"] and info["sources"]:
status = f"合併 {len(info['sources'])} 來源"
elif not info["canonical"] and len(info["sources"]) == 1:
status = "改名為 MMMM do, yyyy"
else:
status = f"合併 {len(info['sources'])} 來源(並建立目標)"
tag = " ← 將處理" if d in selected else ""
print(f"- {d.isoformat()}:{status}{tag}")
for d in selected:
info = mapping[d]
target = info["canonical"] if info["canonical"] else root / f"{canonical_stem(d)}.md"
will_create = (info["canonical"] is None) and (not target.exists())
sources_sorted = sorted(info["sources"], key=lambda p: p.name.lower())
print(f"\n=== {d.isoformat()} ===")
# 單一來源且尚未存在目標 → 直接改名(不加 merged)
if info["canonical"] is None and len(sources_sorted) == 1 and will_create:
src = sources_sorted[0]
print(f"[SINGLE] 僅有來源:{src.name}")
if dry_run:
print(f" [REWRITE] 將在檔內正規化 [[日期]] → [[{canonical_stem(d)}]]")
print(f" [RENAME] '{src.name}' → '{target.name}'")
else:
replaced = rewrite_links_in_file(src)
if replaced:
print(f" [REWRITE] 已正規化 {replaced} 處 [[日期]]")
src.rename(target)
print(f" [RENAMED] '{src.name}' → '{target.name}'")
# 保險:對新檔再跑一次正規化
replaced_t = rewrite_links_in_file(target)
if replaced_t:
print(f" [TARGET REWRITE] {target.name} 內正規化 {replaced_t} 處 [[日期]]")
continue
# 其他情境:存在目標或有多來源 → 合併 + 來源加 merged 前綴
if will_create:
print(f"[TARGET] {target.name}(將建立)")
else:
print(f"[TARGET] {target.name}")
planned = []
for src in sources_sorted:
new_path = prefixed_unique_path(src) # merged ...
planned.append((src, new_path))
if dry_run:
print(f" [TARGET REWRITE] 將正規化 {target.name} 內的 [[日期]](若存在)")
for src, new_path in planned:
print(f" [COPY] '{src.name}' → '{target.name}'(先正規化連結)")
print(f" [RENAME] '{src.name}' → '{new_path.name}'")
continue
if will_create:
target.write_text("", encoding="utf-8")
print(f"[CREATE TARGET] 已建立:{target.name}")
replaced_t = rewrite_links_in_file(target)
if replaced_t:
print(f" [TARGET REWRITE] {target.name} 內正規化 {replaced_t} 處 [[日期]]")
total_chars = 0
total_refs = 0
for src, new_path in planned:
appended_len, replaced_refs = append_transformed(src, target)
total_chars += appended_len
total_refs += replaced_refs
print(f" [COPIED] '{src.name}' → '{target.name}' (+{appended_len} chars, fixed {replaced_refs} refs)")
src.rename(new_path)
print(f" [RENAMED] '{src.name}' → '{new_path.name}'")
print(f"[SUMMARY] 合併 {len(planned)} 個來源,總附加 ~{total_chars} 字元、正規化 {total_refs} 個 [[日期]] → {target.name}")
print(f"\n完成!此次處理日期數量:{len(selected)}。")
def main():
ap = argparse.ArgumentParser(description="合併/正規化日記到 MMMM do, yyyy;跳過已是正確格式且無重複的最新日期。")
ap.add_argument("root", nargs="?", default=".", help="要處理的資料夾(預設:目前資料夾)")
ap.add_argument("-n","--limit", type=int, default=1000, help="一次處理的『日期數量』(預設:1;可改 10 或 1000)")
ap.add_argument("--dry-run", action="store_true", help="僅列出將進行的動作,不修改檔案")
args = ap.parse_args()
root = Path(args.root).expanduser().resolve()
if not root.exists():
print(f"找不到資料夾:{root}")
sys.exit(1)
mapping = find_candidates(root)
process_dates(root, mapping, limit=args.limit, dry_run=args.dry_run)
if __name__ == "__main__":
main()
沒有留言:
張貼留言