my-recipes/backend/backup_db.py
2026-01-26 16:55:58 +02:00

214 lines
6.1 KiB
Python

"""
Database backup script for R2 storage
Exports PostgreSQL database, compresses it, and uploads to Cloudflare R2
"""
import os
import subprocess
import gzip
import shutil
from datetime import datetime
from pathlib import Path
import boto3
from botocore.config import Config
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# R2 Configuration
R2_ENDPOINT = os.getenv("R2_ENDPOINT")
R2_ACCESS_KEY = os.getenv("R2_ACCESS_KEY")
R2_SECRET_KEY = os.getenv("R2_SECRET_KEY")
R2_BUCKET = os.getenv("R2_BUCKET")
# Database Configuration
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "recipes_db")
DB_USER = os.getenv("DB_USER", "recipes_user")
DB_PASSWORD = os.getenv("DB_PASSWORD", "recipes_password")
# Backup directory
BACKUP_DIR = Path(__file__).parent / "backups"
BACKUP_DIR.mkdir(exist_ok=True)
def create_db_dump():
"""Create PostgreSQL database dump"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
dump_file = BACKUP_DIR / f"recipes_db_{timestamp}.sql"
print(f"Creating database dump: {dump_file}")
# Set PGPASSWORD environment variable for pg_dump
env = os.environ.copy()
env['PGPASSWORD'] = DB_PASSWORD
# Run pg_dump
cmd = [
"pg_dump",
"-h", DB_HOST,
"-p", DB_PORT,
"-U", DB_USER,
"-d", DB_NAME,
"-f", str(dump_file),
"--no-owner", # Don't include ownership commands
"--no-acl", # Don't include access privileges
]
try:
subprocess.run(cmd, env=env, check=True, capture_output=True, text=True)
print(f"✓ Database dump created: {dump_file}")
return dump_file
except subprocess.CalledProcessError as e:
print(f"✗ Error creating database dump: {e.stderr}")
raise
def compress_file(file_path):
"""Compress file using gzip"""
compressed_file = Path(str(file_path) + ".gz")
print(f"Compressing {file_path.name}...")
with open(file_path, 'rb') as f_in:
with gzip.open(compressed_file, 'wb', compresslevel=9) as f_out:
shutil.copyfileobj(f_in, f_out)
# Remove uncompressed file
file_path.unlink()
# Get compression ratio
original_size = file_path.stat().st_size if file_path.exists() else 0
compressed_size = compressed_file.stat().st_size
ratio = (1 - compressed_size / max(original_size, 1)) * 100 if original_size > 0 else 0
print(f"✓ Compressed to {compressed_file.name}")
print(f" Original: {original_size / 1024:.2f} KB")
print(f" Compressed: {compressed_size / 1024:.2f} KB")
print(f" Ratio: {ratio:.1f}% reduction")
return compressed_file
def upload_to_r2(file_path):
"""Upload file to Cloudflare R2"""
print(f"Uploading {file_path.name} to R2...")
# Configure S3 client for R2
s3_client = boto3.client(
's3',
endpoint_url=R2_ENDPOINT,
aws_access_key_id=R2_ACCESS_KEY,
aws_secret_access_key=R2_SECRET_KEY,
config=Config(
signature_version='s3v4',
s3={'addressing_style': 'path'}
)
)
# Upload file
try:
s3_client.upload_file(
str(file_path),
R2_BUCKET,
file_path.name,
ExtraArgs={
'Metadata': {
'backup-date': datetime.now().isoformat(),
'db-name': DB_NAME,
}
}
)
print(f"✓ Uploaded to R2: s3://{R2_BUCKET}/{file_path.name}")
return True
except Exception as e:
print(f"✗ Error uploading to R2: {e}")
raise
def list_r2_backups():
"""List all backups in R2 bucket"""
print(f"\nListing backups in R2 bucket: {R2_BUCKET}")
s3_client = boto3.client(
's3',
endpoint_url=R2_ENDPOINT,
aws_access_key_id=R2_ACCESS_KEY,
aws_secret_access_key=R2_SECRET_KEY,
config=Config(
signature_version='s3v4',
s3={'addressing_style': 'path'}
)
)
try:
response = s3_client.list_objects_v2(Bucket=R2_BUCKET)
if 'Contents' not in response:
print("No backups found")
return
print(f"\nFound {len(response['Contents'])} backup(s):")
for obj in sorted(response['Contents'], key=lambda x: x['LastModified'], reverse=True):
size_mb = obj['Size'] / (1024 * 1024)
print(f" - {obj['Key']}")
print(f" Size: {size_mb:.2f} MB")
print(f" Date: {obj['LastModified']}")
except Exception as e:
print(f"✗ Error listing backups: {e}")
def cleanup_old_local_backups(keep_last=3):
"""Keep only the last N local backups"""
backups = sorted(BACKUP_DIR.glob("*.sql.gz"), key=lambda x: x.stat().st_mtime, reverse=True)
if len(backups) > keep_last:
print(f"\nCleaning up old local backups (keeping last {keep_last})...")
for backup in backups[keep_last:]:
print(f" Removing: {backup.name}")
backup.unlink()
def main():
"""Main backup process"""
print("=" * 60)
print("Database Backup to Cloudflare R2")
print("=" * 60)
print()
try:
# Verify R2 credentials
if not all([R2_ENDPOINT, R2_ACCESS_KEY, R2_SECRET_KEY, R2_BUCKET]):
raise ValueError("Missing R2 credentials in environment variables")
# Create database dump
dump_file = create_db_dump()
# Compress the dump
compressed_file = compress_file(dump_file)
# Upload to R2
upload_to_r2(compressed_file)
# List all backups
list_r2_backups()
# Cleanup old local backups
cleanup_old_local_backups(keep_last=3)
print("\n" + "=" * 60)
print("✓ Backup completed successfully!")
print("=" * 60)
except Exception as e:
print("\n" + "=" * 60)
print(f"✗ Backup failed: {e}")
print("=" * 60)
raise
if __name__ == "__main__":
main()