Feature/database backups (#1040)

* add annotations to docs

* alchemy data dumper

* initial tests

* sourcery refactor

* db backups/restore

* potential postgres fix

* potential postgres fix

* this is terrible

* potential pg fix

* cleanup

* remove unused import

* fix comparison

* generate frontend types

* update timestamp and add directory filter

* rewrite to new admin-api

* update backup routers

* add file_token response helper

* update imports

* remove test_backup
This commit is contained in:
Hayden
2022-03-13 15:42:22 -08:00
committed by GitHub
parent 2d1ef7173d
commit 8eefa05393
32 changed files with 756 additions and 229 deletions

View File

View File

@@ -0,0 +1,138 @@
import datetime
import json
from pathlib import Path
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from sqlalchemy import MetaData, create_engine
from sqlalchemy.engine import base
from sqlalchemy.orm import Session, sessionmaker
from mealie.services._base_service import BaseService
class AlchemyExporter(BaseService):
connection_str: str
engine: base.Engine
meta: MetaData
look_for_datetime = {"created_at", "update_at", "date_updated", "timestamp", "expires_at"}
look_for_date = {"date_added", "date"}
class DateTimeParser(BaseModel):
date: datetime.date = None
time: datetime.datetime = None
def __init__(self, connection_str: str) -> None:
super().__init__()
self.connection_str = connection_str
self.engine = create_engine(connection_str)
self.meta = MetaData()
self.session_maker = sessionmaker(bind=self.engine)
@staticmethod
def convert_to_datetime(data: dict) -> dict:
"""
walks the dictionary to convert all things that look like timestamps to datetime objects
used in the context of reading a json file into a database via SQLAlchemy.
"""
for key, value in data.items():
if isinstance(value, dict):
data = AlchemyExporter.convert_to_datetime(value)
elif isinstance(value, list): # assume that this is a list of dictionaries
data[key] = [AlchemyExporter.convert_to_datetime(item) for item in value]
elif isinstance(value, str):
if key in AlchemyExporter.look_for_datetime:
data[key] = AlchemyExporter.DateTimeParser(time=value).time
if key in AlchemyExporter.look_for_date:
data[key] = AlchemyExporter.DateTimeParser(date=value).date
return data
@staticmethod
def _compare_schemas(schema1: dict, schema2: dict) -> bool:
try:
# validate alembic version(s) are the same
return schema1["alembic_version"] == schema2["alembic_version"]
except KeyError:
return False
@staticmethod
def validate_schemas(schema1: Path | dict, schema2: Path | dict) -> bool:
"""
Validates that the schema of the database matches the schema of the database. In practice,
this means validating that the alembic version is the same
"""
def extract_json(file: Path) -> dict:
with open(file) as f:
return json.loads(f.read())
if isinstance(schema1, Path):
schema1 = extract_json(schema1)
if isinstance(schema2, Path):
schema2 = extract_json(schema2)
return AlchemyExporter._compare_schemas(schema1, schema2)
def dump_schema(self) -> dict:
"""
Returns the schema of the SQLAlchemy database as a python dictionary. This dictionary is wrapped by
jsonable_encoder to ensure that the object can be converted to a json string.
"""
self.meta.reflect(bind=self.engine)
all_tables = self.meta.tables.values()
results = {
**{table.name: [] for table in all_tables},
"alembic_version": [dict(row) for row in self.engine.execute("SELECT * FROM alembic_version").fetchall()],
}
return jsonable_encoder(results)
def dump(self) -> dict[str, list[dict]]:
"""
Returns the entire SQLAlchemy database as a python dictionary. This dictionary is wrapped by
jsonable_encoder to ensure that the object can be converted to a json string.
"""
self.meta.reflect(bind=self.engine) # http://docs.sqlalchemy.org/en/rel_0_9/core/reflection.html
result = {
table.name: [dict(row) for row in self.engine.execute(table.select())] for table in self.meta.sorted_tables
}
return jsonable_encoder(result)
def restore(self, db_dump: dict) -> None:
"""Restores all data from dictionary into the database"""
data = AlchemyExporter.convert_to_datetime(db_dump)
self.meta.reflect(bind=self.engine)
for table_name, rows in data.items():
if not rows:
continue
table = self.meta.tables[table_name]
self.engine.execute(table.delete())
self.engine.execute(table.insert(), rows)
def drop_all(self) -> None:
"""Drops all data from the database"""
self.meta.reflect(bind=self.engine)
with self.session_maker() as session:
session: Session
is_postgres = self.settings.DB_ENGINE == "postgres"
try:
if is_postgres:
session.execute("SET session_replication_role = 'replica'")
for table in self.meta.sorted_tables:
session.execute(f"DELETE FROM {table.name}")
finally:
if is_postgres:
session.execute("SET session_replication_role = 'origin'")
session.commit()

View File

@@ -0,0 +1,45 @@
import json
import shutil
import tempfile
from pathlib import Path
class BackupContents:
def __init__(self, file: Path) -> None:
self.base = file
self.data_directory = self.base / "data"
self.tables = self.base / "database.json"
def validate(self) -> bool:
if not self.base.is_dir():
return False
if not self.data_directory.is_dir():
return False
if not self.tables.is_file():
return False
return True
def read_tables(self) -> dict:
with open(self.tables) as f:
return json.loads(f.read())
class BackupFile:
temp_dir: Path | None
def __init__(self, file: Path) -> None:
self.zip = file
def __enter__(self) -> BackupContents:
self.temp_dir = Path(tempfile.mkdtemp())
shutil.unpack_archive(str(self.zip), str(self.temp_dir))
return BackupContents(self.temp_dir)
def __exit__(self, exc_type, exc_val, exc_tb):
if self.temp_dir and self.temp_dir.is_dir():
shutil.rmtree(self.temp_dir)
self.temp_dir = None

View File

@@ -0,0 +1,98 @@
import datetime
import json
import shutil
from pathlib import Path
from zipfile import ZipFile
from mealie.services._base_service import BaseService
from mealie.services.backups_v2.alchemy_exporter import AlchemyExporter
from mealie.services.backups_v2.backup_file import BackupFile
class BackupV2(BaseService):
def __init__(self, db_url: str = None) -> None:
super().__init__()
self.db_url = db_url or self.settings.DB_URL
self.db_exporter = AlchemyExporter(self.db_url)
def _sqlite(self) -> None:
db_file = self.settings.DB_URL.removeprefix("sqlite:///")
# Create a backup of the SQLite database
timestamp = datetime.datetime.now().strftime("%Y.%m.%d")
shutil.copy(db_file, f"mealie_{timestamp}.bak.db")
def _postgres(self) -> None:
pass
def backup(self) -> Path:
# sourcery skip: merge-nested-ifs, reintroduce-else, remove-redundant-continue
exclude = {"mealie.db", "mealie.log", ".secret"}
exclude_ext = {".zip"}
exclude_dirs = {"backups"}
timestamp = datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")
backup_name = f"mealie_{timestamp}.zip"
backup_file = self.directories.BACKUP_DIR / backup_name
database_json = self.db_exporter.dump()
with ZipFile(backup_file, "w") as zip_file:
zip_file.writestr("database.json", json.dumps(database_json))
for data_file in self.directories.DATA_DIR.glob("**/*"):
if data_file.name in exclude:
continue
if data_file.is_file() and data_file.suffix not in exclude_ext:
if data_file.parent.name in exclude_dirs:
continue
zip_file.write(data_file, f"data/{data_file.relative_to(self.directories.DATA_DIR)}")
return backup_file
def _copy_data(self, data_path: Path) -> None:
for f in data_path.iterdir():
if f.is_file():
continue
shutil.rmtree(self.directories.DATA_DIR / f.name)
shutil.copytree(f, self.directories.DATA_DIR / f.name)
def restore(self, backup_path: Path) -> None:
self.logger.info("initially backup restore")
backup = BackupFile(backup_path)
if self.settings.DB_ENGINE == "sqlite":
self._sqlite()
elif self.settings.DB_ENGINE == "postgres":
self._postgres()
with backup as contents:
if not contents.validate():
self.logger.error(
"Invalid backup file. file does not contain required elements (data directory and database.json"
)
raise ValueError("Invalid backup file")
# Purge the Database
self.logger.info("dropping all database tables")
self.db_exporter.drop_all()
database_json = contents.read_tables()
self.logger.info("importing database tables")
self.db_exporter.restore(database_json)
self.logger.info("database tables imported successfully")
self.logger.info("restoring data directory")
self._copy_data(contents.data_directory)
self.logger.info("data directory restored successfully")
self.logger.info("backup restore complete")