mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-02-01 21:43:28 -05:00
feat: Import + Translate recipe images with OpenAI (#3974)
Co-authored-by: Johan Lindell <johan@lindell.me> Co-authored-by: boc-the-git <3479092+boc-the-git@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
from .openai import OpenAIDataInjection, OpenAIService
|
||||
from .openai import OpenAIDataInjection, OpenAIImageExternal, OpenAILocalImage, OpenAIService
|
||||
|
||||
__all__ = [
|
||||
"OpenAIDataInjection",
|
||||
"OpenAIImageExternal",
|
||||
"OpenAILocalImage",
|
||||
"OpenAIService",
|
||||
]
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import base64
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
|
||||
@@ -9,6 +11,7 @@ from openai.resources.chat.completions import ChatCompletion
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from mealie.core.config import get_app_settings
|
||||
from mealie.pkgs import img
|
||||
|
||||
from .._base_service import BaseService
|
||||
|
||||
@@ -39,6 +42,37 @@ class OpenAIDataInjection(BaseModel):
|
||||
return value
|
||||
|
||||
|
||||
class OpenAIImageBase(BaseModel, ABC):
|
||||
@abstractmethod
|
||||
def get_image_url(self) -> str: ...
|
||||
|
||||
def build_message(self) -> dict:
|
||||
return {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": self.get_image_url()},
|
||||
}
|
||||
|
||||
|
||||
class OpenAIImageExternal(OpenAIImageBase):
|
||||
url: str
|
||||
|
||||
def get_image_url(self) -> str:
|
||||
return self.url
|
||||
|
||||
|
||||
class OpenAILocalImage(OpenAIImageBase):
|
||||
filename: str
|
||||
path: Path
|
||||
|
||||
def get_image_url(self) -> str:
|
||||
image = img.PillowMinifier.to_webp(
|
||||
self.path, dest=self.path.parent.joinpath(f"{self.filename}-min-original.webp")
|
||||
)
|
||||
with open(image, "rb") as f:
|
||||
b64content = base64.b64encode(f.read()).decode("utf-8")
|
||||
return f"data:image/webp;base64,{b64content}"
|
||||
|
||||
|
||||
class OpenAIService(BaseService):
|
||||
PROMPTS_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "prompts"
|
||||
|
||||
@@ -50,6 +84,7 @@ class OpenAIService(BaseService):
|
||||
self.model = settings.OPENAI_MODEL
|
||||
self.workers = settings.OPENAI_WORKERS
|
||||
self.send_db_data = settings.OPENAI_SEND_DATABASE_DATA
|
||||
self.enable_image_services = settings.OPENAI_ENABLE_IMAGE_SERVICES
|
||||
|
||||
self.get_client = lambda: AsyncOpenAI(
|
||||
base_url=settings.OPENAI_BASE_URL,
|
||||
@@ -99,7 +134,7 @@ class OpenAIService(BaseService):
|
||||
return "\n".join(content_parts)
|
||||
|
||||
async def _get_raw_response(
|
||||
self, prompt: str, message: str, temperature=0.2, force_json_response=True
|
||||
self, prompt: str, content: list[dict], temperature=0.2, force_json_response=True
|
||||
) -> ChatCompletion:
|
||||
client = self.get_client()
|
||||
return await client.chat.completions.create(
|
||||
@@ -110,7 +145,7 @@ class OpenAIService(BaseService):
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": message,
|
||||
"content": content,
|
||||
},
|
||||
],
|
||||
model=self.model,
|
||||
@@ -118,10 +153,26 @@ class OpenAIService(BaseService):
|
||||
response_format={"type": "json_object"} if force_json_response else NOT_GIVEN,
|
||||
)
|
||||
|
||||
async def get_response(self, prompt: str, message: str, temperature=0.2, force_json_response=True) -> str | None:
|
||||
async def get_response(
|
||||
self,
|
||||
prompt: str,
|
||||
message: str,
|
||||
*,
|
||||
images: list[OpenAIImageBase] | None = None,
|
||||
temperature=0.2,
|
||||
force_json_response=True,
|
||||
) -> str | None:
|
||||
"""Send data to OpenAI and return the response message content"""
|
||||
if images and not self.enable_image_services:
|
||||
self.logger.warning("OpenAI image services are disabled, ignoring images")
|
||||
images = None
|
||||
|
||||
try:
|
||||
response = await self._get_raw_response(prompt, message, temperature, force_json_response)
|
||||
user_messages = [{"type": "text", "text": message}]
|
||||
for image in images or []:
|
||||
user_messages.append(image.build_message())
|
||||
|
||||
response = await self._get_raw_response(prompt, user_messages, temperature, force_json_response)
|
||||
if not response.choices:
|
||||
return None
|
||||
return response.choices[0].message.content
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
You are a bot that reads an image, or a set of images, and parses it into recipe JSON. You will receive an image from the user and you need to extract the recipe data and return its JSON in valid schema. The recipe schema will be included at the bottom of this message.
|
||||
|
||||
It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unable to extract data due to insufficient input, you may reply with a completely empty JSON object (represented by two brackets: {}).
|
||||
|
||||
Do not under any circumstances insert data not found directly in the image. Ingredients, instructions, and notes should come directly from the image and not be generated or otherwise made up. It is illegal for you to create information not found directly in the image.
|
||||
|
||||
Your response must be in valid JSON in the provided Recipe definition below. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema.
|
||||
|
||||
The user message that you receive will be one or more images. Assume all images provided belong to a single recipe, not multiple recipes. The recipe may consist of printed text or handwritten text. It may be rotated or not properly cropped. It is your job to figure out which part of the image is the important content and extract it.
|
||||
|
||||
The text you receive in the provided image or images may not be in English. The user may provide a language for you to translate the recipe into. If the user doesn't ask for a translation, you should preserve the text as-is without translating or otherwise modifying it. Otherwise, you should translate all text (recipe name, ingredients, instructions, etc.) to the requested language.
|
||||
@@ -82,7 +82,7 @@ class OpenAIParser(ABCIngredientParser):
|
||||
# re-combine chunks into one response
|
||||
responses_json = await asyncio.gather(*tasks)
|
||||
responses = [
|
||||
OpenAIIngredients.model_validate_json(response_json) for response_json in responses_json if responses_json
|
||||
OpenAIIngredients.parse_openai_response(response_json) for response_json in responses_json if responses_json
|
||||
]
|
||||
if not responses:
|
||||
raise Exception("No response from OpenAI")
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
@@ -11,24 +12,29 @@ from fastapi import UploadFile
|
||||
from slugify import slugify
|
||||
|
||||
from mealie.core import exceptions
|
||||
from mealie.core.config import get_app_settings
|
||||
from mealie.core.dependencies.dependencies import get_temporary_path
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.pkgs import cache
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.repos.repository_generic import RepositoryGeneric
|
||||
from mealie.schema.openai.recipe import OpenAIRecipe
|
||||
from mealie.schema.recipe.recipe import CreateRecipe, Recipe
|
||||
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
|
||||
from mealie.schema.recipe.recipe_notes import RecipeNote
|
||||
from mealie.schema.recipe.recipe_settings import RecipeSettings
|
||||
from mealie.schema.recipe.recipe_step import RecipeStep
|
||||
from mealie.schema.recipe.recipe_timeline_events import RecipeTimelineEventCreate, TimelineEventType
|
||||
from mealie.schema.recipe.request_helpers import RecipeDuplicate
|
||||
from mealie.schema.user.user import GroupInDB, PrivateUser, UserRatingCreate
|
||||
from mealie.services._base_service import BaseService
|
||||
from mealie.services.openai import OpenAIDataInjection, OpenAILocalImage, OpenAIService
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
|
||||
from .template_service import TemplateService
|
||||
|
||||
|
||||
class RecipeService(BaseService):
|
||||
class RecipeServiceBase(BaseService):
|
||||
def __init__(self, repos: AllRepositories, user: PrivateUser, group: GroupInDB, translator: Translator):
|
||||
self.repos = repos
|
||||
self.user = user
|
||||
@@ -39,6 +45,8 @@ class RecipeService(BaseService):
|
||||
|
||||
super().__init__()
|
||||
|
||||
|
||||
class RecipeService(RecipeServiceBase):
|
||||
def _get_recipe(self, data: str | UUID, key: str | None = None) -> Recipe:
|
||||
recipe = self.repos.recipes.by_group(self.group.id).get_one(data, key)
|
||||
if recipe is None:
|
||||
@@ -250,6 +258,26 @@ class RecipeService(BaseService):
|
||||
|
||||
return recipe
|
||||
|
||||
async def create_from_images(self, images: list[UploadFile], translate_language: str | None = None) -> Recipe:
|
||||
openai_recipe_service = OpenAIRecipeService(self.repos, self.user, self.group, self.translator)
|
||||
with get_temporary_path() as temp_path:
|
||||
local_images: list[Path] = []
|
||||
for image in images:
|
||||
with temp_path.joinpath(image.filename).open("wb") as buffer:
|
||||
shutil.copyfileobj(image.file, buffer)
|
||||
local_images.append(temp_path.joinpath(image.filename))
|
||||
|
||||
recipe_data = await openai_recipe_service.build_recipe_from_images(
|
||||
local_images, translate_language=translate_language
|
||||
)
|
||||
|
||||
recipe = self.create_one(recipe_data)
|
||||
data_service = RecipeDataService(recipe.id)
|
||||
|
||||
with open(local_images[0], "rb") as f:
|
||||
data_service.write_image(f.read(), "webp")
|
||||
return recipe
|
||||
|
||||
def duplicate_one(self, old_slug: str, dup_data: RecipeDuplicate) -> Recipe:
|
||||
"""Duplicates a recipe and returns the new recipe."""
|
||||
|
||||
@@ -379,3 +407,67 @@ class RecipeService(BaseService):
|
||||
def render_template(self, recipe: Recipe, temp_dir: Path, template: str) -> Path:
|
||||
t_service = TemplateService(temp_dir)
|
||||
return t_service.render(recipe, template)
|
||||
|
||||
|
||||
class OpenAIRecipeService(RecipeServiceBase):
|
||||
def _convert_recipe(self, openai_recipe: OpenAIRecipe) -> Recipe:
|
||||
return Recipe(
|
||||
user_id=self.user.id,
|
||||
group_id=self.user.group_id,
|
||||
name=openai_recipe.name,
|
||||
slug=slugify(openai_recipe.name),
|
||||
description=openai_recipe.description,
|
||||
recipe_yield=openai_recipe.recipe_yield,
|
||||
total_time=openai_recipe.total_time,
|
||||
prep_time=openai_recipe.prep_time,
|
||||
perform_time=openai_recipe.perform_time,
|
||||
recipe_ingredient=[
|
||||
RecipeIngredient(title=ingredient.title, note=ingredient.text)
|
||||
for ingredient in openai_recipe.ingredients
|
||||
if ingredient.text
|
||||
],
|
||||
recipe_instructions=[
|
||||
RecipeStep(title=instruction.title, text=instruction.text)
|
||||
for instruction in openai_recipe.instructions
|
||||
if instruction.text
|
||||
],
|
||||
notes=[RecipeNote(title=note.title or "", text=note.text) for note in openai_recipe.notes if note.text],
|
||||
)
|
||||
|
||||
async def build_recipe_from_images(self, images: list[Path], translate_language: str | None) -> Recipe:
|
||||
settings = get_app_settings()
|
||||
if not (settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_IMAGE_SERVICES):
|
||||
raise ValueError("OpenAI image services are not available")
|
||||
|
||||
openai_service = OpenAIService()
|
||||
prompt = openai_service.get_prompt(
|
||||
"recipes.parse-recipe-image",
|
||||
data_injections=[
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"This is the JSON response schema. You must respond in valid JSON that follows this schema. "
|
||||
"Your payload should be as compact as possible, eliminating unncessesary whitespace. "
|
||||
"Any fields with default values which you do not populate should not be in the payload."
|
||||
),
|
||||
value=OpenAIRecipe,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
openai_images = [OpenAILocalImage(filename=os.path.basename(image), path=image) for image in images]
|
||||
message = (
|
||||
f"Please extract the recipe from the {'images' if len(openai_images) > 1 else 'image'} provided."
|
||||
"There should be exactly one recipe."
|
||||
)
|
||||
|
||||
if translate_language:
|
||||
message += f" Please translate the recipe to {translate_language}."
|
||||
|
||||
response = await openai_service.get_response(prompt, message, images=openai_images, force_json_response=True)
|
||||
try:
|
||||
openai_recipe = OpenAIRecipe.parse_openai_response(response)
|
||||
recipe = self._convert_recipe(openai_recipe)
|
||||
except Exception as e:
|
||||
raise ValueError("Unable to parse recipe from image") from e
|
||||
|
||||
return recipe
|
||||
|
||||
Reference in New Issue
Block a user