feat: Import + Translate recipe images with OpenAI (#3974)

Co-authored-by: Johan Lindell <johan@lindell.me>
Co-authored-by: boc-the-git <3479092+boc-the-git@users.noreply.github.com>
This commit is contained in:
Michael Genson
2024-08-17 17:07:01 -05:00
committed by GitHub
parent 3d921cb677
commit 8a15f400e1
23 changed files with 924 additions and 241 deletions

View File

@@ -1,6 +1,8 @@
from .openai import OpenAIDataInjection, OpenAIService
from .openai import OpenAIDataInjection, OpenAIImageExternal, OpenAILocalImage, OpenAIService
__all__ = [
"OpenAIDataInjection",
"OpenAIImageExternal",
"OpenAILocalImage",
"OpenAIService",
]

View File

@@ -1,6 +1,8 @@
import base64
import inspect
import json
import os
from abc import ABC, abstractmethod
from pathlib import Path
from textwrap import dedent
@@ -9,6 +11,7 @@ from openai.resources.chat.completions import ChatCompletion
from pydantic import BaseModel, field_validator
from mealie.core.config import get_app_settings
from mealie.pkgs import img
from .._base_service import BaseService
@@ -39,6 +42,37 @@ class OpenAIDataInjection(BaseModel):
return value
class OpenAIImageBase(BaseModel, ABC):
@abstractmethod
def get_image_url(self) -> str: ...
def build_message(self) -> dict:
return {
"type": "image_url",
"image_url": {"url": self.get_image_url()},
}
class OpenAIImageExternal(OpenAIImageBase):
url: str
def get_image_url(self) -> str:
return self.url
class OpenAILocalImage(OpenAIImageBase):
filename: str
path: Path
def get_image_url(self) -> str:
image = img.PillowMinifier.to_webp(
self.path, dest=self.path.parent.joinpath(f"{self.filename}-min-original.webp")
)
with open(image, "rb") as f:
b64content = base64.b64encode(f.read()).decode("utf-8")
return f"data:image/webp;base64,{b64content}"
class OpenAIService(BaseService):
PROMPTS_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "prompts"
@@ -50,6 +84,7 @@ class OpenAIService(BaseService):
self.model = settings.OPENAI_MODEL
self.workers = settings.OPENAI_WORKERS
self.send_db_data = settings.OPENAI_SEND_DATABASE_DATA
self.enable_image_services = settings.OPENAI_ENABLE_IMAGE_SERVICES
self.get_client = lambda: AsyncOpenAI(
base_url=settings.OPENAI_BASE_URL,
@@ -99,7 +134,7 @@ class OpenAIService(BaseService):
return "\n".join(content_parts)
async def _get_raw_response(
self, prompt: str, message: str, temperature=0.2, force_json_response=True
self, prompt: str, content: list[dict], temperature=0.2, force_json_response=True
) -> ChatCompletion:
client = self.get_client()
return await client.chat.completions.create(
@@ -110,7 +145,7 @@ class OpenAIService(BaseService):
},
{
"role": "user",
"content": message,
"content": content,
},
],
model=self.model,
@@ -118,10 +153,26 @@ class OpenAIService(BaseService):
response_format={"type": "json_object"} if force_json_response else NOT_GIVEN,
)
async def get_response(self, prompt: str, message: str, temperature=0.2, force_json_response=True) -> str | None:
async def get_response(
self,
prompt: str,
message: str,
*,
images: list[OpenAIImageBase] | None = None,
temperature=0.2,
force_json_response=True,
) -> str | None:
"""Send data to OpenAI and return the response message content"""
if images and not self.enable_image_services:
self.logger.warning("OpenAI image services are disabled, ignoring images")
images = None
try:
response = await self._get_raw_response(prompt, message, temperature, force_json_response)
user_messages = [{"type": "text", "text": message}]
for image in images or []:
user_messages.append(image.build_message())
response = await self._get_raw_response(prompt, user_messages, temperature, force_json_response)
if not response.choices:
return None
return response.choices[0].message.content

View File

@@ -0,0 +1,11 @@
You are a bot that reads an image, or a set of images, and parses it into recipe JSON. You will receive an image from the user and you need to extract the recipe data and return its JSON in valid schema. The recipe schema will be included at the bottom of this message.
It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unable to extract data due to insufficient input, you may reply with a completely empty JSON object (represented by two brackets: {}).
Do not under any circumstances insert data not found directly in the image. Ingredients, instructions, and notes should come directly from the image and not be generated or otherwise made up. It is illegal for you to create information not found directly in the image.
Your response must be in valid JSON in the provided Recipe definition below. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema.
The user message that you receive will be one or more images. Assume all images provided belong to a single recipe, not multiple recipes. The recipe may consist of printed text or handwritten text. It may be rotated or not properly cropped. It is your job to figure out which part of the image is the important content and extract it.
The text you receive in the provided image or images may not be in English. The user may provide a language for you to translate the recipe into. If the user doesn't ask for a translation, you should preserve the text as-is without translating or otherwise modifying it. Otherwise, you should translate all text (recipe name, ingredients, instructions, etc.) to the requested language.

View File

@@ -82,7 +82,7 @@ class OpenAIParser(ABCIngredientParser):
# re-combine chunks into one response
responses_json = await asyncio.gather(*tasks)
responses = [
OpenAIIngredients.model_validate_json(response_json) for response_json in responses_json if responses_json
OpenAIIngredients.parse_openai_response(response_json) for response_json in responses_json if responses_json
]
if not responses:
raise Exception("No response from OpenAI")

View File

@@ -1,4 +1,5 @@
import json
import os
import shutil
from datetime import datetime, timezone
from pathlib import Path
@@ -11,24 +12,29 @@ from fastapi import UploadFile
from slugify import slugify
from mealie.core import exceptions
from mealie.core.config import get_app_settings
from mealie.core.dependencies.dependencies import get_temporary_path
from mealie.lang.providers import Translator
from mealie.pkgs import cache
from mealie.repos.repository_factory import AllRepositories
from mealie.repos.repository_generic import RepositoryGeneric
from mealie.schema.openai.recipe import OpenAIRecipe
from mealie.schema.recipe.recipe import CreateRecipe, Recipe
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
from mealie.schema.recipe.recipe_notes import RecipeNote
from mealie.schema.recipe.recipe_settings import RecipeSettings
from mealie.schema.recipe.recipe_step import RecipeStep
from mealie.schema.recipe.recipe_timeline_events import RecipeTimelineEventCreate, TimelineEventType
from mealie.schema.recipe.request_helpers import RecipeDuplicate
from mealie.schema.user.user import GroupInDB, PrivateUser, UserRatingCreate
from mealie.services._base_service import BaseService
from mealie.services.openai import OpenAIDataInjection, OpenAILocalImage, OpenAIService
from mealie.services.recipe.recipe_data_service import RecipeDataService
from .template_service import TemplateService
class RecipeService(BaseService):
class RecipeServiceBase(BaseService):
def __init__(self, repos: AllRepositories, user: PrivateUser, group: GroupInDB, translator: Translator):
self.repos = repos
self.user = user
@@ -39,6 +45,8 @@ class RecipeService(BaseService):
super().__init__()
class RecipeService(RecipeServiceBase):
def _get_recipe(self, data: str | UUID, key: str | None = None) -> Recipe:
recipe = self.repos.recipes.by_group(self.group.id).get_one(data, key)
if recipe is None:
@@ -250,6 +258,26 @@ class RecipeService(BaseService):
return recipe
async def create_from_images(self, images: list[UploadFile], translate_language: str | None = None) -> Recipe:
openai_recipe_service = OpenAIRecipeService(self.repos, self.user, self.group, self.translator)
with get_temporary_path() as temp_path:
local_images: list[Path] = []
for image in images:
with temp_path.joinpath(image.filename).open("wb") as buffer:
shutil.copyfileobj(image.file, buffer)
local_images.append(temp_path.joinpath(image.filename))
recipe_data = await openai_recipe_service.build_recipe_from_images(
local_images, translate_language=translate_language
)
recipe = self.create_one(recipe_data)
data_service = RecipeDataService(recipe.id)
with open(local_images[0], "rb") as f:
data_service.write_image(f.read(), "webp")
return recipe
def duplicate_one(self, old_slug: str, dup_data: RecipeDuplicate) -> Recipe:
"""Duplicates a recipe and returns the new recipe."""
@@ -379,3 +407,67 @@ class RecipeService(BaseService):
def render_template(self, recipe: Recipe, temp_dir: Path, template: str) -> Path:
t_service = TemplateService(temp_dir)
return t_service.render(recipe, template)
class OpenAIRecipeService(RecipeServiceBase):
def _convert_recipe(self, openai_recipe: OpenAIRecipe) -> Recipe:
return Recipe(
user_id=self.user.id,
group_id=self.user.group_id,
name=openai_recipe.name,
slug=slugify(openai_recipe.name),
description=openai_recipe.description,
recipe_yield=openai_recipe.recipe_yield,
total_time=openai_recipe.total_time,
prep_time=openai_recipe.prep_time,
perform_time=openai_recipe.perform_time,
recipe_ingredient=[
RecipeIngredient(title=ingredient.title, note=ingredient.text)
for ingredient in openai_recipe.ingredients
if ingredient.text
],
recipe_instructions=[
RecipeStep(title=instruction.title, text=instruction.text)
for instruction in openai_recipe.instructions
if instruction.text
],
notes=[RecipeNote(title=note.title or "", text=note.text) for note in openai_recipe.notes if note.text],
)
async def build_recipe_from_images(self, images: list[Path], translate_language: str | None) -> Recipe:
settings = get_app_settings()
if not (settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_IMAGE_SERVICES):
raise ValueError("OpenAI image services are not available")
openai_service = OpenAIService()
prompt = openai_service.get_prompt(
"recipes.parse-recipe-image",
data_injections=[
OpenAIDataInjection(
description=(
"This is the JSON response schema. You must respond in valid JSON that follows this schema. "
"Your payload should be as compact as possible, eliminating unncessesary whitespace. "
"Any fields with default values which you do not populate should not be in the payload."
),
value=OpenAIRecipe,
)
],
)
openai_images = [OpenAILocalImage(filename=os.path.basename(image), path=image) for image in images]
message = (
f"Please extract the recipe from the {'images' if len(openai_images) > 1 else 'image'} provided."
"There should be exactly one recipe."
)
if translate_language:
message += f" Please translate the recipe to {translate_language}."
response = await openai_service.get_response(prompt, message, images=openai_images, force_json_response=True)
try:
openai_recipe = OpenAIRecipe.parse_openai_response(response)
recipe = self._convert_recipe(openai_recipe)
except Exception as e:
raise ValueError("Unable to parse recipe from image") from e
return recipe