mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-02-01 05:23:10 -05:00
feat: Migrate OpenAI implementation to use structured outputs (#6964)
This commit is contained in:
@@ -6,6 +6,7 @@ from fastapi import APIRouter, File, UploadFile
|
||||
from mealie.core.dependencies.dependencies import get_temporary_path
|
||||
from mealie.routes._base import BaseAdminController, controller
|
||||
from mealie.schema.admin.debug import DebugResponse
|
||||
from mealie.schema.openai.general import OpenAIText
|
||||
from mealie.services.openai import OpenAILocalImage, OpenAIService
|
||||
|
||||
router = APIRouter(prefix="/debug")
|
||||
@@ -40,9 +41,13 @@ class AdminDebugController(BaseAdminController):
|
||||
message = f"{message} Here is an image to test with:"
|
||||
|
||||
response = await openai_service.get_response(
|
||||
prompt, message, images=local_images, force_json_response=False
|
||||
prompt, message, response_schema=OpenAIText, images=local_images
|
||||
)
|
||||
return DebugResponse(success=True, response=f'OpenAI is working. Response: "{response}"')
|
||||
|
||||
if not response:
|
||||
raise Exception("No response received from OpenAI")
|
||||
|
||||
return DebugResponse(success=True, response=f'OpenAI is working. Response: "{response.text}"')
|
||||
|
||||
except Exception as e:
|
||||
self.logger.exception(e)
|
||||
|
||||
@@ -12,8 +12,8 @@ logger = get_logger()
|
||||
|
||||
class OpenAIBase(BaseModel):
|
||||
"""
|
||||
This class defines the JSON schema sent to OpenAI. Its schema is
|
||||
injected directly into the OpenAI prompt.
|
||||
Base class for OpenAI structured output schemas. These models are passed
|
||||
to OpenAI's response_format parameter with strict schema validation.
|
||||
"""
|
||||
|
||||
__doc__ = "" # we don't want to include the docstring in the JSON schema
|
||||
|
||||
7
mealie/schema/openai/general.py
Normal file
7
mealie/schema/openai/general.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from pydantic import Field
|
||||
|
||||
from ._base import OpenAIBase
|
||||
|
||||
|
||||
class OpenAIText(OpenAIBase):
|
||||
text: str = Field(..., description="A simple response message")
|
||||
@@ -1,5 +1,3 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from ._base import OpenAIBase
|
||||
@@ -8,61 +6,26 @@ from ._base import OpenAIBase
|
||||
class OpenAIRecipeIngredient(OpenAIBase):
|
||||
title: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The title of the section of the recipe that the ingredient is found in. Recipes may not specify
|
||||
ingredient sections, in which case this should be left blank.
|
||||
Only the first item in the section should have this set,
|
||||
whereas subsuquent items should have their titles left blank (unless they start a new section).
|
||||
"""
|
||||
),
|
||||
description="Ingredient section title (e.g., 'Dry Ingredients'). Only set on the first item in each section.",
|
||||
)
|
||||
|
||||
text: str = Field(
|
||||
...,
|
||||
description=dedent(
|
||||
"""
|
||||
The text of the ingredient. This should represent the entire ingredient, such as "1 cup of flour" or
|
||||
"2 cups of onions, chopped". If the ingredient is completely blank, skip it and do not add the ingredient,
|
||||
since this field is required.
|
||||
|
||||
If the ingredient has no text, but has a title, include the title on the
|
||||
next ingredient instead.
|
||||
"""
|
||||
),
|
||||
description="The complete ingredient text, e.g., '1 cup of flour' or '2 cups of onions, chopped'.",
|
||||
)
|
||||
|
||||
|
||||
class OpenAIRecipeInstruction(OpenAIBase):
|
||||
title: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The title of the section of the recipe that the instruction is found in. Recipes may not specify
|
||||
instruction sections, in which case this should be left blank.
|
||||
Only the first instruction in the section should have this set,
|
||||
whereas subsuquent instructions should have their titles left blank (unless they start a new section).
|
||||
"""
|
||||
),
|
||||
description="Instruction section title. Only set on the first step in each section.",
|
||||
)
|
||||
|
||||
text: str = Field(
|
||||
...,
|
||||
description=dedent(
|
||||
"""
|
||||
The text of the instruction. This represents one step in the recipe, such as "Preheat the oven to 350",
|
||||
or "Sauté the onions for 20 minutes". Sometimes steps can be longer, such as "Bring a large pot of lightly
|
||||
salted water to a boil. Add ditalini pasta and cook for 8 minutes or until al dente; drain.".
|
||||
|
||||
Sometimes, but not always, recipes will include their number in front of the text, such as
|
||||
"1.", "2.", or "Step 1", "Step 2", or "First", "Second". In the case where they are directly numbered
|
||||
("1.", "2.", "Step one", "Step 1", "Step two", "Step 2", etc.), you should not include the number in
|
||||
the text. However, if they use words ("First", "Second", etc.), then those should be included.
|
||||
|
||||
If the instruction is completely blank, skip it and do not add the instruction, since this field is
|
||||
required. If the ingredient has no text, but has a title, include the title on the next
|
||||
instruction instead.
|
||||
"""
|
||||
description=(
|
||||
"One instruction step. Do not include numeric prefixes like '1.' or 'Step 1', "
|
||||
"but do include word-based prefixes like 'First' or 'Second'."
|
||||
),
|
||||
)
|
||||
|
||||
@@ -70,137 +33,57 @@ class OpenAIRecipeInstruction(OpenAIBase):
|
||||
class OpenAIRecipeNotes(OpenAIBase):
|
||||
title: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The title of the note. Notes may not specify a title, and just have a body of text. In this case,
|
||||
title should be left blank, and all content should go in the note text. If the note title is just
|
||||
"note" or "info", you should ignore it and leave the title blank.
|
||||
"""
|
||||
),
|
||||
description="Note title. Ignore generic titles like 'Note' or 'Info' and leave blank.",
|
||||
)
|
||||
|
||||
text: str = Field(
|
||||
...,
|
||||
description=dedent(
|
||||
"""
|
||||
The text of the note. This should represent the entire note, such as "This recipe is great for
|
||||
a summer picnic" or "This recipe is a family favorite". They may also include additional prep
|
||||
instructions such as "to make this recipe gluten free, use gluten free flour", or "you may prepare
|
||||
the dough the night before and refrigerate it until ready to bake".
|
||||
|
||||
If the note is completely blank, skip it and do not add the note, since this field is required.
|
||||
"""
|
||||
),
|
||||
description="The note content, such as tips, variations, or preparation advice.",
|
||||
)
|
||||
|
||||
|
||||
class OpenAIRecipe(OpenAIBase):
|
||||
name: str = Field(
|
||||
...,
|
||||
description=dedent(
|
||||
"""
|
||||
The name or title of the recipe. If you're unable to determine the name of the recipe, you should
|
||||
make your best guess based upon the ingredients and instructions provided.
|
||||
"""
|
||||
),
|
||||
description="Recipe name or title. Make your best guess if not obvious.",
|
||||
)
|
||||
|
||||
description: str | None = Field(
|
||||
...,
|
||||
description=dedent(
|
||||
"""
|
||||
A long description of the recipe. This should be a string that describes the recipe in a few words
|
||||
or sentences. If the recipe doesn't have a description, you should return None.
|
||||
"""
|
||||
),
|
||||
None,
|
||||
description="A brief description of the recipe in a few words or sentences.",
|
||||
)
|
||||
|
||||
recipe_yield: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The yield of the recipe. For instance, if the recipe makes 12 cookies, the yield is "12 cookies".
|
||||
If the recipe makes 2 servings, the yield is "2 servings". Typically yield consists of a number followed
|
||||
by the word "serving" or "servings", but it can be any string that describes the yield. If the yield
|
||||
isn't specified, you should return None.
|
||||
"""
|
||||
),
|
||||
description="Recipe yield, e.g., '12 cookies' or '4 servings'.",
|
||||
)
|
||||
|
||||
total_time: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The total time it takes to make the recipe. This should be a string that describes a duration of time,
|
||||
such as "1 hour and 30 minutes", "90 minutes", or "1.5 hours". If the recipe has multiple times, choose
|
||||
the longest time. If the recipe doesn't specify a total time or duration, or it specifies a prep time or
|
||||
perform time but not a total time, you should return None. Do not duplicate times between total time, prep
|
||||
time and perform time.
|
||||
"""
|
||||
),
|
||||
description="Total time as text (e.g., '1 hour 30 minutes'). Use if only one time is available.",
|
||||
)
|
||||
|
||||
prep_time: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The time it takes to prepare the recipe. This should be a string that describes a duration of time,
|
||||
such as "30 minutes", "1 hour", or "1.5 hours". If the recipe has a total time, the prep time should be
|
||||
less than the total time. If the recipe doesn't specify a prep time, you should return None. If the recipe
|
||||
supplies only one time, it should be the total time. Do not duplicate times between total time, prep
|
||||
time and coperformok time.
|
||||
"""
|
||||
),
|
||||
description="Prep time as text, e.g., '30 minutes'. Do not duplicate total_time.",
|
||||
)
|
||||
|
||||
perform_time: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The time it takes to cook the recipe. This should be a string that describes a duration of time,
|
||||
such as "30 minutes", "1 hour", or "1.5 hours". If the recipe has a total time, the perform time should be
|
||||
less than the total time. If the recipe doesn't specify a perform time, you should return None. If the
|
||||
recipe specifies a cook time, active time, or other time besides total or prep, you should use that
|
||||
time as the perform time. If the recipe supplies only one time, it should be the total time, and not the
|
||||
perform time. Do not duplicate times between total time, prep time and perform time.
|
||||
"""
|
||||
),
|
||||
description="Cook/perform time as text, e.g., '1 hour'. Do not duplicate total_time.",
|
||||
)
|
||||
|
||||
ingredients: list[OpenAIRecipeIngredient] = Field(
|
||||
[],
|
||||
description=dedent(
|
||||
"""
|
||||
A list of ingredients used in the recipe. Ingredients should be inserted in the order they appear in the
|
||||
recipe. If the recipe has no ingredients, you should return an empty list.
|
||||
|
||||
Often times, but not always, ingredients are separated by line breaks. Use these as a guide to
|
||||
separate ingredients.
|
||||
"""
|
||||
),
|
||||
default_factory=list,
|
||||
description="List of ingredients in order.",
|
||||
)
|
||||
|
||||
instructions: list[OpenAIRecipeInstruction] = Field(
|
||||
[],
|
||||
description=dedent(
|
||||
"""
|
||||
A list of instructions for the recipe. Each instruction should represent one step in the recipe,
|
||||
and should be inserted in the order they appear in the recipe. If the recipe has no instructions,
|
||||
you should return an empty list.
|
||||
|
||||
Often times, but not always, instructions are separated by line breaks and/or separated by paragraphs.
|
||||
Use these as a guide to separate instructions. They also may be separated by numbers or words, such as
|
||||
"1.", "2.", "Step 1", "Step 2", "First", "Second", etc.
|
||||
"""
|
||||
),
|
||||
default_factory=list,
|
||||
description="List of instruction steps in order.",
|
||||
)
|
||||
|
||||
notes: list[OpenAIRecipeNotes] = Field(
|
||||
[],
|
||||
description=dedent(
|
||||
"""
|
||||
A list of notes found in the recipe. Notes should be inserted in the order they appear in the recipe.
|
||||
They may appear anywhere on the recipe, though they are typically found under the instructions.
|
||||
"""
|
||||
),
|
||||
default_factory=list,
|
||||
description="List of notes, tips, or variations.",
|
||||
)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from textwrap import dedent
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field, field_validator
|
||||
@@ -9,43 +8,21 @@ from ._base import OpenAIBase
|
||||
class OpenAIIngredient(OpenAIBase):
|
||||
quantity: float | None = Field(
|
||||
0,
|
||||
description=dedent(
|
||||
"""
|
||||
The numerical representation of how much of this ingredient. For instance, if you receive
|
||||
"3 1/2 grams of minced garlic", the quantity is "3 1/2". Quantity may be represented as a whole number
|
||||
(integer), a float or decimal, or a fraction. You should output quantity in only whole numbers or
|
||||
floats, converting fractions into floats. Floats longer than 10 decimal places should be
|
||||
rounded to 10 decimal places.
|
||||
"""
|
||||
),
|
||||
description="The numerical quantity as a whole number or float. Convert fractions to decimals.",
|
||||
)
|
||||
unit: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The unit of measurement for this ingredient. For instance, if you receive
|
||||
"2 lbs chicken breast", the unit is "lbs" (short for "pounds").
|
||||
"""
|
||||
),
|
||||
description="The unit of measurement, e.g., 'cups', 'lbs', 'teaspoons'.",
|
||||
)
|
||||
food: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The actual physical ingredient used in the recipe. For instance, if you receive
|
||||
"3 cups of onions, chopped", the food is "onions".
|
||||
"""
|
||||
),
|
||||
description="The ingredient itself, e.g., 'onions' or 'chicken breast'.",
|
||||
)
|
||||
note: str | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
The rest of the text that represents more detail on how to prepare the ingredient.
|
||||
Anything that is not one of the above should be the note. For instance, if you receive
|
||||
"one can of butter beans, drained" the note would be "drained". If you receive
|
||||
"3 cloves of garlic peeled and finely chopped", the note would be "peeled and finely chopped".
|
||||
"""
|
||||
description=(
|
||||
"Preparation details, e.g., 'chopped', 'drained', 'peeled and minced'. "
|
||||
"If there are any elements you're not sure about, put them here."
|
||||
),
|
||||
)
|
||||
|
||||
@@ -55,4 +32,7 @@ class OpenAIIngredient(OpenAIBase):
|
||||
|
||||
|
||||
class OpenAIIngredients(OpenAIBase):
|
||||
ingredients: list[OpenAIIngredient] = []
|
||||
ingredients: list[OpenAIIngredient] = Field(
|
||||
default_factory=list,
|
||||
description="List of parsed ingredients.",
|
||||
)
|
||||
|
||||
@@ -5,17 +5,20 @@ import os
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from typing import TypeVar
|
||||
|
||||
from openai import NOT_GIVEN, AsyncOpenAI
|
||||
from openai import AsyncOpenAI
|
||||
from openai.types.chat import ChatCompletion
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from mealie.core import root_logger
|
||||
from mealie.core.config import get_app_settings
|
||||
from mealie.pkgs import img
|
||||
from mealie.schema.openai._base import OpenAIBase
|
||||
|
||||
from .._base_service import BaseService
|
||||
|
||||
T = TypeVar("T", bound=OpenAIBase)
|
||||
logger = root_logger.get_logger(__name__)
|
||||
|
||||
|
||||
@@ -189,9 +192,9 @@ class OpenAIService(BaseService):
|
||||
)
|
||||
return "\n".join(content_parts)
|
||||
|
||||
async def _get_raw_response(self, prompt: str, content: list[dict], force_json_response=True) -> ChatCompletion:
|
||||
async def _get_raw_response(self, prompt: str, content: list[dict], response_schema: type[T]) -> ChatCompletion:
|
||||
client = self.get_client()
|
||||
return await client.chat.completions.create(
|
||||
return await client.chat.completions.parse(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
@@ -203,7 +206,7 @@ class OpenAIService(BaseService):
|
||||
},
|
||||
],
|
||||
model=self.model,
|
||||
response_format={"type": "json_object"} if force_json_response else NOT_GIVEN,
|
||||
response_format=response_schema,
|
||||
)
|
||||
|
||||
async def get_response(
|
||||
@@ -211,9 +214,9 @@ class OpenAIService(BaseService):
|
||||
prompt: str,
|
||||
message: str,
|
||||
*,
|
||||
response_schema: type[T],
|
||||
images: list[OpenAIImageBase] | None = None,
|
||||
force_json_response=True,
|
||||
) -> str | None:
|
||||
) -> T | None:
|
||||
"""Send data to OpenAI and return the response message content"""
|
||||
if images and not self.enable_image_services:
|
||||
self.logger.warning("OpenAI image services are disabled, ignoring images")
|
||||
@@ -224,9 +227,11 @@ class OpenAIService(BaseService):
|
||||
for image in images or []:
|
||||
user_messages.append(image.build_message())
|
||||
|
||||
response = await self._get_raw_response(prompt, user_messages, force_json_response)
|
||||
response = await self._get_raw_response(prompt, user_messages, response_schema)
|
||||
if not response.choices:
|
||||
return None
|
||||
return response.choices[0].message.content
|
||||
|
||||
response_text = response.choices[0].message.content
|
||||
return response_schema.parse_openai_response(response_text)
|
||||
except Exception as e:
|
||||
raise Exception(f"OpenAI Request Failed. {e.__class__.__name__}: {e}") from e
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
You are a bot that reads an image, or a set of images, and parses it into recipe JSON. You will receive an image from the user and you need to extract the recipe data and return its JSON in valid schema. The recipe schema will be included at the bottom of this message.
|
||||
|
||||
It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unable to extract data due to insufficient input, you may reply with a completely empty JSON object (represented by two brackets: {}).
|
||||
|
||||
Do not under any circumstances insert data not found directly in the image. Ingredients, instructions, and notes should come directly from the image and not be generated or otherwise made up. It is illegal for you to create information not found directly in the image.
|
||||
|
||||
Your response must be in valid JSON in the provided Recipe definition below. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema.
|
||||
You are a bot that reads an image, or a set of images, and parses it into recipe JSON. You will receive an image from the user and you need to extract the recipe data. It is imperative that you do not create any data or otherwise make up any information.
|
||||
|
||||
The user message that you receive will be one or more images. Assume all images provided belong to a single recipe, not multiple recipes. The recipe may consist of printed text or handwritten text. It may be rotated or not properly cropped. It is your job to figure out which part of the image is the important content and extract it.
|
||||
|
||||
The text you receive in the provided image or images may not be in English. The user may provide a language for you to translate the recipe into. If the user doesn't ask for a translation, you should preserve the text as-is without translating or otherwise modifying it. Otherwise, you should translate all text (recipe name, ingredients, instructions, etc.) to the requested language.
|
||||
If the user requests a translation, translate all text (name, ingredients, instructions, etc.) to the requested language. Otherwise, preserve the text as-is.
|
||||
|
||||
@@ -1,15 +1,11 @@
|
||||
You are a bot that parses user input into recipe ingredients. You will receive a list of one or more ingredients, each containing one or more of the following components: quantity, unit, food, and note. Their definitions are stated in the JSON schema below. While parsing the ingredients, there are some things to keep in mind:
|
||||
- If you cannot accurately determine the quantity, unit, food, or note, you should place everything into the note field and leave everything else empty. It's better to err on the side of putting everything in the note field than being wrong
|
||||
- You may receive recipe ingredients from multiple different languages. You should adhere to the grammar rules of the input language when trying to parse the ingredient string
|
||||
- Sometimes foods or units will be in their singular, plural, or other grammatical forms. You must interpret all of them appropriately
|
||||
- Sometimes ingredients will have text in parenthesis (like this). Parenthesis typically indicate something that should appear in the notes. For example: an input of "3 potatoes (roughly chopped)" would parse "roughly chopped" into the notes. Notice that when this occurs, the parenthesis are dropped, and you should use "roughly chopped" instead of "(roughly chopped)" in the note
|
||||
- It's possible for the input to contain typos. For instance, you might see the word "potatos" instead of "potatoes". If it is a common misspelling, you may correct it
|
||||
- Pay close attention to what can be considered a unit of measurement. There are common measurements such as tablespoon, teaspoon, and gram, abbreviations such as tsp, tbsp, and oz, and others such as sprig, can, bundle, bunch, unit, cube, package, and pinch
|
||||
- Sometimes quantities can be given a range, such as "3-5" or "1 to 2" or "three or four". In this instance, choose the lower quantity; do not try to average or otherwise calculate the quantity. For instance, if the input it "2-3 lbs of chicken breast" the quantity should be "2"
|
||||
- Any text that does not appear in the unit or food must appear in the notes. No text should be left off. The only exception for this is if a quantity is converted from text into a number. For instance, if you convert "2 dozen" into the number "24", you should not put the word "dozen" into any other field
|
||||
Parse ingredient strings into components. You will receive a list of one or more ingredients.
|
||||
|
||||
It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unsure, place the entire string into the note section of the response. Do not make things up.
|
||||
|
||||
Below you will receive the JSON schema for your response. Your response must be in valid JSON in the below schema as provided. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema.
|
||||
|
||||
The user message that you receive will be the list of one or more recipe ingredients for you to parse. Your response should have exactly one item for each item provided. For instance, if you receive 12 items to parse, then your response should be an array of 12 parsed items.
|
||||
When parsing:
|
||||
- If uncertain about quantity, unit, or food, put the entire string in the note field
|
||||
- Respect grammar rules for multiple languages
|
||||
- Interpret singular/plural/grammatical variations
|
||||
- Text in parentheses = notes (e.g., "3 potatoes (roughly chopped)" → note: "roughly chopped")
|
||||
- Correct common typos (e.g., "potatos" → "potatoes")
|
||||
- Recognize units: tablespoon, teaspoon, gram, tsp, tbsp, oz, sprig, can, bundle, bunch, unit, cube, package, pinch
|
||||
- For ranges (e.g., "3-5", "1 to 2"), use the lower number
|
||||
- All text must appear somewhere, or otherwise be accounted for; if converting "2 dozen" → "24", don't put "dozen" elsewhere. If you're unsure, put extra text in the notes
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
You are a bot that reads website data and parses it into recipe JSON. You will receive the contents of a webpage (such as its HTML) and you need to extract the recipe data and return its JSON in valid schema. The recipe schema is the standard schema.org schema, which is defined at "https://schema.org/Recipe".
|
||||
Extract recipe data from webpage contents (HTML, text, etc.) and return it in schema.org Recipe format. Reference: https://schema.org/Recipe
|
||||
|
||||
It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unable to extract data due to insufficient input, you may reply with a completely empty JSON object (represented by two brackets: {}).
|
||||
Do not create or make up any information. If insufficient data is found, return an empty object.
|
||||
|
||||
Your response must be in valid JSON in the schema.org Recipe definition. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema.
|
||||
|
||||
The user message that you receive will be the webpage contents, including (but not necessarily limited to) text extracted from the HTML.
|
||||
You will receive the webpage contents, including (but not necessarily limited to) text extracted from the HTML.
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import asyncio
|
||||
import json
|
||||
from collections.abc import Awaitable
|
||||
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
@@ -108,31 +107,21 @@ class OpenAIParser(ABCIngredientParser):
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
|
||||
def _get_prompt(self, service: OpenAIService) -> str:
|
||||
data_injections = [
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"This is the JSON response schema. You must respond in valid JSON that follows this schema. "
|
||||
"Your payload should be as compact as possible, eliminating unncessesary whitespace. Any fields "
|
||||
"with default values which you do not populate should not be in the payload."
|
||||
),
|
||||
value=OpenAIIngredients,
|
||||
),
|
||||
]
|
||||
|
||||
if service.send_db_data and self.data_matcher.units_by_alias:
|
||||
data_injections.extend(
|
||||
[
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"Below is a list of units found in the units database. While parsing, you should "
|
||||
"reference this list when determining which part of the input is the unit. You may "
|
||||
"find a unit in the input that does not exist in this list. This should not prevent "
|
||||
"you from parsing that text as a unit."
|
||||
),
|
||||
value=list(set(self.data_matcher.units_by_alias)),
|
||||
data_injections = [
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"Below is a list of units found in the units database. While parsing, you should "
|
||||
"reference this list when determining which part of the input is the unit. You may "
|
||||
"find a unit in the input that does not exist in this list. This should not prevent "
|
||||
"you from parsing that text as a unit."
|
||||
),
|
||||
]
|
||||
)
|
||||
value=list(set(self.data_matcher.units_by_alias)),
|
||||
),
|
||||
]
|
||||
|
||||
else:
|
||||
data_injections = None
|
||||
|
||||
return service.get_prompt("recipes.parse-recipe-ingredients", data_injections=data_injections)
|
||||
|
||||
@@ -148,26 +137,18 @@ class OpenAIParser(ABCIngredientParser):
|
||||
|
||||
# chunk ingredients and send each chunk to its own worker
|
||||
ingredient_chunks = self._chunk_messages(ingredients, n=service.workers)
|
||||
tasks: list[Awaitable[str | None]] = []
|
||||
for ingredient_chunk in ingredient_chunks:
|
||||
message = json.dumps(ingredient_chunk, separators=(",", ":"))
|
||||
tasks.append(service.get_response(prompt, message, force_json_response=True))
|
||||
tasks = [
|
||||
service.get_response(prompt, json.dumps(chunk, separators=(",", ":")), response_schema=OpenAIIngredients)
|
||||
for chunk in ingredient_chunks
|
||||
]
|
||||
|
||||
# re-combine chunks into one response
|
||||
try:
|
||||
responses_json = await asyncio.gather(*tasks)
|
||||
unfiltered_responses = await asyncio.gather(*tasks)
|
||||
except Exception as e:
|
||||
raise Exception("Failed to call OpenAI services") from e
|
||||
|
||||
try:
|
||||
responses = [
|
||||
OpenAIIngredients.parse_openai_response(response_json)
|
||||
for response_json in responses_json
|
||||
if responses_json
|
||||
]
|
||||
except Exception as e:
|
||||
raise Exception("Failed to parse OpenAI response") from e
|
||||
|
||||
responses = [response for response in unfiltered_responses if response]
|
||||
if not responses:
|
||||
raise Exception("No response from OpenAI")
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ from mealie.schema.recipe.request_helpers import RecipeDuplicate
|
||||
from mealie.schema.user.user import PrivateUser, UserRatingCreate
|
||||
from mealie.services._base_service import BaseService
|
||||
from mealie.services.household_services.household_service import HouseholdService
|
||||
from mealie.services.openai import OpenAIDataInjection, OpenAILocalImage, OpenAIService
|
||||
from mealie.services.openai import OpenAILocalImage, OpenAIService
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
from mealie.services.scraper import cleaner
|
||||
|
||||
@@ -595,19 +595,7 @@ class OpenAIRecipeService(RecipeServiceBase):
|
||||
raise ValueError("OpenAI image services are not available")
|
||||
|
||||
openai_service = OpenAIService()
|
||||
prompt = openai_service.get_prompt(
|
||||
"recipes.parse-recipe-image",
|
||||
data_injections=[
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"This is the JSON response schema. You must respond in valid JSON that follows this schema. "
|
||||
"Your payload should be as compact as possible, eliminating unncessesary whitespace. "
|
||||
"Any fields with default values which you do not populate should not be in the payload."
|
||||
),
|
||||
value=OpenAIRecipe,
|
||||
)
|
||||
],
|
||||
)
|
||||
prompt = openai_service.get_prompt("recipes.parse-recipe-image")
|
||||
|
||||
openai_images = [OpenAILocalImage(filename=os.path.basename(image), path=image) for image in images]
|
||||
message = (
|
||||
@@ -620,14 +608,19 @@ class OpenAIRecipeService(RecipeServiceBase):
|
||||
|
||||
try:
|
||||
response = await openai_service.get_response(
|
||||
prompt, message, images=openai_images, force_json_response=True
|
||||
prompt,
|
||||
message,
|
||||
response_schema=OpenAIRecipe,
|
||||
images=openai_images,
|
||||
)
|
||||
if not response:
|
||||
raise ValueError("Received empty response from OpenAI")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception("Failed to call OpenAI services") from e
|
||||
|
||||
try:
|
||||
openai_recipe = OpenAIRecipe.parse_openai_response(response)
|
||||
recipe = self._convert_recipe(openai_recipe)
|
||||
recipe = self._convert_recipe(response)
|
||||
except Exception as e:
|
||||
raise ValueError("Unable to parse recipe from image") from e
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from mealie.core.config import get_app_settings
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.pkgs import safehttp
|
||||
from mealie.schema.openai.general import OpenAIText
|
||||
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
||||
from mealie.services.openai import OpenAIService
|
||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
@@ -339,11 +340,11 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
|
||||
service = OpenAIService()
|
||||
prompt = service.get_prompt("recipes.scrape-recipe")
|
||||
|
||||
response_json = await service.get_response(prompt, text, force_json_response=True)
|
||||
if not response_json:
|
||||
response = await service.get_response(prompt, text, response_schema=OpenAIText)
|
||||
if not (response and response.text):
|
||||
raise Exception("OpenAI did not return any data")
|
||||
|
||||
return self.ld_json_to_html(response_json)
|
||||
return self.ld_json_to_html(response.text)
|
||||
except Exception:
|
||||
self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
|
||||
return ""
|
||||
|
||||
@@ -21,7 +21,7 @@ def test_openai_create_recipe_from_image(
|
||||
unique_user: TestUser,
|
||||
test_image_jpg: str,
|
||||
):
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> str | None:
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIRecipe | None:
|
||||
data = OpenAIRecipe(
|
||||
name=random_string(),
|
||||
description=random_string(),
|
||||
@@ -33,7 +33,7 @@ def test_openai_create_recipe_from_image(
|
||||
instructions=[OpenAIRecipeInstruction(text=random_string()) for _ in range(1, random_int(5, 10))],
|
||||
notes=[OpenAIRecipeNotes(text=random_string()) for _ in range(random_int(2, 5))],
|
||||
)
|
||||
return data.model_dump_json()
|
||||
return data
|
||||
|
||||
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
|
||||
with open(test_image_jpg, "rb") as f:
|
||||
|
||||
@@ -2,6 +2,7 @@ import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from pydantic import UUID4
|
||||
@@ -459,7 +460,7 @@ def test_openai_parser(
|
||||
):
|
||||
ingredient_count = random_int(10, 20)
|
||||
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> str | None:
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None:
|
||||
inputs = json.loads(message)
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
@@ -469,10 +470,10 @@ def test_openai_parser(
|
||||
food=random_string(),
|
||||
note=random_string(),
|
||||
)
|
||||
for input in inputs
|
||||
for _ in inputs
|
||||
]
|
||||
)
|
||||
return data.model_dump_json()
|
||||
return data
|
||||
|
||||
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
|
||||
|
||||
@@ -496,7 +497,8 @@ def test_openai_parser_sanitize_output(
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> str | None:
|
||||
async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock:
|
||||
# Create data with null character in JSON to test preprocessing
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
@@ -507,9 +509,15 @@ def test_openai_parser_sanitize_output(
|
||||
)
|
||||
]
|
||||
)
|
||||
return data.model_dump_json()
|
||||
|
||||
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
|
||||
# Create a mock raw response which matches the OpenAI chat response format
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = data.model_dump_json()
|
||||
return mock_response
|
||||
|
||||
# Mock the raw response here since we want to make sure our service executes processing before loading the model
|
||||
monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response)
|
||||
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
Reference in New Issue
Block a user