feat: document running openai integration tests with ollama

square · Sep 8, 2024 · b1c4070 · b1c4070
1 parent 8239beb
commit b1c4070
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 12 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -19,19 +19,46 @@ uv run pytest tests -m "not integration"
 
 or, as a shortcut, 
 
-```
+```bash
 just test
 ```
 
 Generally if you are not developing a new provider, you can test most functionality through mocking and the normal
 test suite.
 
-However to ensure the providers work, we also have integration tests which actually require a credential and connect
+However, to ensure the providers work, we also have integration tests which actually require a credential and connect
 to the provider endpoints. Those can be run with
 
-```
+```bash
 uv run pytest tests -m integration
-# or `just integration` 
+# or `just integration`
+```
+
+### Integration testing OpenAI with Ollama
+
+The OpenAI provider uses the OpenAI API to access models. The OpenAI API is supported by many tools, and using this can
+save you time and money when developing. One such tool is [Ollama](https://github.com/ollama/ollama).
+
+First, run ollama and pull the models you want to test.
+```bash
+ollama serve
+# Then in another terminal
+ollama pull mistral-nemo:12B
+ollama pull llava:7b
+```
+
+Now, export OpenAI variables that control the tests
+```bash
+export OPENAI_MODEL_TOOL=mistral-nemo
+export OPENAI_MODEL_VISION=llava:7b
+export OPENAI_HOST=http://localhost:11434
+export OPENAI_API_KEY=unused
+```
+
+Finally, run openai integration tests against your ollama server.
+```bash
+uv run pytest tests -m integration -k openai
+# or `just integration -k openai`
 ```
 
 ## Pull Requests

diff --git a/src/exchange/exchange.py b/src/exchange/exchange.py
@@ -45,11 +45,16 @@ class Exchange:
     provider: Provider
     model: str
     system: str
-    moderator: Moderator = field(default=ContextTruncate())
+    moderator: Moderator = field(default=None)
     tools: Tuple[Tool] = field(factory=tuple, converter=tuple)
     messages: List[Message] = field(factory=list)
     checkpoint_data: CheckpointData = field(factory=CheckpointData)
 
+    def __attrs_post_init__(self) -> None:
+        """Ensures context truncation uses the same model as the exchange"""
+        if self.moderator is None:
+            object.__setattr__(self, "moderator", ContextTruncate(model=self.model))
+
     @property
     def _toolmap(self) -> Mapping[str, Tool]:
         return {tool.name: tool for tool in self.tools}

diff --git a/src/exchange/message.py b/src/exchange/message.py
@@ -19,8 +19,10 @@ def validate_role_and_content(instance: "Message", *_: Any) -> None:  # noqa: AN
         if instance.tool_use:
             raise ValueError("User message does not support ToolUse")
     elif instance.role == "assistant":
-        if not (instance.text or instance.tool_use):
-            raise ValueError("Assistant message must include a Text or ToolUsage")
+        # Note: At least in llama3.1, there's no instance.text in the response
+        # when the input was a single system message. We also can't determine
+        # the input inside a validator. Hence, we can't enforce a condition
+        # that the assistant message must include a Text or ToolUsage.
         if instance.tool_result:
             raise ValueError("Assistant message does not support ToolResult")
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,4 @@
+import os
+
+openai_model_tool = os.getenv("OPENAI_MODEL_TOOL", "gpt-4o-mini")
+openai_model_vision = os.getenv("OPENAI_MODEL_VISION", "gpt-4o-mini")
diff --git a/tests/providers/test_openai.py b/tests/providers/test_openai.py
@@ -4,6 +4,7 @@
 import pytest
 from exchange import Message, Text
 from exchange.providers.openai import OpenAiProvider
+from conftest import openai_model_tool
 
 
 @pytest.fixture
@@ -24,7 +25,7 @@ def test_openai_completion(mock_error, mock_warning, mock_sleep, mock_post, open
 
     mock_post.return_value.json.return_value = mock_response
 
-    model = "gpt-4"
+    model = openai_model_tool
     system = "You are a helpful assistant."
     messages = [Message.user("Hello")]
     tools = ()
@@ -48,7 +49,7 @@ def test_openai_completion(mock_error, mock_warning, mock_sleep, mock_post, open
 @pytest.mark.integration
 def test_openai_integration():
     provider = OpenAiProvider.from_env()
-    model = "gpt-4"  # specify a valid model
+    model = openai_model_tool
     system = "You are a helpful assistant."
     messages = [Message.user("Hello")]
 

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -3,11 +3,12 @@
 from exchange.message import Message
 from exchange.providers import get_provider
 from exchange.tool import Tool
+from conftest import openai_model_tool
 
 too_long_chars = "x" * (2**20 + 1)
 
 cases = [
-    (get_provider("openai"), "gpt-4o-mini"),
+    (get_provider("openai"), openai_model_tool),
     (get_provider("databricks"), "databricks-meta-llama-3-70b-instruct"),
     (get_provider("bedrock"), "anthropic.claude-3-5-sonnet-20240620-v1:0"),
 ]

diff --git a/tests/test_vision.py → tests/test_integration_vision.py b/tests/test_vision.py → tests/test_integration_vision.py
@@ -3,10 +3,10 @@
 from exchange.exchange import Exchange
 from exchange.message import Message
 from exchange.providers import get_provider
-
+from conftest import openai_model_vision
 
 cases = [
-    (get_provider("openai"), "gpt-4o-mini"),
+    (get_provider("openai"), openai_model_vision),
 ]