ChatOpenAI + HF TGI + Llama3.1 tool calling not working #26342

avfranco-br · 2024-09-11T19:08:53Z

avfranco-br
Sep 11, 2024

Checked other resources

I added a very descriptive title to this question.
I searched the LangChain documentation with the integrated search.
I used the GitHub search to find a similar question and didn't find it.

Commit to Help

I commit to help with one of those options 👆

Example Code

from typing import Annotated, List

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable, RunnableConfig
from langgraph.graph.message import AnyMessage, add_messages
from typing_extensions import TypedDict
from langchain_core.messages import ToolMessage
from langchain_core.runnables import RunnableLambda
from langgraph.prebuilt import ToolNode
from IPython.display import Image, display
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, StateGraph
from langgraph.prebuilt import tools_condition

import uuid

runname = "ea4all-togaf-task-2-tool-rca"
config = {
    "run_name": runname, 
    "tags": [ea4all_stage], 
    "callbacks":[tracer],
    "recursion_limit": 10,
    "configurable": {"thread_id": str(uuid.uuid4())}
}

llm = ChatOpenAI(
    model="Meta-Llama-3.1-70B-Instruct",
    openai_api_key=os.environ['HUGGINGFACEHUB_API_TOKEN'],
    openai_api_base=os.path.join(api_base_url, "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-70B-Instruct/v1/"),

## Define Tools --> OpenAI function specs
#@tool(parse_docstring=True, response_format="content")
def websearch(query: str) -> str:
    """
    Run web search on the question.

    Args:
        query: User question

    Returns:
        documents: List of results found
    """

    # print("---WEB SEARCH---")
    ##Rephrase user question to lead bettern LLM response
    question = query

    ##API Wrapper
    search = BingSearchAPIWrapper()

    ##Bing Search Results
    web_results = BingSearchResults(k=3, api_wrapper=search)
    result = web_results.invoke(
        {"query": question},
    )
    fixed_string = result.replace("'", "\"")
    result_json = json.loads(fixed_string)

    # Create a temporary file
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
        # Write the JSON data to the temporary file
        json.dump(result_json, temp_file)
        temp_file.flush()

        # Load the JSON data from the temporary file
        loader = JSONLoader(file_path=temp_file.name, jq_schema=".[]", text_content=False)
        docs = loader.load()
   
    return [
        Document(page_content=d.page_content, metadata={"url": json.loads(d.page_content)['link']})
        for d in docs
    ]

tools = [websearch]

class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

class Assistant:
    def __init__(self, runnable: Runnable):
        """
        Initialize the Assistant with a runnable object.

        Args:
            runnable (Runnable): The runnable instance to invoke.
        """
        self.runnable = runnable

    def __call__(self, state: State, config: RunnableConfig):
        """
        Call method to invoke the LLM and handle its responses.
        Re-prompt the assistant if the response is not a tool call or meaningful text.

        Args:
            state (State): The current state containing messages.
            config (RunnableConfig): The configuration for the runnable.

        Returns:
            dict: The final state containing the updated messages.
        """
        while True:
            result = self.runnable.invoke(state)  # Invoke the LLM
            if not result.tool_calls and (
                not result.content
                or isinstance(result.content, list)
                and not result.content[0].get("text")
            ):
                messages = state["messages"] + [("user", "Respond with a real output.")]
                state = {**state, "messages": messages}
            else:
                break
        return {"messages": result}

# Create the primary assistant prompt template
primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant tasked with answering user questions. "
            "You have access to two tools:  web_search. "
            "For any user questions about Generative AI use the web_search tool to get information from the web. "
            "For all other questions reply with I cannnot find the answer! ",
        ),
        ("placeholder", "{messages}"),
    ]
)

# Prompt our LLM and bind tools
assistant_runnable = primary_assistant_prompt | llm.bind_tools(tools)

def create_tool_node_with_fallback(tools: list) -> dict:
    return ToolNode(tools).with_fallbacks(
        [RunnableLambda(handle_tool_error)], exception_key="error"
    )

def handle_tool_error(state: State) -> dict:
    error = state.get("error")
    tool_calls = state["messages"][-1].tool_calls
    return {
        "messages": [
            ToolMessage(
                content=f"Error: {repr(error)}\n please fix your mistakes.",
                tool_call_id=tc["id"],
            )
            for tc in tool_calls
        ]
    }

# Graph
builder = StateGraph(State)

# Define nodes: these do the work
builder.add_node("assistant", Assistant(assistant_runnable))
builder.add_node("tools", create_tool_node_with_fallback(tools))

# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools
    # If the latest message (result) from assistant is a not a tool call -> tools_condition routes to END
    tools_condition,
)
builder.add_edge("tools", "assistant")

# The checkpointer lets the graph persist its state
memory = MemorySaver()
react_graph = builder.compile(checkpointer=memory)

# Show
#display(Image(react_graph.get_graph(xray=True).draw_mermaid_png()))

def predict_react_agent_answer(example: dict):
    """Use this for answer evaluation"""

    config = {"configurable": {"thread_id": str(uuid.uuid4())}}
    messages = react_graph.invoke({"messages": ("user", example["input"])}, config)
    return {"response": messages["messages"][-1].content, "messages": messages}


example = {"input": "Get me information about AWS Cloud Maturity Assessment."}
response = predict_react_agent_answer(example)

Description

I am trying to run the above code using ChatOpenAI class with Meta-Llama-3.1-70B-Instruct hosted on HF via TGI. Here's the LangSmith run https://smith.langchain.com/public/916f2317-cd69-44f2-955a-a0325ebd2e0c/r

The error happen when calling this code: result = self.runnable.invoke(state) # Invoke the LLM
Error: ```
File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_openai/chat_models/base.py", line 254, in
name=rtc["function"].get("name"),
~~~^^^^^^^^^^^^

TypeError: string indices must be integers, not 'str'


Same code works fine using ChatOpenAI + GPT-4 or Ollama + Llama3.1 local.

Any advise, help will be very appreciated.

Much obliged.
Alexandre

### System Info

Stage: build

System Information
------------------
> OS:  Darwin
> OS Version:  Darwin Kernel Version 23.6.0: Mon Jul 29 21:14:21 PDT 2024; root:xnu-10063.141.2~1/RELEASE_ARM64_T8103
> Python Version:  3.11.3 (main, Jun  5 2024, 16:36:41) [Clang 15.0.0 (clang-1500.3.9.4)]

Package Information
-------------------
> langchain_core: 0.2.38
> langchain: 0.2.12
> langchain_community: 0.2.11
> langsmith: 0.1.96
> langchain_experimental: 0.0.64
> langchain_huggingface: 0.0.3
> langchain_ollama: 0.1.1
> langchain_openai: 0.1.23
> langchain_text_splitters: 0.2.2
> langchainhub: 0.1.20
> langgraph: 0.1.19

Optional packages not installed
-------------------------------
> langserve

Other Dependencies
------------------
> aiohttp: 3.10.0
> async-timeout: Installed. No version info available.
> dataclasses-json: 0.6.7
> huggingface-hub: 0.24.5
> jsonpatch: 1.33
> numpy: 1.26.4
> ollama: 0.3.1
> openai: 1.44.1
> orjson: 3.10.3
> packaging: 24.1
> pydantic: 2.8.2
> PyYAML: 6.0.1
> requests: 2.32.3
> sentence-transformers: 3.0.1
> SQLAlchemy: 2.0.31
> tenacity: 8.5.0
> tiktoken: 0.7.0
> tokenizers: 0.19.1
> transformers: 4.43.3
> types-requests: 2.32.0.20240712
> typing-extensions: 4.12.0
system and langchain packages: None

@avfranco-br · 2024-09-11T19:10:42Z

dosubot[bot]
bot Sep 11, 2024

Hello, @avfranco-br! I'm here to help you with any bugs, questions, or contributions you might have. Let's tackle this together!

The error "TypeError: string indices must be integers, not 'str'" occurs because the code is attempting to access a string using a string index, which is not allowed in Python. This issue arises in the _convert_dict_to_message function when it tries to process the response from the Meta-Llama-3.1-70B-Instruct model hosted on HF via TGI.

The problem likely lies in the structure of the response dictionary from the Meta-Llama-3.1-70B-Instruct model, which might differ from the expected structure. Specifically, the code expects certain keys and values in the response dictionary, and if the structure is different, it can lead to this type of error.

Here is the relevant part of the code that could be causing the issue:

def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
    """Convert a dictionary to a LangChain message.

    Args:
        _dict: The dictionary.

    Returns:
        The LangChain message.
    """
    role = _dict.get("role")
    name = _dict.get("name")
    id_ = _dict.get("id")
    if role == "user":
        return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)
    elif role == "assistant":
        content = _dict.get("content", "") or ""
        additional_kwargs: Dict = {}
        if function_call := _dict.get("function_call"):
            additional_kwargs["function_call"] = dict(function_call)
        tool_calls = []
        invalid_tool_calls = []
        if raw_tool_calls := _dict.get("tool_calls"):
            additional_kwargs["tool_calls"] = raw_tool_calls
            for raw_tool_call in raw_tool_calls:
                try:
                    tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))
                except Exception as e:
                    invalid_tool_calls.append(
                        make_invalid_tool_call(raw_tool_call, str(e))
                    )
        return AIMessage(
            content=content,
            additional_kwargs=additional_kwargs,
            name=name,
            id=id_,
            tool_calls=tool_calls,
            invalid_tool_calls=invalid_tool_calls,
        )
    elif role == "system":
        return SystemMessage(content=_dict.get("content", ""), name=name, id=id_)
    elif role == "function":
        return FunctionMessage(
            content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_
        )
    elif role == "tool":
        additional_kwargs = {}
        if "name" in _dict:
            additional_kwargs["name"] = _dict["name"]
        return ToolMessage(
            content=_dict.get("content", ""),
            tool_call_id=cast(str, _dict.get("tool_call_id")),
            additional_kwargs=additional_kwargs,
            name=name,
            id=id_,
        )
    else:
        return ChatMessage(content=_dict.get("content", ""), role=role, id=id_)  # type: ignore[arg-type]

To fix this issue, you need to ensure that the response dictionary from the Meta-Llama-3.1-70B-Instruct model matches the expected structure. You might need to add additional checks or modify the response parsing logic to handle the specific structure of the Meta-Llama-3.1-70B-Instruct model's response ^[1].

The expected structure of the response dictionary from the Meta-Llama-3.1-70B-Instruct model hosted on HF via TGI is as follows:

{
    "id": "chatcmpl_id",
    "object": "chat.completion",
    "created": 1721875529,
    "model": "meta-llama-3.1-70b-instruct-072424",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "To calculate the result of 36939 multiplied by 8922.4, I get:\n\n36939 x 8922.4 = 329,511,111.6",
            },
            "finish_reason": "stop",
            "logprobs": None,
        }
    ],
    "usage": {
        "prompt_tokens": 30,
        "completion_tokens": 36,
        "total_tokens": 66
    }
}

Ensure that the response from the Meta-Llama-3.1-70B-Instruct model adheres to this structure ^[2].

To continue talking to Dosu, mention @dosu.

3 replies

avfranco-br Sep 12, 2024
Author

@dosu, I'm not quite sure that above is the problem. Reason why is looking at the LangSmith trace the error seems to happen before or trying to call the tool.

avfranco-br Sep 12, 2024
Author

@dosu, here is an entire copy of the message error:

TypeError("string indices must be integers, not 'str'")Traceback (most recent call last):


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 624, in generate
    self._generate_with_cache(


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 846, in _generate_with_cache
    result = self._generate(
             ^^^^^^^^^^^^^^^


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_openai/chat_models/base.py", line 642, in _generate
    return generate_from_stream(stream_iter)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 88, in generate_from_stream
    generation = next(stream, None)
                 ^^^^^^^^^^^^^^^^^^


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_openai/chat_models/base.py", line 615, in _stream
    generation_chunk = _convert_chunk_to_generation_chunk(
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_openai/chat_models/base.py", line 313, in _convert_chunk_to_generation_chunk
    message_chunk = _convert_delta_to_message_chunk(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_openai/chat_models/base.py", line 252, in _convert_delta_to_message_chunk
    tool_call_chunks = [
                       ^


  File "/Users/avfranco/.pyenv/versions/llama/lib/python3.11/site-packages/langchain_openai/chat_models/base.py", line 254, in <listcomp>
    name=rtc["function"].get("name"),
         ~~~^^^^^^^^^^^^


TypeError: string indices must be integers, not 'str'

dosubot[bot] bot Sep 12, 2024

The error "TypeError: string indices must be integers, not 'str'" occurs because the code is attempting to access a string using a string index, which is not allowed in Python. This issue arises in the _convert_delta_to_message_chunk function when it tries to process the response from the Meta-Llama-3.1-70B-Instruct model.

The problem likely lies in the structure of the response dictionary from the Meta-Llama-3.1-70B-Instruct model, which might differ from the expected structure. Specifically, the code expects certain keys and values in the response dictionary, and if the structure is different, it can lead to this type of error.

Here's the relevant part of the code that could be causing the issue:

def _convert_delta_to_message_chunk(delta: Mapping[str, Any]) -> MessageChunk:
    """Convert a delta to a message chunk."""
    tool_call_chunks = [
        ToolCallChunk(
            name=rtc["function"].get("name"),
            arguments=rtc["function"].get("arguments"),
        )
        for rtc in delta.get("tool_calls", [])
    ]
    return MessageChunk(
        content=delta.get("content", ""),
        tool_calls=tool_call_chunks,
    )

In this code, rtc["function"] is expected to be a dictionary, but it seems to be a string instead, causing the error.

To fix this issue, you need to ensure that the response dictionary from the Meta-Llama-3.1-70B-Instruct model matches the expected structure. You might need to add additional checks or modify the response parsing logic to handle the specific structure of the Meta-Llama-3.1-70B-Instruct model's response.

Ensure that the response from the Meta-Llama-3.1-70B-Instruct model adheres to the expected structure, particularly the tool_calls part. If rtc["function"] is a string, you need to adjust the code to handle it appropriately.

For example, you can add a check to ensure rtc["function"] is a dictionary before accessing its keys:

def _convert_delta_to_message_chunk(delta: Mapping[str, Any]) -> MessageChunk:
    """Convert a delta to a message chunk."""
    tool_call_chunks = [
        ToolCallChunk(
            name=rtc["function"].get("name") if isinstance(rtc.get("function"), dict) else None,
            arguments=rtc["function"].get("arguments") if isinstance(rtc.get("function"), dict) else None,
        )
        for rtc in delta.get("tool_calls", [])
    ]
    return MessageChunk(
        content=delta.get("content", ""),
        tool_calls=tool_call_chunks,
    )

This adjustment ensures that the code only attempts to access name and arguments if rtc["function"] is a dictionary, preventing the "TypeError: string indices must be integers, not 'str'" error ^[1].

To continue talking to Dosu, mention @dosu.

kteppris · 2024-09-18T21:19:51Z

kteppris
Sep 18, 2024

We encountered the same Issue.

We are hosting as well the Llama 3.1 70B with HF TGI and we found that the Tool Calling is not working.

I tried to actually build the prompt myself and use the LLM directly instead of the ChatModel Version of HF. Here the code for that:

from transformers import AutoTokenizer
from langchain_core.messages import BaseMessage, ChatMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
from typing import Dict, List

def _convert_message_to_chat_message(message: BaseMessage) -> Dict:
    if isinstance(message, ChatMessage):
        return dict(role=message.role, content=message.content)
    elif isinstance(message, HumanMessage):
        return dict(role="user", content=message.content)
    elif isinstance(message, AIMessage):
        tool_calls = None
        if "tool_calls" in message.additional_kwargs:
            tool_calls = [
                {
                    "function": {
                        "name": tc["function"]["name"],
                        "arguments": tc["function"]["arguments"],
                    }
                }
                for tc in message.additional_kwargs["tool_calls"]
            ]
        
        # Only include tool_calls if they are present
        result = {
            "role": "assistant",
            "content": message.content,
        }
        if tool_calls:
            result["tool_calls"] = tool_calls
        return result

    elif isinstance(message, SystemMessage):
        return dict(role="system", content=message.content)
    elif isinstance(message, ToolMessage):
        return {
            "role": "tool",
            "content": message.content,
            "name": message.name,
        }
    else:
        raise ValueError(f"Got unknown type {message}")

# Function to convert list of Langchain messages to TGI's chat message format
def convert_messages_and_apply_chat_template(messages: List[BaseMessage], tools: List[Dict], model_id: str):
    # Step 1: Convert Langchain messages to dictionaries
    converted_messages = [_convert_message_to_chat_message(message) for message in messages]
    
    # Ensure tools is not None
    if tools is None:
        tools = []
    
    # Step 2: Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    # Step 3: Apply the chat template with the converted messages and tools
    try:
        inputs = tokenizer.apply_chat_template(
            converted_messages,
            tools=tools,  # Ensure tools are a valid list
            tokenize=False,
            add_generation_prompt=True,
            return_dict=False,
        )
    except Exception as e:
        print(f"Error occurred: {e}")
        return None
    
    return inputs

# Example usage:
langchain_messages = [HumanMessage(content="How is the current weather in Hamburg?")]

# Ensure tools is not None
tools = [
    {
        "name": "get_current_weather",
        "description": "Get the current weather",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
                "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature unit"}
            },
            "required": ["location", "format"]
        }
    }
]

# Call the function
tokenized_input = convert_messages_and_apply_chat_template(langchain_messages, tools, model_id)
print(tokenized_input)

This is a little bit hacky but works for now. I guess one would need to write a CustomChatModel to convert the input before, or the TGI gets adapted to parse the output the right way. Happy to hear if someone found a better solution.

1 reply

avfranco-br Sep 19, 2024
Author

Thank you @kteppris for replying and sharing your code! I will try it. I’ve also experimented with the HF ChatHuggingFace class that the tool calling worked but it entered in an infinite loop.

Here’s the code.

# Create the HuggingFaceEndpoint LLM
hf = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
    huggingfacehub_api_token=h,
    temperature=0,
)

llm = ChatHuggingFace(llm=hf, verbose=True)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ChatOpenAI + HF TGI + Llama3.1 tool calling not working #26342

{{title}}

Replies: 2 comments 4 replies

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

Select a reply

ChatOpenAI + HF TGI + Llama3.1 tool calling not working #26342

avfranco-br Sep 11, 2024

Checked other resources

Commit to Help

Example Code

Description

Replies: 2 comments · 4 replies

dosubot[bot] bot Sep 11, 2024

avfranco-br Sep 12, 2024 Author

avfranco-br Sep 12, 2024 Author

dosubot[bot] bot Sep 12, 2024

kteppris Sep 18, 2024

avfranco-br Sep 19, 2024 Author

avfranco-br
Sep 11, 2024

Replies: 2 comments 4 replies

dosubot[bot]
bot Sep 11, 2024

avfranco-br Sep 12, 2024
Author

avfranco-br Sep 12, 2024
Author

kteppris
Sep 18, 2024

avfranco-br Sep 19, 2024
Author