Building Chatbots
Customer support and conversational AI
Building AI Chatbots with HiveOps
Build production-ready AI chatbots with streaming responses, conversation memory, and function calling.
Overview
This guide shows you how to build various types of chatbots using HiveOps:
- Real-time streaming chat interfaces
- Multi-turn conversations with memory
- Function-calling bots (weather, search, database queries)
- Multi-language support
- Context-aware assistants
Quick Start: Basic Chatbot
Python
from openai import OpenAI
client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
def chat(user_message):
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_message}
]
)
return response.choices[0].message.content
# Usage
print(chat("What is the capital of France?"))
JavaScript/TypeScript
import OpenAI from "openai";
const client = new OpenAI({
apiKey: "sk-YOUR-API-KEY",
baseURL: "https://ai.hiveops.io",
});
async function chat(userMessage) {
const response = await client.chat.completions.create({
model: "llama3:8b-instruct-q8_0",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: userMessage },
],
});
return response.choices[0].message.content;
}
// Usage
console.log(await chat("What is the capital of France?"));
Streaming Chat (Real-Time Responses)
For better UX, stream responses as they're generated:
Python
from openai import OpenAI
client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
def streaming_chat(user_message):
stream = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_message}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print() # New line at end
# Usage
streaming_chat("Tell me a story about a robot.")
JavaScript (React)
import { useState } from "react";
import OpenAI from "openai";
const client = new OpenAI({
apiKey: process.env.NEXT_PUBLIC_HIVEOPS_API_KEY!,
baseURL: "https://ai.hiveops.io",
dangerouslyAllowBrowser: true // For client-side usage
});
export function StreamingChatbot() {
const [messages, setMessages] = useState<string[]>([]);
const [input, setInput] = useState("");
const [streaming, setStreaming] = useState(false);
const sendMessage = async () => {
if (!input.trim() || streaming) return;
const userMessage = input;
setInput("");
setMessages((prev) => [...prev, `You: ${userMessage}`]);
setStreaming(true);
let assistantMessage = "";
setMessages((prev) => [...prev, "Assistant: "]);
try {
const stream = await client.chat.completions.create({
model: "llama3:8b-instruct-q8_0",
messages: [{ role: "user", content: userMessage }],
stream: true
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || "";
assistantMessage += content;
// Update last message with streaming content
setMessages((prev) => {
const newMessages = [...prev];
newMessages[newMessages.length - 1] = `Assistant: ${assistantMessage}`;
return newMessages;
});
}
} catch (error) {
console.error("Streaming error:", error);
} finally {
setStreaming(false);
}
};
return (
<div>
<div className="messages">
{messages.map((msg, i) => (
<div key={i}>{msg}</div>
))}
</div>
<input
value={input}
onChange={(e) => setInput(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && sendMessage()}
disabled={streaming}
/>
<button onClick={sendMessage} disabled={streaming}>
Send
</button>
</div>
);
}
Conversation Memory
Maintain context across multiple turns:
Python
from openai import OpenAI
client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
class Chatbot:
def __init__(self, system_prompt="You are a helpful assistant."):
self.messages = [{"role": "system", "content": system_prompt}]
def chat(self, user_message):
# Add user message to history
self.messages.append({"role": "user", "content": user_message})
# Get response
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=self.messages
)
assistant_message = response.choices[0].message.content
# Add assistant response to history
self.messages.append({"role": "assistant", "content": assistant_message})
return assistant_message
def reset(self):
"""Clear conversation history (keep system prompt)"""
self.messages = [self.messages[0]]
# Usage
bot = Chatbot()
print(bot.chat("My name is Alice"))
# => "Nice to meet you, Alice!"
print(bot.chat("What's my name?"))
# => "Your name is Alice!"
bot.reset() # Start fresh conversation
JavaScript/TypeScript
import OpenAI from "openai";
type Message = { role: "system" | "user" | "assistant"; content: string };
class Chatbot {
private messages: Message[];
private client: OpenAI;
constructor(systemPrompt = "You are a helpful assistant.") {
this.messages = [{ role: "system", content: systemPrompt }];
this.client = new OpenAI({
apiKey: process.env.HIVEOPS_API_KEY!,
baseURL: "https://ai.hiveops.io",
});
}
async chat(userMessage: string): Promise<string> {
// Add user message
this.messages.push({ role: "user", content: userMessage });
// Get response
const response = await this.client.chat.completions.create({
model: "llama3:8b-instruct-q8_0",
messages: this.messages,
});
const assistantMessage = response.choices[0].message.content!;
// Add assistant response
this.messages.push({ role: "assistant", content: assistantMessage });
return assistantMessage;
}
reset() {
this.messages = [this.messages[0]];
}
}
// Usage
const bot = new Chatbot();
console.log(await bot.chat("My name is Bob"));
// => "Nice to meet you, Bob!"
console.log(await bot.chat("What's my name?"));
// => "Your name is Bob!"
Function Calling (Tool Use)
Enable your chatbot to call external functions:
Python Example: Weather Bot
import json
from openai import OpenAI
client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
# Define available functions
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g., San Francisco"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["location"]
}
}
}
]
# Implement the function
def get_weather(location, unit="celsius"):
# In production, call a real weather API
return {
"location": location,
"temperature": 72 if unit == "fahrenheit" else 22,
"unit": unit,
"conditions": "Sunny"
}
def chat_with_functions(user_message):
messages = [{"role": "user", "content": user_message}]
# First API call
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=messages,
tools=tools
)
# Check if model wants to call a function
if response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
# Execute the function
if function_name == "get_weather":
function_response = get_weather(**function_args)
# Add function result to conversation
messages.append(response.choices[0].message)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(function_response)
})
# Get final response with function result
final_response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=messages
)
return final_response.choices[0].message.content
else:
return response.choices[0].message.content
# Usage
print(chat_with_functions("What's the weather in Tokyo?"))
# => "The weather in Tokyo is currently sunny with a temperature of 22°C."
Use Case Examples
Customer Support Bot
class SupportBot:
def __init__(self):
self.client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
self.system_prompt = """You are a helpful customer support agent for TechCorp.
Guidelines:
- Be polite and professional
- Ask clarifying questions if needed
- Provide step-by-step solutions
- Escalate to human agent if you can't help
Available products: Laptop Pro, Desktop Max, Tablet Mini
Support topics: Technical issues, Billing, Returns
"""
def handle_ticket(self, customer_message):
response = self.client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": customer_message}
],
temperature=0.7 # Balanced creativity
)
return response.choices[0].message.content
bot = SupportBot()
print(bot.handle_ticket("My laptop won't turn on"))
Educational Tutor Bot
class TutorBot:
def __init__(self, subject="Math"):
self.client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
self.subject = subject
self.system_prompt = f"""You are a patient {subject} tutor.
Guidelines:
- Explain concepts step-by-step
- Use examples and analogies
- Ask if the student understands before moving on
- Encourage critical thinking (don't just give answers)
- Adapt difficulty based on student responses
"""
def teach(self, student_question):
response = self.client.chat.completions.create(
model="llama-3-70b-instruct", # Use 70B for complex reasoning
messages=[
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": student_question}
],
temperature=0.8
)
return response.choices[0].message.content
tutor = TutorBot(subject="Physics")
print(tutor.teach("What is Newton's first law?"))
Personal Assistant Bot
from datetime import datetime
class AssistantBot:
def __init__(self, user_name):
self.client = OpenAI(
api_key="sk-YOUR-API-KEY",
base_url="https://ai.hiveops.io"
)
self.user_name = user_name
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.system_prompt = f"""You are {user_name}'s personal AI assistant.
Context:
- User: {user_name}
- Current time: {current_time}
Guidelines:
- Be proactive and helpful
- Remember context from conversation
- Suggest next steps
- Keep responses concise unless detail is needed
"""
def assist(self, request):
response = self.client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": request}
]
)
return response.choices[0].message.content
assistant = AssistantBot(user_name="Sarah")
print(assistant.assist("Remind me to call mom tomorrow"))
Best Practices
1. Choose the Right Model
| Use Case | Recommended Model | Why |
|---|---|---|
| Simple Q&A | mistral-7b-instruct-v0.3 | Fastest, cheapest |
| General chatbot | llama3:8b-instruct-q8_0 | Balanced quality/cost |
| Complex reasoning | llama-3-70b-instruct | Best quality |
| High volume | gemma-2-9b-it | Good balance |
2. Optimize System Prompts
# Bad: Vague instructions
system_prompt = "You are helpful."
# Good: Specific guidelines
system_prompt = """You are a customer support chatbot for Acme Inc.
Tone: Friendly but professional
Response length: 2-3 sentences max
If unsure: Say "Let me connect you with a human agent."
Never: Make up product information
"""
3. Manage Conversation Length
class Chatbot:
def __init__(self, max_messages=20):
self.messages = [{"role": "system", "content": "..."}]
self.max_messages = max_messages
def chat(self, user_message):
self.messages.append({"role": "user", "content": user_message})
# Keep only recent messages (sliding window)
if len(self.messages) > self.max_messages:
# Keep system prompt + recent messages
self.messages = [self.messages[0]] + self.messages[-(self.max_messages-1):]
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=self.messages
)
assistant_message = response.choices[0].message.content
self.messages.append({"role": "assistant", "content": assistant_message})
return assistant_message
4. Handle Errors Gracefully
def safe_chat(user_message):
try:
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[{"role": "user", "content": user_message}],
timeout=30 # 30 second timeout
)
return response.choices[0].message.content
except OpenAI.APIError as e:
return "I'm having trouble right now. Please try again in a moment."
except Exception as e:
return "An error occurred. Please contact support."
5. Add Response Time Feedback
import time
def chat_with_timing(user_message):
start = time.time()
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[{"role": "user", "content": user_message}]
)
duration = time.time() - start
print(f"Response generated in {duration:.2f}s")
return response.choices[0].message.content
Production Deployment
Environment Setup
# config.py
import os
from openai import OpenAI
def get_client():
api_key = os.getenv("HIVEOPS_API_KEY")
if not api_key:
raise ValueError("HIVEOPS_API_KEY environment variable not set")
return OpenAI(
api_key=api_key,
base_url="https://ai.hiveops.io"
)
client = get_client()
Rate Limiting
import time
from collections import deque
class RateLimiter:
def __init__(self, max_requests_per_minute=60):
self.max_requests = max_requests_per_minute
self.requests = deque()
def wait_if_needed(self):
now = time.time()
# Remove requests older than 1 minute
while self.requests and self.requests[0] < now - 60:
self.requests.popleft()
# If at limit, wait
if len(self.requests) >= self.max_requests:
sleep_time = 60 - (now - self.requests[0])
if sleep_time > 0:
time.sleep(sleep_time)
self.requests.append(now)
limiter = RateLimiter(max_requests_per_minute=60)
def rate_limited_chat(message):
limiter.wait_if_needed()
return client.chat.completions.create(...)
Logging
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def logged_chat(user_message):
logger.info(f"User message: {user_message[:50]}...")
try:
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=[{"role": "user", "content": user_message}]
)
assistant_message = response.choices[0].message.content
tokens = response.usage.total_tokens
logger.info(f"Response generated ({tokens} tokens)")
return assistant_message
except Exception as e:
logger.error(f"Chat error: {str(e)}")
raise
Cost Optimization
Token Usage Tracking
class CostTracker:
def __init__(self):
self.total_input_tokens = 0
self.total_output_tokens = 0
def track_chat(self, messages):
response = client.chat.completions.create(
model="llama3:8b-instruct-q8_0",
messages=messages
)
self.total_input_tokens += response.usage.prompt_tokens
self.total_output_tokens += response.usage.completion_tokens
return response.choices[0].message.content
def get_cost(self):
# Llama 3 8B pricing
input_cost = (self.total_input_tokens / 1_000_000) * 0.01
output_cost = (self.total_output_tokens / 1_000_000) * 0.02
return input_cost + output_cost
def report(self):
total = self.total_input_tokens + self.total_output_tokens
cost = self.get_cost()
print(f"Total tokens: {total:,}")
print(f"Estimated cost: ${cost:.6f}")
tracker = CostTracker()
tracker.track_chat([{"role": "user", "content": "Hello!"}])
tracker.report()
Next Steps
- API Reference - Full endpoint documentation
- Error Handling - Handle errors and retries
- LangChain Integration - Build agents
- SDKs - Language-specific guides
Support
- 💬 Discord Community
- 📧 Email: [email protected]