st.session_state so history persists across reruns.Save this as app.py in your project root. The comments explain each block.
# app.py β Streamlit Chatbot (history, controls, clear, save)
# -----------------------------------------------------------
import os
import json
import streamlit as st
from dotenv import load_dotenv
# Vertex AI imports (Gemini)
import vertexai
from vertexai.generative_models import GenerativeModel
# ---- Load config ----
load_dotenv() # optional: reads .env for GCP_PROJECT_ID, GCP_LOCATION
PROJECT_ID = os.getenv("GCP_PROJECT_ID", "YOUR_PROJECT_ID")
LOCATION = os.getenv("GCP_LOCATION", "us-central1")
# ---- Initialize Vertex AI ----
# Uses Application Default Credentials (ADC) set via gcloud auth application-default login
vertexai.init(project=PROJECT_ID, location=LOCATION)
# ---- Streamlit page setup ----
st.set_page_config(page_title="Vertex AI Chatbot β Step 4", page_icon="π€", layout="wide")
st.title("π€ Vertex AI Chatbot β History & Controls")
# ---- Sidebar: model controls, clear & save ----
with st.sidebar:
st.header("Model Controls")
model_name = st.selectbox("Model", ("gemini-1.5-pro", "gemini-1.5", "text-bison@001"))
temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.3, step=0.05,
help="Higher values => more creative responses")
max_tokens = st.slider("Max output tokens", min_value=50, max_value=1024, value=256, step=50)
history_limit = st.number_input("History limit (turns)", min_value=1, max_value=50, value=8, step=1,
help="How many recent turns (user+assistant) to include in the prompt")
system_instruction = st.text_area("System instruction (optional)", value="You are a helpful assistant.", height=100)
st.markdown("---")
if st.button("Clear chat"):
# Reset conversation
st.session_state['messages'] = []
st.success("Chat history cleared.")
if st.button("Save log"):
# Save conversation to file
log_file = "chat_log.txt"
with open(log_file, "a", encoding="utf-8") as f:
for role, text in st.session_state.get("messages", []):
f.write(f"{role.upper()}: {text}\n")
f.write("---\n")
st.success(f"Saved to {log_file}")
st.markdown("### Download")
# allow user to download the current conversation
if st.session_state.get("messages"):
conv_text = "\n".join([f"{r.upper()}: {t}" for r, t in st.session_state['messages']])
st.download_button("Download conversation (.txt)", data=conv_text, file_name="conversation.txt", mime="text/plain")
# ---- Initialize session state containers ----
if "messages" not in st.session_state:
st.session_state.messages = [] # list of tuples: (role, content) e.g. ("user","Hi"), ("assistant","Hello")
# ---- Helper: build prompt with limited history ----
def build_prompt(user_input, system_instruction, history, history_limit):
"""
Build a plain-text prompt to send to the LLM.
We include a system instruction, then the last N turns (history_limit),
then the new user input, and signal the assistant to reply.
"""
parts = []
if system_instruction:
parts.append(f"System: {system_instruction}")
parts.append("") # blank line for readability
# Include last N turns (each turn may be user+assistant)
# history is stored as [(role, content), ...]
# We'll take the last (history_limit * 2) messages to include both sides
n_msgs = history_limit * 2
selected = history[-n_msgs:] if n_msgs > 0 else history
for role, text in selected:
parts.append(f"{role.title()}: {text}")
# Add the new user message and a prompt for assistant
parts.append(f"User: {user_input}")
parts.append("Assistant:")
# Join into a single string
prompt = "\n".join(parts)
return prompt
# ---- Helper: call Vertex AI model ----
def call_model(prompt, model_name, temperature, max_tokens):
"""
Call the chosen GenerativeModel and return text.
Note: method names / params depend on the SDK version;
if generate_content doesn't accept 'temperature' or 'max_output_tokens', adjust accordingly.
"""
model = GenerativeModel(model_name)
# generate_content returns a response object; .text contains the string
response = model.generate_content(prompt, temperature=temperature, max_output_tokens=max_tokens)
return response.text
# ---- Main UI: display history and accept input ----
# Display chat messages inside a container so Streamlit keeps the layout stable
chat_container = st.container()
with chat_container:
# Render existing messages
for role, content in st.session_state.messages:
# st.chat_message expects "user" or "assistant" (Streamlit >=1.18)
with st.chat_message("user" if role == "user" else "assistant"):
st.markdown(content)
# Input box (chat_input keeps UX like a chat app)
user_prompt = st.chat_input("Type your message here...")
if user_prompt:
# 1) Append user message and display immediately
st.session_state.messages.append(("user", user_prompt))
with st.chat_message("user"):
st.markdown(user_prompt)
# 2) Build the prompt from limited history + system instruction
prompt_text = build_prompt(user_prompt, system_instruction, st.session_state.messages, int(history_limit))
# 3) Call the LLM and show assistant reply
try:
with st.chat_message("assistant"):
with st.spinner("Generating response..."):
answer = call_model(prompt_text, model_name, float(temperature), int(max_tokens))
# Display the answer
st.markdown(answer)
# 4) Save assistant reply in history
st.session_state.messages.append(("assistant", answer))
except Exception as e:
with st.chat_message("assistant"):
st.error(f"Model call failed: {e}")
generate_content rejects temperature or max_output_tokens,
remove or rename those parameters according to your SDK's docs.
| Code area | Purpose |
|---|---|
Session statest.session_state.messages |
Persistent client-side memory so the conversation survives reruns. We store a sequence of role/text tuples. |
| Sidebar controls | Allow the user to choose model, adjust creativity (temperature), set maximum tokens, and limit how much history is included in the prompt. |
| Clear chat button | Resets st.session_state.messages to an empty list so the UI clears. Useful during demos or to reduce prompt size. |
| Prompt builder | Concatenates a system instruction, the recent history (up to history_limit turns), then the new user's message, and finally the assistant cue. |
| Model call | Sends the built prompt to Vertex AI (Gemini) using GenerativeModel.generate_content and returns the assistant text. |
| Save / Download | Writes the conversation to chat_log.txt and allows downloading it as a file for audits or sharing. |
pip install streamlit google-cloud-aiplatform python-dotenv
gcloud auth application-default login
streamlit run app.py
http://localhost:8501 (Streamlit will usually open it automatically).max_tokens if you get errors.max_tokens increase response time β lower them during demos.