We’ll keep the last few turns in session state and prepend them to the prompt so the model has context.
Replace app/main.py with:
import os
from dotenv import load_dotenv
from flask import Flask, render_template, request, session
from google.cloud import aiplatform
from datetime import timedelta
load_dotenv()
project_id = os.getenv("GCP_PROJECT_ID")
location = os.getenv("GCP_LOCATION", "us-central1")
secret_key = os.getenv("FLASK_SECRET_KEY", "dev-secret") # set a real secret in prod
aiplatform.init(project=project_id, location=location)
model = aiplatform.TextGenerationModel.from_pretrained("text-bison")
app = Flask(__name__)
app.secret_key = secret_key
app.permanent_session_lifetime = timedelta(hours=6)
SYSTEM_INSTRUCTION = (
"You are a helpful assistant. Be concise, clear, and correct. "
"If the user asks for code, include comments."
)
def format_history(history):
# history is list of dicts: {"role": "user"|"assistant", "text": "..."}
lines = [f"System: {SYSTEM_INSTRUCTION}"]
for h in history[-8:]: # keep last 8 turns
lines.append(f"{h['role'].title()}: {h['text']}")
return "\n".join(lines)
@app.route("/", methods=["GET", "POST"])
def chat():
session.setdefault("history", [])
response_text = ""
user_input = ""
if request.method == "POST":
user_input = request.form.get("user_input", "").strip()
if user_input:
# Append user message
session["history"].append({"role":"user", "text":user_input})
prompt = format_history(session["history"]) + "\nAssistant:"
# Call Vertex AI
response = model.predict(prompt, max_output_tokens=256, temperature=0.3, top_p=0.9)
response_text = response.text.strip()
# Append assistant reply
session["history"].append({"role":"assistant", "text":response_text})
session.modified = True
return render_template("chat_mem.html", history=session["history"], last_input=user_input, last_response=response_text)
if __name__ == "__main__":
app.run(debug=True, port=5000)
app/templates/chat_mem.html)<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>Chatbot with Memory</title>
<style>
body{font-family:Segoe UI,Tahoma,Arial,sans-serif;background:#f7f9fb;margin:0}
.wrap{max-width:900px;margin:0 auto;padding:24px}
h2{color:#2c3e50;border-bottom:3px solid #e67e22;padding-bottom:8px}
.chat{background:#fff;border-radius:14px;box-shadow:0 6px 18px rgb(0 0 0 / 8%);padding:20px}
.bubble{padding:10px;border-radius:12px;margin:8px 0;white-space:pre-wrap}
.user{background:#eef3ff}
.bot{background:#fff5e5}
textarea{width:100%;min-height:100px;padding:10px;border:1px solid #ddd;border-radius:10px}
button{background:#e67e22;color:#fff;border:none;border-radius:10px;padding:10px 16px;cursor:pointer}
.tools{display:flex;gap:8px;justify-content:flex-end}
</style>
</head>
<body>
<div class="wrap">
<h2>Chatbot with Memory</h2>
<div class="chat">
{% for m in history %}
{% if m.role == 'user' %}
<div class="bubble user"><b>You:</b> {{ m.text }}</div>
{% else %}
<div class="bubble bot"><b>Bot:</b> {{ m.text }}</div>
{% endif %}
{% endfor %}
<form method="POST">
<label>Your message:</label><br/>
<textarea name="user_input" placeholder="Ask anything..."></textarea><br/><br/>
<div class="tools">
<button type="submit">Send</button>
</div>
</form>
</div>
</div>
</body>
</html>
Add simple file logging to app/main.py after each turn:
# ... inside the POST handler after getting response_text
with open("chat.log", "a", encoding="utf-8") as f:
f.write(f"USER: {user_input}\nBOT: {response_text}\n---\n")