Project Overview
Difficulty: Intermediate
Goal: Build an intelligent chatbot that understands intent and responds appropriately
Technologies: NLP, TensorFlow, NLTK
Time Required: 3-4 hours
What You'll Learn
- Natural Language Processing (NLP) basics
- Text tokenization and preprocessing
- Intent classification with neural networks
- Building a conversational interface
- Pattern matching and response generation
Step 1: Install Dependencies
# Install required packages
pip install tensorflow numpy nltk scikit-learn
# Download NLTK data
import nltk
nltk.download('punkt')
nltk.download('wordnet')
Step 2: Create Training Data
# intents.json - Chatbot training data
import json
intents = {
"intents": [
{
"tag": "greeting",
"patterns": ["Hi", "Hello", "Hey", "Good morning", "Good evening"],
"responses": ["Hello! How can I help you?", "Hi there!", "Greetings!"]
},
{
"tag": "goodbye",
"patterns": ["Bye", "Goodbye", "See you later", "Talk to you later"],
"responses": ["Goodbye!", "See you later!", "Have a great day!"]
},
{
"tag": "thanks",
"patterns": ["Thanks", "Thank you", "Thanks a lot", "I appreciate it"],
"responses": ["You're welcome!", "Happy to help!", "No problem!"]
},
{
"tag": "about",
"patterns": ["What can you do?", "Who are you?", "What are you?", "Tell me about yourself"],
"responses": ["I'm an AI chatbot trained to assist you!", "I can answer questions and help with tasks!"]
},
{
"tag": "weather",
"patterns": ["What's the weather?", "How's the weather?", "Is it raining?", "Weather forecast"],
"responses": ["I don't have real-time weather data, but you can check weather.com!",
"Try a weather app for accurate forecasts!"]
},
{
"tag": "time",
"patterns": ["What time is it?", "Current time", "What's the time?"],
"responses": ["I don't have access to real-time clock. Check your device!"]
},
{
"tag": "help",
"patterns": ["Help", "I need help", "Can you help me?", "Support"],
"responses": ["Of course! What do you need help with?", "I'm here to help! What's your question?"]
}
]
}
# Save to file
with open('intents.json', 'w') as f:
json.dump(intents, f, indent=4)
print("Training data created!")
Step 3: Preprocess Data
# chatbot_preprocessing.py
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
# Load intents
with open('intents.json') as f:
intents = json.load(f)
words = []
classes = []
documents = []
ignore_chars = ['?', '!', '.', ',']
# Process each pattern
for intent in intents['intents']:
for pattern in intent['patterns']:
# Tokenize words
word_list = nltk.word_tokenize(pattern)
words.extend(word_list)
# Add to documents
documents.append((word_list, intent['tag']))
# Add tag to classes
if intent['tag'] not in classes:
classes.append(intent['tag'])
# Lemmatize and lower words
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_chars]
words = sorted(set(words))
classes = sorted(set(classes))
print(f"Documents: {len(documents)}")
print(f"Classes: {len(classes)} - {classes}")
print(f"Unique words: {len(words)}")
# Save preprocessed data
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))
# Create training data
training = []
output_empty = [0] * len(classes)
for document in documents:
bag = []
word_patterns = document[0]
word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
# Create bag of words
for word in words:
bag.append(1) if word in word_patterns else bag.append(0)
# Create output row
output_row = list(output_empty)
output_row[classes.index(document[1])] = 1
training.append([bag, output_row])
# Shuffle and convert to array
np.random.shuffle(training)
training = np.array(training, dtype=object)
# Split features and labels
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))
print(f"Training data created: X={train_x.shape}, Y={train_y.shape}")
Step 4: Build and Train Model
# chatbot_training.py
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pickle
# Load preprocessed data
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
# Recreate training data (or save/load from previous step)
# ... (same preprocessing as Step 3) ...
# Build neural network
model = keras.Sequential([
keras.layers.Dense(128, input_shape=(len(train_x[0]),), activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(64, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(len(train_y[0]), activation='softmax')
])
# Compile model
model.compile(
loss='categorical_crossentropy',
optimizer=keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True),
metrics=['accuracy']
)
# Train model
print("Training model...")
history = model.fit(
train_x,
train_y,
epochs=200,
batch_size=5,
verbose=1
)
# Save model
model.save('chatbot_model.h5')
print("Model trained and saved!")
Step 5: Create Chatbot Interface
# chatbot.py - Main chatbot application
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow import keras
import random
# Load files
lemmatizer = WordNetLemmatizer()
intents = json.load(open('intents.json'))
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
model = keras.models.load_model('chatbot_model.h5')
def clean_up_sentence(sentence):
"""Tokenize and lemmatize input sentence"""
sentence_words = nltk.word_tokenize(sentence)
sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
return sentence_words
def bag_of_words(sentence):
"""Convert sentence to bag of words array"""
sentence_words = clean_up_sentence(sentence)
bag = [0] * len(words)
for w in sentence_words:
for i, word in enumerate(words):
if word == w:
bag[i] = 1
return np.array(bag)
def predict_class(sentence):
"""Predict intent class for input sentence"""
bow = bag_of_words(sentence)
res = model.predict(np.array([bow]))[0]
ERROR_THRESHOLD = 0.25
results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
# Sort by probability
results.sort(key=lambda x: x[1], reverse=True)
return_list = []
for r in results:
return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
return return_list
def get_response(intents_list, intents_json):
"""Get response based on predicted intent"""
if not intents_list:
return "I'm not sure I understand. Can you rephrase?"
tag = intents_list[0]['intent']
list_of_intents = intents_json['intents']
for i in list_of_intents:
if i['tag'] == tag:
result = random.choice(i['responses'])
break
return result
def chat():
"""Main chat loop"""
print("Chatbot is ready! (type 'quit' to exit)")
print("-" * 50)
while True:
message = input("You: ")
if message.lower() == 'quit':
print("Bot: Goodbye!")
break
intents_list = predict_class(message)
response = get_response(intents_list, intents)
print(f"Bot: {response}")
# Run chatbot
if __name__ == "__main__":
chat()
Step 6: Test Your Chatbot
# Run the chatbot
python chatbot.py
# Example conversation:
# Chatbot is ready! (type 'quit' to exit)
# --------------------------------------------------
# You: Hello
# Bot: Hi there!
# You: What can you do?
# Bot: I'm an AI chatbot trained to assist you!
# You: Thanks
# Bot: You're welcome!
# You: Bye
# Bot: See you later!
Step 7: Add Web Interface (Optional)
# web_chatbot.py - Flask web interface
from flask import Flask, render_template, request, jsonify
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow import keras
import random
app = Flask(__name__)
# Load chatbot components (same as before)
lemmatizer = WordNetLemmatizer()
intents = json.load(open('intents.json'))
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
model = keras.models.load_model('chatbot_model.h5')
# ... (include all helper functions from Step 5) ...
@app.route('/')
def home():
return render_template('chat.html')
@app.route('/get_response', methods=['POST'])
def get_bot_response():
user_message = request.json['message']
intents_list = predict_class(user_message)
response = get_response(intents_list, intents)
return jsonify({'response': response})
if __name__ == "__main__":
app.run(debug=True)
# Create templates/chat.html:
#
#
#
# AI Chatbot
#
#
#
#
#
#
#
#
#
#
Complete Project Structure
chatbot_project/
│
├── intents.json # Training data
├── chatbot_preprocessing.py
├── chatbot_training.py
├── chatbot.py # Main bot
├── web_chatbot.py # Web interface (optional)
│
├── words.pkl # Generated files
├── classes.pkl
├── chatbot_model.h5
│
└── templates/
└── chat.html # Web UI
Enhancement Ideas
- 🎯 Add more intents (FAQ, product info, booking)
- 🎯 Implement context awareness (multi-turn conversations)
- 🎯 Add entity extraction (dates, names, locations)
- 🎯 Integrate with APIs (weather, news)
- 🎯 Add sentiment analysis
- 🎯 Use pre-trained models (BERT, GPT)
- 🎯 Add voice input/output
- 🎯 Deploy to cloud (Heroku, AWS)
Real-World Applications
Where This Technology Is Used
- Customer Service: Automated support bots
- E-commerce: Product recommendations and queries
- Healthcare: Symptom checkers, appointment booking
- Banking: Account inquiries, transactions
- Education: Learning assistants, tutoring