Project: AI Chatbot

Project Overview

Difficulty: Intermediate

Goal: Build an intelligent chatbot that understands intent and responds appropriately

Technologies: NLP, TensorFlow, NLTK

Time Required: 3-4 hours

What You'll Learn

Natural Language Processing (NLP) basics
Text tokenization and preprocessing
Intent classification with neural networks
Building a conversational interface
Pattern matching and response generation

Step 1: Install Dependencies

# Install required packages
pip install tensorflow numpy nltk scikit-learn

# Download NLTK data
import nltk
nltk.download('punkt')
nltk.download('wordnet')

Step 2: Create Training Data

# intents.json - Chatbot training data
import json

intents = {
    "intents": [
        {
            "tag": "greeting",
            "patterns": ["Hi", "Hello", "Hey", "Good morning", "Good evening"],
            "responses": ["Hello! How can I help you?", "Hi there!", "Greetings!"]
        },
        {
            "tag": "goodbye",
            "patterns": ["Bye", "Goodbye", "See you later", "Talk to you later"],
            "responses": ["Goodbye!", "See you later!", "Have a great day!"]
        },
        {
            "tag": "thanks",
            "patterns": ["Thanks", "Thank you", "Thanks a lot", "I appreciate it"],
            "responses": ["You're welcome!", "Happy to help!", "No problem!"]
        },
        {
            "tag": "about",
            "patterns": ["What can you do?", "Who are you?", "What are you?", "Tell me about yourself"],
            "responses": ["I'm an AI chatbot trained to assist you!", "I can answer questions and help with tasks!"]
        },
        {
            "tag": "weather",
            "patterns": ["What's the weather?", "How's the weather?", "Is it raining?", "Weather forecast"],
            "responses": ["I don't have real-time weather data, but you can check weather.com!", 
                         "Try a weather app for accurate forecasts!"]
        },
        {
            "tag": "time",
            "patterns": ["What time is it?", "Current time", "What's the time?"],
            "responses": ["I don't have access to real-time clock. Check your device!"]
        },
        {
            "tag": "help",
            "patterns": ["Help", "I need help", "Can you help me?", "Support"],
            "responses": ["Of course! What do you need help with?", "I'm here to help! What's your question?"]
        }
    ]
}

# Save to file
with open('intents.json', 'w') as f:
    json.dump(intents, f, indent=4)

print("Training data created!")

Step 3: Preprocess Data

# chatbot_preprocessing.py
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

# Load intents
with open('intents.json') as f:
    intents = json.load(f)

words = []
classes = []
documents = []
ignore_chars = ['?', '!', '.', ',']

# Process each pattern
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize words
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        
        # Add to documents
        documents.append((word_list, intent['tag']))
        
        # Add tag to classes
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Lemmatize and lower words
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_chars]
words = sorted(set(words))
classes = sorted(set(classes))

print(f"Documents: {len(documents)}")
print(f"Classes: {len(classes)} - {classes}")
print(f"Unique words: {len(words)}")

# Save preprocessed data
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

# Create training data
training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    
    # Create bag of words
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
    
    # Create output row
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    
    training.append([bag, output_row])

# Shuffle and convert to array
np.random.shuffle(training)
training = np.array(training, dtype=object)

# Split features and labels
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

print(f"Training data created: X={train_x.shape}, Y={train_y.shape}")

Step 4: Build and Train Model

# chatbot_training.py
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pickle

# Load preprocessed data
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))

# Recreate training data (or save/load from previous step)
# ... (same preprocessing as Step 3) ...

# Build neural network
model = keras.Sequential([
    keras.layers.Dense(128, input_shape=(len(train_x[0]),), activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(len(train_y[0]), activation='softmax')
])

# Compile model
model.compile(
    loss='categorical_crossentropy',
    optimizer=keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True),
    metrics=['accuracy']
)

# Train model
print("Training model...")
history = model.fit(
    train_x, 
    train_y, 
    epochs=200, 
    batch_size=5, 
    verbose=1
)

# Save model
model.save('chatbot_model.h5')
print("Model trained and saved!")

Step 5: Create Chatbot Interface

# chatbot.py - Main chatbot application
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow import keras
import random

# Load files
lemmatizer = WordNetLemmatizer()
intents = json.load(open('intents.json'))
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
model = keras.models.load_model('chatbot_model.h5')

def clean_up_sentence(sentence):
    """Tokenize and lemmatize input sentence"""
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bag_of_words(sentence):
    """Convert sentence to bag of words array"""
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    
    for w in sentence_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1
    
    return np.array(bag)

def predict_class(sentence):
    """Predict intent class for input sentence"""
    bow = bag_of_words(sentence)
    res = model.predict(np.array([bow]))[0]
    
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    
    # Sort by probability
    results.sort(key=lambda x: x[1], reverse=True)
    
    return_list = []
    for r in results:
        return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
    
    return return_list

def get_response(intents_list, intents_json):
    """Get response based on predicted intent"""
    if not intents_list:
        return "I'm not sure I understand. Can you rephrase?"
    
    tag = intents_list[0]['intent']
    list_of_intents = intents_json['intents']
    
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    
    return result

def chat():
    """Main chat loop"""
    print("Chatbot is ready! (type 'quit' to exit)")
    print("-" * 50)
    
    while True:
        message = input("You: ")
        
        if message.lower() == 'quit':
            print("Bot: Goodbye!")
            break
        
        intents_list = predict_class(message)
        response = get_response(intents_list, intents)
        
        print(f"Bot: {response}")

# Run chatbot
if __name__ == "__main__":
    chat()

Step 6: Test Your Chatbot

# Run the chatbot
python chatbot.py

# Example conversation:
# Chatbot is ready! (type 'quit' to exit)
# --------------------------------------------------
# You: Hello
# Bot: Hi there!
# You: What can you do?
# Bot: I'm an AI chatbot trained to assist you!
# You: Thanks
# Bot: You're welcome!
# You: Bye
# Bot: See you later!

Step 7: Add Web Interface (Optional)

# web_chatbot.py - Flask web interface
from flask import Flask, render_template, request, jsonify
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow import keras
import random

app = Flask(__name__)

# Load chatbot components (same as before)
lemmatizer = WordNetLemmatizer()
intents = json.load(open('intents.json'))
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
model = keras.models.load_model('chatbot_model.h5')

# ... (include all helper functions from Step 5) ...

@app.route('/')
def home():
    return render_template('chat.html')

@app.route('/get_response', methods=['POST'])
def get_bot_response():
    user_message = request.json['message']
    intents_list = predict_class(user_message)
    response = get_response(intents_list, intents)
    return jsonify({'response': response})

if __name__ == "__main__":
    app.run(debug=True)

# Create templates/chat.html:
# 
# 
# 
#     AI Chatbot
#     
# 
# 
#     
#     
#     
#     
#     
# 
#

Complete Project Structure

chatbot_project/
│
├── intents.json           # Training data
├── chatbot_preprocessing.py
├── chatbot_training.py
├── chatbot.py            # Main bot
├── web_chatbot.py        # Web interface (optional)
│
├── words.pkl             # Generated files
├── classes.pkl
├── chatbot_model.h5
│
└── templates/
    └── chat.html         # Web UI

Enhancement Ideas

🎯 Add more intents (FAQ, product info, booking)
🎯 Implement context awareness (multi-turn conversations)
🎯 Add entity extraction (dates, names, locations)
🎯 Integrate with APIs (weather, news)
🎯 Add sentiment analysis
🎯 Use pre-trained models (BERT, GPT)
🎯 Add voice input/output
🎯 Deploy to cloud (Heroku, AWS)

Real-World Applications

Where This Technology Is Used

Customer Service: Automated support bots
E-commerce: Product recommendations and queries
Healthcare: Symptom checkers, appointment booking
Banking: Account inquiries, transactions
Education: Learning assistants, tutoring