Why Python for AI?
- ✅ Simple and readable syntax
- ✅ Extensive AI/ML libraries
- ✅ Large community and resources
- ✅ Industry standard for AI development
- ✅ Great for rapid prototyping
Essential Libraries
NumPy
Numerical computing and arrays
Pandas
Data manipulation and analysis
Matplotlib
Data visualization
Scikit-learn
Machine learning algorithms
TensorFlow
Deep learning framework
PyTorch
Deep learning framework
1. NumPy - The Foundation
NumPy provides fast array operations essential for AI.
# NumPy Basics for AI
import numpy as np
# Create arrays (vectors and matrices)
vector = np.array([1, 2, 3, 4, 5])
matrix = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
print("Vector:", vector)
print("Matrix:\n", matrix)
# Array operations (vectorized - FAST!)
squared = vector ** 2
print("Squared:", squared)
# Matrix operations
weights = np.array([[0.5, 0.3], [0.2, 0.8], [0.1, 0.9]])
inputs = np.array([1.0, 2.0])
output = np.dot(weights.T, inputs) # Matrix multiplication
print("Neural network layer output:", output)
# Statistical operations
data = np.random.randn(1000) # 1000 random numbers
print(f"Mean: {data.mean():.2f}")
print(f"Std: {data.std():.2f}")
# Array indexing and slicing
print("First 3 elements:", vector[:3])
print("Matrix first row:", matrix[0, :])
print("Matrix first column:", matrix[:, 0])
2. Pandas - Data Manipulation
Pandas makes it easy to work with structured data.
# Pandas for Data Science
import pandas as pd
import numpy as np
# Create a DataFrame (like a spreadsheet)
data = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
'age': [25, 30, 35, 28],
'salary': [50000, 60000, 75000, 55000],
'department': ['Sales', 'IT', 'IT', 'Sales']
})
print("DataFrame:")
print(data)
# Basic operations
print("\nData info:")
print(data.info())
print("\nStatistics:")
print(data.describe())
# Filtering
it_employees = data[data['department'] == 'IT']
print("\nIT Department:")
print(it_employees)
# Grouping and aggregation
avg_salary = data.groupby('department')['salary'].mean()
print("\nAverage salary by department:")
print(avg_salary)
# Adding new columns
data['annual_bonus'] = data['salary'] * 0.1
print("\nWith bonus column:")
print(data)
# Reading data (common in AI projects)
# data = pd.read_csv('data.csv')
# data = pd.read_excel('data.xlsx')
# data = pd.read_json('data.json')
3. Matplotlib - Visualization
Visualizing data is crucial for understanding and debugging.
# Data Visualization with Matplotlib
import matplotlib.pyplot as plt
import numpy as np
# Line plot
x = np.linspace(0, 10, 100)
y = np.sin(x)
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(x, y)
plt.title('Sine Wave')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
# Scatter plot with colors
x_scatter = np.random.randn(100)
y_scatter = np.random.randn(100)
colors = np.random.rand(100)
plt.subplot(1, 2, 2)
plt.scatter(x_scatter, y_scatter, c=colors, cmap='viridis')
plt.title('Random Scatter')
plt.xlabel('X')
plt.ylabel('Y')
plt.colorbar(label='Color Value')
plt.tight_layout()
plt.savefig('visualization.png')
plt.show()
# Training history visualization (common in AI)
epochs = range(1, 11)
train_loss = [0.9, 0.7, 0.5, 0.4, 0.3, 0.25, 0.2, 0.18, 0.15, 0.13]
val_loss = [0.95, 0.75, 0.55, 0.45, 0.38, 0.35, 0.33, 0.32, 0.31, 0.30]
plt.figure(figsize=(8, 5))
plt.plot(epochs, train_loss, label='Training Loss', marker='o')
plt.plot(epochs, val_loss, label='Validation Loss', marker='s')
plt.title('Model Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()
4. Complete AI Workflow Example
Putting it all together: A complete classification project
# Complete AI Project: Iris Classification
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
# 1. Load and explore data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nStatistics:")
print(df.describe())
# 2. Visualize data
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.scatter(df['sepal length (cm)'], df['sepal width (cm)'],
c=df['species'], cmap='viridis')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Sepal Dimensions')
plt.colorbar(label='Species')
plt.subplot(1, 3, 2)
df['species'].value_counts().plot(kind='bar')
plt.title('Species Distribution')
plt.ylabel('Count')
plt.subplot(1, 3, 3)
df.boxplot(column='petal length (cm)', by='species')
plt.title('Petal Length by Species')
plt.tight_layout()
plt.show()
# 3. Prepare data
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 5. Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
# 6. Evaluate
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy * 100:.2f}%")
# 7. Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=iris.target_names,
yticklabels=iris.target_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()
# 8. Feature importance
importance = model.feature_importances_
feature_names = iris.feature_names
plt.figure(figsize=(8, 5))
plt.barh(feature_names, importance)
plt.xlabel('Importance')
plt.title('Feature Importance')
plt.show()
print("\nModel successfully trained and evaluated!")
5. Python Tips for AI
List Comprehensions (Fast and Pythonic)
# Instead of loops, use comprehensions
numbers = [1, 2, 3, 4, 5]
# Old way
squared = []
for n in numbers:
squared.append(n ** 2)
# Python way
squared = [n ** 2 for n in numbers]
print(squared) # [1, 4, 9, 16, 25]
# With conditions
even_squared = [n ** 2 for n in numbers if n % 2 == 0]
print(even_squared) # [4, 16]
Lambda Functions (Quick Functions)
# Short anonymous functions
square = lambda x: x ** 2
print(square(5)) # 25
# With map (apply to all elements)
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x ** 2, numbers))
print(squared) # [1, 4, 9, 16, 25]
# With filter
evens = list(filter(lambda x: x % 2 == 0, numbers))
print(evens) # [2, 4]
Working with Files
# Reading and writing data
import json
import pickle
# JSON (for configuration, results)
config = {'learning_rate': 0.001, 'epochs': 100}
with open('config.json', 'w') as f:
json.dump(config, f)
with open('config.json', 'r') as f:
loaded_config = json.load(f)
# Pickle (for saving models)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
# ... train model ...
# Save model
with open('model.pkl', 'wb') as f:
pickle.dump(model, f)
# Load model
with open('model.pkl', 'rb') as f:
loaded_model = pickle.load(f)
Installation Guide
# Install essential packages
pip install numpy pandas matplotlib scikit-learn
# For deep learning
pip install tensorflow # or
pip install torch torchvision # PyTorch
# Additional useful packages
pip install seaborn # Better visualizations
pip install jupyter # Jupyter notebooks
pip install plotly # Interactive plots
# Check installations
python -c "import numpy; print(f'NumPy: {numpy.__version__}')"
python -c "import pandas; print(f'Pandas: {pandas.__version__}')"
python -c "import sklearn; print(f'Scikit-learn: {sklearn.__version__}')"
Practice Exercise
Mini Project: Data Analysis
Create a Python script that:
- Generates random student data (names, scores, grades)
- Calculates statistics (mean, median, std)
- Visualizes the distribution
- Predicts pass/fail based on scores
Use NumPy, Pandas, Matplotlib, and Scikit-learn!