Python for AI Development

Why Python for AI?

Essential Libraries

NumPy

Numerical computing and arrays

Pandas

Data manipulation and analysis

Matplotlib

Data visualization

Scikit-learn

Machine learning algorithms

TensorFlow

Deep learning framework

PyTorch

Deep learning framework

1. NumPy - The Foundation

NumPy provides fast array operations essential for AI.

# NumPy Basics for AI
import numpy as np

# Create arrays (vectors and matrices)
vector = np.array([1, 2, 3, 4, 5])
matrix = np.array([[1, 2, 3], 
                   [4, 5, 6], 
                   [7, 8, 9]])

print("Vector:", vector)
print("Matrix:\n", matrix)

# Array operations (vectorized - FAST!)
squared = vector ** 2
print("Squared:", squared)

# Matrix operations
weights = np.array([[0.5, 0.3], [0.2, 0.8], [0.1, 0.9]])
inputs = np.array([1.0, 2.0])
output = np.dot(weights.T, inputs)  # Matrix multiplication
print("Neural network layer output:", output)

# Statistical operations
data = np.random.randn(1000)  # 1000 random numbers
print(f"Mean: {data.mean():.2f}")
print(f"Std: {data.std():.2f}")

# Array indexing and slicing
print("First 3 elements:", vector[:3])
print("Matrix first row:", matrix[0, :])
print("Matrix first column:", matrix[:, 0])

2. Pandas - Data Manipulation

Pandas makes it easy to work with structured data.

# Pandas for Data Science
import pandas as pd
import numpy as np

# Create a DataFrame (like a spreadsheet)
data = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'age': [25, 30, 35, 28],
    'salary': [50000, 60000, 75000, 55000],
    'department': ['Sales', 'IT', 'IT', 'Sales']
})

print("DataFrame:")
print(data)

# Basic operations
print("\nData info:")
print(data.info())

print("\nStatistics:")
print(data.describe())

# Filtering
it_employees = data[data['department'] == 'IT']
print("\nIT Department:")
print(it_employees)

# Grouping and aggregation
avg_salary = data.groupby('department')['salary'].mean()
print("\nAverage salary by department:")
print(avg_salary)

# Adding new columns
data['annual_bonus'] = data['salary'] * 0.1
print("\nWith bonus column:")
print(data)

# Reading data (common in AI projects)
# data = pd.read_csv('data.csv')
# data = pd.read_excel('data.xlsx')
# data = pd.read_json('data.json')

3. Matplotlib - Visualization

Visualizing data is crucial for understanding and debugging.

# Data Visualization with Matplotlib
import matplotlib.pyplot as plt
import numpy as np

# Line plot
x = np.linspace(0, 10, 100)
y = np.sin(x)

plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(x, y)
plt.title('Sine Wave')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)

# Scatter plot with colors
x_scatter = np.random.randn(100)
y_scatter = np.random.randn(100)
colors = np.random.rand(100)

plt.subplot(1, 2, 2)
plt.scatter(x_scatter, y_scatter, c=colors, cmap='viridis')
plt.title('Random Scatter')
plt.xlabel('X')
plt.ylabel('Y')
plt.colorbar(label='Color Value')

plt.tight_layout()
plt.savefig('visualization.png')
plt.show()

# Training history visualization (common in AI)
epochs = range(1, 11)
train_loss = [0.9, 0.7, 0.5, 0.4, 0.3, 0.25, 0.2, 0.18, 0.15, 0.13]
val_loss = [0.95, 0.75, 0.55, 0.45, 0.38, 0.35, 0.33, 0.32, 0.31, 0.30]

plt.figure(figsize=(8, 5))
plt.plot(epochs, train_loss, label='Training Loss', marker='o')
plt.plot(epochs, val_loss, label='Validation Loss', marker='s')
plt.title('Model Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

4. Complete AI Workflow Example

Putting it all together: A complete classification project

# Complete AI Project: Iris Classification
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

# 1. Load and explore data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target

print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nStatistics:")
print(df.describe())

# 2. Visualize data
plt.figure(figsize=(12, 4))

plt.subplot(1, 3, 1)
plt.scatter(df['sepal length (cm)'], df['sepal width (cm)'], 
            c=df['species'], cmap='viridis')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Sepal Dimensions')
plt.colorbar(label='Species')

plt.subplot(1, 3, 2)
df['species'].value_counts().plot(kind='bar')
plt.title('Species Distribution')
plt.ylabel('Count')

plt.subplot(1, 3, 3)
df.boxplot(column='petal length (cm)', by='species')
plt.title('Petal Length by Species')

plt.tight_layout()
plt.show()

# 3. Prepare data
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# 6. Evaluate
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy * 100:.2f}%")

# 7. Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=iris.target_names,
            yticklabels=iris.target_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# 8. Feature importance
importance = model.feature_importances_
feature_names = iris.feature_names

plt.figure(figsize=(8, 5))
plt.barh(feature_names, importance)
plt.xlabel('Importance')
plt.title('Feature Importance')
plt.show()

print("\nModel successfully trained and evaluated!")

5. Python Tips for AI

List Comprehensions (Fast and Pythonic)

# Instead of loops, use comprehensions
numbers = [1, 2, 3, 4, 5]

# Old way
squared = []
for n in numbers:
    squared.append(n ** 2)

# Python way
squared = [n ** 2 for n in numbers]
print(squared)  # [1, 4, 9, 16, 25]

# With conditions
even_squared = [n ** 2 for n in numbers if n % 2 == 0]
print(even_squared)  # [4, 16]

Lambda Functions (Quick Functions)

# Short anonymous functions
square = lambda x: x ** 2
print(square(5))  # 25

# With map (apply to all elements)
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x ** 2, numbers))
print(squared)  # [1, 4, 9, 16, 25]

# With filter
evens = list(filter(lambda x: x % 2 == 0, numbers))
print(evens)  # [2, 4]

Working with Files

# Reading and writing data
import json
import pickle

# JSON (for configuration, results)
config = {'learning_rate': 0.001, 'epochs': 100}
with open('config.json', 'w') as f:
    json.dump(config, f)

with open('config.json', 'r') as f:
    loaded_config = json.load(f)

# Pickle (for saving models)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
# ... train model ...

# Save model
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Load model
with open('model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

Installation Guide

# Install essential packages
pip install numpy pandas matplotlib scikit-learn

# For deep learning
pip install tensorflow  # or
pip install torch torchvision  # PyTorch

# Additional useful packages
pip install seaborn  # Better visualizations
pip install jupyter  # Jupyter notebooks
pip install plotly  # Interactive plots

# Check installations
python -c "import numpy; print(f'NumPy: {numpy.__version__}')"
python -c "import pandas; print(f'Pandas: {pandas.__version__}')"
python -c "import sklearn; print(f'Scikit-learn: {sklearn.__version__}')"

Practice Exercise

Mini Project: Data Analysis

Create a Python script that:

  1. Generates random student data (names, scores, grades)
  2. Calculates statistics (mean, median, std)
  3. Visualizes the distribution
  4. Predicts pass/fail based on scores

Use NumPy, Pandas, Matplotlib, and Scikit-learn!