Sim-to-Real Transfer¶
Transferring policies learned in simulation to real robots is one of the most challenging problems in robot learning.
The Reality Gap¶
Problem: Simulation ≠ Reality
Sources of discrepancy: - Physics: Friction, contact dynamics, deformable objects - Perception: Lighting, camera noise, occlusions - Actuation: Motor delays, backlash, compliance - Calibration: Joint offsets, kinematic errors
Impact: Policy that achieves 95% success in sim may get 0% on real robot without proper transfer techniques.
# Simulation
sim_success_rate = 0.95
# Naive transfer to reality
real_success_rate = 0.05 # 😢
# With proper sim2real techniques
real_success_rate = 0.80 # ✓
Transfer Strategies¶
1. Domain Randomization¶
Randomize simulation parameters to force policy to be robust:
class DomainRandomization:
"""Randomize simulation parameters during training"""
def __init__(self, env):
self.env = env
def randomize(self):
"""Apply random parameters to simulation"""
# Physics randomization
self.randomize_physics()
# Visual randomization
self.randomize_visuals()
# Dynamics randomization
self.randomize_dynamics()
def randomize_physics(self):
"""Randomize physical properties"""
# Friction coefficients
friction = np.random.uniform(0.5, 1.5) # ±50%
self.env.set_friction(friction)
# Object masses
for obj in self.env.objects:
mass = obj.nominal_mass * np.random.uniform(0.8, 1.2) # ±20%
obj.set_mass(mass)
# Contact parameters
restitution = np.random.uniform(0.0, 0.3) # Bounciness
self.env.set_restitution(restitution)
# Damping
damping = np.random.uniform(0.5, 2.0)
self.env.set_damping(damping)
def randomize_visuals(self):
"""Randomize visual appearance"""
# Lighting
light_intensity = np.random.uniform(0.5, 1.5)
light_position = np.random.uniform(-2, 2, size=3)
self.env.set_lighting(light_intensity, light_position)
# Object colors/textures
for obj in self.env.objects:
color = np.random.uniform(0, 1, size=3) # RGB
obj.set_color(color)
# Apply random texture
texture = self.sample_random_texture()
obj.set_texture(texture)
# Background
background_color = np.random.uniform(0, 1, size=3)
self.env.set_background(background_color)
# Camera parameters
focal_length = np.random.uniform(0.9, 1.1) * self.env.camera.nominal_focal_length
self.env.camera.set_focal_length(focal_length)
def randomize_dynamics(self):
"""Randomize robot dynamics"""
# Joint stiffness
for joint in self.env.robot.joints:
stiffness = joint.nominal_stiffness * np.random.uniform(0.8, 1.2)
joint.set_stiffness(stiffness)
# Action delays
delay = np.random.randint(0, 3) # 0-3 timesteps
self.env.set_action_delay(delay)
# Action noise
noise_std = np.random.uniform(0.01, 0.05)
self.env.set_action_noise(noise_std)
def sample_random_texture(self):
"""Sample from texture dataset"""
# Use real-world texture dataset (e.g., DTD, Materials)
texture_id = np.random.randint(0, len(self.texture_dataset))
return self.texture_dataset[texture_id]
Training with Domain Randomization¶
def train_with_domain_randomization(policy, env, config):
"""Train policy with domain randomization"""
randomizer = DomainRandomization(env)
for episode in range(config.num_episodes):
# Randomize simulation parameters
randomizer.randomize()
# Reset environment
obs = env.reset()
done = False
episode_reward = 0
while not done:
# Policy prediction
action = policy.predict(obs)
# Execute in randomized environment
obs, reward, done, info = env.step(action)
episode_reward += reward
# Store experience and train
# (omitted for brevity)
# Log
if episode % 100 == 0:
print(f"Episode {episode}, Reward: {episode_reward}")
# Periodically test on real robot
if episode % 1000 == 0:
real_success_rate = evaluate_on_real_robot(policy)
print(f"Real robot success rate: {real_success_rate:.2f}")
return policy
2. System Identification¶
Measure real robot parameters and configure simulation to match:
class SystemIdentification:
"""Identify real robot parameters"""
def __init__(self, robot):
self.robot = robot
def identify_friction(self):
"""Estimate friction coefficients"""
# Apply constant torques and measure resulting motion
torques = np.linspace(-5, 5, 20)
velocities = []
for torque in torques:
self.robot.set_joint_torque(joint_id=0, torque=torque)
time.sleep(1.0) # Wait for steady state
velocity = self.robot.get_joint_velocity(joint_id=0)
velocities.append(velocity)
# Fit friction model: tau = b*v + c*sign(v)
# (viscous + Coulomb friction)
from scipy.optimize import curve_fit
def friction_model(v, b, c):
return b * v + c * np.sign(v)
params, _ = curve_fit(friction_model, velocities, torques)
viscous_friction, coulomb_friction = params
return {
'viscous': viscous_friction,
'coulomb': coulomb_friction
}
def identify_mass_properties(self):
"""Estimate link masses and inertias"""
# Apply known torques and measure accelerations
measurements = []
for joint in range(self.robot.num_joints):
# Apply torque pulse
torque = 10.0
self.robot.set_joint_torque(joint, torque)
# Measure angular acceleration
acc = self.robot.get_joint_acceleration(joint)
measurements.append({
'joint': joint,
'torque': torque,
'acceleration': acc
})
self.robot.set_joint_torque(joint, 0)
# Estimate from tau = I * alpha
inertias = {}
for m in measurements:
inertias[m['joint']] = m['torque'] / (m['acceleration'] + 1e-6)
return inertias
def identify_time_delays(self):
"""Measure sensor-actuator delays"""
# Send action, measure time until observation changes
delays = []
for trial in range(100):
# Record current joint position
pos_before = self.robot.get_joint_position(0)
# Send step command
t_command = time.time()
self.robot.set_joint_position(0, pos_before + 0.1)
# Wait for motion to start
while True:
pos_now = self.robot.get_joint_position(0)
if abs(pos_now - pos_before) > 0.001: # Motion detected
t_response = time.time()
break
delay = t_response - t_command
delays.append(delay)
avg_delay = np.mean(delays)
return avg_delay
def configure_simulation(self, sim_env):
"""Configure simulation to match real robot"""
print("Identifying real robot parameters...")
# Friction
friction_params = self.identify_friction()
sim_env.set_friction(
viscous=friction_params['viscous'],
coulomb=friction_params['coulomb']
)
# Inertias
inertias = self.identify_mass_properties()
for joint, inertia in inertias.items():
sim_env.set_joint_inertia(joint, inertia)
# Time delays
delay = self.identify_time_delays()
sim_env.set_action_delay(delay)
print(f"Configured simulation with identified parameters:")
print(f" Friction: {friction_params}")
print(f" Time delay: {delay*1000:.1f}ms")
3. Perception-Aware Training¶
Train directly on real images instead of sim-rendered ones:
class RealImageOverlay:
"""Replace sim images with real camera images during training"""
def __init__(self, sim_env, real_camera):
self.sim_env = sim_env
self.real_camera = real_camera
def get_observation(self):
"""Get observation with real image but sim state"""
# Get state from simulation
sim_state = self.sim_env.get_state()
# Get image from real camera
real_image = self.real_camera.get_image()
return {
'image': real_image, # Real!
'state': sim_state # Sim
}
# Training loop
env = RealImageOverlay(sim_env, real_camera)
for episode in range(num_episodes):
obs = env.get_observation() # Real image, sim state
# Train policy on real images
# ...
Benefit: Policy sees real visual distribution during training.
4. Progressive Real-World Adaptation¶
Gradually transition from sim to real:
class ProgressiveSimToReal:
"""Progressively adapt policy from sim to real"""
def __init__(self, policy, sim_env, real_env):
self.policy = policy
self.sim_env = sim_env
self.real_env = real_env
def adapt(self, num_stages=5):
"""Multi-stage adaptation"""
# Stage 1: Pure sim training
print("Stage 1: Training in simulation...")
self.train_in_sim(num_episodes=10000)
# Stage 2: Sim with real visual textures
print("Stage 2: Sim with real textures...")
self.apply_real_textures_to_sim()
self.train_in_sim(num_episodes=2000)
# Stage 3: Sim with real images (AR)
print("Stage 3: Sim with real camera images...")
self.enable_real_image_overlay()
self.train_in_sim(num_episodes=1000)
# Stage 4: Mix of sim and real data
print("Stage 4: Mixed sim and real...")
self.train_mixed(sim_ratio=0.8, num_episodes=500)
# Stage 5: Pure real fine-tuning
print("Stage 5: Fine-tuning on real robot...")
self.train_on_real_robot(num_episodes=100)
# Final evaluation
real_success_rate = self.evaluate_on_real(num_episodes=50)
print(f"Final real-world success rate: {real_success_rate:.2%}")
return self.policy
def train_mixed(self, sim_ratio, num_episodes):
"""Train on mix of sim and real data"""
for episode in range(num_episodes):
# Randomly choose sim or real
if np.random.rand() < sim_ratio:
env = self.sim_env
else:
env = self.real_env
# Collect episode
obs = env.reset()
# ... training loop
Evaluation Metrics¶
Track sim-to-real gap throughout training:
class SimToRealEvaluator:
"""Evaluate sim-to-real transfer"""
def __init__(self, policy, sim_env, real_env):
self.policy = policy
self.sim_env = sim_env
self.real_env = real_env
def evaluate(self, num_episodes=20):
"""Comprehensive sim-to-real evaluation"""
results = {}
# 1. Sim performance
print("Evaluating in simulation...")
sim_results = self.run_episodes(self.sim_env, num_episodes)
results['sim'] = sim_results
# 2. Real performance
print("Evaluating on real robot...")
real_results = self.run_episodes(self.real_env, num_episodes)
results['real'] = real_results
# 3. Compute transfer metrics
results['transfer_gap'] = {
'success_rate_gap': (
sim_results['success_rate'] - real_results['success_rate']
),
'return_gap': (
sim_results['avg_return'] - real_results['avg_return']
)
}
# Print report
self.print_report(results)
return results
def run_episodes(self, env, num_episodes):
"""Run episodes and collect metrics"""
successes = 0
returns = []
episode_lengths = []
for _ in range(num_episodes):
obs = env.reset()
done = False
episode_return = 0
steps = 0
while not done and steps < 500:
action = self.policy.predict(obs)
obs, reward, done, info = env.step(action)
episode_return += reward
steps += 1
success = info.get('success', False)
successes += int(success)
returns.append(episode_return)
episode_lengths.append(steps)
return {
'success_rate': successes / num_episodes,
'avg_return': np.mean(returns),
'avg_length': np.mean(episode_lengths)
}
def print_report(self, results):
"""Print sim-to-real evaluation report"""
print("="*60)
print("SIM-TO-REAL EVALUATION")
print("="*60)
print(f"\nSimulation Performance:")
print(f" Success Rate: {results['sim']['success_rate']:.2%}")
print(f" Avg Return: {results['sim']['avg_return']:.2f}")
print(f"\nReal Robot Performance:")
print(f" Success Rate: {results['real']['success_rate']:.2%}")
print(f" Avg Return: {results['real']['avg_return']:.2f}")
print(f"\nTransfer Gap:")
print(f" Success Rate Gap: {results['transfer_gap']['success_rate_gap']:.2%}")
print(f" Return Gap: {results['transfer_gap']['return_gap']:.2f}")
if results['transfer_gap']['success_rate_gap'] < 0.1:
print("\n✓ Good transfer! Gap < 10%")
elif results['transfer_gap']['success_rate_gap'] < 0.3:
print("\n⚠️ Moderate transfer. Gap = 10-30%")
else:
print("\n✗Poor transfer. Gap > 30%. Consider more domain randomization.")
print("="*60)
Advanced: Domain Adaptation¶
Use domain adaptation techniques when randomization isn't enough:
class VisualDomainAdapter(nn.Module):
"""Adapt visual features from sim to real using adversarial training"""
def __init__(self, feature_dim):
super().__init__()
# Domain discriminator
self.discriminator = nn.Sequential(
nn.Linear(feature_dim, 256),
nn.ReLU(),
nn.Linear(256, 1) # Binary: sim vs real
)
def domain_adversarial_loss(self, sim_features, real_features):
"""
DANN: Domain-Adversarial Neural Network
Encourage feature extractor to produce domain-invariant features
"""
# Discriminator tries to distinguish sim from real
sim_preds = self.discriminator(sim_features)
real_preds = self.discriminator(real_features)
# Labels
sim_labels = torch.zeros_like(sim_preds)
real_labels = torch.ones_like(real_preds)
# Discriminator loss (maximize this)
disc_loss = (
F.binary_cross_entropy_with_logits(sim_preds, sim_labels) +
F.binary_cross_entropy_with_logits(real_preds, real_labels)
)
# Feature extractor loss (minimize this - confuse discriminator)
# Flip labels: want sim to look like real and vice versa
conf_loss = (
F.binary_cross_entropy_with_logits(sim_preds, real_labels) +
F.binary_cross_entropy_with_logits(real_preds, sim_labels)
)
return disc_loss, conf_loss
# Training loop
def train_with_domain_adaptation(policy, sim_env, real_env):
"""Train with domain adaptation"""
adapter = VisualDomainAdapter(feature_dim=512)
for iteration in range(10000):
# Collect sim experience
sim_obs = sim_env.reset()
sim_features = policy.encode_observation(sim_obs)
# Collect real experience (no actions needed, just observations)
real_obs = real_env.get_observation()
real_features = policy.encode_observation(real_obs)
# Domain adaptation loss
disc_loss, conf_loss = adapter.domain_adversarial_loss(
sim_features, real_features
)
# Task loss (from sim only, since we have ground truth actions)
task_loss = policy.compute_loss(sim_obs, sim_actions)
# Combined loss
total_loss = task_loss + 0.1 * conf_loss
# Update policy (feature extractor)
policy_optimizer.zero_grad()
total_loss.backward()
policy_optimizer.step()
# Update discriminator
adapter_optimizer.zero_grad()
disc_loss.backward()
adapter_optimizer.step()
Best Practices¶
DO:¶
✓ Use extensive domain randomization (physics + visuals) ✓ Measure real robot parameters when possible ✓ Start with high sim success rate (>95%) before transferring ✓ Gradually increase realism (progressive transfer) ✓ Test frequently on real robot to catch issues early ✓ Use real images during sim training if possible ✓ Collect diverse real-world data for fine-tuning
DON'T:¶
✗Assume perfect transfer without testing ✗Over-optimize for specific sim parameters ✗Neglect visual domain gap ✗Skip safety checks when deploying to real robot ✗Give up after first failure - iterate!
Safety Considerations¶
Always test safely on real robot:
class SafeRealRobotTester:
"""Safely test policies on real robot"""
def __init__(self, robot, safety_checker):
self.robot = robot
self.safety_checker = safety_checker
def test_policy(self, policy, num_episodes=10):
"""Test with safety checks"""
for episode in range(num_episodes):
obs = self.robot.reset()
# Start with human supervision
print(f"Episode {episode+1}/{num_episodes}")
input("Press Enter to start (E-stop ready!)...")
done = False
steps = 0
while not done and steps < 500:
# Predict action
action = policy.predict(obs)
# Safety check before execution
if not self.safety_checker.is_safe(obs, action):
print("⚠️ UNSAFE ACTION DETECTED! Stopping.")
self.robot.emergency_stop()
break
# Execute with reduced speed initially
speed_limit = 0.5 if episode < 3 else 1.0
safe_action = np.clip(action, -speed_limit, speed_limit)
obs, reward, done, info = self.robot.step(safe_action)
steps += 1
print(f"Episode completed: {steps} steps, Success: {info.get('success', False)}")
Resources & Papers¶
Key Papers: - Tobin et al., "Domain Randomization for Transferring Deep Neural Networks from Simulation to the Real World", IROS 2017 - Peng et al., "Sim-to-Real Transfer of Robotic Control with Dynamics Randomization", ICRA 2018 - Sadeghi & Levine, "CAD2RL: Real Single-Image Flight without a Single Real Image", RSS 2017
Next Steps¶
- Domain Randomization in IsaacLab
- Benchmarking - Evaluate sim-to-real gap
- Safety - Safe deployment practices
- Deployment - Production deployment