Stable Diffusion Integration for Image Generation
Stable Diffusion is an open-source image generation model with maximum flexibility: self-hosted deployment, custom checkpoints, LoRA, ControlNet, inpainting. SDXL (Stable Diffusion XL) generates 1024×1024 images with significantly higher quality than SD 1.5.
diffusers — Python Integration
from diffusers import (
StableDiffusionXLPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLInpaintPipeline,
DPMSolverMultistepScheduler
)
import torch
from PIL import Image
import io
class StableDiffusionService:
def __init__(self, model_path: str = "stabilityai/stable-diffusion-xl-base-1.0"):
self.pipe = StableDiffusionXLPipeline.from_pretrained(
model_path,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16"
)
# Optimized sampler
self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
self.pipe.scheduler.config,
use_karras_sigmas=True
)
self.pipe.to("cuda")
# Optional VRAM optimizations
self.pipe.enable_model_cpu_offload()
self.pipe.enable_vae_tiling()
def generate(
self,
prompt: str,
negative_prompt: str = "nsfw, low quality, blurry, watermark, text",
width: int = 1024,
height: int = 1024,
steps: int = 30,
guidance_scale: float = 7.5,
seed: int = None
) -> bytes:
generator = torch.Generator("cuda").manual_seed(seed) if seed else None
image = self.pipe(
prompt=prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
num_inference_steps=steps,
guidance_scale=guidance_scale,
generator=generator
).images[0]
buf = io.BytesIO()
image.save(buf, format="PNG")
return buf.getvalue()
SDXL Refiner for Final Polish
from diffusers import StableDiffusionXLImg2ImgPipeline
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0",
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16"
)
refiner.to("cuda")
def generate_with_refiner(prompt: str, steps: int = 40) -> bytes:
# Base generates latent vector
image = base_pipe(
prompt=prompt,
num_inference_steps=steps,
denoising_end=0.8,
output_type="latent"
).images
# Refiner adds details
image = refiner(
prompt=prompt,
num_inference_steps=steps,
denoising_start=0.8,
image=image
).images[0]
buf = io.BytesIO()
image.save(buf, format="PNG")
return buf.getvalue()
LoRA Weights for Specific Styles
# Load LoRA for specific style
pipe.load_lora_weights("./loras/anime_style_v2.safetensors")
pipe.fuse_lora(lora_scale=0.8)
# Multiple LoRA simultaneously
pipe.load_lora_weights("lora1.safetensors", adapter_name="style1")
pipe.load_lora_weights("lora2.safetensors", adapter_name="style2")
pipe.set_adapters(["style1", "style2"], adapter_weights=[0.7, 0.3])
Performance by GPU
| GPU | VRAM | Generation Time 1024×1024 (30 steps) |
|---|---|---|
| RTX 3060 | 12 GB | ~18 sec |
| RTX 3090 | 24 GB | ~7 sec |
| RTX 4090 | 24 GB | ~4 sec |
| A100 40G | 40 GB | ~3 sec |
xFormers or Flash Attention 2 accelerate by 20–30% with same VRAM.
REST API Wrapper
from fastapi import FastAPI, BackgroundTasks
from pydantic import BaseModel
import uuid
app = FastAPI()
sd_service = StableDiffusionService()
class GenerateRequest(BaseModel):
prompt: str
negative_prompt: str = ""
width: int = 1024
height: int = 1024
steps: int = 30
seed: int = None
@app.post("/generate")
async def generate(req: GenerateRequest, background_tasks: BackgroundTasks):
job_id = str(uuid.uuid4())
background_tasks.add_task(
process_generation, job_id, req.dict()
)
return {"job_id": job_id}
@app.get("/result/{job_id}")
async def get_result(job_id: str):
status = redis_client.get(f"job:{job_id}")
return json.loads(status) if status else {"status": "not_found"}
Timeline: API wrapper over SDXL — 3–5 days. Self-hosted service with queue and storage — 1–2 weeks.







