Development of AI Inpainting for Image Area Filling
Inpainting replaces a mask-defined area of an image with new content, seamlessly blending it into the surrounding context. Used for object removal, background replacement, restoration, and design element changes.
diffusers Inpainting
from diffusers import StableDiffusionXLInpaintPipeline
from PIL import Image, ImageDraw
import torch
import io
import numpy as np
class InpaintingService:
def __init__(self):
self.pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16"
).to("cuda")
def inpaint(
self,
image_bytes: bytes,
mask_bytes: bytes, # white = replace, black = preserve
prompt: str,
negative_prompt: str = "low quality, blurry, artifacts",
strength: float = 0.99,
steps: int = 30,
guidance_scale: float = 8.0
) -> bytes:
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
mask = Image.open(io.BytesIO(mask_bytes)).convert("L")
# Dimensions must match and be divisible by 8
w, h = image.size
w, h = (w // 8) * 8, (h // 8) * 8
image = image.resize((w, h))
mask = mask.resize((w, h))
result = self.pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=image,
mask_image=mask,
height=h,
width=w,
strength=strength,
num_inference_steps=steps,
guidance_scale=guidance_scale
).images[0]
buf = io.BytesIO()
result.save(buf, format="PNG")
return buf.getvalue()
Automatic Mask Generation
from transformers import pipeline
import numpy as np
class AutoMaskGenerator:
def __init__(self):
# SAM (Segment Anything) for precise segmentation
self.sam = pipeline("mask-generation", model="facebook/sam-vit-huge", device="cuda")
def mask_by_text(self, image: Image.Image, text_query: str) -> Image.Image:
"""Create mask via CLIP + SAM from text description"""
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
seg_model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
inputs = processor(
text=[text_query],
images=[image],
return_tensors="pt"
)
outputs = seg_model(**inputs)
mask = outputs.logits.squeeze().sigmoid().detach().numpy()
# Binarize
mask_binary = (mask > 0.5).astype(np.uint8) * 255
return Image.fromarray(mask_binary).resize(image.size)
def mask_by_coords(self, image: Image.Image, bbox: tuple) -> Image.Image:
"""Mask by bounding box"""
x1, y1, x2, y2 = bbox
mask = Image.new("L", image.size, 0)
draw = ImageDraw.Draw(mask)
draw.rectangle([x1, y1, x2, y2], fill=255)
return mask
Typical Scenarios
class InpaintingUseCases:
async def remove_object(self, image: bytes, object_mask: bytes) -> bytes:
"""Remove object, fill with background"""
return self.pipe.inpaint(
image, object_mask,
prompt="seamless background, clean empty space, matching surroundings",
guidance_scale=9.0
)
async def replace_background(self, image: bytes, subject_mask_inverted: bytes, new_background: str) -> bytes:
"""Change background while preserving subject"""
return self.pipe.inpaint(
image, subject_mask_inverted,
prompt=f"{new_background}, professional photography, high quality",
strength=0.95
)
async def change_product_color(self, product_image: bytes, product_mask: bytes, color: str) -> bytes:
"""Change product color for catalog"""
return self.pipe.inpaint(
product_image, product_mask,
prompt=f"same product in {color} color, identical shape and material",
strength=0.7, # low strength preserves shape
guidance_scale=10.0
)
API Endpoint
from fastapi import FastAPI, File, UploadFile, Form
app = FastAPI()
inpainting = InpaintingService()
@app.post("/inpaint")
async def inpaint_image(
image: UploadFile = File(...),
mask: UploadFile = File(...),
prompt: str = Form(...),
strength: float = Form(0.99)
):
image_bytes = await image.read()
mask_bytes = await mask.read()
result = inpainting.inpaint(image_bytes, mask_bytes, prompt, strength=strength)
return Response(content=result, media_type="image/png")
Timeline: basic inpainting API — 2–3 days. Service with auto-segmentation by click/text and web interface — 2–3 weeks.







