Kandinsky (Sber) integration for image generation
Kandinsky is a Russian image generation model from Sber. Its main advantage is native support for Russian-language prompts without translation; the CLIP model is trained on Russian texts. It is a candidate for projects with localization and data sovereignty requirements.
Kandinsky 3 via API
import httpx
import base64
import asyncio
class KandinskyClient:
def __init__(self, api_key: str, secret_key: str):
self.api_key = api_key
self.secret_key = secret_key
self.base_url = "https://api-key.fusionbrain.ai/key/api/v1"
async def generate(
self,
prompt: str,
width: int = 1024,
height: int = 1024,
num_images: int = 1,
style: str = "DEFAULT" # DEFAULT, KANDINSKY, UHD, ANIME, DIGITAL_ART
) -> list[bytes]:
# Получаем список моделей
async with httpx.AsyncClient() as client:
models_resp = await client.get(
f"{self.base_url}/models",
headers={"X-Key": f"Key {self.api_key}", "X-Secret": f"Secret {self.secret_key}"}
)
model_id = models_resp.json()[0]["id"]
# Запускаем генерацию
params = {
"type": "GENERATE",
"numImages": num_images,
"width": width,
"height": height,
"generateParams": {"query": prompt},
"style": style
}
gen_resp = await client.post(
f"{self.base_url}/text2image/run",
headers={"X-Key": f"Key {self.api_key}", "X-Secret": f"Secret {self.secret_key}"},
data={"model_id": str(model_id), "params": json.dumps(params)}
)
uuid = gen_resp.json()["uuid"]
# Ожидаем результат
return await self.poll_result(client, uuid)
async def poll_result(self, client, uuid: str, max_attempts: int = 30) -> list[bytes]:
headers = {"X-Key": f"Key {self.api_key}", "X-Secret": f"Secret {self.secret_key}"}
for _ in range(max_attempts):
await asyncio.sleep(2)
resp = await client.get(f"{self.base_url}/text2image/status/{uuid}", headers=headers)
data = resp.json()
if data["status"] == "DONE":
return [base64.b64decode(img) for img in data["images"]]
raise TimeoutError("Generation timeout")
Self-hosted через Hugging Face
from diffusers import KandinskyV22Pipeline, KandinskyV22PriorPipeline
import torch
prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior",
torch_dtype=torch.float16
).to("cuda")
pipeline = KandinskyV22Pipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder",
torch_dtype=torch.float16
).to("cuda")
def generate_kandinsky(prompt: str, negative_prompt: str = "") -> bytes:
# Prior: текст → эмбеддинги
image_embeds, negative_image_embeds = prior(
prompt, negative_prompt=negative_prompt
).to_tuple()
# Decoder: эмбеддинги → изображение
image = pipeline(
image_embeds=image_embeds,
negative_image_embeds=negative_image_embeds,
height=768,
width=768,
num_inference_steps=25
).images[0]
import io
buf = io.BytesIO()
image.save(buf, format="PNG")
return buf.getvalue()
Kandinsky 3 processes Russian prompts directly, improving accuracy for Russian-specific concepts (cultural references, Russian toponymy, folklore). For international projects, FLUX or SDXL are preferable. API integration takes 1–2 days.







