The text-to-image activity entails producing a visible illustration (picture) from a textual description.
The method begins with a textual enter that describes a picture. This might vary from easy descriptions like “a two-story blue home” to extra complicated and summary ideas.
The mannequin processes the textual content to grasp the contents after which generates a picture that matches the outline. This entails understanding the semantics of the textual content, visualizing the described components, and assembling them right into a coherent picture.
from diffusers import DiffusionPipeline
import torchpipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float32, use_safetensors=True)
pipe.to("cpu")
immediate = "An astronaut driving a inexperienced horse"
picture = pipe(immediate=immediate).photos[0]
picture
import torch
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
from huggingface_hub import hf_hub_download
from safetensors.torch import load_filebase = "stabilityai/stable-diffusion-xl-base-1.0"
repo = "ByteDance/SDXL-Lightning"
ckpt = "sdxl_lightning_4step_unet.safetensors" # Use the right ckpt on your step setting!
# Load mannequin and transfer to CPU with single precision
unet = UNet2DConditionModel.from_config(base, subfolder="unet").to("cpu")
unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), machine="cpu"))
# Initialize pipeline with CPU and single precision
pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=torch.float32).to("cpu")
# Guarantee sampler makes use of "trailing" timesteps
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
# Generate picture with specified inference steps and CFG scale
outcome = pipe("A cat skating", num_inference_steps=4, guidance_scale=0)
picture = outcome.photos[0]
picture.save("output.png")
from diffusers import DiffusionPipelinepipeline = DiffusionPipeline.from_pretrained("digiplay/AbsoluteReality_v1.8.1")
pipeline.to("cpu")
# if utilizing torch < 2.0
# pipe.enable_xformers_memory_efficient_attention()
immediate = "An astronaut driving a inexperienced horse"
photos = pipeline(immediate=immediate).photos[0]
photos
import torch
from diffusers import LCMScheduler, AutoPipelineForText2Imagemodel_id = "Lykon/dreamshaper-7"
adapter_id = "latent-consistency/lcm-lora-sdv1-5"
# Initialize the pipeline with single precision
pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float32)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.to("cpu")
# Load and fuse lcm lora
pipe.load_lora_weights(adapter_id)
pipe.fuse_lora()
immediate = "An astronaut driving a inexperienced horse"
# Disable guidance_scale by passing 0
photos = pipe(immediate=immediate, num_inference_steps=4, guidance_scale=0)
picture= photos.photos[0]
Learn Extra
Sources
https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
