| --- |
| library_name: diffusers |
| license: other |
| license_name: flux-1-dev-non-commercial-license |
| license_link: LICENSE.md |
| --- |
| |
| > [!NOTE] |
| > Contains the NF4 checkpoints (`transformer` and `text_encoder_2`) of [`black-forest-labs/FLUX.1-Depth-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev). Please adhere to the original model licensing! |
|
|
| <details> |
| <summary>Code</summary> |
|
|
| ```py |
| # !pip install git+https://github.com/asomoza/image_gen_aux.git |
| from diffusers import DiffusionPipeline, FluxControlPipeline, FluxTransformer2DModel |
| import torch |
| from transformers import T5EncoderModel |
| from image_gen_aux import DepthPreprocessor |
| from diffusers.utils import load_image |
| import fire |
| |
| |
| def load_pipeline(four_bit=False): |
| orig_pipeline = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16) |
| if four_bit: |
| print("Using four bit.") |
| transformer = FluxTransformer2DModel.from_pretrained( |
| "sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="transformer", torch_dtype=torch.bfloat16 |
| ) |
| text_encoder_2 = T5EncoderModel.from_pretrained( |
| "sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="text_encoder_2", torch_dtype=torch.bfloat16 |
| ) |
| pipeline = FluxControlPipeline.from_pipe( |
| orig_pipeline, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16 |
| ) |
| else: |
| transformer = FluxTransformer2DModel.from_pretrained( |
| "black-forest-labs/FLUX.1-Depth-dev", |
| subfolder="transformer", |
| revision="refs/pr/1", |
| torch_dtype=torch.bfloat16, |
| ) |
| pipeline = FluxControlPipeline.from_pipe(orig_pipeline, transformer=transformer, torch_dtype=torch.bfloat16) |
| |
| pipeline.enable_model_cpu_offload() |
| return pipeline |
| |
| @torch.no_grad() |
| def get_depth(control_image): |
| processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf") |
| control_image = processor(control_image)[0].convert("RGB") |
| return control_image |
| |
| def load_conditions(): |
| prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts." |
| control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png") |
| control_image = get_depth(control_image) |
| return prompt, control_image |
| |
| |
| def main(four_bit: bool = False): |
| ckpt_id = "sayakpaul/FLUX.1-Depth-dev-nf4" |
| pipe = load_pipeline(four_bit=four_bit) |
| prompt, control_image = load_conditions() |
| image = pipe( |
| prompt=prompt, |
| control_image=control_image, |
| height=1024, |
| width=1024, |
| num_inference_steps=30, |
| guidance_scale=10.0, |
| max_sequence_length=512, |
| generator=torch.Generator("cpu").manual_seed(0), |
| ).images[0] |
| filename = "output_" + ckpt_id.split("/")[-1].replace(".", "_") |
| filename += "_4bit" if four_bit else "" |
| image.save(f"{filename}.png") |
| |
| |
| if __name__ == "__main__": |
| fire.Fire(main) |
| ``` |
|
|
| </details> |
|
|
| ## Outputs |
|
|
| <table> |
| <thead> |
| <tr> |
| <th>Original</th> |
| <th>NF4</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td> |
| <img src="./assets/output_FLUX_1-Depth-dev.png" alt="Original"> |
| </td> |
| <td> |
| <img src="./assets/output_FLUX_1-Depth-dev_4bit.png" alt="NF4"> |
| </td> |
| </tr> |
| </tbody> |
| </table> |