Spaces:

zerogpu-aoti
/

Flux-Compiled-Graph

Running on Zero

File size: 1,370 Bytes

937a94e
 
 
 
 
 
 
 
 
f9f24d7
937a94e
f9f24d7
 
937a94e
f9f24d7
 
937a94e
 
 
 
 
 
 
 
 
 
 
f5a3617
c5db835
 
 
 
 
f9f24d7
c5db835
 
 
f9f24d7
 
c5db835
f9f24d7
c5db835
 
968b96f

from typing import Any
from typing import Callable
from typing import ParamSpec
import spaces
import torch
from torch.utils._pytree import tree_map

P = ParamSpec('P')

TRANSFORMER_HIDDEN_DIM = torch.export.Dim('hidden', min=4096, max=8212)

# Specific to Flux. More about this is available in
# https://huggingface.co/blog/zerogpu-aoti
TRANSFORMER_DYNAMIC_SHAPES = {
    'hidden_states': {1: TRANSFORMER_HIDDEN_DIM},
    'img_ids': {0: TRANSFORMER_HIDDEN_DIM},
}

INDUCTOR_CONFIGS = {
    'conv_1x1_as_mm': True,
    'epilogue_fusion': False,
    'coordinate_descent_tuning': True,
    'coordinate_descent_check_all_directions': True,
    'max_autotune': True,
    'triton.cudagraphs': True,
}

def compile_transformer(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
    @spaces.GPU(duration=1500)
    def f():
        with spaces.aoti_capture(pipeline.transformer) as call:
            pipeline(*args, **kwargs)

        dynamic_shapes = tree_map(lambda v: None, call.kwargs)
        dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES

        exported = torch.export.export(
            mod=pipeline.transformer, 
            args=call.args, 
            kwargs=call.kwargs,
            dynamic_shapes=dynamic_shapes
        )
        return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
    
    compiled_transformer = f()
    return compiled_transformer