#include #include "registration.h" #include "torch_binding.h" TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { ops.def("residual_rms(Tensor input, Tensor residual, Tensor weight, Tensor scale_tensor, float epsilon, Tensor! output, Tensor next_buffer, int num_threads, bool force_scalar) -> ()"); ops.impl("residual_rms", torch::kCUDA, &residual_rms); } REGISTER_EXTENSION(TORCH_EXTENSION_NAME)