Coverage for /home/jenkins/.local/lib/python3.10/site-packages/hyper_parallel/core/shard/ops/parallel_mhc_pre

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ============================================================================

15"""Distributed implementation for npu_mhc_pre_sinkhorn operator."""

16from typing import Tuple, Dict, Any

18from hyper_parallel.core.dtensor.layout import Layout

19from hyper_parallel.platform import get_platform

20from hyper_parallel.platform.platform import PlatformType

21from .parallel_ops import DistributedOp

23platform = get_platform()

25_HC_MULT_DEFAULT = 4

26_NUM_ITERS_DEFAULT = 20

27_HC_EPS_DEFAULT = 1e-6

28_NORM_EPS_DEFAULT = 1e-6

29_MHC_PRE_CLAMP_ARG_NAMES = (

30 "x", "phi", "alpha", "bias", "hc_mult", "num_iters",

31 "hc_eps", "norm_eps", "out_flag", "clamp_min", "clamp_max",

32)

33_MHC_PRE_CLAMP_DEFAULTS = {

34 "hc_mult": _HC_MULT_DEFAULT,

35 "num_iters": _NUM_ITERS_DEFAULT,

36 "hc_eps": _HC_EPS_DEFAULT,

37 "norm_eps": _NORM_EPS_DEFAULT,

38 "out_flag": True,

39 "clamp_min": 0.0,

40 "clamp_max": 0.0,

41}

44def _normalize_mhc_pre_sinkhorn_args(

45 x,

46 phi,

47 alpha,

48 bias,

49 hc_mult=_HC_MULT_DEFAULT,

50 num_iters=_NUM_ITERS_DEFAULT,

51 hc_eps=_HC_EPS_DEFAULT,

52 norm_eps=_NORM_EPS_DEFAULT,

53 out_flag=True):

54 """Normalize positional and keyword arguments into a canonical positional tuple.

56 Args:

57 x: Input tensor [B,S,N,C] or [T,N,C].

58 phi: mHC parameter matrix [N*N+2*N, N*C].

59 alpha: mHC scaling parameters [3].

60 bias: mHC bias parameters [N*N+2*N].

61 hc_mult: HC dimension size (currently only 4 supported).

62 num_iters: Sinkhorn iteration count.

63 hc_eps: H_pre sigmoid eps parameter.

64 norm_eps: RmsNorm eps parameter.

65 out_flag: Whether to output intermediate gradients.

67 Returns:

68 tuple: (positional_args_tuple, empty_kwargs_dict)

69 """

70 return (

71 x, phi, alpha, bias,

72 hc_mult, num_iters, hc_eps, norm_eps, out_flag,

73 ), {}

76def _normalize_mhc_pre_clamp_sinkhorn_args(*args, **kwargs):

77 """Normalize npu_mhc_pre_clamp_sinkhorn arguments."""

78 values = dict(_MHC_PRE_CLAMP_DEFAULTS)

79 if len(args) > len(_MHC_PRE_CLAMP_ARG_NAMES):

80 raise TypeError(

81 f"npu_mhc_pre_clamp_sinkhorn expected at most {len(_MHC_PRE_CLAMP_ARG_NAMES)} arguments"

82 )

83 for name, value in zip(_MHC_PRE_CLAMP_ARG_NAMES, args):

84 values[name] = value

85 for name, value in kwargs.items():

86 if name not in _MHC_PRE_CLAMP_ARG_NAMES:

87 raise TypeError(f"npu_mhc_pre_clamp_sinkhorn got an unexpected keyword argument '{name}'")

88 if name in _MHC_PRE_CLAMP_ARG_NAMES[:len(args)]:

89 raise TypeError(f"npu_mhc_pre_clamp_sinkhorn got multiple values for argument '{name}'")

90 values[name] = value

91 missing = [name for name in _MHC_PRE_CLAMP_ARG_NAMES[:4] if name not in values]

92 if missing:

93 raise TypeError(f"npu_mhc_pre_clamp_sinkhorn missing required arguments: {missing}")

94 return tuple(values[name] for name in _MHC_PRE_CLAMP_ARG_NAMES), {}

97# Validation rules table for npu_mhc_pre_sinkhorn

98# Key: tensor_map length (format identifier)

99# Value: validation rules for that format

100_MHC_PRE_SINKHORN_VALIDATION_RULES: Dict[int, Dict[str, Any]] = {

101 4: {

102 "op_name": "npu_mhc_pre_sinkhorn",

103 "forbidden_dims": {2: "N"},

104 "phi_forbidden_dims": {0: "dim0", 1: "dim1"},

105 "alpha_forbidden_dims": {0: "dim0"},

106 "bias_forbidden_dims": {0: "dim0"},

107 },

108 3: {

109 "op_name": "npu_mhc_pre_sinkhorn",

110 "forbidden_dims": {1: "N"},

111 "phi_forbidden_dims": {0: "dim0", 1: "dim1"},

112 "alpha_forbidden_dims": {0: "dim0"},

113 "bias_forbidden_dims": {0: "dim0"},

114 },

115}

116

117

118def _validate_tensor_map_dims(

119 tensor_map: tuple,

120 op_name: str,

121 forbidden_dims: Dict[int, str],

122) -> None:

123 """Check that specified dimensions are not sharded (replicated).

124

125 Args:

126 tensor_map: The tensor_map to check.

127 op_name: Operator name for error message.

128 forbidden_dims: Dict mapping dim index to dim name.

129

130 Raises:

131 ValueError: If any forbidden dimension is sharded.

132 """

133 for dim_idx, dim_name in forbidden_dims.items():

134 dim_value = tensor_map[dim_idx]

135 if dim_value != -1:

136 raise ValueError(

137 f"For {op_name}, {dim_name} dimension (dim {dim_idx}) of x "

138 f"should be replicated, but got {dim_value}"

139 )

140

141

142def _validate_input_layouts_mhc_pre_sinkhorn(

143 x_layout: Layout,

144 phi_layout: Layout,

145 alpha_layout: Layout,

146 bias_layout: Layout,

147) -> None:

148 """Validate input layouts for npu_mhc_pre_sinkhorn operator."""

149 x_tm = x_layout.tensor_map

150 x_tm_len = len(x_tm)

151

152 rules = _MHC_PRE_SINKHORN_VALIDATION_RULES.get(x_tm_len)

153 if rules is None:

154 raise ValueError(

155 f"For npu_mhc_pre_sinkhorn, tensor_map length should be 4 or 3, but got {x_tm_len}"

156 )

157

158 _validate_tensor_map_dims(x_tm, rules["op_name"], rules["forbidden_dims"])

159 _validate_tensor_map_dims(phi_layout.tensor_map, rules["op_name"], rules["phi_forbidden_dims"])

160 _validate_tensor_map_dims(alpha_layout.tensor_map, rules["op_name"], rules["alpha_forbidden_dims"])

161 _validate_tensor_map_dims(bias_layout.tensor_map, rules["op_name"], rules["bias_forbidden_dims"])

162

163

164class NpuMhcPreSinkhornDistributedOp(DistributedOp):

165 """DistributedOp for npu_mhc_pre_sinkhorn operator.

166

167 Implements layout inference for the MHC pre-processing with Sinkhorn operation.

168 Outputs 8 tensors: hin, h_post, h_res, h_pre, hc_before_norm, inv_rms, sum_out, norm_out.

169 """

170

171 def preprocess(self, args: tuple, kwargs: dict) -> tuple:

172 norm_args, _ = _normalize_mhc_pre_sinkhorn_args(*args, **kwargs)

173 dtensor_x = norm_args[0]

174 dtensor_phi = norm_args[1]

175 dtensor_alpha = norm_args[2]

176 dtensor_bias = norm_args[3]

177

178 if platform.platform_type == PlatformType.MINDSPORE:

179 local_args = (

180 dtensor_x.to_local(),

181 dtensor_phi.to_local(),

182 dtensor_alpha.to_local(),

183 dtensor_bias.to_local(),

184 norm_args[4],

185 norm_args[5],

186 norm_args[6],

187 norm_args[7],

188 norm_args[8],

189 )

190 local_kwargs = {}

191 else:

192 local_args = (

193 dtensor_x.to_local(),

194 dtensor_phi.to_local(),

195 dtensor_alpha.to_local(),

196 dtensor_bias.to_local(),

197 )

198 local_kwargs = {

199 'hc_mult': norm_args[4],

200 'num_iters': norm_args[5],

201 'hc_eps': norm_args[6],

202 'norm_eps': norm_args[7],

203 'out_flag': norm_args[8],

204 }

205

206 cache_values = [

207 dtensor_x.layout,

208 dtensor_phi.layout,

209 dtensor_alpha.layout,

210 dtensor_bias.layout,

211 ]

212 return local_args, local_kwargs, cache_values

213

214 def infer_layout(self, layouts: list, extra_args=None) -> Tuple[tuple, None]:

215 del extra_args

216 x_layout, phi_layout, alpha_layout, bias_layout = layouts

217

218 self._check_partial_inputs([x_layout, phi_layout, alpha_layout, bias_layout])

219

220 _validate_input_layouts_mhc_pre_sinkhorn(

221 x_layout, phi_layout, alpha_layout, bias_layout

222 )

223

224 out_layouts = self._infer_output_layouts(x_layout)

225 return out_layouts, None

226

227 @staticmethod

228 def _infer_output_layouts(

229 x_layout: Layout,

230 ) -> Tuple[Layout, Layout, Layout, Layout, Layout, Layout, Layout, Layout]:

231 out_layout = Layout.from_device_mesh(x_layout.mesh)

232 out_layout.set_tensor_map(x_layout.tensor_map)

233 out_layout.tensor_map_to_placement()

234

235 return (

236 out_layout, out_layout, out_layout, out_layout,

237 out_layout, out_layout, out_layout, out_layout,

238 )

239

240

241class NpuMhcPreClampSinkhornDistributedOp(DistributedOp):

242 """DistributedOp for npu_mhc_pre_clamp_sinkhorn operator.

243

244 The clamp variant follows the same input layout rules as npu_mhc_pre_sinkhorn

245 and emits one additional h_res_logits output.

246 """

247

248 def preprocess(self, args: tuple, kwargs: dict) -> tuple:

249 norm_args, _ = _normalize_mhc_pre_clamp_sinkhorn_args(*args, **kwargs)

250 dtensor_x = norm_args[0]

251 dtensor_phi = norm_args[1]

252 dtensor_alpha = norm_args[2]

253 dtensor_bias = norm_args[3]

254

255 if platform.platform_type == PlatformType.MINDSPORE:

256 local_args = (

257 dtensor_x.to_local(),

258 dtensor_phi.to_local(),

259 dtensor_alpha.to_local(),

260 dtensor_bias.to_local(),

261 norm_args[4],

262 norm_args[5],

263 norm_args[6],

264 norm_args[7],

265 norm_args[8],

266 norm_args[9],

267 norm_args[10],

268 )

269 local_kwargs = {}

270 else:

271 local_args = (

272 dtensor_x.to_local(),

273 dtensor_phi.to_local(),

274 dtensor_alpha.to_local(),

275 dtensor_bias.to_local(),

276 )

277 local_kwargs = {

278 'hc_mult': norm_args[4],

279 'num_iters': norm_args[5],

280 'hc_eps': norm_args[6],

281 'norm_eps': norm_args[7],

282 'out_flag': norm_args[8],

283 'clamp_min': norm_args[9],

284 'clamp_max': norm_args[10],

285 }

286

287 cache_values = [

288 dtensor_x.layout,

289 dtensor_phi.layout,

290 dtensor_alpha.layout,

291 dtensor_bias.layout,

292 ]

293 return local_args, local_kwargs, cache_values

294

295 def infer_layout(self, layouts: list, extra_args=None) -> Tuple[tuple, None]:

296 del extra_args

297 x_layout, phi_layout, alpha_layout, bias_layout = layouts

298

299 self._check_partial_inputs([x_layout, phi_layout, alpha_layout, bias_layout])

300 _validate_input_layouts_mhc_pre_sinkhorn(

301 x_layout, phi_layout, alpha_layout, bias_layout

302 )

303

304 out_layout = Layout.from_device_mesh(x_layout.mesh)

305 out_layout.set_tensor_map(x_layout.tensor_map)

306 out_layout.tensor_map_to_placement()

307 return (out_layout,) * 9, None

Coverage for / home / jenkins / .local / lib / python3.10 / site-packages / hyper_parallel / core / shard / ops / parallel_mhc_pre_sinkhorn.py: 62%

96 statements