Diff Coverage

Source File	Diff Coverage (%)	Missing Lines
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/_backbone.py	30.0%	676-677,679-683
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/body.py	100%
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/comm.py	100%
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/layer_block.py	100%
hyper_parallel/auto_parallel/sapp_nd/nd/common/_cost_model_variables.py	100%
hyper_parallel/auto_parallel/sapp_nd/nd/common/cost_model_preprocess.py	100%
hyper_parallel/auto_parallel/sapp_nd/nd/common/cp_types.py	100%
hyper_parallel/auto_parallel/sapp_nd/nd/common/framework_parsers/_cost_model_parser.py	100%
hyper_parallel/auto_parallel/sapp_nd/nd/common/hardware.py	100%
hyper_parallel/auto_parallel/sapp_nd/nd/dimensions.py	95.7%	232,262,389,478
hyper_parallel/auto_parallel/sapp_nd/nd/parallelize.py	100%
hyper_parallel/auto_parallel/sapp_nd/perf_estimation/comm_time.py	82.5%	63,131,495-496,504,559-563

hyper_parallel/auto_parallel/sapp_nd/memory_estimation/_backbone.py

            ins["AllGather Comm"] = self.mb(stage_accu[MemType.AG_COMM])
            ins["All2All Comm"] = self.mb(stage_accu[MemType.A2A_COMM])

            if self._ccfg.cp > 1:
                cp_memory = EvalBody.act_cp_layer(self._ccfg, self._ctx)
                cp_comm_buffer = EvalLayerComm.cp_comm_buffer(self._ccfg, self._ctx)

                ins["CP KV Cache"] = self.mb(cp_memory.kv_cache_memory)
                ins["CP Attn Scores"] = self.mb(cp_memory.attention_scores_memory)
                ins["CP Softmax"] = self.mb(cp_memory.softmax_outputs_memory)
                ins["CP Comm Buffer"] = self.mb(cp_comm_buffer)
                ins["CP Reduction"] = self.mb(cp_memory.total_reduction)

            ins["Node Log"] = sm["logs"][stage_id].node_compute_log
            # VERBOSE
            if verbose and spec_stage_id in (-1, stage_id):

hyper_parallel/auto_parallel/sapp_nd/nd/dimensions.py

    @staticmethod
    def _check_mbn_pp(dims_val, all_dims):
        """Return True if MBN/PP combination is valid."""
        if MBN not in dims_val or PP not in all_dims:
            return True
        valid = dims_val[MBN] >= dims_val[PP]
        valid = valid and not (dims_val[PP] == 1 and dims_val[MBN] > 1)
        if not valid:
            logger.warning("PP and MBN were deemed not suitable")

            if self.dims_val[SP] and self.dims_val[CP] > 1:
                logger.warning("SP & CP cannot coexist")
                return False
        if OP in self.all_dims and not self._check_power_of_two(OP, self.dims_val[OP]):
            return False
        return True

    def val(self, dim):
        """Get Dimension value"""

        )

    attn_upper = p.attention_type_str.upper()
    if attn_upper == "MLA":
        recommended_cp_max = 16
    elif attn_upper == "GQA":
        recommended_cp_max = 8
    else:
        recommended_cp_max = 4

        cp_algo, attention_heads, sp_enabled, num_kv_heads,
    )

    if p.cp_degree <= 1:
        return _cp_ok_result()

    if p.sp_enabled:
        return _cp_ok_result(
            is_valid=False,

hyper_parallel/auto_parallel/sapp_nd/perf_estimation/comm_time.py

    intra_ranks = min(int(cp), int(device_per_node))
    if cp <= device_per_node:
        return "intra-node", bw_intra
    if intra_ranks == 1:
        return "cross-node", bw_inter
    intra_fraction = (intra_ranks - 1) / (cp - 1)
    cross_fraction = 1.0 - intra_fraction
    bw = intra_fraction * bw_intra + cross_fraction * bw_inter
    return "mixed", bw

    cp = ccfg.cp
    t = max(1, ccfg.t)

    if ccfg.a <= 0:
        raise ValueError(f"Number of attention heads must be positive, got {ccfg.a}")

    kv_dim = compute_kv_dim(ccfg)
    attention_type = detect_attention_type(ccfg)
    cp_algo = _resolve_cp_algo(ccfg)

                comm[Dim.EP] += EvalLayerComm.ep_comm_layer(
                    param["cfg"], param["ctx"], 1
                )  # * param["cfg"].ep
                if param["cfg"].cp > 1:
                    cp_comm_details = cp_comm_layer_detailed(param["cfg"], param["ctx"])
                    comm[Dim.CP] += cp_comm_details.comm_volume
                # min(device_type.level_bound_number[0], param["cfg"].ep)
                # comm_cp += EvalLayerComm.cp_comm_layer
                # (param["cfg"], param["ctx"])




        if param["ccfg"].ttype == PerformanceType.TIME:
            for dim, ov in zip([Dim.DP, Dim.TP, Dim.CP], [0.0, 0, 0.0]):
                comm[dim] = estimate_comm_score(
                    param["cfg"],
                    comm[dim],
                    dim,

        param["debugger"].info[PerfParts.EP_COMM] = comms[Dim.EP]
        param["debugger"].info[PerfParts.CP_COMM] = comms[Dim.CP]

        if param["cfg"].cp > 1:
            cp_comm_details = cp_comm_layer_detailed(param["cfg"], param["ctx"])
            param["debugger"].info["CP_KV_VOLUME"] = cp_comm_details.total_kv_volume
            param["debugger"].info["CP_EXPOSED_TIME"] = cp_comm_details.exposed_comm_time
            param["debugger"].info["CP_TOPOLOGY"] = cp_comm_details.topology
            param["debugger"].info["CP_BANDWIDTH"] = cp_comm_details.effective_bandwidth

    res = []
    for i, c in enumerate(comms[Dim.TP]):
        res += [c + comms[Dim.DP][i] + comms[Dim.EP][i] + comms[Dim.CP][i]]