drugv2.rar
plugin.rar
retrosynthesis.rar

ms-swift-main.zip
deepseek_python_20260402_aaf888.rar

import re

from typing import List

from swift.rewards import ORM, orms

class DummyReward(ORM):

"""
A minimal reward function for testing purposes.
Returns 1.0 if the completion is non-empty, otherwise 0.0.
"""

def __call__(self, completions: List[str], **kwargs) -> List[float]:
    """
    Args:
        completions: List of model-generated strings.

    Returns:
        List of floats, each 1.0 if the completion has any content else 0.0.
    """
    rewards = []
    for completion in completions:
        # Simple check: if the completion contains at least one non-whitespace character
        if completion and completion.strip():
            rewards.append(1.0)
        else:
            rewards.append(0.0)
    return rewards

Register the dummy reward function so it can be used via --reward_funcs dummy_reward

orms['dummy_reward'] = DummyReward

最后修改:2026 年 04 月 02 日
如果觉得我的文章对你有用,请随意赞赏