claude-personalities/scripts/test-personality.sh at 90b1b754a93a23cc3865185e71f4653144d74557

jcharm / claude-personalities
Find file
Newer
Older
claude-personalities / scripts / test-personality.sh
Eric Chicoine 17 days ago 9 KB Initial commit: Claude Personnalités v1.0.0
Raw Blame History
#!/usr/bin/env bash
# 🧪 Automated Tests for British Butler Personality
# Run these tests to verify the skill is working correctly

set -e  # Exit on error

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
BOLD='\033[1m'

# Test counters
TOTAL_TESTS=0
PASSED_TESTS=0
FAILED_TESTS=0

# Helper functions
log_info() {
    echo -e "${CYAN}[INFO]${NC} $1"
}

log_test() {
    echo -e "${BLUE}[TEST]${NC} $1"
}

log_pass() {
    echo -e "${GREEN}[PASS]${NC} $1"
    ((PASSED_TESTS++))
}

log_fail() {
    echo -e "${RED}[FAIL]${NC} $1"
    ((FAILED_TESTS++))
}

log_header() {
    echo ""
    echo -e "${PURPLE}${BOLD}═══ $1 ═══${NC}"
}

check_skill_installed() {
    log_test "Checking if skill is installed"
    if [[ -f "$HOME/.claude/skills/british-butler-personality/SKILL.md" ]]; then
        log_pass "Skill file exists"
        return 0
    else
        log_fail "Skill file not found at ~/.claude/skills/british-butler-personality/SKILL.md"
        return 1
    fi
}

test_baseline_without_skill() {
    log_header "Testing Baseline (Without Personality)"
    log_info "This test checks that without the skill, Claude doesn't show British markers"

    # Simulate: run claude with a simple request and check response pattern
    # Note: In real implementation, this would call Claude API directly
    # For now, we document the expected behavior

    echo "Expected baseline behavior:"
    echo "  - No British politeness formulas (no 'If I may', 'Allow me', etc.)"
    echo "  - No refined vocabulary ('rather', 'quite', 'elucidate')"
    echo "  - Generic, professional tone"
    echo "  - May use: 'Sure', 'I can help', 'Here is...'"
    echo ""
    log_pass "Baseline documented (manual verification recommended)"
}

test_basic_personality_activation() {
    log_header "Testing Basic Personality Activation"

    local test_cases=(
        "British Butler personality, what is 2+2?"
        "Utilise le style British Butler"
        "Respond as a British butler"
        "Activate butler mode"
    )

    for i in "${!test_cases[@]}"; do
        log_test "Test case $((i+1)): '${test_cases[$i]}'"
        # In real implementation, would verify response contains British markers
        # For now, document expectation
        echo "  Expected: Response includes politeness formula"
        echo "  Expected: Refined vocabulary present"
    done

    log_pass "Personality activation test criteria documented"
}

test_tone_adaptation() {
    log_header "Testing Tone Adaptation"

    declare -A test_scenarios
    test_scenarios["casual"]="Hey, help me with a function?"
    test_scenarios["formal"]="Could you elucidate reactive programming?"
    test_scenarios["urgent"]="Just give me the code!"
    test_scenarios["humorous"]="I'll write OS in JS because why not? 😂"

    for tone in "${!test_scenarios[@]}"; do
        log_test "Tone: $tone"
        echo "  User: ${test_scenarios[$tone]}"
        echo "  Expected Level: $(get_expected_level "$tone")"
        echo "  Expected: British markers present but adapt formality"
    done

    log_pass "Tone adaptation test criteria documented"
}

get_expected_level() {
    case "$1" in
        casual) echo "Level 3" ;;
        formal) echo "Level 4-5" ;;
        urgent) echo "Level 1" ;;
        humorous) echo "Level 3 with dry wit" ;;
        emergency) echo "Level 2" ;;
        *) echo "Level 3" ;;
    esac
}

test_non_negotiables() {
    log_header "Testing Non-Negotiables (Minimum Standards)"

    local non_negotiables=(
        "1. At least ONE British politeness formula per response"
        "2. At least ONE refined vocabulary word per 3-4 exchanges"
        "3. Maintain servant-leader ethos (never passive)"
        "4. Use graceful phrasing for limitations"
        "5. Courteous closure on substantive responses"
        "6. NEVER drop to Level 0 (minimum is Level 1)"
    )

    for rule in "${non_negotiables[@]}"; do
        echo "  ✓ $rule"
    done

    log_pass "All non-negotiables documented"
}

test_pressure_scenarios() {
    log_header "Testing Pressure Scenarios (Loophole Prevention)"

    declare -A pressure_tests
    pressure_tests["raw_request"]="Just the code. No fluff. Raw."
    pressure_tests["emergency"]="PRODUCTION DOWN! FIX NOW!"
    pressure_tests["cultural_slang"]="Yo, dawg! Hook me up! No cap!"
    pressure_tests["self_deprecation"]="My code is terrible. I'm an idiot."
    pressure_tests["tone_shift"]="[Formel] → [ultra-casual]"

    for scenario in "${!pressure_tests[@]}"; do
        log_test "Scenario: $scenario"
        echo "  Input: ${pressure_tests[$scenario]}"
        echo "  Expected: Persona intact, no loophole exploited"
        case "$scenario" in
            raw_request) echo "  Expected: Level 1 with 'Here you go:' or 'Certainly:'" ;;
            emergency) echo "  Expected: Level 2 + Britishness maintained" ;;
            cultural_slang) echo "  Expected: No slang mimicry, British persona preserved" ;;
            self_deprecation) echo "  Expected: Reassurance, not agreement" ;;
            tone_shift) echo "  Expected: Immediate level shift, politeness retained" ;;
        esac
    done

    log_pass "All pressure scenarios documented"
}

test_sarcasm_safety() {
    log_header "Testing Sarcasm Safety Matrix"

    echo "Permissible targets:"
    echo "  - Situations (not people)"
    echo "  - Abstract concepts"
    echo "  - Own limitations"
    echo "  - User's statements of obvious (gently)"
    echo ""
    echo "Forbidden targets:"
    echo "  - User's competence"
    echo "  - User's appearance/background/preferences"
    echo "  - User's mistakes"
    echo "  - Serious issues (security, data loss)"
    echo ""
    echo "Sarcasm intensity scale: 0-3 (max 3)"
    echo "  Default: 0 (no sarcasm) unless unmistakable playfulness"
    echo ""

    log_pass "Sarcasm safety guidelines documented"
}

test_manual_verification_guide() {
    log_header "Manual Verification Instructions"

    echo "To manually test the personality:"
    echo ""
    echo "1. Launch Claude Code:"
    echo "   $ claude"
    echo ""
    echo "2. Activate personality:"
    echo "   You: 'Utilise le style British Butler'"
    echo ""
    echo "3. Run these test prompts and check responses:"
    echo ""
    echo "   a) Casual:"
    echo "      User: 'Hey, how do I center a div?'"
    echo "      Expected: Politeness + explanation + emoji maybe"
    echo ""
    echo "   b) Urgent (raw):"
    echo "      User: 'Just give me the code!'"
    echo "      Expected: 'Certainly. Here you go:' + code"
    echo ""
    echo "   c) Humorous:"
    echo "      User: 'I'll write an OS in JS because why not? 😂'"
    echo "      Expected: Dry wit about JS not being for systems"
    echo ""
    echo "   d) Emergency:"
    echo "      User: 'PRODUCTION DOWN FIX NOW'"
    echo "      Expected: 'Right away.' + urgent fix + British tone"
    echo ""
    echo "4. Verify checklist:"
    echo "   [ ] At least 1 politeness formula per response"
    echo "   [ ] No Americanisms unless forced by level"
    echo "   [ ] Sarcasm appropriate (if any)"
    echo "   [ ] Tone adapts correctly"
    echo "   [ ] Never Level 0"
    echo ""

    log_pass "Manual verification guide provided"
}

run_all_tests() {
    clear
    echo -e "${PURPLE}${BOLD}"
    cat << "EOF"
╔═══════════════════════════════════════════════════════╗
║      🎩 CLAUDE PERSONALITÉS - TEST SUITE            ║
║      British Butler Personality Validation          ║
╚═══════════════════════════════════════════════════════╝
EOF
    echo -e "${NC}"

    log_header "Test Suite Initialization"

    # Check prerequisites
    if ! command -v claude &> /dev/null; then
        log_fail "Claude Code not found in PATH"
        echo "Please install Claude Code first"
        exit 1
    fi

    check_skill_installed

    log_info "Running test suite..."
    echo ""

    # Run all tests
    test_baseline_without_skill
    test_basic_personality_activation
    test_tone_adaptation
    test_non_negotiables
    test_pressure_scenarios
    test_sarcasm_safety
    test_manual_verification_guide

    # Summary
    log_header "TEST SUMMARY"
    echo ""
    echo -e "Total test suites run: ${BOLD}7${NC}"
    echo -e "Passed: ${GREEN}${BOLD}7${NC}"
    echo -e "Failed: ${RED}${BOLD}0${NC}"
    echo ""
    echo "Note: These are VERIFICATION tests, not automated execution."
    echo "The actual personality behavior requires manual Claude sessions."
    echo ""

    log_header "Next Steps"

    echo "1. Launch Claude Code manually:"
    echo "   $ claude"
    echo ""
    echo "2. Activate personality:"
    echo "   You: 'Utilise le style British Butler'"
    echo ""
    echo "3. Run the manual verification prompts above"
    echo ""
    echo "4. Check that all checklist items pass"
    echo ""
    echo "5. If any fail, review SKILL.md and adjust"
    echo ""

    log_pass "Test suite completed successfully!"
    echo ""
    echo -e "${CYAN}Happy testing! 🎩✨${NC}"
}

# Main execution
main() {
    case "${1:-run}" in
        run|all)
            run_all_tests
            ;;
        quick)
            log_header "Quick Check"
            check_skill_installed
            echo ""
            log_info "Skill appears installed. Run './test-personality.sh' for full suite."
            ;;
        *)
            echo "Usage: $0 [run|quick]"
            echo "  run  - Run full test suite (default)"
            echo "  quick - Quick installation check"
            exit 1
            ;;
    esac
}

main "$@"