package ai.topandrey15.reinforcemc.core;

import ai.topandrey15.reinforcemc.ReinforceMC;
import ai.topandrey15.reinforcemc.action.ActionExecutor;
import ai.topandrey15.reinforcemc.input.PlayerStatsProvider;
import ai.topandrey15.reinforcemc.input.PlayerTracker;
import ai.topandrey15.reinforcemc.input.ScreenCaptureManager;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Random;

/* loaded from: input_file:ai/topandrey15/reinforcemc/core/ActionSelectionManager.class */
public class ActionSelectionManager {
    private static final float INITIAL_EPSILON = 1.0f;
    private static final float FINAL_EPSILON = 0.1f;
    private static final int EPSILON_DECAY_STEPS = 50000;
    private static final int WARMUP_STEPS = 1000;
    private static final float DIVERSITY_BONUS_STRENGTH = 0.15f;
    private static final int DIVERSITY_HISTORY_SIZE = 500;
    private final ActionExecutor actionExecutor;
    private float currentEpsilon = 1.0f;
    private int historyIndex = 0;
    private int historySize = 0;
    private int trainingSteps = 0;
    private float averageReward = 0.0f;
    private int successfulEpisodes = 0;
    private float maxRewardAchieved = Float.NEGATIVE_INFINITY;
    private final Random random = new Random();
    private final int[] recentActionHistory = new int[DIVERSITY_HISTORY_SIZE];

    /* loaded from: input_file:ai/topandrey15/reinforcemc/core/ActionSelectionManager$ActionManagerData.class */
    public static class ActionManagerData implements Serializable {
        private static final long serialVersionUID = 1;
        public float currentEpsilon;
        public int trainingSteps;
        public float averageReward;
        public int successfulEpisodes;
        public float maxRewardAchieved;
        public int historyIndex;
        public int historySize;
        public int[] recentActionHistory;
    }

    public ActionSelectionManager(ActionExecutor actionExecutor) {
        this.actionExecutor = actionExecutor;
        Arrays.fill(this.recentActionHistory, -1);
        ReinforceMC.LOGGER.info("ActionSelectionManager initialized with action diversity and exploration strategies");
    }

    public void startNewTraining() {
        this.trainingSteps = 0;
        this.averageReward = 0.0f;
        this.currentEpsilon = 1.0f;
        this.successfulEpisodes = 0;
        this.maxRewardAchieved = Float.NEGATIVE_INFINITY;
        Arrays.fill(this.recentActionHistory, -1);
        this.historyIndex = 0;
        this.historySize = 0;
        ReinforceMC.LOGGER.info("Started new training session with exploration and diversity tracking");
    }

    public int selectAction(float[] fArr) {
        try {
            int nextInt = this.trainingSteps < WARMUP_STEPS ? this.random.nextInt(fArr.length) : selectActionPostWarmup(fArr);
            int enforceActionAvailability = this.actionExecutor.getAvailabilityEnforcer().enforceActionAvailability(nextInt, fArr);
            addToActionHistory(enforceActionAvailability);
            if (enforceActionAvailability != nextInt) {
                ReinforceMC.LOGGER.debug("Action {} enforced to {} for guaranteed execution (step {})", Integer.valueOf(nextInt), Integer.valueOf(enforceActionAvailability), Integer.valueOf(this.trainingSteps));
            }
            return enforceActionAvailability;
        } catch (Exception e) {
            ReinforceMC.LOGGER.warn("Action selection failed at step {}, using emergency fallback: {}", Integer.valueOf(this.trainingSteps), e.getMessage());
            int enforceActionAvailability2 = this.actionExecutor.getAvailabilityEnforcer().enforceActionAvailability(-1, fArr);
            addToActionHistory(enforceActionAvailability2);
            return enforceActionAvailability2;
        }
    }

    private int selectActionPostWarmup(float[] fArr) {
        try {
            float[] calculateDiversityBonus = calculateDiversityBonus();
            float[] fArr2 = new float[fArr.length];
            int i = 0;
            while (i < fArr.length) {
                fArr2[i] = Math.max(0.001f, fArr[i] + ((calculateDiversityBonus == null || i >= calculateDiversityBonus.length) ? 0.0f : calculateDiversityBonus[i]));
                i++;
            }
            float f = 0.0f;
            for (float f2 : fArr2) {
                f += f2;
            }
            if (f > 0.001f) {
                for (int i2 = 0; i2 < fArr2.length; i2++) {
                    int i3 = i2;
                    fArr2[i3] = fArr2[i3] / f;
                    fArr2[i2] = Math.max(0.001f / fArr.length, fArr2[i2]);
                }
            } else {
                Arrays.fill(fArr2, 1.0f / fArr2.length);
            }
            float nextFloat = this.random.nextFloat();
            int nextInt = nextFloat < this.currentEpsilon * 0.4f ? this.random.nextInt(fArr.length) : nextFloat < this.currentEpsilon * 0.7f ? sampleFromDistributionSafe(fArr2) : nextFloat < this.currentEpsilon ? sampleFromDistributionSafe(fArr) : this.random.nextFloat() < 0.2f ? sampleFromDistributionSafe(fArr2) : getBestActionSafe(fArr2);
            if (nextInt < 0 || nextInt >= fArr.length) {
                ReinforceMC.LOGGER.warn("Invalid action {} selected at step {}, using random fallback", Integer.valueOf(nextInt), Integer.valueOf(this.trainingSteps));
                nextInt = this.random.nextInt(fArr.length);
            }
            return nextInt;
        } catch (Exception e) {
            ReinforceMC.LOGGER.warn("Post-warmup selection failed at step {}, using random: {}", Integer.valueOf(this.trainingSteps), e.getMessage());
            return this.random.nextInt(fArr.length);
        }
    }

    private float[] calculateDiversityBonus() {
        float[] fArr = new float[this.actionExecutor.getActionCount()];
        if (this.historySize == 0) {
            Arrays.fill(fArr, DIVERSITY_BONUS_STRENGTH);
            return fArr;
        }
        int[] iArr = new int[this.actionExecutor.getActionCount()];
        for (int i = 0; i < this.historySize; i++) {
            int i2 = this.recentActionHistory[i];
            if (i2 >= 0 && i2 < this.actionExecutor.getActionCount()) {
                iArr[i2] = iArr[i2] + 1;
            }
        }
        float actionCount = this.historySize / this.actionExecutor.getActionCount();
        for (int i3 = 0; i3 < this.actionExecutor.getActionCount(); i3++) {
            float f = iArr[i3] / actionCount;
            if (f < 0.5f) {
                fArr[i3] = 0.22500001f;
            } else if (f < 0.8f) {
                fArr[i3] = 0.15f;
            } else if (f > 2.0f) {
                fArr[i3] = -0.075f;
            } else if (f > 1.5f) {
                fArr[i3] = -0.0375f;
            } else {
                fArr[i3] = 0.0f;
            }
        }
        return fArr;
    }

    private int sampleFromDistribution(float[] fArr) {
        float nextFloat = this.random.nextFloat();
        float f = 0.0f;
        for (int i = 0; i < fArr.length; i++) {
            f += fArr[i];
            if (nextFloat <= f) {
                return i;
            }
        }
        return fArr.length - 1;
    }

    private int sampleFromDistributionSafe(float[] fArr) {
        if (fArr == null) {
            return 0;
        }
        try {
            if (fArr.length == 0) {
                return 0;
            }
            float nextFloat = this.random.nextFloat();
            float f = 0.0f;
            for (int i = 0; i < fArr.length; i++) {
                f += fArr[i];
                if (nextFloat <= f) {
                    return i;
                }
            }
            return fArr.length - 1;
        } catch (Exception e) {
            ReinforceMC.LOGGER.debug("Safe sampling failed, using random: {}", e.getMessage());
            return this.random.nextInt(Math.max(1, fArr != null ? fArr.length : this.actionExecutor.getActionCount()));
        }
    }

    private int getBestAction(float[] fArr) {
        int i = 0;
        float f = fArr[0];
        for (int i2 = 1; i2 < fArr.length; i2++) {
            if (fArr[i2] > f) {
                f = fArr[i2];
                i = i2;
            }
        }
        return i;
    }

    private int getBestActionSafe(float[] fArr) {
        if (fArr == null) {
            return 0;
        }
        try {
            if (fArr.length == 0) {
                return 0;
            }
            int i = 0;
            float f = fArr[0];
            for (int i2 = 1; i2 < fArr.length; i2++) {
                if (fArr[i2] > f) {
                    f = fArr[i2];
                    i = i2;
                }
            }
            return i;
        } catch (Exception e) {
            ReinforceMC.LOGGER.debug("Safe best action selection failed, using random: {}", e.getMessage());
            return this.random.nextInt(Math.max(1, fArr != null ? fArr.length : this.actionExecutor.getActionCount()));
        }
    }

    private void addToActionHistory(int i) {
        this.recentActionHistory[this.historyIndex] = i;
        this.historyIndex = (this.historyIndex + 1) % DIVERSITY_HISTORY_SIZE;
        if (this.historySize < DIVERSITY_HISTORY_SIZE) {
            this.historySize++;
        }
    }

    public void updateEpsilon() {
        if (this.trainingSteps < EPSILON_DECAY_STEPS) {
            this.currentEpsilon = 1.0f - (0.9f * (this.trainingSteps / 50000.0f));
        } else {
            this.currentEpsilon = 0.1f;
        }
    }

    public void processTrainingStep(float f, boolean z) {
        calculateDiversityBonus();
        this.trainingSteps++;
        this.averageReward = (this.averageReward * 0.99f) + (f * 0.01f);
        if (z && f > 0.0f) {
            this.successfulEpisodes++;
        }
        if (f > this.maxRewardAchieved) {
            this.maxRewardAchieved = f;
        }
        updateEpsilon();
        if (this.trainingSteps % WARMUP_STEPS == 0) {
            ReinforceMC.LOGGER.info("Training step {}: avgReward={:.3f}, epsilon={:.3f}, diversity={:.2f}", Integer.valueOf(this.trainingSteps), Float.valueOf(this.averageReward), Float.valueOf(this.currentEpsilon), Float.valueOf(calculateDiversityScore()));
        }
    }

    public float calculateDiversityScore() {
        if (this.historySize == 0) {
            return 1.0f;
        }
        int[] iArr = new int[this.actionExecutor.getActionCount()];
        for (int i = 0; i < this.historySize; i++) {
            int i2 = this.recentActionHistory[i];
            if (i2 >= 0 && i2 < this.actionExecutor.getActionCount()) {
                iArr[i2] = iArr[i2] + 1;
            }
        }
        float f = 0.0f;
        for (int i3 : iArr) {
            if (i3 > 0) {
                float f2 = i3 / this.historySize;
                f = (float) (f - (f2 * Math.log(f2)));
            }
        }
        float log = (float) Math.log(this.actionExecutor.getActionCount());
        if (log > 0.0f) {
            return f / log;
        }
        return 0.0f;
    }

    @Deprecated
    public float[] combineInputs(ScreenCaptureManager screenCaptureManager, PlayerTracker playerTracker, PlayerStatsProvider playerStatsProvider) {
        ReinforceMC.LOGGER.warn("DEPRECATED: ActionSelectionManager.combineInputs() called - use ModelManagerFixed.combineInputs() instead");
        int i = 0;
        try {
            float[] flattenImage = screenCaptureManager.flattenImage(screenCaptureManager.captureScreenshot());
            float[] playerFeatures = playerTracker.getPlayerFeatures();
            float[] features = playerStatsProvider.getFeatures();
            i = flattenImage.length + playerFeatures.length + features.length;
            float[] fArr = new float[i];
            System.arraycopy(flattenImage, 0, fArr, 0, flattenImage.length);
            int length = 0 + flattenImage.length;
            System.arraycopy(playerFeatures, 0, fArr, length, playerFeatures.length);
            System.arraycopy(features, 0, fArr, length + playerFeatures.length, features.length);
            return fArr;
        } catch (Exception e) {
            ReinforceMC.LOGGER.error("Error combining inputs: ", e);
            float[] fArr2 = new float[Math.max(WARMUP_STEPS, i > 0 ? i : 16000)];
            Arrays.fill(fArr2, 0.0f);
            return fArr2;
        }
    }

    public ActionManagerData exportData() {
        ActionManagerData actionManagerData = new ActionManagerData();
        actionManagerData.currentEpsilon = this.currentEpsilon;
        actionManagerData.trainingSteps = this.trainingSteps;
        actionManagerData.averageReward = this.averageReward;
        actionManagerData.successfulEpisodes = this.successfulEpisodes;
        actionManagerData.maxRewardAchieved = this.maxRewardAchieved;
        actionManagerData.historyIndex = this.historyIndex;
        actionManagerData.historySize = this.historySize;
        actionManagerData.recentActionHistory = (int[]) this.recentActionHistory.clone();
        return actionManagerData;
    }

    public void loadFromData(ActionManagerData actionManagerData) {
        if (actionManagerData == null) {
            ReinforceMC.LOGGER.warn("Cannot load action manager data: data is null");
            return;
        }
        this.currentEpsilon = actionManagerData.currentEpsilon;
        this.trainingSteps = actionManagerData.trainingSteps;
        this.averageReward = actionManagerData.averageReward;
        this.successfulEpisodes = actionManagerData.successfulEpisodes;
        this.maxRewardAchieved = actionManagerData.maxRewardAchieved;
        this.historyIndex = actionManagerData.historyIndex;
        this.historySize = actionManagerData.historySize;
        if (actionManagerData.recentActionHistory == null || actionManagerData.recentActionHistory.length != DIVERSITY_HISTORY_SIZE) {
            Arrays.fill(this.recentActionHistory, -1);
            this.historyIndex = 0;
            this.historySize = 0;
        } else {
            System.arraycopy(actionManagerData.recentActionHistory, 0, this.recentActionHistory, 0, DIVERSITY_HISTORY_SIZE);
        }
        ReinforceMC.LOGGER.info("Action selection manager state loaded from saved data");
    }

    public float getCurrentEpsilon() {
        return this.currentEpsilon;
    }

    public int getTrainingSteps() {
        return this.trainingSteps;
    }

    public float getAverageReward() {
        return this.averageReward;
    }

    public int getSuccessfulEpisodes() {
        return this.successfulEpisodes;
    }

    public float getMaxRewardAchieved() {
        return this.maxRewardAchieved;
    }

    public int getActionCount() {
        return this.actionExecutor.getActionCount();
    }
}
