mirror of
				https://github.com/yuzu-emu/yuzu-android
				synced 2025-10-24 19:00:29 -07:00 
			
		
		
		
	maxwell_3d: Restructure macro upload to use a single macro code memory.
- Fixes an issue where macros could be skipped. - Fixes rendering of distant objects in Super Mario Odyssey.
This commit is contained in:
		| @@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | ||||
|     // Reset the current macro. | ||||
|     executing_macro = 0; | ||||
|  | ||||
|     // The requested macro must have been uploaded already. | ||||
|     auto macro_code = uploaded_macros.find(method); | ||||
|     if (macro_code == uploaded_macros.end()) { | ||||
|         LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); | ||||
|     // Lookup the macro offset | ||||
|     const u32 entry{(method - MacroRegistersStart) >> 1}; | ||||
|     const auto& search{macro_offsets.find(entry)}; | ||||
|     if (search == macro_offsets.end()) { | ||||
|         LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); | ||||
|         UNREACHABLE(); | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // Execute the current macro. | ||||
|     macro_interpreter.Execute(macro_code->second, std::move(parameters)); | ||||
|     macro_interpreter.Execute(search->second, std::move(parameters)); | ||||
| } | ||||
|  | ||||
| void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | ||||
| @@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | ||||
|         ProcessMacroUpload(value); | ||||
|         break; | ||||
|     } | ||||
|     case MAXWELL3D_REG_INDEX(macros.bind): { | ||||
|         ProcessMacroBind(value); | ||||
|         break; | ||||
|     } | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): | ||||
| @@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | ||||
| } | ||||
|  | ||||
| void Maxwell3D::ProcessMacroUpload(u32 data) { | ||||
|     // Store the uploaded macro code to interpret them when they're called. | ||||
|     auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; | ||||
|     macro.push_back(data); | ||||
|     ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), | ||||
|                "upload_address exceeded macro_memory size!"); | ||||
|     macro_memory[regs.macros.upload_address++] = data; | ||||
| } | ||||
|  | ||||
| void Maxwell3D::ProcessMacroBind(u32 data) { | ||||
|     macro_offsets[regs.macros.entry] = data; | ||||
| } | ||||
|  | ||||
| void Maxwell3D::ProcessQueryGet() { | ||||
|   | ||||
| @@ -475,12 +475,13 @@ public: | ||||
|                 INSERT_PADDING_WORDS(0x45); | ||||
|  | ||||
|                 struct { | ||||
|                     INSERT_PADDING_WORDS(1); | ||||
|                     u32 upload_address; | ||||
|                     u32 data; | ||||
|                     u32 entry; | ||||
|                     u32 bind; | ||||
|                 } macros; | ||||
|  | ||||
|                 INSERT_PADDING_WORDS(0x189); | ||||
|                 INSERT_PADDING_WORDS(0x188); | ||||
|  | ||||
|                 u32 tfb_enabled; | ||||
|  | ||||
| @@ -994,12 +995,25 @@ public: | ||||
|     /// Returns the texture information for a specific texture in a specific shader stage. | ||||
|     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | ||||
|  | ||||
|     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | ||||
|     /// we've seen used. | ||||
|     using MacroMemory = std::array<u32, 0x40000>; | ||||
|  | ||||
|     /// Gets a reference to macro memory. | ||||
|     const MacroMemory& GetMacroMemory() const { | ||||
|         return macro_memory; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     void InitializeRegisterDefaults(); | ||||
|  | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|  | ||||
|     std::unordered_map<u32, std::vector<u32>> uploaded_macros; | ||||
|     /// Start offsets of each macro in macro_memory | ||||
|     std::unordered_map<u32, u32> macro_offsets; | ||||
|  | ||||
|     /// Memory for macro code | ||||
|     MacroMemory macro_memory; | ||||
|  | ||||
|     /// Macro method that is currently being executed / being fed parameters. | ||||
|     u32 executing_macro = 0; | ||||
| @@ -1022,9 +1036,12 @@ private: | ||||
|      */ | ||||
|     void CallMacroMethod(u32 method, std::vector<u32> parameters); | ||||
|  | ||||
|     /// Handles writes to the macro uploading registers. | ||||
|     /// Handles writes to the macro uploading register. | ||||
|     void ProcessMacroUpload(u32 data); | ||||
|  | ||||
|     /// Handles writes to the macro bind register. | ||||
|     void ProcessMacroBind(u32 data); | ||||
|  | ||||
|     /// Handles a write to the CLEAR_BUFFERS register. | ||||
|     void ProcessClearBuffers(); | ||||
|  | ||||
|   | ||||
| @@ -11,7 +11,7 @@ namespace Tegra { | ||||
|  | ||||
| MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | ||||
|  | ||||
| void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { | ||||
| void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | ||||
|     Reset(); | ||||
|     registers[1] = parameters[0]; | ||||
|     this->parameters = std::move(parameters); | ||||
| @@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa | ||||
|     // Execute the code until we hit an exit condition. | ||||
|     bool keep_executing = true; | ||||
|     while (keep_executing) { | ||||
|         keep_executing = Step(code, false); | ||||
|         keep_executing = Step(offset, false); | ||||
|     } | ||||
|  | ||||
|     // Assert the the macro used all the input parameters | ||||
| @@ -37,10 +37,10 @@ void MacroInterpreter::Reset() { | ||||
|     next_parameter_index = 1; | ||||
| } | ||||
|  | ||||
| bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | ||||
| bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | ||||
|     u32 base_address = pc; | ||||
|  | ||||
|     Opcode opcode = GetOpcode(code); | ||||
|     Opcode opcode = GetOpcode(offset); | ||||
|     pc += 4; | ||||
|  | ||||
|     // Update the program counter if we were delayed | ||||
| @@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | ||||
|  | ||||
|             delayed_pc = base_address + opcode.GetBranchTarget(); | ||||
|             // Execute one more instruction due to the delay slot. | ||||
|             return Step(code, true); | ||||
|             return Step(offset, true); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
| @@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | ||||
|         // Exit has a delay slot, execute the next instruction | ||||
|         // Note: Executing an exit during a branch delay slot will cause the instruction at the | ||||
|         // branch target to be executed before exiting. | ||||
|         Step(code, true); | ||||
|         Step(offset, true); | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { | ||||
| MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { | ||||
|     const auto& macro_memory{maxwell3d.GetMacroMemory()}; | ||||
|     ASSERT((pc % sizeof(u32)) == 0); | ||||
|     ASSERT(pc < code.size() * sizeof(u32)); | ||||
|     return {code[pc / sizeof(u32)]}; | ||||
|     ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); | ||||
|     return {macro_memory[offset + pc / sizeof(u32)]}; | ||||
| } | ||||
|  | ||||
| u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { | ||||
|   | ||||
| @@ -22,10 +22,10 @@ public: | ||||
|  | ||||
|     /** | ||||
|      * Executes the macro code with the specified input parameters. | ||||
|      * @param code The macro byte code to execute | ||||
|      * @param parameters The parameters of the macro | ||||
|      * @param offset Offset to start execution at. | ||||
|      * @param parameters The parameters of the macro. | ||||
|      */ | ||||
|     void Execute(const std::vector<u32>& code, std::vector<u32> parameters); | ||||
|     void Execute(u32 offset, std::vector<u32> parameters); | ||||
|  | ||||
| private: | ||||
|     enum class Operation : u32 { | ||||
| @@ -110,11 +110,11 @@ private: | ||||
|     /** | ||||
|      * Executes a single macro instruction located at the current program counter. Returns whether | ||||
|      * the interpreter should keep running. | ||||
|      * @param code The macro code to execute. | ||||
|      * @param offset Offset to start execution at. | ||||
|      * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | ||||
|      * previous instruction. | ||||
|      */ | ||||
|     bool Step(const std::vector<u32>& code, bool is_delay_slot); | ||||
|     bool Step(u32 offset, bool is_delay_slot); | ||||
|  | ||||
|     /// Calculates the result of an ALU operation. src_a OP src_b; | ||||
|     u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; | ||||
| @@ -127,7 +127,7 @@ private: | ||||
|     bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; | ||||
|  | ||||
|     /// Reads an opcode at the current program counter location. | ||||
|     Opcode GetOpcode(const std::vector<u32>& code) const; | ||||
|     Opcode GetOpcode(u32 offset) const; | ||||
|  | ||||
|     /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | ||||
|     u32 GetRegister(u32 register_id) const; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user