Comments (11)
I don't see a reason why there should be a difference between STM32 and RP2040 in this regard.
You need two things:
- a linker script
- some code which copies the code to RAM
For the linker script, instead of replacing link.x
from cortex-m-rt
, you can use a memory.x
like this:
MEMORY {
BOOT2 : ORIGIN = 0x10000000, LENGTH = 0x100
FLASH : ORIGIN = 0x10000100, LENGTH = 2048K - 0x100
RAM : ORIGIN = 0x20000000, LENGTH = 256K
SRAM4 : ORIGIN = 0x20040000, LENGTH = 4k
SRAM5 : ORIGIN = 0x20041000, LENGTH = 4k
}
EXTERN(BOOT2_FIRMWARE)
SECTIONS {
/* ### Boot loader */
.boot2 ORIGIN(BOOT2) :
{
KEEP(*(.boot2));
} > BOOT2
} INSERT BEFORE .text;
SECTIONS {
.sram4 :
{
KEEP(*(.sram4));
} > SRAM4 AT>FLASH
} INSERT AFTER .text;
You'd need to add code to actually initialize RAM4 contents before jumping to functions located there, and that code probably needs some linker symbols so it can find the data to be copied in flash.
I don't have a ready-made example, and the details depend on what you are actually trying to achieve. But if you have working code for STM32, you can probably copy most of what you need from there.
from rp-hal.
You can also just put functions in .data
, if you don't mind where in SRAM they go:
from rp-hal.
@jannic with your linker file the resulting elf still reports that region at address 0, which will not work:
Sections:
Idx Name Size VMA LMA File off Algn
2 .text 00006e04 100001c0 100001c0 000002f4 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
3 .sram4and5 0000004c 00000000 00000000 000086e0 2**2
CONTENTS, READONLY
@thejpster I do care where it is, the idea is that the second core will only run within sram4+5 to improve performance.
from rp-hal.
You should benchmark carefully to ensure it actually does improve performance, as SRAM0-3 are striped so each successive word comes from a different bank.
jannic suggested a section and a segment called sram4
but your ELF contains a segment called sram4and5
. Can we see the source code? If not (e.g. because this is a proprietary project) you may require professional support under NDA.
from rp-hal.
sorry, I did just put the two together:
MEMORY {
BOOT2 : ORIGIN = 0x10000000, LENGTH = 0x100
FLASH : ORIGIN = 0x10000100, LENGTH = 2048K - 0x100
RAM : ORIGIN = 0x20000000, LENGTH = 256K
SRAM4AND5 : ORIGIN = 0x20040000, LENGTH = 8k
}
EXTERN(BOOT2_FIRMWARE)
SECTIONS {
/* ### Boot loader */
.boot2 ORIGIN(BOOT2) :
{
KEEP(*(.boot2));
} > BOOT2
} INSERT BEFORE .text;
SECTIONS {
.sram4and5 :
{
KEEP(*(.sram4and5));
} > SRAM4AND5 AT>FLASH
} INSERT AFTER .text;
it's currently a very basic routine just to test that all works as expected
.syntax unified
.cpu cortex-m0plus
.thumb
//bare metal assembly blinking routine
.global basic
.section .sram4and5
.p2align 2
.type basic,%function
basic:
.fnstart
//releases the peripheral reset for iobank_0
ldr r0, =rst_clr // atomic register for clearing reset controller (0x4000c000+0x3000)
movs r1, #32 // load a 1 into bit 5
str r1, [r0, #0] // store the bitmask into the atomic register to clear register
// check if reset is done
rst:
ldr r0, =rst_base // base address for reset controller
ldr r1, [r0, #8] // offset to get to the reset_done register
movs r2, #32 // load 1 in bit 5 of register 2 (...0000000000100000)
ands r1, r1, r2 // isolate bit 5
beq rst // if bit five is 0 then check again, if not, reset is done
// set the control
ldr r0, =ctrl // control register for GPIO25
movs r1, #5 // Function 5, select SIO for GPIO25 2.19.2
str r1, [r0] // Store function_5 in GPIO25 control register
//shifts over "1" the number of bits of GPIO pin
movs r1, #1 // load a 1 into register 1
lsls r1, r1, #25 // move the bit over to align with GPIO25
ldr r0, =sio_base // SIO base
str r1, [r0, #36] // 0x24 GPIO output enable
led_loop:
str r1, [r0, #20] // 0x14 GPIO output value set
ldr r3, =big_num // load countdown number
bl delay // branch to subroutine delay
str r1, [r0, #24] // 0x18 GPIO output value clear
ldr r3, =big_num // load countdown number
bl delay // branch to subroutine delay
b led_loop // do the loop again
delay:
subs r3, #1 // subtract 1 from register 3
bne delay // loop back to delay if not zero
bx lr // return from subroutine
.fnend
.p2align 2
data:
.equ rst_clr, 0x4000f000 // atomic register for clearing reset controller 2.1.2
.equ rst_base, 0x4000c000 // reset controller base 2.14.3
.equ ctrl, 0x400140cc // GPIO25_CTRL 2.19.6.1
.equ sio_base, 0xd0000000 // SIO base 2.3.1.7
.equ big_num, 0x00f00000 // large number for the delay loop
build.rs:
use std::io::Error;
fn main() -> Result<(), Error> {
println!("cargo:rerun-if-changed=c64/basic.S");
cc::Build::new()
.file("c64/basic.S")
.warnings(true)
.warnings_into_errors(true)
.debug(true)
.compile("c64");
Ok(())
}
from rp-hal.
.section .sram4and5
Please try .section .sram4and5,"ax",%progbits
here.
I can't claim I fully understand why, but it seems to be important. I guess it's the a
flag (SHF_ALLOC, "This section occupies memory during process execution.") that makes a difference.
References:
https://developer.arm.com/documentation/101754/0618/armclang-Reference/armclang-Integrated-Assembler/Section-directives
https://www.man7.org/linux/man-pages/man5/elf.5.html
from rp-hal.
That looks way better.
3 .sram4and5 0000004c 20040000 10006fc4 00007118 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
but it does not run, probably I have to copy the code? I'll continue this evening but wanted you to know now.
from rp-hal.
Yes, that's what I meant when I wrote "some code which copies the code to RAM".
You could for example use #[pre_init]
to copy the code from flash to RAM. But make sure to read the warnings in https://docs.rs/cortex-m-rt/0.7.3/cortex_m_rt/attr.pre_init.html, there are some pitfalls. Perhaps consider writing it in assembly.
from rp-hal.
I did got it to work with:
SECTIONS {
. = ALIGN(4);
.sram4and5 :
{
. = ALIGN(4);
_srelocate = .;
KEEP(*(.sram4and5));
. = ALIGN(4);
_erelocate = .;
} > SRAM4AND5 AT>FLASH
} INSERT AFTER .text;
_xrelocate = LOADADDR(.sram4and5);
unsafe {
extern "C" {
static mut _srelocate: u8;
static mut _erelocate: u8;
static mut _xrelocate: u8;
}
let srelocate: *mut u8 = addr_of_mut!(_srelocate);
let erelocate: *const u8 = addr_of!(_erelocate);
let xrelocate: *const u8 = addr_of!(_xrelocate);
let size = erelocate.offset_from(srelocate) as usize;
from_raw_parts_mut(srelocate, size).copy_from_slice(from_raw_parts(xrelocate, size));
}
let _test = core1.spawn(
unsafe {
&mut *(slice_from_raw_parts_mut((0x20040000 + 8 * 1024 - 4 * 100) as *mut usize, 100))
},
move || unsafe { basic() },
);
probably not the nicest but it's working. the first core uses the "normal" way and the second only this.
btw. the rp2040 datasheet says that if a ram 0-5 is accessed by more than one cpu/other than they have to wait, since sram4+5 is exclusively used by the second core no waiting should happen.
thanks for all you help!
from rp-hal.
Glad that it finally worked!
Regarding the possible wait states on concurrent access: I'd love to see some benchmark showing how big the difference is. I guess it's rather small, but actual measurements would be interesting.
from rp-hal.
I won't measure it, but just believe them that there is no wait states if not used elsewhere.
And as already said: Thanks!
from rp-hal.
Related Issues (20)
- ADC read blocks HOT 3
- embedded_io::serial::Reader implementations drop data on error
- rp2040_hal::halt() makes rp2040 un-debuggable even though comment in function says debugging will stay possible
- Does rp2040_hal::spi::Spi implement the embedded-hal 1.0 SpiDevice trait? HOT 3
- Cycling a buffer with DMA HOT 5
- Support for #[thread_local]? HOT 3
- Multicore Lockout HOT 11
- Readme seems to use wrong cargo run command HOT 1
- I2C example produces weird non-working results HOT 3
- Embassy-RP and RP2040-hal Compatibility??? - Embedded_Hal::PWM:SetDutyCycle Assistance HOT 4
- Faulty UART baudrate divisor formula HOT 2
- Support the ReadReady and WriteReady traits on UART HOT 1
- Strange on-target-test results on rust beta HOT 4
- I2C contract violation HOT 4
- Add new RP2350 microcontroller support HOT 2
- `embedded_io::ReadReady/WriteReady` not implemented for uart peripheral HOT 1
- rp235x-hal: Add nice wrapper for `get_sys_info`
- Our implementation of embedded_io::Write is wrong HOT 1
- Be consistent with I2S and SPI terminology
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from rp-hal.