From e418c26b14e840304aa6e00ef847ec41ac0b995e Mon Sep 17 00:00:00 2001
From: Mike Engel <Mike.Engel@digi.com>
Date: Fri, 23 Feb 2024 12:28:42 +0100
Subject: [PATCH 04/10] Documentation: add NXP RT support

Upstream-Status: Inappropriate [DEY specific]

Signed-off-by: Mike Engel <Mike.Engel@digi.com>
---
 .../mailbox/generic-software-mbox.yaml        |  65 +++
 .../devicetree/bindings/net/fsl,fec.yaml      |  24 ++
 .../devicetree/bindings/tty/rpmsg_tty.yaml    |  67 ++++
 Documentation/printk-ringbuffer.txt           | 377 ++++++++++++++++++
 4 files changed, 533 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mailbox/generic-software-mbox.yaml
 create mode 100644 Documentation/devicetree/bindings/tty/rpmsg_tty.yaml
 create mode 100644 Documentation/printk-ringbuffer.txt

diff --git a/Documentation/devicetree/bindings/mailbox/generic-software-mbox.yaml b/Documentation/devicetree/bindings/mailbox/generic-software-mbox.yaml
new file mode 100644
index 000000000000..57c91ab2c80a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/generic-software-mbox.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mailbox/generic-software-mbox.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic Software Mailbox
+
+maintainers:
+  - Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
+
+description: |
+  The Generic Software Mailbox is a virtual device, which uses unused
+  interrupt line to notify remote side and shared memory to emulate
+  device MMIO registers.
+
+properties:
+  compatible:
+    oneOf:
+      - const: fsl,generic-software-mbox
+  reg:
+    maxItems: 1
+
+  "#mbox-cells":
+    description: |
+      <&phandle type channel ack>
+      phandle   : Label name of controller
+      type      : Channel type
+      channel   : Channel index
+      ack       : 0: Receiver doesn't trigger an ACK interrupt to sender after receive message
+                  1: Receiver triggers an ACK interrupt to sender after receive message
+
+      This mailbox support 3 type of unidirectional channels, each type
+      has 32 channels. Following types are supported:
+      0 - TX channel with 32bit transmit register
+      1 - RX channel with 32bit receive register and IRQ support
+      2 - RX doorbell channel.
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - "#mbox-cells"
+  - interrupts
+  - interrupt-names
+
+additionalProperties: false
+
+examples:
+  - |
+    reserved-memory {
+        gen-sw-mbox@b8500000 {
+            no-map;
+            reg = <0 0xb8500000 0 0x1000>;
+        };
+    };
+
+    generic-software-mailbox@b8500000 {
+        compatible = "fsl,generic-software-mbox";
+        reg = <0 0xb8500000 0 0x1000>;
+        #mbox-cells = <3>;
+        interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "irq", "remote_irq";
+    };
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
index 18f04d110d6e..f53b6ba17d5d 100644
--- a/Documentation/devicetree/bindings/net/fsl,fec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -176,6 +176,30 @@ properties:
     description:
       Register bits of stop mode control, the format is <&gpr req_gpr req_bit>.
 
+  fsl,rx-phy-delay-100-ns:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      If present, should specify the delay (MAC-PHY) compensation
+      in ns to be applied on packets timestamps on receive for 100 Mbps link speed
+
+  fsl,tx-phy-delay-100-ns:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      If present, should specify the delay (MAC-PHY) compensation
+      in ns to be applied on packets timestamps on transmit for 100 Mbps link speed
+
+  fsl,rx-phy-delay-1000-ns:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      If present, should specify the delay (MAC-PHY) compensation
+      in ns to be applied on packets timestamps on receive for 1 Gbps link speed
+
+  fsl,tx-phy-delay-1000-ns:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      If present, should specify the delay (MAC-PHY) compensation
+      in ns to be applied on packets timestamps on transmit for 1 Gbps link speed
+
   mii-exclusive:
     $ref: /schemas/types.yaml#/definitions/flag
     description:
diff --git a/Documentation/devicetree/bindings/tty/rpmsg_tty.yaml b/Documentation/devicetree/bindings/tty/rpmsg_tty.yaml
new file mode 100644
index 000000000000..9e11fadd16eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/tty/rpmsg_tty.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/tty/rpmsg_tty.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: rpmsg tty driver
+
+maintainers:
+  - Biwen Li <biwen.li@nxp.com>
+
+properties:
+  compatible:
+    const: fsl,uart-rpbus
+
+  bus_id:
+    items:
+      - description: Used for imx srtm uart application protocol. Specify which real uart(LPUARTx/UARTx, x is instance number) will be used by a virtual tty(/dev/ttyRPMSGx), bus_id = 0xff when bus_id is not specified in dts and specified the flag(IMX_SRTM_UART_SUPPORT_MULTI_UART_MSG_FLAG).
+    maxItems: 1
+
+  flags:
+    items:
+      - description: used for imx srtm uart application protocol(IMX_SRTM_UART_SUPPORT_MULTI_UART_MSG_FLAG: support multi uart message protocol; IMX_SRTM_UART_SPECIFY_PORT_NUM_MASK: whether specify destination uart id; IMX_SRTM_UART_PORT_NUM_MASK: which destination uart id will be used)
+    maxItems: 1
+
+  status:
+    maxItems: 1
+
+examples:
+  - |
+    #include <dt-bindings/rpmsg/imx_srtm.h>
+
+    uart-rpbus-0 {
+        compatible = "fsl,uart-rpbus";
+        bus_id = <3>; /* use uart3 */
+        flags=<IMX_SRTM_UART_SUPPORT_MULTI_UART_MSG_FLAG>;
+        status = "okay";
+    }; /* /dev/ttyRPMSG0 on linux(running on acore) <--SRTM PROTOCOL--> srtm uart channel 0(endpoint on mcore) <---MULTI UART MSG PROTOCOL---> UART3(mcore is the owner) */
+
+    uart-rpbus-1 {
+        compatible = "fsl,uart-rpbus";
+        bus_id = <3>; /* use uart3 */
+        flags=<IMX_SRTM_UART_SUPPORT_MULTI_UART_MSG_FLAG>;
+        status = "okay";
+    }; /* /dev/ttyRPMSG1 on linux(running on acore) <--SRTM PROTOCOL--> srtm uart channel 1(endpoint on mcore) <---MULTI UART MSG PROTOCOL---> UART3(mcore is the owner) */
+
+    uart-rpbus-2 {
+        compatible = "fsl,uart-rpbus";
+        bus_id = <2>; /* use uart2 */
+        flags=<IMX_SRTM_UART_SUPPORT_MULTI_UART_MSG_FLAG>;
+        status = "okay";
+    }; /* /dev/ttyRPMSG2 on linux(running on acore) <--SRTM PROTOCOL--> srtm uart channel 2(endpoint on mcore) <---MULTI UART MSG PROTOCOL---> UART2(mcore is the owner) */
+
+    uart-rpbus-3 {
+        compatible = "fsl,uart-rpbus";
+        bus_id = <4>; /* use uart4 */
+        flags=<IMX_SRTM_UART_SUPPORT_MULTI_UART_MSG_FLAG | IMX_SRTM_UART_SPECIFY_PORT_NUM_MASK | IMX_SRTM_UART_PORT_NUM_MASK(0x8)>;
+        status = "okay";
+    }; /* [/dev/ttyRPMSG3 on linux(running on acore) <--SRTM PROTOCOL--> srtm uart channel 3(endpoint on mcore) <---MULTI UART MSG PROTOCOL---> UART4(mcore is the owner), first board] <---MULTI UART MSG PROTOCOL--> [UARTx(mcore is the owner) <---MULTI UART MSG PROTOCOL---> srtm uart channel 8 <---SRTM PROTOCOL--> /dev/ttyRPMSG8 on linux(running on acore), second board ] */
+
+    uart-rpbus-4 {
+        compatible = "fsl,uart-rpbus";
+        bus_id = <5>; /* use uart5 */
+        status = "okay";
+    }; /* /dev/ttyRPMSG4 on linux(running on acore) <--SRTM PROTOCOL--> srtm uart channel 4(endpoint on mcore) <-----> UART5(mcore is the owner) <----> GPS device(The device have exclusive use of the UART5, other virtual tty cannot use UART5) */
+
+    /* Note: when nothing is specified(include compatible, bus_id, flags, status), it's a normal rpmsg tty. /dev/ttyRPMSG7(running on acore) <--RPMSG--> rpmsg endpoint(running on mcore) */
diff --git a/Documentation/printk-ringbuffer.txt b/Documentation/printk-ringbuffer.txt
new file mode 100644
index 000000000000..6bde5dbd8545
--- /dev/null
+++ b/Documentation/printk-ringbuffer.txt
@@ -0,0 +1,377 @@
+struct printk_ringbuffer
+------------------------
+John Ogness <john.ogness@linutronix.de>
+
+Overview
+~~~~~~~~
+As the name suggests, this ring buffer was implemented specifically to serve
+the needs of the printk() infrastructure. The ring buffer itself is not
+specific to printk and could be used for other purposes. _However_, the
+requirements and semantics of printk are rather unique. If you intend to use
+this ring buffer for anything other than printk, you need to be very clear on
+its features, behavior, and pitfalls.
+
+Features
+^^^^^^^^
+The printk ring buffer has the following features:
+
+- single global buffer
+- resides in initialized data section (available at early boot)
+- lockless readers
+- supports multiple writers
+- supports multiple non-consuming readers
+- safe from any context (including NMI)
+- groups bytes into variable length blocks (referenced by entries)
+- entries tagged with sequence numbers
+
+Behavior
+^^^^^^^^
+Since the printk ring buffer readers are lockless, there exists no
+synchronization between readers and writers. Basically writers are the tasks
+in control and may overwrite any and all committed data at any time and from
+any context. For this reason readers can miss entries if they are overwritten
+before the reader was able to access the data. The reader API implementation
+is such that reader access to entries is atomic, so there is no risk of
+readers having to deal with partial or corrupt data. Also, entries are
+tagged with sequence numbers so readers can recognize if entries were missed.
+
+Writing to the ring buffer consists of 2 steps. First a writer must reserve
+an entry of desired size. After this step the writer has exclusive access
+to the memory region. Once the data has been written to memory, it needs to
+be committed to the ring buffer. After this step the entry has been inserted
+into the ring buffer and assigned an appropriate sequence number.
+
+Once committed, a writer must no longer access the data directly. This is
+because the data may have been overwritten and no longer exists. If a
+writer must access the data, it should either keep a private copy before
+committing the entry or use the reader API to gain access to the data.
+
+Because of how the data backend is implemented, entries that have been
+reserved but not yet committed act as barriers, preventing future writers
+from filling the ring buffer beyond the location of the reserved but not
+yet committed entry region. For this reason it is *important* that writers
+perform both reserve and commit as quickly as possible. Also, be aware that
+preemption and local interrupts are disabled and writing to the ring buffer
+is processor-reentrant locked during the reserve/commit window. Writers in
+NMI contexts can still preempt any other writers, but as long as these
+writers do not write a large amount of data with respect to the ring buffer
+size, this should not become an issue.
+
+API
+~~~
+
+Declaration
+^^^^^^^^^^^
+The printk ring buffer can be instantiated as a static structure:
+
+ /* declare a static struct printk_ringbuffer */
+ #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr)
+
+The value of szbits specifies the size of the ring buffer in bits. The
+cpulockptr field is a pointer to a prb_cpulock struct that is used to
+perform processor-reentrant spin locking for the writers. It is specified
+externally because it may be used for multiple ring buffers (or other
+code) to synchronize writers without risk of deadlock.
+
+Here is an example of a declaration of a printk ring buffer specifying a
+32KB (2^15) ring buffer:
+
+....
+DECLARE_STATIC_PRINTKRB_CPULOCK(rb_cpulock);
+DECLARE_STATIC_PRINTKRB(rb, 15, &rb_cpulock);
+....
+
+If writers will be using multiple ring buffers and the ordering of that usage
+is not clear, the same prb_cpulock should be used for both ring buffers.
+
+Writer API
+^^^^^^^^^^
+The writer API consists of 2 functions. The first is to reserve an entry in
+the ring buffer, the second is to commit that data to the ring buffer. The
+reserved entry information is stored within a provided `struct prb_handle`.
+
+ /* reserve an entry */
+ char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
+                   unsigned int size);
+
+ /* commit a reserved entry to the ring buffer */
+ void prb_commit(struct prb_handle *h);
+
+Here is an example of a function to write data to a ring buffer:
+
+....
+int write_data(struct printk_ringbuffer *rb, char *data, int size)
+{
+    struct prb_handle h;
+    char *buf;
+
+    buf = prb_reserve(&h, rb, size);
+    if (!buf)
+        return -1;
+    memcpy(buf, data, size);
+    prb_commit(&h);
+
+    return 0;
+}
+....
+
+Pitfalls
+++++++++
+Be aware that prb_reserve() can fail. A retry might be successful, but it
+depends entirely on whether or not the next part of the ring buffer to
+overwrite belongs to reserved but not yet committed entries of other writers.
+Writers can use the prb_inc_lost() function to allow readers to notice that a
+message was lost.
+
+Reader API
+^^^^^^^^^^
+The reader API utilizes a `struct prb_iterator` to track the reader's
+position in the ring buffer.
+
+ /* declare a pre-initialized static iterator for a ring buffer */
+ #define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr)
+
+ /* initialize iterator for a ring buffer (if static macro NOT used) */
+ void prb_iter_init(struct prb_iterator *iter,
+                    struct printk_ringbuffer *rb, u64 *seq);
+
+ /* make a deep copy of an iterator */
+ void prb_iter_copy(struct prb_iterator *dest,
+                    struct prb_iterator *src);
+
+ /* non-blocking, advance to next entry (and read the data) */
+ int prb_iter_next(struct prb_iterator *iter, char *buf,
+                   int size, u64 *seq);
+
+ /* blocking, advance to next entry (and read the data) */
+ int prb_iter_wait_next(struct prb_iterator *iter, char *buf,
+                        int size, u64 *seq);
+
+ /* position iterator at the entry seq */
+ int prb_iter_seek(struct prb_iterator *iter, u64 seq);
+
+ /* read data at current position */
+ int prb_iter_data(struct prb_iterator *iter, char *buf,
+                   int size, u64 *seq);
+
+Typically prb_iter_data() is not needed because the data can be retrieved
+directly with prb_iter_next().
+
+Here is an example of a non-blocking function that will read all the data in
+a ring buffer:
+
+....
+void read_all_data(struct printk_ringbuffer *rb, char *buf, int size)
+{
+    struct prb_iterator iter;
+    u64 prev_seq = 0;
+    u64 seq;
+    int ret;
+
+    prb_iter_init(&iter, rb, NULL);
+
+    for (;;) {
+        ret = prb_iter_next(&iter, buf, size, &seq);
+        if (ret > 0) {
+            if (seq != ++prev_seq) {
+                /* "seq - prev_seq" entries missed */
+                prev_seq = seq;
+            }
+            /* process buf here */
+        } else if (ret == 0) {
+            /* hit the end, done */
+            break;
+        } else if (ret < 0) {
+            /*
+             * iterator is invalid, a writer overtook us, reset the
+             * iterator and keep going, entries were missed
+             */
+            prb_iter_init(&iter, rb, NULL);
+        }
+    }
+}
+....
+
+Pitfalls
+++++++++
+The reader's iterator can become invalid at any time because the reader was
+overtaken by a writer. Typically the reader should reset the iterator back
+to the current oldest entry (which will be newer than the entry the reader
+was at) and continue, noting the number of entries that were missed.
+
+Utility API
+^^^^^^^^^^^
+Several functions are available as convenience for external code.
+
+ /* query the size of the data buffer */
+ int prb_buffer_size(struct printk_ringbuffer *rb);
+
+ /* skip a seq number to signify a lost record */
+ void prb_inc_lost(struct printk_ringbuffer *rb);
+
+ /* processor-reentrant spin lock */
+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
+
+ /* processor-reentrant spin unlock */
+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
+
+Pitfalls
+++++++++
+Although the value returned by prb_buffer_size() does represent an absolute
+upper bound, the amount of data that can be stored within the ring buffer
+is actually less because of the additional storage space of a header for each
+entry.
+
+The prb_lock() and prb_unlock() functions can be used to synchronize between
+ring buffer writers and other external activities. The function of a
+processor-reentrant spin lock is to disable preemption and local interrupts
+and synchronize against other processors. It does *not* protect against
+multiple contexts of a single processor, i.e NMI.
+
+Implementation
+~~~~~~~~~~~~~~
+This section describes several of the implementation concepts and details to
+help developers better understand the code.
+
+Entries
+^^^^^^^
+All ring buffer data is stored within a single static byte array. The reason
+for this is to ensure that any pointers to the data (past and present) will
+always point to valid memory. This is important because the lockless readers
+may be preempted for long periods of time and when they resume may be working
+with expired pointers.
+
+Entries are identified by start index and size. (The start index plus size
+is the start index of the next entry.) The start index is not simply an
+offset into the byte array, but rather a logical position (lpos) that maps
+directly to byte array offsets.
+
+For example, for a byte array of 1000, an entry may have have a start index
+of 100. Another entry may have a start index of 1100. And yet another 2100.
+All of these entry are pointing to the same memory region, but only the most
+recent entry is valid. The other entries are pointing to valid memory, but
+represent entries that have been overwritten.
+
+Note that due to overflowing, the most recent entry is not necessarily the one
+with the highest lpos value. Indeed, the printk ring buffer initializes its
+data such that an overflow happens relatively quickly in order to validate the
+handling of this situation. The implementation assumes that an lpos (unsigned
+long) will never completely wrap while a reader is preempted. If this were to
+become an issue, the seq number (which never wraps) could be used to increase
+the robustness of handling this situation.
+
+Buffer Wrapping
+^^^^^^^^^^^^^^^
+If an entry starts near the end of the byte array but would extend beyond it,
+a special terminating entry (size = -1) is inserted into the byte array and
+the real entry is placed at the beginning of the byte array. This can waste
+space at the end of the byte array, but simplifies the implementation by
+allowing writers to always work with contiguous buffers.
+
+Note that the size field is the first 4 bytes of the entry header. Also note
+that calc_next() always ensures that there are at least 4 bytes left at the
+end of the byte array to allow room for a terminating entry.
+
+Ring Buffer Pointers
+^^^^^^^^^^^^^^^^^^^^
+Three pointers (lpos values) are used to manage the ring buffer:
+
+ - _tail_: points to the oldest entry
+ - _head_: points to where the next new committed entry will be
+ - _reserve_: points to where the next new reserved entry will be
+
+These pointers always maintain a logical ordering:
+
+ tail <= head <= reserve
+
+The reserve pointer moves forward when a writer reserves a new entry. The
+head pointer moves forward when a writer commits a new entry.
+
+The reserve pointer cannot overwrite the tail pointer in a wrap situation. In
+such a situation, the tail pointer must be "pushed forward", thus
+invalidating that oldest entry. Readers identify if they are accessing a
+valid entry by ensuring their entry pointer is `>= tail && < head`.
+
+If the tail pointer is equal to the head pointer, it cannot be pushed and any
+reserve operation will fail. The only resolution is for writers to commit
+their reserved entries.
+
+Processor-Reentrant Locking
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The purpose of the processor-reentrant locking is to limit the interruption
+scenarios of writers to 2 contexts. This allows for a simplified
+implementation where:
+
+- The reserve/commit window only exists on 1 processor at a time. A reserve
+  can never fail due to uncommitted entries of other processors.
+
+- When committing entries, it is trivial to handle the situation when
+  subsequent entries have already been committed, i.e. managing the head
+  pointer.
+
+Performance
+~~~~~~~~~~~
+Some basic tests were performed on a quad Intel(R) Xeon(R) CPU E5-2697 v4 at
+2.30GHz (36 cores / 72 threads). All tests involved writing a total of
+32,000,000 records at an average of 33 bytes each. Each writer was pinned to
+its own CPU and would write as fast as it could until a total of 32,000,000
+records were written. All tests involved 2 readers that were both pinned
+together to another CPU. Each reader would read as fast as it could and track
+how many of the 32,000,000 records it could read. All tests used a ring buffer
+of 16KB in size, which holds around 350 records (header + data for each
+entry).
+
+The only difference between the tests is the number of writers (and thus also
+the number of records per writer). As more writers are added, the time to
+write a record increases. This is because data pointers, modified via cmpxchg,
+and global data access in general become more contended.
+
+1 writer
+^^^^^^^^
+ runtime: 0m 18s
+ reader1: 16219900/32000000 (50%) records
+ reader2: 16141582/32000000 (50%) records
+
+2 writers
+^^^^^^^^^
+ runtime: 0m 32s
+ reader1: 16327957/32000000 (51%) records
+ reader2: 16313988/32000000 (50%) records
+
+4 writers
+^^^^^^^^^
+ runtime: 0m 42s
+ reader1: 16421642/32000000 (51%) records
+ reader2: 16417224/32000000 (51%) records
+
+8 writers
+^^^^^^^^^
+ runtime: 0m 43s
+ reader1: 16418300/32000000 (51%) records
+ reader2: 16432222/32000000 (51%) records
+
+16 writers
+^^^^^^^^^^
+ runtime: 0m 54s
+ reader1: 16539189/32000000 (51%) records
+ reader2: 16542711/32000000 (51%) records
+
+32 writers
+^^^^^^^^^^
+ runtime: 1m 13s
+ reader1: 16731808/32000000 (52%) records
+ reader2: 16735119/32000000 (52%) records
+
+Comments
+^^^^^^^^
+It is particularly interesting to compare/contrast the 1-writer and 32-writer
+tests. Despite the writing of the 32,000,000 records taking over 4 times
+longer, the readers (which perform no cmpxchg) were still unable to keep up.
+This shows that the memory contention between the increasing number of CPUs
+also has a dramatic effect on readers.
+
+It should also be noted that in all cases each reader was able to read >=50%
+of the records. This means that a single reader would have been able to keep
+up with the writer(s) in all cases, becoming slightly easier as more writers
+are added. This was the purpose of pinning 2 readers to 1 CPU: to observe how
+maximum reader performance changes.
-- 
2.34.1