From: Andre Ramnitz Date: Mon, 1 Jun 2026 18:48:16 +0000 (+0200) Subject: Bump X-Git-Url: https://git.ramnitz.eu/?a=commitdiff_plain;p=etc_portage_patches.git Bump --- diff --git a/dev-qt/qtbase/0001-Wayland-Ignore-null-wl_output-in-QWaylandScreen-surf.patch b/dev-qt/qtbase/0001-Wayland-Ignore-null-wl_output-in-QWaylandScreen-surf.patch deleted file mode 100644 index a4bcc0a..0000000 --- a/dev-qt/qtbase/0001-Wayland-Ignore-null-wl_output-in-QWaylandScreen-surf.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 0285fce0ce0db7b9446389870fe6c076310eb28e Mon Sep 17 00:00:00 2001 -From: Igor Khanin -Date: Thu, 26 Feb 2026 10:50:58 +0200 -Subject: Wayland: Ignore null wl_output in QWaylandScreen surface enter/leave - -Misbehaving compositors (as observed with some Smithay based -compositors) may send wl_surface.enter and wl_surface.leave messages -referring to an output which was already removed from the registry, and -therefore its' proxy object was already destroyed. This manifests as -the listener method being invoked with a null wl_output pointer, which -Qt then dereferences - leading to a crash. - -To avoid crashing, simply just ignore such events. - -Pick-to: 6.11 6.10 -Change-Id: Ib217366b5aff1b39dcc6f42e52165b94ea7a1018 -Reviewed-by: David Edmundson ---- - src/plugins/platforms/wayland/qwaylandsurface.cpp | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/src/plugins/platforms/wayland/qwaylandsurface.cpp b/src/plugins/platforms/wayland/qwaylandsurface.cpp -index bd7c358e42a..ffccbefe61c 100644 ---- a/src/plugins/platforms/wayland/qwaylandsurface.cpp -+++ b/src/plugins/platforms/wayland/qwaylandsurface.cpp -@@ -56,8 +56,10 @@ void QWaylandSurface::handleScreenRemoved(QScreen *qScreen) - - void QWaylandSurface::surface_enter(wl_output *output) - { -- auto addedScreen = QWaylandScreen::fromWlOutput(output); -+ if (!output) -+ return; - -+ auto addedScreen = QWaylandScreen::fromWlOutput(output); - if (!addedScreen) - return; - -@@ -76,8 +78,10 @@ void QWaylandSurface::surface_enter(wl_output *output) - - void QWaylandSurface::surface_leave(wl_output *output) - { -- auto *removedScreen = QWaylandScreen::fromWlOutput(output); -+ if (!output) -+ return; - -+ auto *removedScreen = QWaylandScreen::fromWlOutput(output); - if (!removedScreen) - return; - --- -2.52.0 - diff --git a/gui-apps/noctalia-shell/mod_nightlight.patch b/gui-apps/noctalia-shell/mod_nightlight.patch new file mode 100644 index 0000000..dab0d3e --- /dev/null +++ b/gui-apps/noctalia-shell/mod_nightlight.patch @@ -0,0 +1,33 @@ +--- noctalia-release/Services/Control/IPCService.file 2026-04-29 02:20:16.298144022 +0200 ++++ noctalia-release/Services/Control/IPCService.qml 2026-04-29 02:18:13.515149554 +0200 +@@ -482,6 +482,30 @@ + } + } + } ++ function enable() { ++ if (!ProgramCheckerService.wlsunsetAvailable) { ++ Logger.w("IPC", "wlsunset not available, cannot toggle night light"); ++ return; ++ } ++ Settings.data.nightLight.forced = false; ++ Settings.data.nightLight.enabled = true; ++ } ++ function force() { ++ if (!ProgramCheckerService.wlsunsetAvailable) { ++ Logger.w("IPC", "wlsunset not available, cannot toggle night light"); ++ return; ++ } ++ Settings.data.nightLight.forced = true; ++ Settings.data.nightLight.enabled = true; ++ } ++ function disable() { ++ if (!ProgramCheckerService.wlsunsetAvailable) { ++ Logger.w("IPC", "wlsunset not available, cannot toggle night light"); ++ return; ++ } ++ Settings.data.nightLight.forced = false; ++ Settings.data.nightLight.enabled = false; ++ } + } + + IpcHandler { diff --git a/gui-apps/noctalia-shell/mod_theme_foot.patch b/gui-apps/noctalia-shell/mod_theme_foot.patch index 810e396..330b8d6 100644 --- a/gui-apps/noctalia-shell/mod_theme_foot.patch +++ b/gui-apps/noctalia-shell/mod_theme_foot.patch @@ -1,4 +1,4 @@ ---- noctalia-release/Assets/Templates/foot_predefined.backup 2026-04-23 08:13:04.228820613 +0200 +--- noctalia-release/Assets/Templates/terminal/foot_predefined.backup 2026-04-23 08:13:04.228820613 +0200 +++ noctalia-release/Assets/Templates/terminal/foot-predefined 2026-04-23 08:17:51.671131130 +0200 @@ -20,3 +20,26 @@ selection-foreground={{colors.terminal_selection_fg.default.hex_stripped}} @@ -26,4 +26,4 @@ +bright7={{colors.terminal_bright_white.default.hex_stripped}} +selection-foreground={{colors.terminal_selection_bg.default.hex_stripped}} +selection-background={{colors.terminal_selection_fg.default.hex_stripped}} -+cursor={{colors.terminal_cursor_text.default.hex_stripped}} {{colors.terminal_cursor.default.hex_stripped}} ++cursor={{colors.terminal_cursor.default.hex_stripped}} {{colors.terminal_cursor_text.default.hex_stripped}} diff --git a/gui-apps/noctalia-shell/theme_qutebrowser_01.patch b/gui-apps/noctalia-shell/theme_qutebrowser_01.patch new file mode 100644 index 0000000..d9f1596 --- /dev/null +++ b/gui-apps/noctalia-shell/theme_qutebrowser_01.patch @@ -0,0 +1,22 @@ +--- noctalia-release/Services/Theming/TemplateRegistry.file 2026-05-11 18:49:28.945664584 +0200 ++++ noctalia-release/Services/Theming/TemplateRegistry.qml 2026-05-11 18:49:44.455868839 +0200 +@@ -171,6 +171,19 @@ + } // CONSOLIDATED DISCORD CLIENTS + , + { ++ "id": "qutebrowser", ++ "name": "Qutebrowser", ++ "category": "browser", ++ "input": "qutebrowser.py", ++ "outputs": [ ++ { ++ "path": "~/.config/qutebrowser/noctalia-theme.py" ++ } ++ ], ++ "postProcess": () => `pgrep -x qutebrowser && qutebrowser ':config-source'` ++ } ++ , ++ { + "id": "discord", + "name": "Discord", + "category": "misc", diff --git a/gui-apps/noctalia-shell/theme_qutebrowser_02.patch b/gui-apps/noctalia-shell/theme_qutebrowser_02.patch new file mode 100644 index 0000000..517c6b9 --- /dev/null +++ b/gui-apps/noctalia-shell/theme_qutebrowser_02.patch @@ -0,0 +1,284 @@ +--- noctalia-release/Assets/Templates/qutebrowser.file 1970-01-01 01:00:00.000000000 +0100 ++++ noctalia-release/Assets/Templates/qutebrowser.py 1970-01-01 01:00:00.000000000 +0100 +@@ -0,0 +1,281 @@ ++# Noctalia template for qutebrowser by freerunner ++ ++ ++# Colors should be in 6 or 2 character hexadecimal or single spaced rgb decimal: "#RRGGBB", "#BW" or "0_255 0_255 0_255" ++# example for white: "#ffffff", "#ff" or "255 255 255". ++ ++ ++# Text color of the completion widget. May be a single color to use for all columns or a list of three colors, one for each column. ++c.colors.completion.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Background color of the completion widget for odd rows. ++c.colors.completion.odd.bg = "{{colors.surface.dark.hex}}" ++ ++# Background color of the completion widget for even rows. ++c.colors.completion.even.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color of completion widget category headers. ++c.colors.completion.category.fg = "{{colors.on_primary.default.hex}}" ++ ++# Background color of the completion widget category headers. ++c.colors.completion.category.bg = "{{colors.primary.dark.hex}}" ++ ++# Top border color of the completion widget category headers. ++c.colors.completion.category.border.top = "{{colors.outline.dark.hex}}" ++ ++# Bottom border color of the completion widget category headers. ++c.colors.completion.category.border.bottom = "{{colors.outline.dark.hex}}" ++ ++# Foreground color of the selected completion item. ++c.colors.completion.item.selected.fg = "{{colors.background.default.hex}}" ++ ++# Background color of the selected completion item. ++c.colors.completion.item.selected.bg = "{{colors.secondary.default.hex}}" ++ ++# Top border color of the selected completion item. ++c.colors.completion.item.selected.border.top = "{{colors.outline.default.hex}}" ++ ++# Bottom border color of the selected completion item. ++c.colors.completion.item.selected.border.bottom = "{{colors.outline.default.hex}}" ++ ++# Foreground color of the matched text in the selected completion item. ++c.colors.completion.item.selected.match.fg = "{{colors.tertiary.default.hex}}" ++ ++# Foreground color of the matched text in the completion. ++c.colors.completion.match.fg = "{{colors.tertiary.default.hex}}" ++ ++# Color of the scrollbar handle in the completion view. ++c.colors.completion.scrollbar.fg = "{{colors.primary.dark.hex}}" ++ ++# Color of the scrollbar in the completion view. ++c.colors.completion.scrollbar.bg = "{{colors.primary_container.dark.hex}}" ++ ++# Background color of disabled items in the context menu. ++c.colors.contextmenu.disabled.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color of disabled items in the context menu. ++c.colors.contextmenu.disabled.fg = "{{colors.on_secondary.dark.hex}}" ++ ++# Background color of the context menu. If set to null, the Qt dark is used. ++c.colors.contextmenu.menu.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color of the context menu. If set to null, the Qt dark is used. ++c.colors.contextmenu.menu.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Background color of the context menu’s selected item. If set to null, the Qt dark is used. ++c.colors.contextmenu.selected.bg = "{{colors.primary.dark.hex}}" ++ ++#Foreground color of the context menu’s selected item. If set to null, the Qt dark is used. ++c.colors.contextmenu.selected.fg = "{{colors.on_primary.dark.hex}}" ++ ++# Background color for the download bar. ++c.colors.downloads.bar.bg = "{{colors.scrim.dark.hex}}" ++ ++# Color gradient start for download text. ++c.colors.downloads.start.fg = "{{colors.on_tertiary.dark.hex}}" ++ ++# Color gradient start for download "Yellow". ++c.colors.downloads.start.bg = "{{colors.tertiary.default.hex}}" ++ ++# Color gradient end for download text. ++c.colors.downloads.stop.fg = "{{colors.on_primary.dark.hex}}" ++ ++# Color gradient stop for download "Green". ++c.colors.downloads.stop.bg = "{{colors.primary.default.hex}}" ++ ++# Foreground color for downloads with errors "Red". ++c.colors.downloads.error.fg = "{{colors.error.default.hex}}" ++ ++# Font color for hints. ++c.colors.hints.fg = "{{colors.on_secondary_container.dark.hex}}" ++ ++# Background color for hints. Note that you can use a `rgba(...)` value for transparency. ++c.colors.hints.bg = "{{colors.secondary_container.dark.hex}}" ++ ++# Font color for the matched part of hints. ++c.colors.hints.match.fg = "{{colors.inverse_primary.dark.hex}}" ++ ++# Text color for the keyhint widget. ++c.colors.keyhint.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Highlight color for keys to complete the current keychain. ++c.colors.keyhint.suffix.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Background color of the keyhint widget. ++c.colors.keyhint.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color of an error message. ++c.colors.messages.error.fg = "{{colors.on_error.dark.hex}}" ++ ++# Background color of an error message. ++c.colors.messages.error.bg = "{{colors.error.dark.hex}}" ++ ++# Border color of an error message. ++c.colors.messages.error.border = "{{colors.outline_variant.dark.hex}}" ++ ++# Foreground color of a warning message. ++c.colors.messages.warning.fg = "{{colors.on_error.dark.hex}}" ++ ++# Background color of a warning message. ++c.colors.messages.warning.bg = "{{colors.scrim.dark.hex}}" ++ ++# Border color of a warning message. ++c.colors.messages.warning.border = "{{colors.outline_variant.dark.hex}}" ++ ++# Foreground color of an info message. ++c.colors.messages.info.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Background color of an info message. ++c.colors.messages.info.bg = "{{colors.surface.dark.hex}}" ++ ++# Border color of an info message. ++c.colors.messages.info.border = "{{colors.outline_variant.dark.hex}}" ++ ++# Foreground color for prompts. ++c.colors.prompts.fg = "{{colors.on_primary.dark.hex}}" ++ ++# Border used around UI elements in prompts. ++c.colors.prompts.border = "{{colors.outline.dark.hex}}" ++ ++# Background color for prompts. ++c.colors.prompts.bg = "{{colors.primary.dark.hex}}" ++ ++# Background color for the selected item in filename prompts. ++c.colors.prompts.selected.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color for the selected item in filename prompts. ++c.colors.prompts.selected.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Foreground color of the statusbar. ++c.colors.statusbar.normal.fg = "{{colors.on_background.dark.hex}}" ++ ++# Background color of the statusbar. ++c.colors.statusbar.normal.bg = "{{colors.background.dark.hex}}" ++ ++# Foreground color of the statusbar in insert mode. ++c.colors.statusbar.insert.fg = "{{colors.on_primary.dark.hex}}" ++ ++# Background color of the statusbar in insert mode. ++c.colors.statusbar.insert.bg = "{{colors.primary.dark.hex}}" ++ ++# Foreground color of the statusbar in passthrough mode. ++c.colors.statusbar.passthrough.fg = "{{colors.on_secondary.dark.hex}}" ++ ++# Background color of the statusbar in passthrough mode. ++c.colors.statusbar.passthrough.bg = "{{colors.secondary.dark.hex}}" ++ ++# Foreground color of the statusbar in private browsing mode. ++c.colors.statusbar.private.fg = "{{colors.on_tertiary.dark.hex}}" ++ ++# Background color of the statusbar in private browsing mode. ++c.colors.statusbar.private.bg = "{{colors.tertiary.dark.hex}}" ++ ++# Foreground color of the statusbar in command mode. ++c.colors.statusbar.command.fg = "{{colors.on_surface_variant.dark.hex}}" ++ ++# Background color of the statusbar in command mode. ++c.colors.statusbar.command.bg = "{{colors.surface_variant.dark.hex}}" ++ ++# Foreground color of the statusbar in private browsing + command mode. ++c.colors.statusbar.command.private.fg = "{{colors.on_surface_variant.dark.hex}}" ++ ++# Background color of the statusbar in private browsing + command mode. ++c.colors.statusbar.command.private.bg = "{{colors.surface_variant.dark.hex}}" ++ ++# Foreground color of the statusbar in caret mode. ++c.colors.statusbar.caret.fg = "{{colors.primary_container.dark.hex}}" ++ ++# Background color of the statusbar in caret mode. ++c.colors.statusbar.caret.bg = "{{colors.on_primary_container.dark.hex}}" ++ ++# Foreground color of the statusbar in caret mode with a selection. ++c.colors.statusbar.caret.selection.fg = "{{colors.on_tertiary.dark.hex}}" ++ ++# Background color of the statusbar in caret mode with a selection. ++c.colors.statusbar.caret.selection.bg = "{{colors.on_tertiary_container.dark.hex}}" ++ ++# Background color of the progress bar. ++c.colors.statusbar.progress.bg = "{{colors.primary.dark.hex}}" ++ ++# Default color of the URL in the statusbar." ++c.colors.statusbar.url.fg = "{{colors.secondary_container.default.hex}}" ++ ++# Foreground color of the URL in the statusbar on error. ++c.colors.statusbar.url.error.fg = "{{colors.error.dark.hex}}" ++ ++# Foreground color of the URL in the statusbar for hovered links. ++c.colors.statusbar.url.hover.fg = "{{colors.primary.dark.hex}}" ++ ++# Foreground color of the URL in the statusbar on successful load ++# (http). ++c.colors.statusbar.url.success.http.fg = "{{colors.secondary.dark.hex}}" ++ ++# Foreground color of the URL in the statusbar on successful load ++# (https). ++c.colors.statusbar.url.success.https.fg = "{{colors.secondary.dark.hex}}" ++ ++# Foreground color of the URL in the statusbar when there's a warning. ++c.colors.statusbar.url.warn.fg = "{{colors.error.dark.hex}}" ++ ++# Background color of the tab bar. ++c.colors.tabs.bar.bg = "{{colors.background.dark.hex}}" ++ ++# Color gradient start for the tab indicator. ++c.colors.tabs.indicator.start = "{{colors.secondary_container.dark.hex}}" ++ ++# Color gradient end for the tab indicator. ++c.colors.tabs.indicator.stop = "{{colors.primary_container.dark.hex}}" ++ ++# Color for the tab indicator on errors. ++c.colors.tabs.indicator.error = "{{colors.error.dark.hex}}" ++ ++# Foreground color of unselected odd tabs. ++c.colors.tabs.odd.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Background color of unselected odd tabs. ++c.colors.tabs.odd.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color of unselected even tabs. ++c.colors.tabs.even.fg = "{{colors.on_surface.dark.hex}}" ++ ++# Background color of unselected even tabs. ++c.colors.tabs.even.bg = "{{colors.surface_variant.dark.hex}}" ++ ++# Foreground color of selected odd tabs. ++c.colors.tabs.selected.odd.fg = "{{colors.on_secondary.dark.hex}}" ++ ++# Background color of selected odd tabs. ++c.colors.tabs.selected.odd.bg = "{{colors.secondary.dark.hex}}" ++ ++# Foreground color of selected even tabs. ++c.colors.tabs.selected.even.fg = "{{colors.on_secondary.dark.hex}}" ++ ++# Background color of selected even tabs. ++c.colors.tabs.selected.even.bg = "{{colors.secondary.dark.hex}}" ++ ++# Background color of pinned unselected even tabs. ++c.colors.tabs.pinned.even.bg = "{{colors.surface_variant.dark.hex}}" ++ ++# Foreground color of pinned unselected even tabs. ++c.colors.tabs.pinned.even.fg = "{{colors.secondary.dark.hex}}" ++ ++# Background color of pinned unselected odd tabs. ++c.colors.tabs.pinned.odd.bg = "{{colors.surface.dark.hex}}" ++ ++# Foreground color of pinned unselected odd tabs. ++c.colors.tabs.pinned.odd.fg = "{{colors.secondary.dark.hex}}" ++ ++# Background color of pinned selected even tabs. ++c.colors.tabs.pinned.selected.even.bg = "{{colors.secondary.dark.hex}}" ++ ++# Foreground color of pinned selected even tabs. ++c.colors.tabs.pinned.selected.even.fg = "{{colors.on_secondary.dark.hex}}" ++ ++# Background color of pinned selected odd tabs. ++c.colors.tabs.pinned.selected.odd.bg = "{{colors.secondary.dark.hex}}" ++ ++# Foreground color of pinned selected odd tabs. ++c.colors.tabs.pinned.selected.odd.fg = "{{colors.on_secondary.dark.hex}}" ++ ++# Background color for webpages if unset (or empty to use the theme's color). ++c.colors.webpage.bg = '' diff --git a/sys-kernel/gentoo-sources-7.0/0001-bore.patch b/sys-kernel/gentoo-sources-7.0/0001-bore.patch deleted file mode 100644 index 51617f0..0000000 --- a/sys-kernel/gentoo-sources-7.0/0001-bore.patch +++ /dev/null @@ -1,1217 +0,0 @@ -From 187d3236f77a721f684e3211dc50585973b04ab4 Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Fri, 10 Apr 2026 08:27:29 +0200 -Subject: [PATCH] bore - -Signed-off-by: Piotr Gorski ---- - include/linux/sched.h | 34 +++ - include/linux/sched/bore.h | 41 ++++ - init/Kconfig | 17 ++ - kernel/Kconfig.hz | 17 ++ - kernel/exit.c | 4 + - kernel/fork.c | 13 ++ - kernel/futex/waitwake.c | 11 + - kernel/sched/Makefile | 1 + - kernel/sched/bore.c | 434 +++++++++++++++++++++++++++++++++++++ - kernel/sched/core.c | 12 + - kernel/sched/debug.c | 61 ++++++ - kernel/sched/fair.c | 126 ++++++++++- - kernel/sched/sched.h | 9 + - 13 files changed, 769 insertions(+), 11 deletions(-) - create mode 100644 include/linux/sched/bore.h - create mode 100644 kernel/sched/bore.c - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 5a5d3dbc9..b2b2d8c66 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -817,6 +817,37 @@ struct kmap_ctrl { - #endif - }; - -+#ifdef CONFIG_SCHED_BORE -+#define BORE_BC_TIMESTAMP_SHIFT 16 -+ -+struct bore_bc { -+ union { -+ struct { -+ u64 timestamp: 48; -+ u64 penalty: 16; -+ }; -+ u64 value; -+ }; -+}; -+ -+struct bore_ctx { -+ u64 burst_time; -+ u16 prev_penalty; -+ u16 curr_penalty; -+ union { -+ u16 penalty; -+ struct { -+ u8 _; -+ u8 score; -+ }; -+ }; -+ bool stop_update; -+ bool futex_waiting; -+ struct bore_bc subtree; -+ struct bore_bc group; -+}; -+#endif /* CONFIG_SCHED_BORE */ -+ - struct task_struct { - #ifdef CONFIG_THREAD_INFO_IN_TASK - /* -@@ -875,6 +906,9 @@ struct task_struct { - #ifdef CONFIG_SCHED_CLASS_EXT - struct sched_ext_entity scx; - #endif -+#ifdef CONFIG_SCHED_BORE -+ struct bore_ctx bore; -+#endif /* CONFIG_SCHED_BORE */ - const struct sched_class *sched_class; - - #ifdef CONFIG_SCHED_CORE -diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h -new file mode 100644 -index 000000000..9215c13a9 ---- /dev/null -+++ b/include/linux/sched/bore.h -@@ -0,0 +1,41 @@ -+#ifndef _KERNEL_SCHED_BORE_H -+#define _KERNEL_SCHED_BORE_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define SCHED_BORE_AUTHOR "Masahito Suzuki" -+#define SCHED_BORE_PROGNAME "BORE CPU Scheduler modification" -+ -+#define SCHED_BORE_VERSION "6.6.3" -+ -+extern u8 __read_mostly sched_bore; -+DECLARE_STATIC_KEY_TRUE(sched_bore_key); -+extern u8 __read_mostly sched_burst_inherit_type; -+extern u8 __read_mostly sched_burst_smoothness; -+extern u8 __read_mostly sched_burst_penalty_offset; -+extern uint __read_mostly sched_burst_penalty_scale; -+extern uint __read_mostly sched_burst_cache_lifetime; -+ -+extern u8 effective_prio_bore(struct task_struct *p); -+extern void update_curr_bore(struct task_struct *p, u64 delta_exec); -+extern void restart_burst_bore(struct task_struct *p); -+extern void restart_burst_rescale_deadline_bore(struct task_struct *p); -+extern void task_fork_bore(struct task_struct *p, struct task_struct *parent, -+ u64 clone_flags, u64 now); -+extern void sched_init_bore(void); -+extern void reset_task_bore(struct task_struct *p); -+ -+extern int sched_bore_update_handler(const struct ctl_table *table, -+ int write, void __user *buffer, size_t *lenp, loff_t *ppos); -+extern int sched_burst_inherit_type_update_handler(const struct ctl_table *table, -+ int write, void __user *buffer, size_t *lenp, loff_t *ppos); -+ -+extern void reweight_entity( -+ struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight); -+ -+#endif /* _KERNEL_SCHED_BORE_H */ -diff --git a/init/Kconfig b/init/Kconfig -index 7484cd703..4cf628106 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1446,6 +1446,23 @@ config CHECKPOINT_RESTORE - - If unsure, say N here. - -+config SCHED_BORE -+ bool "Burst-Oriented Response Enhancer" -+ default y -+ help -+ In Desktop and Mobile computing, one might prefer interactive -+ tasks to keep responsive no matter what they run in the background. -+ -+ Enabling this kernel feature modifies the scheduler to discriminate -+ tasks by their burst time (runtime since it last went sleeping or -+ yielding state) and prioritize those that run less bursty. -+ Such tasks usually include window compositor, widgets backend, -+ terminal emulator, video playback, games and so on. -+ With a little impact to scheduling fairness, it may improve -+ responsiveness especially under heavy background workload. -+ -+ If unsure, say Y here. -+ - config SCHED_AUTOGROUP - bool "Automatic process group scheduling" - select CGROUPS -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index ce1435cb0..9eee2005e 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -57,3 +57,20 @@ config HZ - - config SCHED_HRTICK - def_bool HIGH_RES_TIMERS -+ -+config MIN_BASE_SLICE_NS -+ int "Default value for min_base_slice_ns" -+ default 2000000 -+ help -+ The BORE Scheduler automatically calculates the optimal base -+ slice for the configured HZ using the following equation: -+ -+ base_slice_ns = -+ 1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ) -+ -+ This option sets the default lower bound limit of the base slice -+ to prevent the loss of task throughput due to overscheduling. -+ -+ Setting this value too high can cause the system to boot with -+ an unnecessarily large base slice, resulting in high scheduling -+ latency and poor system responsiveness. -diff --git a/kernel/exit.c b/kernel/exit.c -index ede3117fa..3f3af470d 100644 ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -147,7 +147,11 @@ static void __unhash_process(struct release_task_post *post, struct task_struct - detach_pid(post->pids, p, PIDTYPE_SID); - - list_del_rcu(&p->tasks); -+#ifdef CONFIG_SCHED_BORE -+ list_del_rcu(&p->sibling); -+#else /* !CONFIG_SCHED_BORE */ - list_del_init(&p->sibling); -+#endif /* CONFIG_SCHED_BORE */ - __this_cpu_dec(process_counts); - } - list_del_rcu(&p->thread_node); -diff --git a/kernel/fork.c b/kernel/fork.c -index bc2bf58b9..207276c30 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -117,6 +117,10 @@ - /* For dup_mmap(). */ - #include "../mm/internal.h" - -+#ifdef CONFIG_SCHED_BORE -+#include -+#endif /* CONFIG_SCHED_BORE */ -+ - #include - - #define CREATE_TRACE_POINTS -@@ -2362,6 +2366,11 @@ __latent_entropy struct task_struct *copy_process( - p->start_time = ktime_get_ns(); - p->start_boottime = ktime_get_boottime_ns(); - -+#ifdef CONFIG_SCHED_BORE -+ if (likely(p->pid)) -+ task_fork_bore(p, current, clone_flags, p->start_time); -+#endif /* CONFIG_SCHED_BORE */ -+ - /* - * Make it visible to the rest of the system, but dont wake it up yet. - * Need tasklist lock for parent etc handling! -@@ -2435,7 +2444,11 @@ __latent_entropy struct task_struct *copy_process( - */ - p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || - p->real_parent->signal->is_child_subreaper; -+#ifdef CONFIG_SCHED_BORE -+ list_add_tail_rcu(&p->sibling, &p->real_parent->children); -+#else /* !CONFIG_SCHED_BORE */ - list_add_tail(&p->sibling, &p->real_parent->children); -+#endif /* CONFIG_SCHED_BORE */ - list_add_tail_rcu(&p->tasks, &init_task.tasks); - attach_pid(p, PIDTYPE_TGID); - attach_pid(p, PIDTYPE_PGID); -diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c -index 1c2dd03f1..de57e2d54 100644 ---- a/kernel/futex/waitwake.c -+++ b/kernel/futex/waitwake.c -@@ -4,6 +4,9 @@ - #include - #include - #include -+#ifdef CONFIG_SCHED_BORE -+#include -+#endif /* CONFIG_SCHED_BORE */ - - #include "futex.h" - -@@ -355,7 +358,15 @@ void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout) - * is no timeout, or if it has yet to expire. - */ - if (!timeout || timeout->task) -+#ifdef CONFIG_SCHED_BORE -+ { -+ current->bore.futex_waiting = true; -+#endif /* CONFIG_SCHED_BORE */ - schedule(); -+#ifdef CONFIG_SCHED_BORE -+ current->bore.futex_waiting = false; -+ } -+#endif /* CONFIG_SCHED_BORE */ - } - __set_current_state(TASK_RUNNING); - } -diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index b1f1a3670..f95a7b3d5 100644 ---- a/kernel/sched/Makefile -+++ b/kernel/sched/Makefile -@@ -40,3 +40,4 @@ obj-y += core.o - obj-y += fair.o - obj-y += build_policy.o - obj-y += build_utility.o -+obj-$(CONFIG_SCHED_BORE) += bore.o -diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c -new file mode 100644 -index 000000000..c27a22cd6 ---- /dev/null -+++ b/kernel/sched/bore.c -@@ -0,0 +1,434 @@ -+/* -+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler -+ * Copyright (C) 2021-2025 Masahito Suzuki -+ */ -+#include -+#include -+#include -+#include "sched.h" -+ -+#ifdef CONFIG_SCHED_BORE -+DEFINE_STATIC_KEY_TRUE(sched_bore_key); -+u8 __read_mostly sched_bore = 1; -+u8 __read_mostly sched_burst_inherit_type = 2; -+u8 __read_mostly sched_burst_smoothness = 1; -+u8 __read_mostly sched_burst_penalty_offset = 24; -+uint __read_mostly sched_burst_penalty_scale = 1536; -+uint __read_mostly sched_burst_cache_lifetime = 75000000; -+static int __maybe_unused maxval_prio = 39; -+static int __maybe_unused maxval_6_bits = 63; -+static int __maybe_unused maxval_8_bits = 255; -+static int __maybe_unused maxval_12_bits = 4095; -+ -+#define MAX_BURST_PENALTY ((40U << 8) - 1) -+#define BURST_CACHE_SAMPLE_LIMIT 63 -+#define BURST_CACHE_SCAN_LIMIT (BURST_CACHE_SAMPLE_LIMIT * 2) -+ -+static u32 bore_reciprocal_lut[BURST_CACHE_SAMPLE_LIMIT + 1]; -+ -+DEFINE_STATIC_KEY_TRUE(sched_burst_inherit_key); -+DEFINE_STATIC_KEY_TRUE(sched_burst_ancestor_key); -+ -+static inline u32 log2p1_u64_u32fp(u64 v, u8 fp) { -+ if (unlikely(!v)) return 0; -+ int clz = __builtin_clzll(v); -+ int exponent = 64 - clz; -+ u32 mantissa = (u32)((v << clz) << 1 >> (64 - fp)); -+ return exponent << fp | mantissa; -+} -+ -+static inline u32 calc_burst_penalty(u64 burst_time) { -+ u32 greed = log2p1_u64_u32fp(burst_time, 8), -+ tolerance = sched_burst_penalty_offset << 8; -+ s32 diff = (s32)(greed - tolerance); -+ u32 penalty = diff & ~(diff >> 31); -+ u32 scaled_penalty = penalty * sched_burst_penalty_scale >> 10; -+ s32 overflow = scaled_penalty - MAX_BURST_PENALTY; -+ return scaled_penalty - (overflow & ~(overflow >> 31)); -+} -+ -+static inline u64 rescale_slice(u64 delta, u8 old_prio, u8 new_prio) { -+ u64 unscaled, rescaled; -+ unscaled = mul_u64_u32_shr(delta , sched_prio_to_weight[old_prio], 10); -+ rescaled = mul_u64_u32_shr(unscaled, sched_prio_to_wmult [new_prio], 22); -+ return rescaled; -+} -+ -+static inline u32 binary_smooth(u32 new, u32 old) { -+ u32 is_growing = (new > old); -+ u32 increment = (new - old) * is_growing; -+ u32 shift = sched_burst_smoothness; -+ u32 smoothed = old + ((increment + (1U << shift) - 1) >> shift); -+ return (new & ~(-is_growing)) | (smoothed & (-is_growing)); -+} -+ -+static void reweight_task_by_prio(struct task_struct *p, int prio) { -+ if (task_has_idle_policy(p)) return; -+ -+ struct sched_entity *se = &p->se; -+ unsigned long weight = scale_load(sched_prio_to_weight[prio]); -+ -+ if (se->on_rq) { -+ p->bore.stop_update = true; -+ reweight_entity(cfs_rq_of(se), se, weight); -+ p->bore.stop_update = false; -+ } else -+ se->load.weight = weight; -+ se->load.inv_weight = sched_prio_to_wmult[prio]; -+} -+ -+u8 effective_prio_bore(struct task_struct *p) { -+ int prio = p->static_prio - MAX_RT_PRIO; -+ if (static_branch_likely(&sched_bore_key)) -+ prio += p->bore.score; -+ prio &= ~(prio >> 31); -+ s32 diff = prio - maxval_prio; -+ prio -= (diff & ~(diff >> 31)); -+ return (u8)prio; -+} -+ -+static void update_penalty(struct task_struct *p) { -+ struct bore_ctx *ctx = &p->bore; -+ -+ u8 prev_prio = effective_prio_bore(p); -+ -+ s32 diff = (s32)ctx->curr_penalty - (s32)ctx->prev_penalty; -+ u16 max_val = ctx->curr_penalty - (diff & (diff >> 31)); -+ u32 is_kthread = !!(p->flags & PF_KTHREAD); -+ ctx->penalty = max_val & -(s32)(!is_kthread); -+ -+ u8 new_prio = effective_prio_bore(p); -+ if (new_prio != prev_prio) -+ reweight_task_by_prio(p, new_prio); -+} -+ -+void update_curr_bore(struct task_struct *p, u64 delta_exec) { -+ struct bore_ctx *ctx = &p->bore; -+ if (ctx->stop_update) return; -+ -+ ctx->burst_time += delta_exec; -+ u32 curr_penalty = ctx->curr_penalty = calc_burst_penalty(ctx->burst_time); -+ -+ if (curr_penalty <= ctx->prev_penalty) return; -+ update_penalty(p); -+} -+ -+void restart_burst_bore(struct task_struct *p) { -+ struct bore_ctx *ctx = &p->bore; -+ u32 new_penalty = binary_smooth(ctx->curr_penalty, ctx->prev_penalty); -+ ctx->prev_penalty = new_penalty; -+ ctx->curr_penalty = 0; -+ ctx->burst_time = 0; -+ update_penalty(p); -+} -+ -+void restart_burst_rescale_deadline_bore(struct task_struct *p) { -+ struct sched_entity *se = &p->se; -+ s64 vscaled, vremain = se->deadline - se->vruntime; -+ -+ u8 old_prio = effective_prio_bore(p); -+ restart_burst_bore(p); -+ u8 new_prio = effective_prio_bore(p); -+ -+ if (old_prio > new_prio) { -+ vscaled = rescale_slice(abs(vremain), old_prio, new_prio); -+ if (unlikely(vremain < 0)) -+ vscaled = -vscaled; -+ se->deadline = se->vruntime + vscaled; -+ } -+} -+ -+static inline bool task_is_bore_eligible(struct task_struct *p) -+{return p && p->sched_class == &fair_sched_class && !p->exit_state;} -+ -+#ifndef for_each_child_task -+#define for_each_child_task(p, t) \ -+ list_for_each_entry_rcu(t, &(p)->children, sibling) -+#endif -+ -+static inline u32 count_children_upto2(struct task_struct *p) { -+ struct list_head *head = &p->children; -+ struct list_head *first = READ_ONCE(head->next); -+ struct list_head *second = READ_ONCE(first->next); -+ return (first != head) + (second != head); -+} -+ -+static inline bool burst_cache_expired(struct bore_bc *bc, u64 now) { -+ struct bore_bc bc_val = { .value = READ_ONCE(bc->value) }; -+ u64 timestamp = (u64)bc_val.timestamp << BORE_BC_TIMESTAMP_SHIFT; -+ return now - timestamp > (u64)sched_burst_cache_lifetime; -+} -+ -+static void update_burst_cache(struct bore_bc *bc, -+ struct task_struct *p, u32 count, u32 total, u64 now) { -+ u32 average = (count == 1) ? total : -+ (u32)(((u64)total * bore_reciprocal_lut[count]) >> 32); -+ -+ struct bore_bc new_bc = { -+ .penalty = max(average, p->bore.penalty), -+ .timestamp = now >> BORE_BC_TIMESTAMP_SHIFT -+ }; -+ WRITE_ONCE(bc->value, new_bc.value); -+} -+ -+static u32 inherit_from_parent(struct task_struct *parent, -+ u64 clone_flags, u64 now) { -+ struct bore_bc bc_val; -+ -+ if (clone_flags & CLONE_PARENT) -+ parent = rcu_dereference(parent->real_parent); -+ -+ struct bore_bc *bc = &parent->bore.subtree; -+ -+ if (burst_cache_expired(bc, now)) { -+ struct task_struct *child; -+ u32 count = 0, total = 0, scan_count = 0; -+ for_each_child_task(parent, child) { -+ if (count >= BURST_CACHE_SAMPLE_LIMIT) break; -+ if (scan_count++ >= BURST_CACHE_SCAN_LIMIT) break; -+ -+ if (!task_is_bore_eligible(child)) continue; -+ count++; -+ total += child->bore.penalty; -+ } -+ -+ update_burst_cache(bc, parent, count, total, now); -+ } -+ -+ bc_val.value = READ_ONCE(bc->value); -+ return (u32)bc_val.penalty; -+} -+ -+static u32 inherit_from_ancestor_hub(struct task_struct *parent, -+ u64 clone_flags, u64 now) { -+ struct bore_bc bc_val; -+ struct task_struct *ancestor = parent; -+ u32 sole_child_count = 0; -+ -+ if (clone_flags & CLONE_PARENT) { -+ ancestor = rcu_dereference(ancestor->real_parent); -+ sole_child_count = 1; -+ } -+ -+ for (struct task_struct *next; -+ (next = rcu_dereference(ancestor->real_parent)) != ancestor && -+ count_children_upto2(ancestor) <= sole_child_count; -+ ancestor = next, sole_child_count = 1) {} -+ -+ struct bore_bc *bc = &ancestor->bore.subtree; -+ -+ if (burst_cache_expired(bc, now)) { -+ struct task_struct *direct_child; -+ u32 count = 0, total = 0, scan_count = 0; -+ for_each_child_task(ancestor, direct_child) { -+ if (count >= BURST_CACHE_SAMPLE_LIMIT) break; -+ if (scan_count++ >= BURST_CACHE_SCAN_LIMIT) break; -+ -+ struct task_struct *descendant = direct_child; -+ while (count_children_upto2(descendant) == 1) { -+ struct task_struct *next_descendant = -+ list_first_or_null_rcu(&descendant->children, -+ struct task_struct, sibling); -+ if (!next_descendant) break; -+ descendant = next_descendant; -+ } -+ -+ if (!task_is_bore_eligible(descendant)) continue; -+ count++; -+ total += descendant->bore.penalty; -+ } -+ -+ update_burst_cache(bc, ancestor, count, total, now); -+ } -+ -+ bc_val.value = READ_ONCE(bc->value); -+ return (u32)bc_val.penalty; -+} -+ -+static u32 inherit_from_thread_group(struct task_struct *p, u64 now) { -+ struct bore_bc bc_val; -+ struct task_struct *leader = p->group_leader; -+ struct bore_bc *bc = &leader->bore.group; -+ -+ if (burst_cache_expired(bc, now)) { -+ struct task_struct *sibling; -+ u32 count = 0, total = 0, scan_count = 0; -+ -+ for_each_thread(leader, sibling) { -+ if (count >= BURST_CACHE_SAMPLE_LIMIT) break; -+ if (scan_count++ >= BURST_CACHE_SCAN_LIMIT) break; -+ -+ if (!task_is_bore_eligible(sibling)) continue; -+ count++; -+ total += sibling->bore.penalty; -+ } -+ -+ update_burst_cache(bc, leader, count, total, now); -+ } -+ -+ bc_val.value = READ_ONCE(bc->value); -+ return (u32)bc_val.penalty; -+} -+ -+void task_fork_bore(struct task_struct *p, -+ struct task_struct *parent, u64 clone_flags, u64 now) { -+ if (!static_branch_likely(&sched_bore_key) || !task_is_bore_eligible(p)) return; -+ -+ rcu_read_lock(); -+ struct bore_ctx *ctx = &p->bore; -+ u32 inherited_penalty; -+ if (clone_flags & CLONE_THREAD) -+ inherited_penalty = inherit_from_thread_group(parent, now); -+ else if (static_branch_likely(&sched_burst_inherit_key)) -+ inherited_penalty = static_branch_likely(&sched_burst_ancestor_key)? -+ inherit_from_ancestor_hub(parent, clone_flags, now): -+ inherit_from_parent(parent, clone_flags, now); -+ else -+ inherited_penalty = 0; -+ -+ if (ctx->prev_penalty < inherited_penalty) -+ ctx->prev_penalty = inherited_penalty; -+ ctx->curr_penalty = 0; -+ ctx->burst_time = 0; -+ ctx->stop_update = false; -+ ctx->futex_waiting = false; -+ update_penalty(p); -+ rcu_read_unlock(); -+} -+ -+void reset_task_bore(struct task_struct *p) -+{ memset(&p->bore, 0, sizeof(struct bore_ctx)); } -+ -+static void update_inherit_type(void) { -+ switch(sched_burst_inherit_type) { -+ case 1: -+ static_branch_enable(&sched_burst_inherit_key); -+ static_branch_disable(&sched_burst_ancestor_key); -+ break; -+ case 2: -+ static_branch_enable(&sched_burst_inherit_key); -+ static_branch_enable(&sched_burst_ancestor_key); -+ break; -+ default: -+ static_branch_disable(&sched_burst_inherit_key); -+ break; -+ } -+} -+ -+void __init sched_init_bore(void) { -+ printk(KERN_INFO "%s %s by %s\n", -+ SCHED_BORE_PROGNAME, SCHED_BORE_VERSION, SCHED_BORE_AUTHOR); -+ -+ for (int i = 1; i <= BURST_CACHE_SAMPLE_LIMIT; i++) -+ bore_reciprocal_lut[i] = (u32)div64_u64(0xffffffffULL + i, i); -+ -+ reset_task_bore(&init_task); -+ update_inherit_type(); -+} -+ -+static void readjust_all_task_weights(void) { -+ struct task_struct *task; -+ struct rq *rq; -+ struct rq_flags rf; -+ -+ scoped_guard(write_lock_irq, &tasklist_lock) -+ for_each_process(task) { -+ if (!task_is_bore_eligible(task)) continue; -+ rq = task_rq_lock(task, &rf); -+ update_rq_clock(rq); -+ reweight_task_by_prio(task, effective_prio_bore(task)); -+ task_rq_unlock(rq, task, &rf); -+ } -+} -+ -+int sched_bore_update_handler(const struct ctl_table *table, -+ int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); -+ if (ret || !write) -+ return ret; -+ -+ if (sched_bore) -+ static_branch_enable(&sched_bore_key); -+ else -+ static_branch_disable(&sched_bore_key); -+ -+ readjust_all_task_weights(); -+ -+ return 0; -+} -+ -+int sched_burst_inherit_type_update_handler(const struct ctl_table *table, -+ int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); -+ if (ret || !write) -+ return ret; -+ -+ update_inherit_type(); -+ -+ return 0; -+} -+ -+#ifdef CONFIG_SYSCTL -+static struct ctl_table sched_bore_sysctls[] = { -+ { -+ .procname = "sched_bore", -+ .data = &sched_bore, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = sched_bore_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_inherit_type", -+ .data = &sched_burst_inherit_type, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = sched_burst_inherit_type_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_TWO, -+ }, -+ { -+ .procname = "sched_burst_smoothness", -+ .data = &sched_burst_smoothness, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_THREE, -+ }, -+ { -+ .procname = "sched_burst_penalty_offset", -+ .data = &sched_burst_penalty_offset, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_6_bits, -+ }, -+ { -+ .procname = "sched_burst_penalty_scale", -+ .data = &sched_burst_penalty_scale, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_12_bits, -+ }, -+ { -+ .procname = "sched_burst_cache_lifetime", -+ .data = &sched_burst_cache_lifetime, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+}; -+ -+static int __init sched_bore_sysctl_init(void) { -+ register_sysctl_init("kernel", sched_bore_sysctls); -+ return 0; -+} -+late_initcall(sched_bore_sysctl_init); -+ -+#endif // CONFIG_SYSCTL -+#endif /* CONFIG_SCHED_BORE */ -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 496dff740..2bc2b943a 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -100,6 +100,10 @@ - #include "../smpboot.h" - #include "../locking/mutex.h" - -+#ifdef CONFIG_SCHED_BORE -+#include -+#endif /* CONFIG_SCHED_BORE */ -+ - EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); - EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); - -@@ -1446,7 +1450,11 @@ int tg_nop(struct task_group *tg, void *data) - - void set_load_weight(struct task_struct *p, bool update_load) - { -+#ifdef CONFIG_SCHED_BORE -+ int prio = effective_prio_bore(p); -+#else /* !CONFIG_SCHED_BORE */ - int prio = p->static_prio - MAX_RT_PRIO; -+#endif /* CONFIG_SCHED_BORE */ - struct load_weight lw; - - if (task_has_idle_policy(p)) { -@@ -8611,6 +8619,10 @@ void __init sched_init(void) - BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class)); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ sched_init_bore(); -+#endif /* CONFIG_SCHED_BORE */ -+ - wait_bit_init(); - - #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index 15bf45b6f..282007725 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -169,6 +169,53 @@ static const struct file_operations sched_feat_fops = { - .release = single_release, - }; - -+#ifdef CONFIG_SCHED_BORE -+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \ -+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \ -+{ \ -+ char buf[16]; \ -+ unsigned int value; \ -+\ -+ if (cnt > 15) \ -+ cnt = 15; \ -+\ -+ if (copy_from_user(&buf, ubuf, cnt)) \ -+ return -EFAULT; \ -+ buf[cnt] = '\0'; \ -+\ -+ if (kstrtouint(buf, 10, &value)) \ -+ return -EINVAL; \ -+\ -+ sysctl_sched_##name = value; \ -+ sched_update_##update_func(); \ -+\ -+ *ppos += cnt; \ -+ return cnt; \ -+} \ -+\ -+static int sched_##name##_show(struct seq_file *m, void *v) \ -+{ \ -+ seq_printf(m, "%d\n", sysctl_sched_##name); \ -+ return 0; \ -+} \ -+\ -+static int sched_##name##_open(struct inode *inode, struct file *filp) \ -+{ \ -+ return single_open(filp, sched_##name##_show, NULL); \ -+} \ -+\ -+static const struct file_operations sched_##name##_fops = { \ -+ .open = sched_##name##_open, \ -+ .write = sched_##name##_write, \ -+ .read = seq_read, \ -+ .llseek = seq_lseek, \ -+ .release = single_release, \ -+}; -+ -+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice) -+ -+#undef DEFINE_SYSCTL_SCHED_FUNC -+#else /* !CONFIG_SCHED_BORE */ - static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) - { -@@ -208,6 +255,7 @@ static const struct file_operations sched_scaling_fops = { - .llseek = seq_lseek, - .release = single_release, - }; -+#endif /* CONFIG_SCHED_BORE */ - - #ifdef CONFIG_PREEMPT_DYNAMIC - -@@ -602,12 +650,19 @@ static __init int sched_init_debug(void) - debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops); -+ debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice); -+#else /* !CONFIG_SCHED_BORE */ - debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice); -+#endif /* CONFIG_SCHED_BORE */ - - debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); - debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); - -+#if !defined(CONFIG_SCHED_BORE) - debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); -+#endif /* CONFIG_SCHED_BORE */ - debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); - debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); - -@@ -852,6 +907,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); - -+#ifdef CONFIG_SCHED_BORE -+ SEQ_printf(m, " %2d", p->bore.score); -+#endif /* CONFIG_SCHED_BORE */ - #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); - #endif -@@ -1331,6 +1389,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, - __PS("nr_involuntary_switches", p->nivcsw); - - P(se.load.weight); -+#ifdef CONFIG_SCHED_BORE -+ P(bore.score); -+#endif /* CONFIG_SCHED_BORE */ - P(se.avg.load_sum); - P(se.avg.runnable_sum); - P(se.avg.util_sum); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index ab4114712..630896fc0 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -58,6 +58,10 @@ - #include "stats.h" - #include "autogroup.h" - -+#ifdef CONFIG_SCHED_BORE -+#include -+#endif /* CONFIG_SCHED_BORE */ -+ - /* - * The initial- and re-scaling of tunables is configurable - * -@@ -67,17 +71,30 @@ - * SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus) - * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus - * -- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) -+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant -+ * EEVDF: default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) - */ -+#ifdef CONFIG_SCHED_BORE -+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; -+#else /* !CONFIG_SCHED_BORE */ - unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; -+#endif /* CONFIG_SCHED_BORE */ - - /* - * Minimal preemption granularity for CPU-bound tasks: - * -- * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds) -+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice -+ * (default min_base_slice = 2000000 constant, units: nanoseconds) -+ * EEVDF: default 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds - */ -+#ifdef CONFIG_SCHED_BORE -+static const unsigned int nsecs_per_tick = 1000000000ULL / HZ; -+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS; -+__read_mostly uint sysctl_sched_base_slice = nsecs_per_tick; -+#else /* !CONFIG_SCHED_BORE */ - unsigned int sysctl_sched_base_slice = 700000ULL; - static unsigned int normalized_sysctl_sched_base_slice = 700000ULL; -+#endif /* CONFIG_SCHED_BORE */ - - __read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; - -@@ -189,6 +206,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) - * - * This idea comes from the SD scheduler of Con Kolivas: - */ -+#ifdef CONFIG_SCHED_BORE -+static void update_sysctl(void) { -+ sysctl_sched_base_slice = nsecs_per_tick * -+ max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick)); -+} -+void sched_update_min_base_slice(void) { update_sysctl(); } -+#else /* !CONFIG_SCHED_BORE */ - static unsigned int get_update_sysctl_factor(void) - { - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); -@@ -219,6 +243,7 @@ static void update_sysctl(void) - SET_SYSCTL(sched_base_slice); - #undef SET_SYSCTL - } -+#endif /* CONFIG_SCHED_BORE */ - - void __init sched_init_granularity(void) - { -@@ -957,7 +982,11 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) - */ - static inline void set_protect_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) - { -+#ifdef CONFIG_SCHED_BORE -+ u64 slice = sysctl_sched_base_slice; -+#else /* CONFIG_SCHED_BORE */ - u64 slice = normalized_sysctl_sched_base_slice; -+#endif /* CONFIG_SCHED_BORE */ - u64 vprot = se->deadline; - - if (sched_feat(RUN_TO_PARITY)) -@@ -1035,6 +1064,11 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect) - curr = NULL; - - if (curr && protect && protect_slice(curr)) -+#ifdef CONFIG_SCHED_BORE -+ if (!static_branch_likely(&sched_bore_key) || -+ !entity_is_task(curr) || -+ !task_of(curr)->bore.futex_waiting) -+#endif /* CONFIG_SCHED_BORE */ - return curr; - - /* Pick the leftmost entity if it's eligible */ -@@ -1096,6 +1130,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) - /************************************************************** - * Scheduling class statistics methods: - */ -+#if !defined(CONFIG_SCHED_BORE) - int sched_update_scaling(void) - { - unsigned int factor = get_update_sysctl_factor(); -@@ -1107,6 +1142,7 @@ int sched_update_scaling(void) - - return 0; - } -+#endif /* CONFIG_SCHED_BORE */ - - static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); - -@@ -1307,6 +1343,11 @@ static void update_curr(struct cfs_rq *cfs_rq) - resched = update_deadline(cfs_rq, curr); - - if (entity_is_task(curr)) { -+#ifdef CONFIG_SCHED_BORE -+ struct task_struct *p = task_of(curr); -+ update_curr_bore(p, delta_exec); -+#endif /* CONFIG_SCHED_BORE */ -+ - /* - * If the fair_server is active, we need to account for the - * fair_server time whether or not the task is running on -@@ -3843,17 +3884,23 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) - - static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); - --static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, -+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight) - { - bool curr = cfs_rq->curr == se; - bool rel_vprot = false; - u64 vprot; -+#ifdef CONFIG_SCHED_BORE -+ s64 vlag_unscaled = 0; -+#endif /* !CONFIG_SCHED_BORE */ - - if (se->on_rq) { - /* commit outstanding execution time */ - update_curr(cfs_rq); - update_entity_lag(cfs_rq, se); -+#ifdef CONFIG_SCHED_BORE -+ vlag_unscaled = se->vlag; -+#endif /* !CONFIG_SCHED_BORE */ - se->deadline -= se->vruntime; - se->rel_deadline = 1; - if (curr && protect_slice(se)) { -@@ -3889,6 +3936,16 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - - enqueue_load_avg(cfs_rq, se); - if (se->on_rq) { -+#ifdef CONFIG_SCHED_BORE -+ if (curr) { -+ se->vruntime += vlag_unscaled - se->vlag; -+ if (se->rel_deadline) { -+ se->deadline += se->vruntime; -+ se->rel_deadline = 0; -+ } -+ } -+ else -+#endif /* !CONFIG_SCHED_BORE */ - place_entity(cfs_rq, se, 0); - if (rel_vprot) - se->vprot = se->vruntime + vprot; -@@ -5164,12 +5221,11 @@ void __setparam_fair(struct task_struct *p, const struct sched_attr *attr) - static void - place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - { -- u64 vslice, vruntime = avg_vruntime(cfs_rq); -+ u64 vslice = 0, vruntime = avg_vruntime(cfs_rq); - s64 lag = 0; - - if (!se->custom_slice) - se->slice = sysctl_sched_base_slice; -- vslice = calc_delta_fair(se->slice, se); - - /* - * Due to how V is constructed as the weighted average of entities, -@@ -5254,7 +5310,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - se->rel_deadline = 0; - return; - } -- -+#ifdef CONFIG_SCHED_BORE -+ if (static_branch_likely(&sched_bore_key) && -+ entity_is_task(se) && -+ task_of(se)->bore.futex_waiting) -+ goto vslice_found; -+#endif /* !CONFIG_SCHED_BORE */ -+ vslice = calc_delta_fair(se->slice, se); -+#ifdef CONFIG_SCHED_BORE -+ if (static_branch_likely(&sched_bore_key)) -+ vslice >>= !!(flags & (ENQUEUE_INITIAL | ENQUEUE_WAKEUP)); -+ else -+#endif /* CONFIG_SCHED_BORE */ - /* - * When joining the competition; the existing tasks will be, - * on average, halfway through their slice, as such start tasks -@@ -5263,6 +5330,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL)) - vslice /= 2; - -+#ifdef CONFIG_SCHED_BORE -+vslice_found: -+#endif /* CONFIG_SCHED_BORE */ - /* - * EEVDF: vd_i = ve_i + r_i/w_i - */ -@@ -5273,7 +5343,7 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq); - static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq); - - static void --requeue_delayed_entity(struct sched_entity *se); -+requeue_delayed_entity(struct sched_entity *se, int flags); - - static void - enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) -@@ -5431,6 +5501,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - if (sched_feat(DELAY_DEQUEUE) && delay && - !entity_eligible(cfs_rq, se)) { - update_load_avg(cfs_rq, se, 0); -+#ifdef CONFIG_SCHED_BORE -+ if (static_branch_likely(&sched_bore_key) && sched_feat(DELAY_ZERO)) -+ update_entity_lag(cfs_rq, se); -+#endif /* CONFIG_SCHED_BORE */ - set_delayed(se); - return false; - } -@@ -6902,7 +6976,7 @@ static int sched_idle_cpu(int cpu) - } - - static void --requeue_delayed_entity(struct sched_entity *se) -+requeue_delayed_entity(struct sched_entity *se, int flags) - { - struct cfs_rq *cfs_rq = cfs_rq_of(se); - -@@ -6915,13 +6989,22 @@ requeue_delayed_entity(struct sched_entity *se) - WARN_ON_ONCE(!se->on_rq); - - if (sched_feat(DELAY_ZERO)) { -+#ifdef CONFIG_SCHED_BORE -+ if (static_branch_likely(&sched_bore_key)) -+ flags |= ENQUEUE_WAKEUP; -+ else { -+#endif /* CONFIG_SCHED_BORE */ -+ flags = 0; - update_entity_lag(cfs_rq, se); -+#ifdef CONFIG_SCHED_BORE -+ } -+#endif /* CONFIG_SCHED_BORE */ - if (se->vlag > 0) { - cfs_rq->nr_queued--; - if (se != cfs_rq->curr) - __dequeue_entity(cfs_rq, se); - se->vlag = 0; -- place_entity(cfs_rq, se, 0); -+ place_entity(cfs_rq, se, flags); - if (se != cfs_rq->curr) - __enqueue_entity(cfs_rq, se); - cfs_rq->nr_queued++; -@@ -6961,7 +7044,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) - util_est_enqueue(&rq->cfs, p); - - if (flags & ENQUEUE_DELAYED) { -- requeue_delayed_entity(se); -+ requeue_delayed_entity(se, flags); - return; - } - -@@ -6979,7 +7062,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) - for_each_sched_entity(se) { - if (se->on_rq) { - if (se->sched_delayed) -- requeue_delayed_entity(se); -+ requeue_delayed_entity(se, flags); - break; - } - cfs_rq = cfs_rq_of(se); -@@ -7186,6 +7269,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) - util_est_dequeue(&rq->cfs, p); - - util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); -+#ifdef CONFIG_SCHED_BORE -+ struct cfs_rq *cfs_rq = cfs_rq_of(&p->se); -+ struct sched_entity *se = &p->se; -+ if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) { -+ if (cfs_rq->curr == se) -+ update_curr(cfs_rq); -+ restart_burst_bore(p); -+ } -+#endif /* CONFIG_SCHED_BORE */ - if (dequeue_entities(rq, &p->se, flags) < 0) - return false; - -@@ -9097,16 +9189,25 @@ static void yield_task_fair(struct rq *rq) - /* - * Are we the only task in the tree? - */ -+#if !defined(CONFIG_SCHED_BORE) - if (unlikely(rq->nr_running == 1)) - return; - - clear_buddies(cfs_rq, se); -+#endif /* CONFIG_SCHED_BORE */ - - update_rq_clock(rq); - /* - * Update run-time statistics of the 'current'. - */ - update_curr(cfs_rq); -+#ifdef CONFIG_SCHED_BORE -+ restart_burst_rescale_deadline_bore(curr); -+ if (unlikely(rq->nr_running == 1)) -+ return; -+ -+ clear_buddies(cfs_rq, se); -+#endif /* CONFIG_SCHED_BORE */ - /* - * Tell update_rq_clock() that we've just updated, - * so we don't do microscopic update in schedule() -@@ -13586,6 +13687,9 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p) - WARN_ON_ONCE(p->se.sched_delayed); - - attach_task_cfs_rq(p); -+#ifdef CONFIG_SCHED_BORE -+ reset_task_bore(p); -+#endif /* CONFIG_SCHED_BORE */ - - set_task_max_allowed_capacity(p); - -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 1ef9ba480..4b5bbf708 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2219,7 +2219,11 @@ extern int group_balance_cpu(struct sched_group *sg); - extern void update_sched_domain_debugfs(void); - extern void dirty_sched_domain_sysctl(int cpu); - -+#ifdef CONFIG_SCHED_BORE -+extern void sched_update_min_base_slice(void); -+#else /* !CONFIG_SCHED_BORE */ - extern int sched_update_scaling(void); -+#endif /* CONFIG_SCHED_BORE */ - - static inline const struct cpumask *task_user_cpus(struct task_struct *p) - { -@@ -3013,7 +3017,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); - extern __read_mostly unsigned int sysctl_sched_nr_migrate; - extern __read_mostly unsigned int sysctl_sched_migration_cost; - -+#ifdef CONFIG_SCHED_BORE -+extern unsigned int sysctl_sched_min_base_slice; -+extern __read_mostly uint sysctl_sched_base_slice; -+#else /* !CONFIG_SCHED_BORE */ - extern unsigned int sysctl_sched_base_slice; -+#endif /* CONFIG_SCHED_BORE */ - - extern int sysctl_resched_latency_warn_ms; - extern int sysctl_resched_latency_warn_once; --- -2.53.0 - diff --git a/sys-kernel/git-sources/0000-Gentoo-Kconfig.patch b/sys-kernel/git-sources/0000-Gentoo-Kconfig.patch deleted file mode 100644 index 8e5b831..0000000 --- a/sys-kernel/git-sources/0000-Gentoo-Kconfig.patch +++ /dev/null @@ -1,330 +0,0 @@ ---- a/Kconfig 2022-08-25 10:11:47.220973785 -0400 -+++ b/Kconfig 2022-08-25 10:11:56.997682513 -0400 -@@ -30,3 +30,5 @@ source "lib/Kconfig" - source "lib/Kconfig.debug" - - source "Documentation/Kconfig" -+ -+source "distro/Kconfig" ---- /dev/null 2022-08-25 07:13:06.694086407 -0400 -+++ b/distro/Kconfig 2022-08-25 13:21:55.150660724 -0400 -@@ -0,0 +1,291 @@ -+menu "Gentoo Linux" -+ -+config GENTOO_LINUX -+ bool "Gentoo Linux support" -+ -+ default y -+ -+ select CPU_FREQ_DEFAULT_GOV_SCHEDUTIL -+ -+ help -+ In order to boot Gentoo Linux a minimal set of config settings needs to -+ be enabled in the kernel; to avoid the users from having to enable them -+ manually as part of a Gentoo Linux installation or a new clean config, -+ we enable these config settings by default for convenience. -+ -+ See the settings that become available for more details and fine-tuning. -+ -+config GENTOO_LINUX_UDEV -+ bool "Linux dynamic and persistent device naming (userspace devfs) support" -+ -+ depends on GENTOO_LINUX -+ default y if GENTOO_LINUX -+ -+ select DEVTMPFS -+ select TMPFS -+ select UNIX -+ -+ select MMU -+ select SHMEM -+ -+ help -+ In order to boot Gentoo Linux a minimal set of config settings needs to -+ be enabled in the kernel; to avoid the users from having to enable them -+ manually as part of a Gentoo Linux installation or a new clean config, -+ we enable these config settings by default for convenience. -+ -+ Currently this only selects TMPFS, DEVTMPFS and their dependencies. -+ TMPFS is enabled to maintain a tmpfs file system at /dev/shm, /run and -+ /sys/fs/cgroup; DEVTMPFS to maintain a devtmpfs file system at /dev. -+ -+ Some of these are critical files that need to be available early in the -+ boot process; if not available, it causes sysfs and udev to malfunction. -+ -+ To ensure Gentoo Linux boots, it is best to leave this setting enabled; -+ if you run a custom setup, you could consider whether to disable this. -+ -+config GENTOO_LINUX_PORTAGE -+ bool "Select options required by Portage features" -+ -+ depends on GENTOO_LINUX -+ default y if GENTOO_LINUX -+ -+ select CGROUPS -+ select NAMESPACES -+ select IPC_NS -+ select NET_NS -+ select PID_NS -+ select SYSVIPC -+ select USER_NS -+ select UTS_NS -+ -+ help -+ This enables options required by various Portage FEATURES. -+ Currently this selects: -+ -+ CGROUPS (required for FEATURES=cgroup) -+ IPC_NS (required for FEATURES=ipc-sandbox) -+ NET_NS (required for FEATURES=network-sandbox) -+ PID_NS (required for FEATURES=pid-sandbox) -+ SYSVIPC (required by IPC_NS) -+ -+ -+ It is highly recommended that you leave this enabled as these FEATURES -+ are, or will soon be, enabled by default. -+ -+menu "Support for init systems, system and service managers" -+ visible if GENTOO_LINUX -+ -+config GENTOO_LINUX_INIT_SCRIPT -+ bool "OpenRC, runit and other script based systems and managers" -+ -+ default y if GENTOO_LINUX -+ -+ depends on GENTOO_LINUX -+ -+ select BINFMT_SCRIPT -+ select CGROUPS -+ select EPOLL -+ select FILE_LOCKING -+ select INOTIFY_USER -+ select SIGNALFD -+ select TIMERFD -+ -+ help -+ The init system is the first thing that loads after the kernel booted. -+ -+ These config settings allow you to select which init systems to support; -+ instead of having to select all the individual settings all over the -+ place, these settings allows you to select all the settings at once. -+ -+ This particular setting enables all the known requirements for OpenRC, -+ runit and similar script based systems and managers. -+ -+ If you are unsure about this, it is best to leave this setting enabled. -+ -+config GENTOO_LINUX_INIT_SYSTEMD -+ bool "systemd" -+ -+ default n -+ -+ depends on GENTOO_LINUX && GENTOO_LINUX_UDEV -+ -+ select AUTOFS_FS -+ select BLK_DEV_BSG -+ select BPF_SYSCALL -+ select CGROUP_BPF -+ select CGROUPS -+ select CRYPTO_HMAC -+ select CRYPTO_SHA256 -+ select CRYPTO_USER_API_HASH -+ select DEVPTS_MULTIPLE_INSTANCES -+ select DMIID if X86_32 || X86_64 || X86 -+ select EPOLL -+ select FANOTIFY -+ select FHANDLE -+ select FILE_LOCKING -+ select INOTIFY_USER -+ select IPV6 -+ select KCMP -+ select NET -+ select NET_NS -+ select PROC_FS -+ select SECCOMP if HAVE_ARCH_SECCOMP -+ select SECCOMP_FILTER if HAVE_ARCH_SECCOMP_FILTER -+ select SIGNALFD -+ select SYSFS -+ select TIMERFD -+ select TMPFS_POSIX_ACL -+ select TMPFS_XATTR -+ -+ select ANON_INODES -+ select BLOCK -+ select EVENTFD -+ select FSNOTIFY -+ select INET -+ select NLATTR -+ -+ help -+ The init system is the first thing that loads after the kernel booted. -+ -+ These config settings allow you to select which init systems to support; -+ instead of having to select all the individual settings all over the -+ place, these settings allows you to select all the settings at once. -+ -+ This particular setting enables all the known requirements for systemd; -+ it also enables suggested optional settings, as the package suggests to. -+ -+endmenu -+ -+menuconfig GENTOO_KERNEL_SELF_PROTECTION -+ bool "Kernel Self Protection Project" -+ depends on GENTOO_LINUX -+ help -+ Recommended Kernel settings based on the suggestions from the Kernel Self Protection Project -+ See: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings -+ Note, there may be additional settings for which the CONFIG_ setting is invisible in menuconfig due -+ to unmet dependencies. Search for GENTOO_KERNEL_SELF_PROTECTION_COMMON and search for -+ GENTOO_KERNEL_SELF_PROTECTION_{X86_64, ARM64, X86_32, ARM} for dependency information on your -+ specific architecture. -+ Note 2: Please see the URL above for numeric settings, e.g. CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 -+ for X86_64 -+ -+if GENTOO_KERNEL_SELF_PROTECTION -+config GENTOO_KERNEL_SELF_PROTECTION_COMMON -+ bool "Enable Kernel Self Protection Project Recommendations" -+ -+ depends on GENTOO_LINUX && !ACPI_CUSTOM_METHOD && !COMPAT_BRK && !PROC_KCORE && !COMPAT_VDSO && !KEXEC && !HIBERNATION && !LEGACY_PTYS && !X86_X32_ABI && !MODIFY_LDT_SYSCALL && GCC_PLUGINS && !IOMMU_DEFAULT_DMA_LAZY && !IOMMU_DEFAULT_PASSTHROUGH && IOMMU_DEFAULT_DMA_STRICT && SECURITY && !ARCH_EPHEMERAL_INODES && RANDSTRUCT_PERFORMANCE -+ -+ select BUG -+ select STRICT_KERNEL_RWX -+ select DEBUG_WX -+ select STACKPROTECTOR -+ select STACKPROTECTOR_STRONG -+ select STRICT_DEVMEM if DEVMEM=y -+ select IO_STRICT_DEVMEM if DEVMEM=y -+ select SYN_COOKIES -+ select DEBUG_CREDENTIALS -+ select DEBUG_NOTIFIERS -+ select DEBUG_LIST -+ select DEBUG_SG -+ select HARDENED_USERCOPY if HAVE_HARDENED_USERCOPY_ALLOCATOR=y -+ select KFENCE if HAVE_ARCH_KFENCE && (!SLAB || SLUB) -+ select RANDOMIZE_KSTACK_OFFSET_DEFAULT if HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET && (INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION>=140000) -+ select SECURITY_LANDLOCK -+ select SCHED_CORE if SCHED_SMT -+ select BUG_ON_DATA_CORRUPTION -+ select SCHED_STACK_END_CHECK -+ select SECCOMP if HAVE_ARCH_SECCOMP -+ select SECCOMP_FILTER if HAVE_ARCH_SECCOMP_FILTER -+ select SECURITY_YAMA -+ select SLAB_FREELIST_RANDOM -+ select SLAB_FREELIST_HARDENED -+ select SHUFFLE_PAGE_ALLOCATOR -+ select SLUB_DEBUG -+ select PAGE_POISONING -+ select PAGE_POISONING_NO_SANITY -+ select PAGE_POISONING_ZERO -+ select INIT_ON_ALLOC_DEFAULT_ON -+ select INIT_ON_FREE_DEFAULT_ON -+ select REFCOUNT_FULL -+ select FORTIFY_SOURCE -+ select SECURITY_DMESG_RESTRICT -+ select PANIC_ON_OOPS -+ select GCC_PLUGIN_LATENT_ENTROPY -+ select GCC_PLUGIN_STRUCTLEAK -+ select GCC_PLUGIN_STRUCTLEAK_BYREF_ALL -+ select GCC_PLUGIN_RANDSTRUCT -+ select GCC_PLUGIN_RANDSTRUCT_PERFORMANCE -+ select ZERO_CALL_USED_REGS if CC_HAS_ZERO_CALL_USED_REGS -+ -+ help -+ Search for GENTOO_KERNEL_SELF_PROTECTION_{X86_64, ARM64, X86_32, ARM} for dependency -+ information on your specific architecture. Note 2: Please see the URL above for -+ numeric settings, e.g. CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 for X86_64 -+ -+config GENTOO_KERNEL_SELF_PROTECTION_X86_64 -+ bool "X86_64 KSPP Settings" if GENTOO_KERNEL_SELF_PROTECTION_COMMON -+ -+ depends on !X86_MSR && X86_64 && GENTOO_KERNEL_SELF_PROTECTION -+ default n -+ -+ select GCC_PLUGIN_STACKLEAK -+ select LEGACY_VSYSCALL_NONE -+ select PAGE_TABLE_ISOLATION -+ select RANDOMIZE_BASE -+ select RANDOMIZE_MEMORY -+ select RELOCATABLE -+ select VMAP_STACK -+ -+ -+config GENTOO_KERNEL_SELF_PROTECTION_ARM64 -+ bool "ARM64 KSPP Settings" -+ -+ depends on ARM64 -+ default n -+ -+ select RANDOMIZE_BASE -+ select RELOCATABLE -+ select ARM64_SW_TTBR0_PAN -+ select CONFIG_UNMAP_KERNEL_AT_EL0 -+ select GCC_PLUGIN_STACKLEAK -+ select VMAP_STACK -+ -+config GENTOO_KERNEL_SELF_PROTECTION_X86_32 -+ bool "X86_32 KSPP Settings" -+ -+ depends on !X86_MSR && !MODIFY_LDT_SYSCALL && !M486 && X86_32 -+ default n -+ -+ select HIGHMEM64G -+ select X86_PAE -+ select RANDOMIZE_BASE -+ select RELOCATABLE -+ select PAGE_TABLE_ISOLATION -+ -+config GENTOO_KERNEL_SELF_PROTECTION_ARM -+ bool "ARM KSPP Settings" -+ -+ depends on !OABI_COMPAT && ARM -+ default n -+ -+ select VMSPLIT_3G -+ select STRICT_MEMORY_RWX -+ select CPU_SW_DOMAIN_PAN -+ -+endif -+ -+config GENTOO_PRINT_FIRMWARE_INFO -+ bool "Print firmware information that the kernel attempts to load" -+ -+ depends on GENTOO_LINUX -+ default y -+ -+ help -+ Enable this option to print information about firmware that the kernel -+ is attempting to load. This information can be accessible via the -+ dmesg command-line utility -+ -+ See the settings that become available for more details and fine-tuning. -+ -+endmenu --- -2.31.1 - -From bd3ff0b16792c18c0614c2b95e148943209f460a Mon Sep 17 00:00:00 2001 -From: Georgy Yakovlev -Date: Tue, 8 Jun 2021 13:59:57 -0700 -Subject: [PATCH 2/2] set DEFAULT_MMAP_MIN_ADDR by default - ---- - mm/Kconfig | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/mm/Kconfig b/mm/Kconfig -index 24c045b24..e13fc740c 100644 ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -738,6 +738,8 @@ config KSM - config DEFAULT_MMAP_MIN_ADDR - int "Low address space to protect from user allocation" - depends on MMU -+ default 65536 if ( X86_64 || X86_32 || PPC64 || IA64 ) && GENTOO_KERNEL_SELF_PROTECTION -+ default 32768 if ( ARM64 || ARM ) && GENTOO_KERNEL_SELF_PROTECTION - default 4096 - help - This is the portion of low virtual memory which should be protected --- -2.31.1 -``` diff --git a/sys-kernel/git-sources/0000-Gentoo-support-printing-firmware-info.patch b/sys-kernel/git-sources/0000-Gentoo-support-printing-firmware-info.patch deleted file mode 100644 index a630cfb..0000000 --- a/sys-kernel/git-sources/0000-Gentoo-support-printing-firmware-info.patch +++ /dev/null @@ -1,14 +0,0 @@ ---- a/drivers/base/firmware_loader/main.c 2021-08-24 15:42:07.025482085 -0400 -+++ b/drivers/base/firmware_loader/main.c 2021-08-24 15:44:40.782975313 -0400 -@@ -809,6 +809,11 @@ _request_firmware(const struct firmware - - ret = _request_firmware_prepare(&fw, name, device, buf, size, - offset, opt_flags); -+ -+#ifdef CONFIG_GENTOO_PRINT_FIRMWARE_INFO -+ printk(KERN_NOTICE "Loading firmware: %s\n", name); -+#endif -+ - if (ret <= 0) /* error or already assigned */ - goto out; - diff --git a/sys-kernel/git-sources/0000_distro-Gentoo-Kconfig.patch b/sys-kernel/git-sources/0000_distro-Gentoo-Kconfig.patch new file mode 100644 index 0000000..5543daa --- /dev/null +++ b/sys-kernel/git-sources/0000_distro-Gentoo-Kconfig.patch @@ -0,0 +1,363 @@ +From 15772d7f271de72be9e954a37de4a820ceeeae92 Mon Sep 17 00:00:00 2001 +From: Arisu Tachibana +Date: Fri, 14 Nov 2025 17:09:23 +0900 +Subject: [PATCH] 4567_distro-Gentoo-Kconfig.patch + +Drop structleak gcc plugin +Ref: https://github.com/torvalds/linux/commit/8530ea3c9b9747faba46ed3a59ad103b894f1189 + +Drop ARCH_EPHEMERAL_INODES +Ref: https://github.com/torvalds/linux/commit/74ce793bcbde5cef0f82d6ccb3c47cb651295a9a + +Switch to RANDSTRUCT_FULL + +Signed-off-by: Arisu Tachibana +--- + Kconfig | 2 + + distro/Kconfig | 310 +++++++++++++++++++++++++++++++++++++++++++++++++ + mm/Kconfig | 2 + + 3 files changed, 314 insertions(+) + create mode 100644 distro/Kconfig + +diff --git a/Kconfig b/Kconfig +index 307e58114..91aa4be3d 100644 +--- a/Kconfig ++++ b/Kconfig +@@ -32,3 +32,5 @@ source "lib/Kconfig.debug" + source "Documentation/Kconfig" + + source "io_uring/Kconfig" ++ ++source "distro/Kconfig" +diff --git a/distro/Kconfig b/distro/Kconfig +new file mode 100644 +index 000000000..41797d8f8 +--- /dev/null ++++ b/distro/Kconfig +@@ -0,0 +1,310 @@ ++menu "Gentoo Linux" ++ ++config GENTOO_LINUX ++ bool "Gentoo Linux support" ++ ++ default y ++ ++ select CPU_FREQ_DEFAULT_GOV_SCHEDUTIL ++ ++ help ++ In order to boot Gentoo Linux a minimal set of config settings needs to ++ be enabled in the kernel; to avoid the users from having to enable them ++ manually as part of a Gentoo Linux installation or a new clean config, ++ we enable these config settings by default for convenience. ++ ++ See the settings that become available for more details and fine-tuning. ++ ++config GENTOO_LINUX_UDEV ++ bool "Linux dynamic and persistent device naming (userspace devfs) support" ++ ++ depends on GENTOO_LINUX ++ default y if GENTOO_LINUX ++ ++ select DEVTMPFS ++ select TMPFS ++ select UNIX ++ ++ select MMU ++ select SHMEM ++ ++ help ++ In order to boot Gentoo Linux a minimal set of config settings needs to ++ be enabled in the kernel; to avoid the users from having to enable them ++ manually as part of a Gentoo Linux installation or a new clean config, ++ we enable these config settings by default for convenience. ++ ++ Currently this only selects TMPFS, DEVTMPFS and their dependencies. ++ TMPFS is enabled to maintain a tmpfs file system at /dev/shm, /run and ++ /sys/fs/cgroup; DEVTMPFS to maintain a devtmpfs file system at /dev. ++ ++ Some of these are critical files that need to be available early in the ++ boot process; if not available, it causes sysfs and udev to malfunction. ++ ++ To ensure Gentoo Linux boots, it is best to leave this setting enabled; ++ if you run a custom setup, you could consider whether to disable this. ++ ++config GENTOO_LINUX_PORTAGE ++ bool "Select options required by Portage features" ++ ++ depends on GENTOO_LINUX ++ default y if GENTOO_LINUX ++ ++ select CGROUPS ++ select NAMESPACES ++ select IPC_NS ++ select NET_NS ++ select PID_NS ++ select SYSVIPC ++ select USER_NS ++ select UTS_NS ++ ++ help ++ This enables options required by various Portage FEATURES. ++ Currently this selects: ++ ++ CGROUPS (required for FEATURES=cgroup) ++ IPC_NS (required for FEATURES=ipc-sandbox) ++ NET_NS (required for FEATURES=network-sandbox) ++ PID_NS (required for FEATURES=pid-sandbox) ++ SYSVIPC (required by IPC_NS) ++ ++ ++ It is highly recommended that you leave this enabled as these FEATURES ++ are, or will soon be, enabled by default. ++ ++menu "Support for init systems, system and service managers" ++ visible if GENTOO_LINUX ++ ++config GENTOO_LINUX_INIT_SCRIPT ++ bool "OpenRC, runit and other script based systems and managers" ++ ++ default y if GENTOO_LINUX ++ ++ depends on GENTOO_LINUX ++ ++ select BINFMT_SCRIPT ++ select CGROUPS ++ select EPOLL ++ select FILE_LOCKING ++ select INOTIFY_USER ++ select SIGNALFD ++ select TIMERFD ++ ++ help ++ The init system is the first thing that loads after the kernel booted. ++ ++ These config settings allow you to select which init systems to support; ++ instead of having to select all the individual settings all over the ++ place, these settings allows you to select all the settings at once. ++ ++ This particular setting enables all the known requirements for OpenRC, ++ runit and similar script based systems and managers. ++ ++ If you are unsure about this, it is best to leave this setting enabled. ++ ++config GENTOO_LINUX_INIT_SYSTEMD ++ bool "systemd" ++ ++ default n ++ ++ depends on GENTOO_LINUX && GENTOO_LINUX_UDEV ++ ++ select AUTOFS_FS ++ select BLK_DEV_BSG if SCSI ++ select BPF_SYSCALL ++ select CGROUP_BPF ++ select CGROUPS ++ select CRYPTO_HMAC ++ select CRYPTO_SHA256 ++ select CRYPTO_USER_API_HASH ++ select DEVPTS_MULTIPLE_INSTANCES ++ select DMIID if X86_32 || X86_64 || X86 ++ select EPOLL ++ select FANOTIFY ++ select FHANDLE ++ select FILE_LOCKING ++ select INOTIFY_USER ++ select IPV6 ++ select KCMP ++ select NET ++ select NET_NS ++ select PROC_FS ++ select SECCOMP if HAVE_ARCH_SECCOMP ++ select SECCOMP_FILTER if HAVE_ARCH_SECCOMP_FILTER ++ select SIGNALFD ++ select SYSFS ++ select TIMERFD ++ select TMPFS_POSIX_ACL ++ select TMPFS_XATTR ++ ++ select ANON_INODES ++ select BLOCK ++ select EVENTFD ++ select FSNOTIFY ++ select INET ++ select NLATTR ++ ++ help ++ The init system is the first thing that loads after the kernel booted. ++ ++ These config settings allow you to select which init systems to support; ++ instead of having to select all the individual settings all over the ++ place, these settings allows you to select all the settings at once. ++ ++ This particular setting enables all the known requirements for systemd; ++ it also enables suggested optional settings, as the package suggests to. ++ ++endmenu ++ ++menuconfig GENTOO_KERNEL_SELF_PROTECTION ++ bool "Kernel Self Protection Project" ++ depends on GENTOO_LINUX ++ help ++ Recommended Kernel settings based on the suggestions from the Kernel Self Protection Project ++ See: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings ++ Note, there may be additional settings for which the CONFIG_ setting is invisible in menuconfig due ++ to unmet dependencies. Search for GENTOO_KERNEL_SELF_PROTECTION_COMMON and search for ++ GENTOO_KERNEL_SELF_PROTECTION_{X86_64, ARM64, X86_32, ARM} for dependency information on your ++ specific architecture. ++ Note 2: Please see the URL above for numeric settings, e.g. CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 ++ for X86_64 ++ ++if GENTOO_KERNEL_SELF_PROTECTION ++config GENTOO_KERNEL_SELF_PROTECTION_COMMON ++ bool "Enable Kernel Self Protection Project Recommendations" ++ ++ depends on GENTOO_LINUX && !SLAB_MERGE_DEFAULT && !SLUB_TINY && !COMPAT_BRK && !PROC_KCORE && !COMPAT_VDSO && !KEXEC && !HIBERNATION && !LEGACY_PTYS && !X86_X32_ABI && !MODIFY_LDT_SYSCALL && GCC_PLUGINS && !IOMMU_DEFAULT_DMA_LAZY && !IOMMU_DEFAULT_PASSTHROUGH && IOMMU_DEFAULT_DMA_STRICT && SECURITY ++ ++ select BUG ++ select STRICT_KERNEL_RWX ++ select DEBUG_VIRTUAL ++ select DEBUG_WX ++ select STACKPROTECTOR ++ select STACKPROTECTOR_STRONG ++ select STRICT_DEVMEM if DEVMEM=y ++ select IO_STRICT_DEVMEM if DEVMEM=y ++ select SYN_COOKIES ++ select DEBUG_CREDENTIALS ++ select DEBUG_NOTIFIERS ++ select DEBUG_LIST ++ select DEBUG_SG ++ select HARDENED_USERCOPY ++ select KFENCE if HAVE_ARCH_KFENCE && (!SLAB || SLUB) ++ select PAGE_TABLE_CHECK if ARCH_SUPPORTS_PAGE_TABLE_CHECK=y && EXCLUSIVE_SYSTEM_RAM=y ++ select PAGE_TABLE_CHECK_ENFORCED if PAGE_TABLE_CHECK=y ++ select RANDOMIZE_KSTACK_OFFSET_DEFAULT if HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET && (INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION>=140000) ++ select SECURITY_LANDLOCK ++ select SCHED_CORE if SCHED_SMT ++ select BUG_ON_DATA_CORRUPTION ++ select RANDOM_KMALLOC_CACHES if SLUB_TINY=n ++ select SCHED_STACK_END_CHECK ++ select SECCOMP if HAVE_ARCH_SECCOMP ++ select SECCOMP_FILTER if HAVE_ARCH_SECCOMP_FILTER ++ select SECURITY_YAMA ++ select SLAB_FREELIST_RANDOM ++ select SLAB_FREELIST_HARDENED ++ select SLAB_BUCKETS ++ select SHUFFLE_PAGE_ALLOCATOR ++ select SLUB_DEBUG ++ select UBSAN ++ select CC_HAS_UBSAN_BOUNDS_STRICT if !CC_HAS_UBSAN_ARRAY_BOUNDS ++ select UBSAN_BOUNDS ++ select UBSAN_SHIFT ++ select PAGE_POISONING ++ select PAGE_POISONING_NO_SANITY ++ select PAGE_POISONING_ZERO ++ select INIT_ON_ALLOC_DEFAULT_ON ++ select INIT_ON_FREE_DEFAULT_ON ++ select REFCOUNT_FULL ++ select FORTIFY_SOURCE ++ select SECURITY_DMESG_RESTRICT ++ select PANIC_ON_OOPS ++ select GCC_PLUGIN_LATENT_ENTROPY ++ select GCC_PLUGIN_RANDSTRUCT ++ select RANDSTRUCT_FULL ++ select ZERO_CALL_USED_REGS if CC_HAS_ZERO_CALL_USED_REGS ++ ++ help ++ Search for GENTOO_KERNEL_SELF_PROTECTION_{X86_64, ARM64, X86_32, ARM} for dependency ++ information on your specific architecture. Note 2: Please see the URL above for ++ numeric settings, e.g. CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 for X86_64 ++ ++config GENTOO_KERNEL_SELF_PROTECTION_X86_64 ++ bool "X86_64 KSPP Settings" if GENTOO_KERNEL_SELF_PROTECTION_COMMON ++ ++ depends on !X86_MSR && X86_64 && GENTOO_KERNEL_SELF_PROTECTION ++ default n ++ ++ select KSTACK_ERASE if HAVE_ARCH_KSTACK_ERASE ++ select X86_KERNEL_IBT if CC_HAS_IBT=y && HAVE_OBJTOOL=y && (!LD_IS_LLD=n || LLD_VERSION>=140000) ++ select LEGACY_VSYSCALL_NONE ++ select PAGE_TABLE_ISOLATION ++ select RANDOMIZE_BASE ++ select RANDOMIZE_MEMORY ++ select RELOCATABLE ++ select X86_USER_SHADOW_STACK if AS_WRUSS=y ++ select VMAP_STACK ++ ++ ++config GENTOO_KERNEL_SELF_PROTECTION_ARM64 ++ bool "ARM64 KSPP Settings" ++ ++ depends on ARM64 ++ default n ++ ++ select ARM64_BTI ++ select ARM64_E0PD ++ select ARM64_EPAN if ARM64_PAN=y ++ select ARM64_MTE if (ARM64_AS_HAS_MTE=y && ARM64_TAGGED_ADDR_ABI=y ) && ( AS_HAS_ARMV8_5=y ) && ( AS_HAS_LSE_ATOMICS=y ) ++ select ARM64_PTR_AUTH ++ select ARM64_PTR_AUTH_KERNEL if ( ARM64_PTR_AUTH=y ) && (( CC_HAS_SIGN_RETURN_ADDRESS=y || CC_HAS_BRANCH_PROT_PAC_RET=y ) && AS_HAS_ARMV8_3=y ) && ( LD_IS_LLD=y || LD_VERSION >= 23301 || ( CC_IS_GCC=y && GCC_VERSION < 90100 )) && (CC_IS_CLANG=n || AS_HAS_CFI_NEGATE_RA_STATE=y ) && ((FUNCTION_GRAPH_TRACER=n || DYNAMIC_FTRACE_WITH_ARGS=y )) ++ select ARM64_BTI_KERNEL if ( ARM64_BTI=y ) && ( ARM64_PTR_AUTH_KERNEL=y ) && ( CC_HAS_BRANCH_PROT_PAC_RET_BTI=y ) && (CC_IS_GCC=n || GCC_VERSION >= 100100 ) && (CC_IS_GCC=n ) && ((FUNCTION_GRAPH_TRACE=n || DYNAMIC_FTRACE_WITH_ARG=y )) ++ select ARM64_SW_TTBR0_PAN ++ select CONFIG_UNMAP_KERNEL_AT_EL0 ++ select KSTACK_ERASE if HAVE_ARCH_KSTACK_ERASE ++ select KASAN_HW_TAGS if HAVE_ARCH_KASAN_HW_TAGS=y ++ select RANDOMIZE_BASE ++ select RELOCATABLE ++ select SHADOW_CALL_STACK if ARCH_SUPPORTS_SHADOW_CALL_STACK=y && (DYNAMIC_FTRACE_WITH_ARGS=y || DYNAMIC_FTRACE_WITH_REGS=y || FUNCTION_GRAPH_TRACER=n) && MMU=y ++ select UNWIND_PATCH_PAC_INTO_SCS if (CC_IS_CLANG=y && CLANG_VERSION >= CONFIG_150000 ) && ( ARM64_PTR_AUTH_KERNEL=y && CC_HAS_BRANCH_PROT_PAC_RET=y ) && ( SHADOW_CALL_STACK=y ) ++ select VMAP_STACK ++ ++config GENTOO_KERNEL_SELF_PROTECTION_X86_32 ++ bool "X86_32 KSPP Settings" ++ ++ depends on !X86_MSR && !MODIFY_LDT_SYSCALL && !M486 && X86_32 ++ default n ++ ++ select HIGHMEM64G ++ select X86_PAE ++ select RANDOMIZE_BASE ++ select RELOCATABLE ++ select PAGE_TABLE_ISOLATION ++ ++config GENTOO_KERNEL_SELF_PROTECTION_ARM ++ bool "ARM KSPP Settings" ++ ++ depends on !OABI_COMPAT && ARM ++ default n ++ ++ select VMSPLIT_3G ++ select STRICT_MEMORY_RWX ++ select CPU_SW_DOMAIN_PAN ++ ++endif ++ ++config GENTOO_PRINT_FIRMWARE_INFO ++ bool "Print firmware information that the kernel attempts to load" ++ ++ depends on GENTOO_LINUX ++ default y ++ ++ help ++ Enable this option to print information about firmware that the kernel ++ is attempting to load. This information can be accessible via the ++ dmesg command-line utility ++ ++ See the settings that become available for more details and fine-tuning. ++ ++endmenu +diff --git a/mm/Kconfig b/mm/Kconfig +index e443fe8cd..cefe9f0cf 100644 +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -753,6 +753,8 @@ config KSM + config DEFAULT_MMAP_MIN_ADDR + int "Low address space to protect from user allocation" + depends on MMU ++ default 65536 if ( X86_64 || X86_32 || PPC64 || IA64 ) && GENTOO_KERNEL_SELF_PROTECTION ++ default 32768 if ( ARM64 || ARM ) && GENTOO_KERNEL_SELF_PROTECTION + default 4096 + help + This is the portion of low virtual memory which should be protected +-- +2.51.0 + diff --git a/sys-kernel/git-sources/0001-asus.patch b/sys-kernel/git-sources/0001-asus.patch deleted file mode 100644 index 75ef225..0000000 --- a/sys-kernel/git-sources/0001-asus.patch +++ /dev/null @@ -1,6038 +0,0 @@ -From b5b4f8345dc0d81e7922485af45f5384008db8bf Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Mon, 1 Sep 2025 09:38:53 +0800 -Subject: [PATCH 1/4] asus - -Signed-off-by: Eric Naim ---- - .../ABI/testing/sysfs-platform-asus-wmi | 17 + - drivers/hid/Kconfig | 9 + - drivers/hid/Makefile | 1 + - drivers/hid/hid-asus-ally.c | 2197 +++++++++++++++++ - drivers/hid/hid-asus-ally.h | 398 +++ - drivers/hid/hid-asus.c | 29 +- - drivers/hid/hid-asus.h | 13 + - drivers/hid/hid-ids.h | 1 + - drivers/platform/x86/Kconfig | 23 + - drivers/platform/x86/Makefile | 1 + - drivers/platform/x86/asus-armoury.c | 1174 +++++++++ - drivers/platform/x86/asus-armoury.h | 1278 ++++++++++ - drivers/platform/x86/asus-wmi.c | 171 +- - include/linux/platform_data/x86/asus-wmi.h | 22 + - 14 files changed, 5293 insertions(+), 41 deletions(-) - create mode 100644 drivers/hid/hid-asus-ally.c - create mode 100644 drivers/hid/hid-asus-ally.h - create mode 100644 drivers/hid/hid-asus.h - create mode 100644 drivers/platform/x86/asus-armoury.c - create mode 100644 drivers/platform/x86/asus-armoury.h - -diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi -index 28144371a0f1..765d50b0d9df 100644 ---- a/Documentation/ABI/testing/sysfs-platform-asus-wmi -+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi -@@ -63,6 +63,7 @@ Date: Aug 2022 - KernelVersion: 6.1 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Switch the GPU hardware MUX mode. Laptops with this feature can - can be toggled to boot with only the dGPU (discrete mode) or in - standard Optimus/Hybrid mode. On switch a reboot is required: -@@ -75,6 +76,7 @@ Date: Aug 2022 - KernelVersion: 5.17 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Disable discrete GPU: - * 0 - Enable dGPU, - * 1 - Disable dGPU -@@ -84,6 +86,7 @@ Date: Aug 2022 - KernelVersion: 5.17 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Enable the external GPU paired with ROG X-Flow laptops. - Toggling this setting will also trigger ACPI to disable the dGPU: - -@@ -95,6 +98,7 @@ Date: Aug 2022 - KernelVersion: 5.17 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Enable an LCD response-time boost to reduce or remove ghosting: - * 0 - Disable, - * 1 - Enable -@@ -104,6 +108,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Get the current charging mode being used: - * 1 - Barrel connected charger, - * 2 - USB-C charging -@@ -114,6 +119,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Show if the egpu (XG Mobile) is correctly connected: - * 0 - False, - * 1 - True -@@ -123,6 +129,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Change the mini-LED mode: - * 0 - Single-zone, - * 1 - Multi-zone -@@ -133,6 +140,7 @@ Date: Apr 2024 - KernelVersion: 6.10 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - List the available mini-led modes. - - What: /sys/devices/platform//ppt_pl1_spl -@@ -140,6 +148,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the Package Power Target total of CPU: PL1 on Intel, SPL on AMD. - Shown on Intel+Nvidia or AMD+Nvidia based systems: - -@@ -150,6 +159,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the Slow Package Power Tracking Limit of CPU: PL2 on Intel, SPPT, - on AMD. Shown on Intel+Nvidia or AMD+Nvidia based systems: - -@@ -160,6 +170,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the Fast Package Power Tracking Limit of CPU. AMD+Nvidia only: - * min=5, max=250 - -@@ -168,6 +179,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the APU SPPT limit. Shown on full AMD systems only: - * min=5, max=130 - -@@ -176,6 +188,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the platform SPPT limit. Shown on full AMD systems only: - * min=5, max=130 - -@@ -184,6 +197,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the dynamic boost limit of the Nvidia dGPU: - * min=5, max=25 - -@@ -192,6 +206,7 @@ Date: Jun 2023 - KernelVersion: 6.5 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set the target temperature limit of the Nvidia dGPU: - * min=75, max=87 - -@@ -200,6 +215,7 @@ Date: Apr 2024 - KernelVersion: 6.10 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set if the BIOS POST sound is played on boot. - * 0 - False, - * 1 - True -@@ -209,6 +225,7 @@ Date: Apr 2024 - KernelVersion: 6.10 - Contact: "Luke Jones" - Description: -+ DEPRECATED, WILL BE REMOVED SOON - Set if the MCU can go in to low-power mode on system sleep - * 0 - False, - * 1 - True -diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 79997553d8f9..d3147e48a8f1 100644 ---- a/drivers/hid/Kconfig -+++ b/drivers/hid/Kconfig -@@ -191,6 +191,15 @@ config HID_ASUS - - GL553V series - - GL753V series - -+config HID_ASUS_ALLY -+ tristate "Asus Ally gamepad configuration support" -+ depends on USB_HID -+ depends on LEDS_CLASS -+ depends on LEDS_CLASS_MULTICOLOR -+ select POWER_SUPPLY -+ help -+ Support for configuring the Asus ROG Ally gamepad using attributes. -+ - config HID_AUREAL - tristate "Aureal" - help -diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 10ae5dedbd84..958f67193c85 100644 ---- a/drivers/hid/Makefile -+++ b/drivers/hid/Makefile -@@ -33,6 +33,7 @@ obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o - obj-$(CONFIG_HID_APPLETB_KBD) += hid-appletb-kbd.o - obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o - obj-$(CONFIG_HID_ASUS) += hid-asus.o -+obj-$(CONFIG_HID_ASUS_ALLY) += hid-asus-ally.o - obj-$(CONFIG_HID_AUREAL) += hid-aureal.o - obj-$(CONFIG_HID_BELKIN) += hid-belkin.o - obj-$(CONFIG_HID_BETOP_FF) += hid-betopff.o -diff --git a/drivers/hid/hid-asus-ally.c b/drivers/hid/hid-asus-ally.c -new file mode 100644 -index 000000000000..e78625f70c44 ---- /dev/null -+++ b/drivers/hid/hid-asus-ally.c -@@ -0,0 +1,2197 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * HID driver for Asus ROG laptops and Ally -+ * -+ * Copyright (c) 2023 Luke Jones -+ */ -+ -+#include "linux/compiler_attributes.h" -+#include "linux/device.h" -+#include -+#include -+#include "linux/pm.h" -+#include "linux/printk.h" -+#include "linux/slab.h" -+#include -+#include -+#include -+#include -+#include -+ -+#include "hid-ids.h" -+#include "hid-asus.h" -+#include "hid-asus-ally.h" -+ -+#define DEBUG -+ -+#define READY_MAX_TRIES 3 -+#define FEATURE_REPORT_ID 0x0d -+#define FEATURE_ROG_ALLY_REPORT_ID 0x5a -+#define FEATURE_ROG_ALLY_CODE_PAGE 0xD1 -+#define FEATURE_ROG_ALLY_REPORT_SIZE 64 -+#define ALLY_X_INPUT_REPORT_USB 0x0B -+#define ALLY_X_INPUT_REPORT_USB_SIZE 16 -+ -+#define ROG_ALLY_REPORT_SIZE 64 -+#define ROG_ALLY_X_MIN_MCU 313 -+#define ROG_ALLY_MIN_MCU 319 -+ -+#define FEATURE_KBD_LED_REPORT_ID1 0x5d -+#define FEATURE_KBD_LED_REPORT_ID2 0x5e -+ -+#define BTN_DATA_LEN 11; -+#define BTN_CODE_BYTES_LEN 8 -+ -+static const u8 EC_INIT_STRING[] = { 0x5A, 'A', 'S', 'U', 'S', ' ', 'T', 'e','c', 'h', '.', 'I', 'n', 'c', '.', '\0' }; -+static const u8 EC_MODE_LED_APPLY[] = { 0x5A, 0xB4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -+static const u8 EC_MODE_LED_SET[] = { 0x5A, 0xB5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -+static const u8 FORCE_FEEDBACK_OFF[] = { 0x0D, 0x0F, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0xEB }; -+ -+static const struct hid_device_id rog_ally_devices[] = { -+ { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY) }, -+ { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X) }, -+ {} -+}; -+ -+struct btn_code_map { -+ u64 code; -+ const char *name; -+}; -+ -+static const struct btn_code_map ally_btn_codes[] = { -+ { 0, "NONE" }, -+ /* Gamepad button codes */ -+ { BTN_PAD_A, "PAD_A" }, -+ { BTN_PAD_B, "PAD_B" }, -+ { BTN_PAD_X, "PAD_X" }, -+ { BTN_PAD_Y, "PAD_Y" }, -+ { BTN_PAD_LB, "PAD_LB" }, -+ { BTN_PAD_RB, "PAD_RB" }, -+ { BTN_PAD_LS, "PAD_LS" }, -+ { BTN_PAD_RS, "PAD_RS" }, -+ { BTN_PAD_DPAD_UP, "PAD_DPAD_UP" }, -+ { BTN_PAD_DPAD_DOWN, "PAD_DPAD_DOWN" }, -+ { BTN_PAD_DPAD_LEFT, "PAD_DPAD_LEFT" }, -+ { BTN_PAD_DPAD_RIGHT, "PAD_DPAD_RIGHT" }, -+ { BTN_PAD_VIEW, "PAD_VIEW" }, -+ { BTN_PAD_MENU, "PAD_MENU" }, -+ { BTN_PAD_XBOX, "PAD_XBOX" }, -+ -+ /* Triggers mapped to keyboard codes */ -+ { BTN_KB_M2, "KB_M2" }, -+ { BTN_KB_M1, "KB_M1" }, -+ { BTN_KB_ESC, "KB_ESC" }, -+ { BTN_KB_F1, "KB_F1" }, -+ { BTN_KB_F2, "KB_F2" }, -+ { BTN_KB_F3, "KB_F3" }, -+ { BTN_KB_F4, "KB_F4" }, -+ { BTN_KB_F5, "KB_F5" }, -+ { BTN_KB_F6, "KB_F6" }, -+ { BTN_KB_F7, "KB_F7" }, -+ { BTN_KB_F8, "KB_F8" }, -+ { BTN_KB_F9, "KB_F9" }, -+ { BTN_KB_F10, "KB_F10" }, -+ { BTN_KB_F11, "KB_F11" }, -+ { BTN_KB_F12, "KB_F12" }, -+ { BTN_KB_F14, "KB_F14" }, -+ { BTN_KB_F15, "KB_F15" }, -+ { BTN_KB_BACKTICK, "KB_BACKTICK" }, -+ { BTN_KB_1, "KB_1" }, -+ { BTN_KB_2, "KB_2" }, -+ { BTN_KB_3, "KB_3" }, -+ { BTN_KB_4, "KB_4" }, -+ { BTN_KB_5, "KB_5" }, -+ { BTN_KB_6, "KB_6" }, -+ { BTN_KB_7, "KB_7" }, -+ { BTN_KB_8, "KB_8" }, -+ { BTN_KB_9, "KB_9" }, -+ { BTN_KB_0, "KB_0" }, -+ { BTN_KB_HYPHEN, "KB_HYPHEN" }, -+ { BTN_KB_EQUALS, "KB_EQUALS" }, -+ { BTN_KB_BACKSPACE, "KB_BACKSPACE" }, -+ { BTN_KB_TAB, "KB_TAB" }, -+ { BTN_KB_Q, "KB_Q" }, -+ { BTN_KB_W, "KB_W" }, -+ { BTN_KB_E, "KB_E" }, -+ { BTN_KB_R, "KB_R" }, -+ { BTN_KB_T, "KB_T" }, -+ { BTN_KB_Y, "KB_Y" }, -+ { BTN_KB_U, "KB_U" }, -+ { BTN_KB_O, "KB_O" }, -+ { BTN_KB_P, "KB_P" }, -+ { BTN_KB_LBRACKET, "KB_LBRACKET" }, -+ { BTN_KB_RBRACKET, "KB_RBRACKET" }, -+ { BTN_KB_BACKSLASH, "KB_BACKSLASH" }, -+ { BTN_KB_CAPS, "KB_CAPS" }, -+ { BTN_KB_A, "KB_A" }, -+ { BTN_KB_S, "KB_S" }, -+ { BTN_KB_D, "KB_D" }, -+ { BTN_KB_F, "KB_F" }, -+ { BTN_KB_G, "KB_G" }, -+ { BTN_KB_H, "KB_H" }, -+ { BTN_KB_J, "KB_J" }, -+ { BTN_KB_K, "KB_K" }, -+ { BTN_KB_L, "KB_L" }, -+ { BTN_KB_SEMI, "KB_SEMI" }, -+ { BTN_KB_QUOTE, "KB_QUOTE" }, -+ { BTN_KB_RET, "KB_RET" }, -+ { BTN_KB_LSHIFT, "KB_LSHIFT" }, -+ { BTN_KB_Z, "KB_Z" }, -+ { BTN_KB_X, "KB_X" }, -+ { BTN_KB_C, "KB_C" }, -+ { BTN_KB_V, "KB_V" }, -+ { BTN_KB_B, "KB_B" }, -+ { BTN_KB_N, "KB_N" }, -+ { BTN_KB_M, "KB_M" }, -+ { BTN_KB_COMMA, "KB_COMMA" }, -+ { BTN_KB_PERIOD, "KB_PERIOD" }, -+ { BTN_KB_RSHIFT, "KB_RSHIFT" }, -+ { BTN_KB_LCTL, "KB_LCTL" }, -+ { BTN_KB_META, "KB_META" }, -+ { BTN_KB_LALT, "KB_LALT" }, -+ { BTN_KB_SPACE, "KB_SPACE" }, -+ { BTN_KB_RALT, "KB_RALT" }, -+ { BTN_KB_MENU, "KB_MENU" }, -+ { BTN_KB_RCTL, "KB_RCTL" }, -+ { BTN_KB_PRNTSCN, "KB_PRNTSCN" }, -+ { BTN_KB_SCRLCK, "KB_SCRLCK" }, -+ { BTN_KB_PAUSE, "KB_PAUSE" }, -+ { BTN_KB_INS, "KB_INS" }, -+ { BTN_KB_HOME, "KB_HOME" }, -+ { BTN_KB_PGUP, "KB_PGUP" }, -+ { BTN_KB_DEL, "KB_DEL" }, -+ { BTN_KB_END, "KB_END" }, -+ { BTN_KB_PGDWN, "KB_PGDWN" }, -+ { BTN_KB_UP_ARROW, "KB_UP_ARROW" }, -+ { BTN_KB_DOWN_ARROW, "KB_DOWN_ARROW" }, -+ { BTN_KB_LEFT_ARROW, "KB_LEFT_ARROW" }, -+ { BTN_KB_RIGHT_ARROW, "KB_RIGHT_ARROW" }, -+ -+ /* Numpad mappings */ -+ { BTN_NUMPAD_LOCK, "NUMPAD_LOCK" }, -+ { BTN_NUMPAD_FWDSLASH, "NUMPAD_FWDSLASH" }, -+ { BTN_NUMPAD_ASTERISK, "NUMPAD_ASTERISK" }, -+ { BTN_NUMPAD_HYPHEN, "NUMPAD_HYPHEN" }, -+ { BTN_NUMPAD_0, "NUMPAD_0" }, -+ { BTN_NUMPAD_1, "NUMPAD_1" }, -+ { BTN_NUMPAD_2, "NUMPAD_2" }, -+ { BTN_NUMPAD_3, "NUMPAD_3" }, -+ { BTN_NUMPAD_4, "NUMPAD_4" }, -+ { BTN_NUMPAD_5, "NUMPAD_5" }, -+ { BTN_NUMPAD_6, "NUMPAD_6" }, -+ { BTN_NUMPAD_7, "NUMPAD_7" }, -+ { BTN_NUMPAD_8, "NUMPAD_8" }, -+ { BTN_NUMPAD_9, "NUMPAD_9" }, -+ { BTN_NUMPAD_PLUS, "NUMPAD_PLUS" }, -+ { BTN_NUMPAD_ENTER, "NUMPAD_ENTER" }, -+ { BTN_NUMPAD_PERIOD, "NUMPAD_PERIOD" }, -+ -+ /* Mouse mappings */ -+ { BTN_MOUSE_LCLICK, "MOUSE_LCLICK" }, -+ { BTN_MOUSE_RCLICK, "MOUSE_RCLICK" }, -+ { BTN_MOUSE_MCLICK, "MOUSE_MCLICK" }, -+ { BTN_MOUSE_WHEEL_UP, "MOUSE_WHEEL_UP" }, -+ { BTN_MOUSE_WHEEL_DOWN, "MOUSE_WHEEL_DOWN" }, -+ -+ /* Media mappings */ -+ { BTN_MEDIA_SCREENSHOT, "MEDIA_SCREENSHOT" }, -+ { BTN_MEDIA_SHOW_KEYBOARD, "MEDIA_SHOW_KEYBOARD" }, -+ { BTN_MEDIA_SHOW_DESKTOP, "MEDIA_SHOW_DESKTOP" }, -+ { BTN_MEDIA_START_RECORDING, "MEDIA_START_RECORDING" }, -+ { BTN_MEDIA_MIC_OFF, "MEDIA_MIC_OFF" }, -+ { BTN_MEDIA_VOL_DOWN, "MEDIA_VOL_DOWN" }, -+ { BTN_MEDIA_VOL_UP, "MEDIA_VOL_UP" }, -+}; -+static const size_t keymap_len = ARRAY_SIZE(ally_btn_codes); -+ -+/* byte_array must be >= 8 in length */ -+static void btn_code_to_byte_array(u64 keycode, u8 *byte_array) -+{ -+ /* Convert the u64 to bytes[8] */ -+ for (int i = 0; i < 8; ++i) { -+ byte_array[i] = (keycode >> (56 - 8 * i)) & 0xFF; -+ } -+} -+ -+static u64 name_to_btn(const char *name) -+{ -+ int len = strcspn(name, "\n"); -+ for (size_t i = 0; i < keymap_len; ++i) { -+ if (strncmp(ally_btn_codes[i].name, name, len) == 0) { -+ return ally_btn_codes[i].code; -+ } -+ } -+ return -EINVAL; -+} -+ -+static const char* btn_to_name(u64 key) -+{ -+ for (size_t i = 0; i < keymap_len; ++i) { -+ if (ally_btn_codes[i].code == key) { -+ return ally_btn_codes[i].name; -+ } -+ } -+ return NULL; -+} -+ -+struct btn_data { -+ u64 button; -+ u64 macro; -+ bool turbo; -+}; -+ -+struct btn_mapping { -+ struct btn_data btn_a; -+ struct btn_data btn_b; -+ struct btn_data btn_x; -+ struct btn_data btn_y; -+ struct btn_data btn_lb; -+ struct btn_data btn_rb; -+ struct btn_data btn_ls; -+ struct btn_data btn_rs; -+ struct btn_data btn_lt; -+ struct btn_data btn_rt; -+ struct btn_data dpad_up; -+ struct btn_data dpad_down; -+ struct btn_data dpad_left; -+ struct btn_data dpad_right; -+ struct btn_data btn_view; -+ struct btn_data btn_menu; -+ struct btn_data btn_m1; -+ struct btn_data btn_m2; -+}; -+ -+struct deadzone { -+ u8 inner; -+ u8 outer; -+}; -+ -+struct response_curve { -+ uint8_t move_pct_1; -+ uint8_t response_pct_1; -+ uint8_t move_pct_2; -+ uint8_t response_pct_2; -+ uint8_t move_pct_3; -+ uint8_t response_pct_3; -+ uint8_t move_pct_4; -+ uint8_t response_pct_4; -+} __packed; -+ -+struct js_axis_calibrations { -+ uint16_t left_y_stable; -+ uint16_t left_y_min; -+ uint16_t left_y_max; -+ uint16_t left_x_stable; -+ uint16_t left_x_min; -+ uint16_t left_x_max; -+ uint16_t right_y_stable; -+ uint16_t right_y_min; -+ uint16_t right_y_max; -+ uint16_t right_x_stable; -+ uint16_t right_x_min; -+ uint16_t right_x_max; -+} __packed; -+ -+struct tr_axis_calibrations { -+ uint16_t left_stable; -+ uint16_t left_max; -+ uint16_t right_stable; -+ uint16_t right_max; -+} __packed; -+ -+/* ROG Ally has many settings related to the gamepad, all using the same n-key endpoint */ -+struct ally_gamepad_cfg { -+ struct hid_device *hdev; -+ struct input_dev *input; -+ -+ enum xpad_mode mode; -+ /* -+ * index: [mode] -+ */ -+ struct btn_mapping key_mapping[xpad_mode_mouse]; -+ /* -+ * index: left, right -+ * max: 64 -+ */ -+ u8 vibration_intensity[2]; -+ -+ /* deadzones */ -+ struct deadzone ls_dz; // left stick -+ struct deadzone rs_dz; // right stick -+ struct deadzone lt_dz; // left trigger -+ struct deadzone rt_dz; // right trigger -+ /* anti-deadzones */ -+ u8 ls_adz; // left stick -+ u8 rs_adz; // right stick -+ /* joystick response curves */ -+ struct response_curve ls_rc; -+ struct response_curve rs_rc; -+ -+ struct js_axis_calibrations js_cal; -+ struct tr_axis_calibrations tr_cal; -+}; -+ -+/* The hatswitch outputs integers, we use them to index this X|Y pair */ -+static const int hat_values[][2] = { -+ { 0, 0 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, -+ { 0, 1 }, { -1, 1 }, { -1, 0 }, { -1, -1 }, -+}; -+ -+/* rumble packet structure */ -+struct ff_data { -+ u8 enable; -+ u8 magnitude_left; -+ u8 magnitude_right; -+ u8 magnitude_strong; -+ u8 magnitude_weak; -+ u8 pulse_sustain_10ms; -+ u8 pulse_release_10ms; -+ u8 loop_count; -+} __packed; -+ -+struct ff_report { -+ u8 report_id; -+ struct ff_data ff; -+} __packed; -+ -+struct ally_x_input_report { -+ uint16_t x, y; -+ uint16_t rx, ry; -+ uint16_t z, rz; -+ uint8_t buttons[4]; -+} __packed; -+ -+struct ally_x_device { -+ struct input_dev *input; -+ struct hid_device *hdev; -+ spinlock_t lock; -+ -+ struct ff_report *ff_packet; -+ struct work_struct output_worker; -+ bool output_worker_initialized; -+ /* Prevent multiple queued event due to the enforced delay in worker */ -+ bool update_qam_btn; -+ /* Set if the QAM and AC buttons emit Xbox and Xbox+A */ -+ bool qam_btns_steam_mode; -+ bool update_ff; -+}; -+ -+struct ally_rgb_dev { -+ struct hid_device *hdev; -+ struct led_classdev_mc led_rgb_dev; -+ struct work_struct work; -+ bool output_worker_initialized; -+ spinlock_t lock; -+ -+ bool removed; -+ bool update_rgb; -+ uint8_t red[4]; -+ uint8_t green[4]; -+ uint8_t blue[4]; -+}; -+ -+struct ally_rgb_data { -+ uint8_t brightness; -+ uint8_t red[4]; -+ uint8_t green[4]; -+ uint8_t blue[4]; -+ bool initialized; -+}; -+ -+static struct ally_drvdata { -+ struct hid_device *hdev; -+ struct ally_x_device *ally_x; -+ struct ally_gamepad_cfg *gamepad_cfg; -+ struct ally_rgb_dev *led_rgb_dev; -+ struct ally_rgb_data led_rgb_data; -+ uint mcu_version; -+} drvdata; -+ -+static void reverse_bytes_in_pairs(u8 *buf, size_t size) { -+ uint16_t *word_ptr; -+ size_t i; -+ -+ for (i = 0; i < size; i += 2) { -+ if (i + 1 < size) { -+ word_ptr = (uint16_t *)&buf[i]; -+ *word_ptr = cpu_to_be16(*word_ptr); -+ } -+ } -+} -+ -+/** -+ * asus_dev_set_report - send set report request to device. -+ * -+ * @hdev: hid device -+ * @buf: in/out data to transfer -+ * @len: length of buf -+ * -+ * Return: count of data transferred, negative if error -+ * -+ * Same behavior as hid_hw_raw_request. Note that the input buffer is duplicated. -+ */ -+static int asus_dev_set_report(struct hid_device *hdev, const u8 *buf, size_t len) -+{ -+ unsigned char *dmabuf; -+ int ret; -+ -+ dmabuf = kmemdup(buf, len, GFP_KERNEL); -+ if (!dmabuf) -+ return -ENOMEM; -+ -+ ret = hid_hw_raw_request(hdev, buf[0], dmabuf, len, HID_FEATURE_REPORT, -+ HID_REQ_SET_REPORT); -+ kfree(dmabuf); -+ -+ return ret; -+} -+ -+/** -+ * asus_dev_get_report - send get report request to device. -+ * -+ * @hdev: hid device -+ * @out: buffer to write output data in to -+ * @len: length the output buffer provided -+ * -+ * Return: count of data transferred, negative if error -+ * -+ * Same behavior as hid_hw_raw_request. -+ */ -+static int asus_dev_get_report(struct hid_device *hdev, u8 *out, size_t len) -+{ -+ return hid_hw_raw_request(hdev, FEATURE_REPORT_ID, out, len, -+ HID_FEATURE_REPORT, HID_REQ_GET_REPORT); -+} -+ -+static u8 get_endpoint_address(struct hid_device *hdev) -+{ -+ struct usb_interface *intf; -+ struct usb_host_endpoint *ep; -+ -+ intf = to_usb_interface(hdev->dev.parent); -+ -+ if (intf) { -+ ep = intf->cur_altsetting->endpoint; -+ if (ep) { -+ return ep->desc.bEndpointAddress; -+ } -+ } -+ -+ return -ENODEV; -+} -+ -+/**************************************************************************************************/ -+/* ROG Ally gamepad configuration */ -+/**************************************************************************************************/ -+ -+/* This should be called before any attempts to set device functions */ -+static int ally_gamepad_check_ready(struct hid_device *hdev) -+{ -+ int ret, count; -+ u8 *hidbuf; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ ret = 0; -+ for (count = 0; count < READY_MAX_TRIES; count++) { -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ hidbuf[2] = xpad_cmd_check_ready; -+ hidbuf[3] = 01; -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ hid_dbg(hdev, "ROG Ally check failed set report: %d\n", ret); -+ -+ hidbuf[0] = hidbuf[1] = hidbuf[2] = hidbuf[3] = 0; -+ ret = asus_dev_get_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ hid_dbg(hdev, "ROG Ally check failed get report: %d\n", ret); -+ -+ ret = hidbuf[2] == xpad_cmd_check_ready; -+ if (ret) -+ break; -+ usleep_range( -+ 1000, -+ 2000); /* don't spam the entire loop in less than USB response time */ -+ } -+ -+ if (count == READY_MAX_TRIES) -+ hid_warn(hdev, "ROG Ally never responded with a ready\n"); -+ -+ kfree(hidbuf); -+ return ret; -+} -+ -+/* VIBRATION INTENSITY ****************************************************************************/ -+static ssize_t gamepad_vibration_intensity_index_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return sysfs_emit(buf, "left right\n"); -+} -+ -+ALLY_DEVICE_ATTR_RO(gamepad_vibration_intensity_index, vibration_intensity_index); -+ -+static ssize_t _gamepad_apply_intensity(struct hid_device *hdev, -+ struct ally_gamepad_cfg *ally_cfg) -+{ -+ u8 *hidbuf; -+ int ret; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ hidbuf[2] = xpad_cmd_set_vibe_intensity; -+ hidbuf[3] = xpad_cmd_len_vibe_intensity; -+ hidbuf[4] = ally_cfg->vibration_intensity[0]; -+ hidbuf[5] = ally_cfg->vibration_intensity[1]; -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ goto report_fail; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ goto report_fail; -+ -+report_fail: -+ kfree(hidbuf); -+ return ret; -+} -+ -+static ssize_t gamepad_vibration_intensity_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ return sysfs_emit( -+ buf, "%d %d\n", -+ ally_cfg->vibration_intensity[0], -+ ally_cfg->vibration_intensity[1]); -+} -+ -+static ssize_t gamepad_vibration_intensity_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, -+ size_t count) -+{ -+ struct hid_device *hdev = to_hid_device(dev); -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ u32 left, right; -+ int ret; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ if (sscanf(buf, "%d %d", &left, &right) != 2) -+ return -EINVAL; -+ -+ if (left > 64 || right > 64) -+ return -EINVAL; -+ -+ ally_cfg->vibration_intensity[0] = left; -+ ally_cfg->vibration_intensity[1] = right; -+ -+ ret = _gamepad_apply_intensity(hdev, ally_cfg); -+ if (ret < 0) -+ return ret; -+ -+ return count; -+} -+ -+ALLY_DEVICE_ATTR_RW(gamepad_vibration_intensity, vibration_intensity); -+ -+/* ANALOGUE DEADZONES *****************************************************************************/ -+static ssize_t _gamepad_apply_deadzones(struct hid_device *hdev, -+ struct ally_gamepad_cfg *ally_cfg) -+{ -+ u8 *hidbuf; -+ int ret; -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ return ret; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ hidbuf[2] = xpad_cmd_set_js_dz; -+ hidbuf[3] = xpad_cmd_len_deadzone; -+ hidbuf[4] = ally_cfg->ls_dz.inner; -+ hidbuf[5] = ally_cfg->ls_dz.outer; -+ hidbuf[6] = ally_cfg->rs_dz.inner; -+ hidbuf[7] = ally_cfg->rs_dz.outer; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ goto end; -+ -+ hidbuf[2] = xpad_cmd_set_tr_dz; -+ hidbuf[4] = ally_cfg->lt_dz.inner; -+ hidbuf[5] = ally_cfg->lt_dz.outer; -+ hidbuf[6] = ally_cfg->rt_dz.inner; -+ hidbuf[7] = ally_cfg->rt_dz.outer; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ goto end; -+ -+end: -+ kfree(hidbuf); -+ return ret; -+} -+ -+static void _gamepad_set_deadzones_default(struct ally_gamepad_cfg *ally_cfg) -+{ -+ ally_cfg->ls_dz.inner = 0x00; -+ ally_cfg->ls_dz.outer = 0x64; -+ ally_cfg->rs_dz.inner = 0x00; -+ ally_cfg->rs_dz.outer = 0x64; -+ ally_cfg->lt_dz.inner = 0x00; -+ ally_cfg->lt_dz.outer = 0x64; -+ ally_cfg->rt_dz.inner = 0x00; -+ ally_cfg->rt_dz.outer = 0x64; -+} -+ -+static ssize_t axis_xyz_deadzone_index_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return sysfs_emit(buf, "inner outer\n"); -+} -+ -+ALLY_DEVICE_ATTR_RO(axis_xyz_deadzone_index, deadzone_index); -+ -+ALLY_DEADZONES(axis_xy_left, ls_dz); -+ALLY_DEADZONES(axis_xy_right, rs_dz); -+ALLY_DEADZONES(axis_z_left, lt_dz); -+ALLY_DEADZONES(axis_z_right, rt_dz); -+ -+/* ANTI-DEADZONES *********************************************************************************/ -+static ssize_t _gamepad_apply_js_ADZ(struct hid_device *hdev, -+ struct ally_gamepad_cfg *ally_cfg) -+{ -+ u8 *hidbuf; -+ int ret; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ hidbuf[2] = xpad_cmd_set_adz; -+ hidbuf[3] = xpad_cmd_len_adz; -+ hidbuf[4] = ally_cfg->ls_adz; -+ hidbuf[5] = ally_cfg->rs_adz; -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ goto report_fail; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ goto report_fail; -+ -+report_fail: -+ kfree(hidbuf); -+ return ret; -+} -+ -+static void _gamepad_set_anti_deadzones_default(struct ally_gamepad_cfg *ally_cfg) -+{ -+ ally_cfg->ls_adz = 0x00; -+ ally_cfg->rs_adz = 0x00; -+} -+ -+static ssize_t _gamepad_js_ADZ_store(struct device *dev, const char *buf, u8 *adz) -+{ -+ int ret, val; -+ -+ ret = kstrtoint(buf, 0, &val); -+ if (ret) -+ return ret; -+ -+ if (val < 0 || val > 32) -+ return -EINVAL; -+ -+ *adz = val; -+ -+ return ret; -+} -+ -+static ssize_t axis_xy_left_anti_deadzone_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ -+ return sysfs_emit(buf, "%d\n", ally_cfg->ls_adz); -+} -+ -+static ssize_t axis_xy_left_anti_deadzone_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ int ret; -+ -+ ret = _gamepad_js_ADZ_store(dev, buf, &ally_cfg->ls_adz); -+ if (ret) -+ return ret; -+ -+ return count; -+} -+ALLY_DEVICE_ATTR_RW(axis_xy_left_anti_deadzone, anti_deadzone); -+ -+static ssize_t axis_xy_right_anti_deadzone_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ -+ return sysfs_emit(buf, "%d\n", ally_cfg->rs_adz); -+} -+ -+static ssize_t axis_xy_right_anti_deadzone_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ int ret; -+ -+ ret = _gamepad_js_ADZ_store(dev, buf, &ally_cfg->rs_adz); -+ if (ret) -+ return ret; -+ -+ return count; -+} -+ALLY_DEVICE_ATTR_RW(axis_xy_right_anti_deadzone, anti_deadzone); -+ -+/* JS RESPONSE CURVES *****************************************************************************/ -+static void _gamepad_set_js_response_curves_default(struct ally_gamepad_cfg *ally_cfg) -+{ -+ struct response_curve *js1_rc = &ally_cfg->ls_rc; -+ struct response_curve *js2_rc = &ally_cfg->rs_rc; -+ js1_rc->move_pct_1 = js2_rc->move_pct_1 = 0x16; // 25% -+ js1_rc->move_pct_2 = js2_rc->move_pct_2 = 0x32; // 50% -+ js1_rc->move_pct_3 = js2_rc->move_pct_3 = 0x48; // 75% -+ js1_rc->move_pct_4 = js2_rc->move_pct_4 = 0x64; // 100% -+ js1_rc->response_pct_1 = js2_rc->response_pct_1 = 0x16; -+ js1_rc->response_pct_2 = js2_rc->response_pct_2 = 0x32; -+ js1_rc->response_pct_3 = js2_rc->response_pct_3 = 0x48; -+ js1_rc->response_pct_4 = js2_rc->response_pct_4 = 0x64; -+} -+ -+static ssize_t _gamepad_apply_response_curves(struct hid_device *hdev, -+ struct ally_gamepad_cfg *ally_cfg) -+{ -+ u8 *hidbuf; -+ int ret; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ memcpy(&hidbuf[2], &ally_cfg->ls_rc, sizeof(ally_cfg->ls_rc)); -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ goto report_fail; -+ -+ hidbuf[4] = 0x02; -+ memcpy(&hidbuf[5], &ally_cfg->rs_rc, sizeof(ally_cfg->rs_rc)); -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ goto report_fail; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ goto report_fail; -+ -+report_fail: -+ kfree(hidbuf); -+ return ret; -+} -+ -+ALLY_JS_RC_POINT(axis_xy_left, move, 1); -+ALLY_JS_RC_POINT(axis_xy_left, move, 2); -+ALLY_JS_RC_POINT(axis_xy_left, move, 3); -+ALLY_JS_RC_POINT(axis_xy_left, move, 4); -+ALLY_JS_RC_POINT(axis_xy_left, response, 1); -+ALLY_JS_RC_POINT(axis_xy_left, response, 2); -+ALLY_JS_RC_POINT(axis_xy_left, response, 3); -+ALLY_JS_RC_POINT(axis_xy_left, response, 4); -+ -+ALLY_JS_RC_POINT(axis_xy_right, move, 1); -+ALLY_JS_RC_POINT(axis_xy_right, move, 2); -+ALLY_JS_RC_POINT(axis_xy_right, move, 3); -+ALLY_JS_RC_POINT(axis_xy_right, move, 4); -+ALLY_JS_RC_POINT(axis_xy_right, response, 1); -+ALLY_JS_RC_POINT(axis_xy_right, response, 2); -+ALLY_JS_RC_POINT(axis_xy_right, response, 3); -+ALLY_JS_RC_POINT(axis_xy_right, response, 4); -+ -+/* CALIBRATIONS ***********************************************************************************/ -+static int gamepad_get_calibration(struct hid_device *hdev) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ u8 *hidbuf; -+ int ret, i; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ for (i = 0; i < 2; i++) { -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = 0xD0; -+ hidbuf[2] = 0x03; -+ hidbuf[3] = i + 1; // 0x01 JS, 0x02 TR -+ hidbuf[4] = 0x20; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) { -+ hid_warn(hdev, "ROG Ally check failed set report: %d\n", ret); -+ goto cleanup; -+ } -+ -+ memset(hidbuf, 0, FEATURE_ROG_ALLY_REPORT_SIZE); -+ ret = asus_dev_get_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0 || hidbuf[5] != 1) { -+ hid_warn(hdev, "ROG Ally check failed get report: %d\n", ret); -+ goto cleanup; -+ } -+ -+ if (i == 0) { -+ /* Joystick calibration */ -+ reverse_bytes_in_pairs(&hidbuf[6], sizeof(struct js_axis_calibrations)); -+ ally_cfg->js_cal = *(struct js_axis_calibrations *)&hidbuf[6]; -+ print_hex_dump(KERN_INFO, "HID Buffer JS: ", DUMP_PREFIX_OFFSET, 16, 1, hidbuf, 32, true); -+ struct js_axis_calibrations *cal = &drvdata.gamepad_cfg->js_cal; -+ pr_err("LS_CAL: X: %d, Min: %d, Max: %d", cal->left_x_stable, cal->left_x_min, cal->left_x_max); -+ pr_err("LS_CAL: Y: %d, Min: %d, Max: %d", cal->left_y_stable, cal->left_y_min, cal->left_y_max); -+ pr_err("RS_CAL: X: %d, Min: %d, Max: %d", cal->right_x_stable, cal->right_x_min, cal->right_x_max); -+ pr_err("RS_CAL: Y: %d, Min: %d, Max: %d", cal->right_y_stable, cal->right_y_min, cal->right_y_max); -+ } else { -+ /* Trigger calibration */ -+ reverse_bytes_in_pairs(&hidbuf[6], sizeof(struct tr_axis_calibrations)); -+ ally_cfg->tr_cal = *(struct tr_axis_calibrations *)&hidbuf[6]; -+ print_hex_dump(KERN_INFO, "HID Buffer TR: ", DUMP_PREFIX_OFFSET, 16, 1, hidbuf, 32, true); -+ } -+ } -+ -+cleanup: -+ kfree(hidbuf); -+ return ret; -+} -+ -+static struct attribute *axis_xy_left_attrs[] = { -+ &dev_attr_axis_xy_left_anti_deadzone.attr, -+ &dev_attr_axis_xy_left_deadzone.attr, -+ &dev_attr_axis_xyz_deadzone_index.attr, -+ &dev_attr_axis_xy_left_move_1.attr, -+ &dev_attr_axis_xy_left_move_2.attr, -+ &dev_attr_axis_xy_left_move_3.attr, -+ &dev_attr_axis_xy_left_move_4.attr, -+ &dev_attr_axis_xy_left_response_1.attr, -+ &dev_attr_axis_xy_left_response_2.attr, -+ &dev_attr_axis_xy_left_response_3.attr, -+ &dev_attr_axis_xy_left_response_4.attr, -+ NULL -+}; -+static const struct attribute_group axis_xy_left_attr_group = { -+ .name = "axis_xy_left", -+ .attrs = axis_xy_left_attrs, -+}; -+ -+static struct attribute *axis_xy_right_attrs[] = { -+ &dev_attr_axis_xy_right_anti_deadzone.attr, -+ &dev_attr_axis_xy_right_deadzone.attr, -+ &dev_attr_axis_xyz_deadzone_index.attr, -+ &dev_attr_axis_xy_right_move_1.attr, -+ &dev_attr_axis_xy_right_move_2.attr, -+ &dev_attr_axis_xy_right_move_3.attr, -+ &dev_attr_axis_xy_right_move_4.attr, -+ &dev_attr_axis_xy_right_response_1.attr, -+ &dev_attr_axis_xy_right_response_2.attr, -+ &dev_attr_axis_xy_right_response_3.attr, -+ &dev_attr_axis_xy_right_response_4.attr, -+ NULL -+}; -+static const struct attribute_group axis_xy_right_attr_group = { -+ .name = "axis_xy_right", -+ .attrs = axis_xy_right_attrs, -+}; -+ -+static struct attribute *axis_z_left_attrs[] = { -+ &dev_attr_axis_z_left_deadzone.attr, -+ &dev_attr_axis_xyz_deadzone_index.attr, -+ NULL, -+}; -+static const struct attribute_group axis_z_left_attr_group = { -+ .name = "axis_z_left", -+ .attrs = axis_z_left_attrs, -+}; -+ -+static struct attribute *axis_z_right_attrs[] = { -+ &dev_attr_axis_z_right_deadzone.attr, -+ &dev_attr_axis_xyz_deadzone_index.attr, -+ NULL, -+}; -+static const struct attribute_group axis_z_right_attr_group = { -+ .name = "axis_z_right", -+ .attrs = axis_z_right_attrs, -+}; -+ -+/* A HID packet conatins mappings for two buttons: btn1, btn1_macro, btn2, btn2_macro */ -+static void _btn_pair_to_hid_pkt(struct ally_gamepad_cfg *ally_cfg, -+ enum btn_pair_index pair, -+ struct btn_data *btn1, struct btn_data *btn2, -+ u8 *out, int out_len) -+{ -+ int start = 5; -+ -+ out[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ out[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ out[2] = xpad_cmd_set_mapping; -+ out[3] = pair; -+ out[4] = xpad_cmd_len_mapping; -+ -+ btn_code_to_byte_array(btn1->button, &out[start]); -+ start += BTN_DATA_LEN; -+ btn_code_to_byte_array(btn1->macro, &out[start]); -+ start += BTN_DATA_LEN; -+ btn_code_to_byte_array(btn2->button, &out[start]); -+ start += BTN_DATA_LEN; -+ btn_code_to_byte_array(btn2->macro, &out[start]); -+ //print_hex_dump(KERN_DEBUG, "byte_array: ", DUMP_PREFIX_OFFSET, 64, 1, out, 64, false); -+} -+ -+/* Apply the mapping pair to the device */ -+static int _gamepad_apply_btn_pair(struct hid_device *hdev, struct ally_gamepad_cfg *ally_cfg, -+ enum btn_pair_index btn_pair) -+{ -+ u8 mode = ally_cfg->mode - 1; -+ struct btn_data *btn1, *btn2; -+ u8 *hidbuf; -+ int ret; -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ return ret; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ switch (btn_pair) { -+ case btn_pair_dpad_u_d: -+ btn1 = &ally_cfg->key_mapping[mode].dpad_up; -+ btn2 = &ally_cfg->key_mapping[mode].dpad_down; -+ break; -+ case btn_pair_dpad_l_r: -+ btn1 = &ally_cfg->key_mapping[mode].dpad_left; -+ btn2 = &ally_cfg->key_mapping[mode].dpad_right; -+ break; -+ case btn_pair_ls_rs: -+ btn1 = &ally_cfg->key_mapping[mode].btn_ls; -+ btn2 = &ally_cfg->key_mapping[mode].btn_rs; -+ break; -+ case btn_pair_lb_rb: -+ btn1 = &ally_cfg->key_mapping[mode].btn_lb; -+ btn2 = &ally_cfg->key_mapping[mode].btn_rb; -+ break; -+ case btn_pair_lt_rt: -+ btn1 = &ally_cfg->key_mapping[mode].btn_lt; -+ btn2 = &ally_cfg->key_mapping[mode].btn_rt; -+ break; -+ case btn_pair_a_b: -+ btn1 = &ally_cfg->key_mapping[mode].btn_a; -+ btn2 = &ally_cfg->key_mapping[mode].btn_b; -+ break; -+ case btn_pair_x_y: -+ btn1 = &ally_cfg->key_mapping[mode].btn_x; -+ btn2 = &ally_cfg->key_mapping[mode].btn_y; -+ break; -+ case btn_pair_view_menu: -+ btn1 = &ally_cfg->key_mapping[mode].btn_view; -+ btn2 = &ally_cfg->key_mapping[mode].btn_menu; -+ break; -+ case btn_pair_m1_m2: -+ btn1 = &ally_cfg->key_mapping[mode].btn_m1; -+ btn2 = &ally_cfg->key_mapping[mode].btn_m2; -+ break; -+ default: -+ break; -+ } -+ -+ _btn_pair_to_hid_pkt(ally_cfg, btn_pair, btn1, btn2, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ -+ kfree(hidbuf); -+ -+ return ret; -+} -+ -+static int _gamepad_apply_turbo(struct hid_device *hdev, struct ally_gamepad_cfg *ally_cfg) -+{ -+ struct btn_mapping *map = &ally_cfg->key_mapping[ally_cfg->mode - 1]; -+ u8 *hidbuf; -+ int ret; -+ -+ /* set turbo */ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ hidbuf[2] = xpad_cmd_set_turbo; -+ hidbuf[3] = xpad_cmd_len_turbo; -+ -+ hidbuf[4] = map->dpad_up.turbo; -+ hidbuf[6] = map->dpad_down.turbo; -+ hidbuf[8] = map->dpad_left.turbo; -+ hidbuf[10] = map->dpad_right.turbo; -+ -+ hidbuf[12] = map->btn_ls.turbo; -+ hidbuf[14] = map->btn_rs.turbo; -+ hidbuf[16] = map->btn_lb.turbo; -+ hidbuf[18] = map->btn_rb.turbo; -+ -+ hidbuf[20] = map->btn_a.turbo; -+ hidbuf[22] = map->btn_b.turbo; -+ hidbuf[24] = map->btn_x.turbo; -+ hidbuf[26] = map->btn_y.turbo; -+ -+ hidbuf[28] = map->btn_lt.turbo; -+ hidbuf[30] = map->btn_rt.turbo; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ -+ kfree(hidbuf); -+ -+ return ret; -+} -+ -+static ssize_t _gamepad_apply_all(struct hid_device *hdev, struct ally_gamepad_cfg *ally_cfg) -+{ -+ int ret; -+ -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_dpad_u_d); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_dpad_l_r); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_ls_rs); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_lb_rb); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_a_b); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_x_y); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_view_menu); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_m1_m2); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_lt_rt); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_turbo(hdev, ally_cfg); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_deadzones(hdev, ally_cfg); -+ if (ret < 0) -+ return ret; -+ ret = _gamepad_apply_js_ADZ(hdev, ally_cfg); -+ if (ret < 0) -+ return ret; -+ ret =_gamepad_apply_response_curves(hdev, ally_cfg); -+ if (ret < 0) -+ return ret; -+ -+ return 0; -+} -+ -+static ssize_t gamepad_apply_all_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ struct hid_device *hdev = to_hid_device(dev); -+ int ret; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ ret = _gamepad_apply_all(hdev, ally_cfg); -+ if (ret < 0) -+ return ret; -+ -+ return count; -+} -+ALLY_DEVICE_ATTR_WO(gamepad_apply_all, apply_all); -+ -+/* button map attributes, regular and macro*/ -+ALLY_BTN_MAPPING(m1, btn_m1); -+ALLY_BTN_MAPPING(m2, btn_m2); -+ALLY_BTN_MAPPING(view, btn_view); -+ALLY_BTN_MAPPING(menu, btn_menu); -+ALLY_TURBO_BTN_MAPPING(a, btn_a); -+ALLY_TURBO_BTN_MAPPING(b, btn_b); -+ALLY_TURBO_BTN_MAPPING(x, btn_x); -+ALLY_TURBO_BTN_MAPPING(y, btn_y); -+ALLY_TURBO_BTN_MAPPING(lb, btn_lb); -+ALLY_TURBO_BTN_MAPPING(rb, btn_rb); -+ALLY_TURBO_BTN_MAPPING(ls, btn_ls); -+ALLY_TURBO_BTN_MAPPING(rs, btn_rs); -+ALLY_TURBO_BTN_MAPPING(lt, btn_lt); -+ALLY_TURBO_BTN_MAPPING(rt, btn_rt); -+ALLY_TURBO_BTN_MAPPING(dpad_u, dpad_up); -+ALLY_TURBO_BTN_MAPPING(dpad_d, dpad_down); -+ALLY_TURBO_BTN_MAPPING(dpad_l, dpad_left); -+ALLY_TURBO_BTN_MAPPING(dpad_r, dpad_right); -+ -+static void _gamepad_set_xpad_default(struct ally_gamepad_cfg *ally_cfg) -+{ -+ struct btn_mapping *map = &ally_cfg->key_mapping[ally_cfg->mode - 1]; -+ map->btn_m1.button = BTN_KB_M1; -+ map->btn_m2.button = BTN_KB_M2; -+ map->btn_a.button = BTN_PAD_A; -+ map->btn_b.button = BTN_PAD_B; -+ map->btn_x.button = BTN_PAD_X; -+ map->btn_y.button = BTN_PAD_Y; -+ map->btn_lb.button = BTN_PAD_LB; -+ map->btn_rb.button = BTN_PAD_RB; -+ map->btn_lt.button = BTN_PAD_LT; -+ map->btn_rt.button = BTN_PAD_RT; -+ map->btn_ls.button = BTN_PAD_LS; -+ map->btn_rs.button = BTN_PAD_RS; -+ map->dpad_up.button = BTN_PAD_DPAD_UP; -+ map->dpad_down.button = BTN_PAD_DPAD_DOWN; -+ map->dpad_left.button = BTN_PAD_DPAD_LEFT; -+ map->dpad_right.button = BTN_PAD_DPAD_RIGHT; -+ map->btn_view.button = BTN_PAD_VIEW; -+ map->btn_menu.button = BTN_PAD_MENU; -+} -+ -+static ssize_t btn_mapping_reset_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ switch (ally_cfg->mode) { -+ case xpad_mode_game: -+ _gamepad_set_xpad_default(ally_cfg); -+ break; -+ default: -+ _gamepad_set_xpad_default(ally_cfg); -+ break; -+ } -+ -+ return count; -+} -+ALLY_DEVICE_ATTR_WO(btn_mapping_reset, reset_btn_mapping); -+ -+/* GAMEPAD MODE */ -+static ssize_t _gamepad_set_mode(struct hid_device *hdev, struct ally_gamepad_cfg *ally_cfg, -+ int val) -+{ -+ u8 *hidbuf; -+ int ret; -+ -+ hidbuf = kzalloc(FEATURE_ROG_ALLY_REPORT_SIZE, GFP_KERNEL); -+ if (!hidbuf) -+ return -ENOMEM; -+ -+ hidbuf[0] = FEATURE_ROG_ALLY_REPORT_ID; -+ hidbuf[1] = FEATURE_ROG_ALLY_CODE_PAGE; -+ hidbuf[2] = xpad_cmd_set_mode; -+ hidbuf[3] = xpad_cmd_len_mode; -+ hidbuf[4] = val; -+ -+ ret = ally_gamepad_check_ready(hdev); -+ if (ret < 0) -+ goto report_fail; -+ -+ ret = asus_dev_set_report(hdev, hidbuf, FEATURE_ROG_ALLY_REPORT_SIZE); -+ if (ret < 0) -+ goto report_fail; -+ -+ ret = _gamepad_apply_all(hdev, ally_cfg); -+ if (ret < 0) -+ goto report_fail; -+ -+report_fail: -+ kfree(hidbuf); -+ return ret; -+} -+ -+static ssize_t gamepad_mode_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ return sysfs_emit(buf, "%d\n", ally_cfg->mode); -+} -+ -+static ssize_t gamepad_mode_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct hid_device *hdev = to_hid_device(dev); -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ int ret, val; -+ -+ if (!drvdata.gamepad_cfg) -+ return -ENODEV; -+ -+ ret = kstrtoint(buf, 0, &val); -+ if (ret) -+ return ret; -+ -+ if (val < xpad_mode_game || val > xpad_mode_mouse) -+ return -EINVAL; -+ -+ ally_cfg->mode = val; -+ -+ ret = _gamepad_set_mode(hdev, ally_cfg, val); -+ if (ret < 0) -+ return ret; -+ -+ return count; -+} -+ -+DEVICE_ATTR_RW(gamepad_mode); -+ -+static ssize_t mcu_version_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ return sysfs_emit(buf, "%d\n", drvdata.mcu_version); -+} -+ -+DEVICE_ATTR_RO(mcu_version); -+ -+/* ROOT LEVEL ATTRS *******************************************************************************/ -+static struct attribute *gamepad_device_attrs[] = { -+ &dev_attr_btn_mapping_reset.attr, -+ &dev_attr_gamepad_mode.attr, -+ &dev_attr_gamepad_apply_all.attr, -+ &dev_attr_gamepad_vibration_intensity.attr, -+ &dev_attr_gamepad_vibration_intensity_index.attr, -+ &dev_attr_mcu_version.attr, -+ NULL -+}; -+ -+static const struct attribute_group ally_controller_attr_group = { -+ .attrs = gamepad_device_attrs, -+}; -+ -+static const struct attribute_group *gamepad_device_attr_groups[] = { -+ &ally_controller_attr_group, -+ &axis_xy_left_attr_group, -+ &axis_xy_right_attr_group, -+ &axis_z_left_attr_group, -+ &axis_z_right_attr_group, -+ &btn_mapping_m1_attr_group, -+ &btn_mapping_m2_attr_group, -+ &btn_mapping_a_attr_group, -+ &btn_mapping_b_attr_group, -+ &btn_mapping_x_attr_group, -+ &btn_mapping_y_attr_group, -+ &btn_mapping_lb_attr_group, -+ &btn_mapping_rb_attr_group, -+ &btn_mapping_ls_attr_group, -+ &btn_mapping_rs_attr_group, -+ &btn_mapping_lt_attr_group, -+ &btn_mapping_rt_attr_group, -+ &btn_mapping_dpad_u_attr_group, -+ &btn_mapping_dpad_d_attr_group, -+ &btn_mapping_dpad_l_attr_group, -+ &btn_mapping_dpad_r_attr_group, -+ &btn_mapping_view_attr_group, -+ &btn_mapping_menu_attr_group, -+ NULL, -+}; -+ -+static struct ally_gamepad_cfg *ally_gamepad_cfg_create(struct hid_device *hdev) -+{ -+ struct ally_gamepad_cfg *ally_cfg; -+ struct input_dev *input_dev; -+ int err; -+ -+ ally_cfg = devm_kzalloc(&hdev->dev, sizeof(*ally_cfg), GFP_KERNEL); -+ if (!ally_cfg) -+ return ERR_PTR(-ENOMEM); -+ ally_cfg->hdev = hdev; -+ // Allocate memory for each mode's `btn_mapping` -+ ally_cfg->mode = xpad_mode_game; -+ -+ input_dev = devm_input_allocate_device(&hdev->dev); -+ if (!input_dev) { -+ err = -ENOMEM; -+ goto free_ally_cfg; -+ } -+ -+ input_dev->id.bustype = hdev->bus; -+ input_dev->id.vendor = hdev->vendor; -+ input_dev->id.product = hdev->product; -+ input_dev->id.version = hdev->version; -+ input_dev->uniq = hdev->uniq; -+ input_dev->name = "ASUS ROG Ally Config"; -+ input_set_capability(input_dev, EV_KEY, KEY_PROG1); -+ input_set_capability(input_dev, EV_KEY, KEY_F16); -+ input_set_capability(input_dev, EV_KEY, KEY_F17); -+ input_set_capability(input_dev, EV_KEY, KEY_F18); -+ input_set_drvdata(input_dev, hdev); -+ -+ err = input_register_device(input_dev); -+ if (err) -+ goto free_input_dev; -+ ally_cfg->input = input_dev; -+ -+ /* ignore all errors for this as they are related to USB HID I/O */ -+ _gamepad_set_xpad_default(ally_cfg); -+ ally_cfg->key_mapping[ally_cfg->mode - 1].btn_m1.button = BTN_KB_M1; -+ ally_cfg->key_mapping[ally_cfg->mode - 1].btn_m2.button = BTN_KB_M2; -+ _gamepad_apply_btn_pair(hdev, ally_cfg, btn_pair_m1_m2); -+ gamepad_get_calibration(hdev); -+ -+ ally_cfg->vibration_intensity[0] = 0x64; -+ ally_cfg->vibration_intensity[1] = 0x64; -+ _gamepad_set_deadzones_default(ally_cfg); -+ _gamepad_set_anti_deadzones_default(ally_cfg); -+ _gamepad_set_js_response_curves_default(ally_cfg); -+ -+ drvdata.gamepad_cfg = ally_cfg; // Must asign before attr group setup -+ if (sysfs_create_groups(&hdev->dev.kobj, gamepad_device_attr_groups)) { -+ err = -ENODEV; -+ goto unregister_input_dev; -+ } -+ -+ return ally_cfg; -+ -+unregister_input_dev: -+ input_unregister_device(input_dev); -+ ally_cfg->input = NULL; // Prevent double free when kfree(ally_cfg) happens -+ -+free_input_dev: -+ devm_kfree(&hdev->dev, input_dev); -+ -+free_ally_cfg: -+ devm_kfree(&hdev->dev, ally_cfg); -+ return ERR_PTR(err); -+} -+ -+static void ally_cfg_remove(struct hid_device *hdev) -+{ -+ // __gamepad_set_mode(hdev, drvdata.gamepad_cfg, xpad_mode_mouse); -+ sysfs_remove_groups(&hdev->dev.kobj, gamepad_device_attr_groups); -+} -+ -+/**************************************************************************************************/ -+/* ROG Ally gamepad i/o and force-feedback */ -+/**************************************************************************************************/ -+static int ally_x_raw_event(struct ally_x_device *ally_x, struct hid_report *report, u8 *data, -+ int size) -+{ -+ struct ally_x_input_report *in_report; -+ unsigned long flags; -+ u8 byte; -+ -+ if (data[0] == 0x0B) { -+ in_report = (struct ally_x_input_report *)&data[1]; -+ -+ input_report_abs(ally_x->input, ABS_X, in_report->x); -+ input_report_abs(ally_x->input, ABS_Y, in_report->y); -+ input_report_abs(ally_x->input, ABS_RX, in_report->rx); -+ input_report_abs(ally_x->input, ABS_RY, in_report->ry); -+ input_report_abs(ally_x->input, ABS_Z, in_report->z); -+ input_report_abs(ally_x->input, ABS_RZ, in_report->rz); -+ -+ byte = in_report->buttons[0]; -+ input_report_key(ally_x->input, BTN_A, byte & BIT(0)); -+ input_report_key(ally_x->input, BTN_B, byte & BIT(1)); -+ input_report_key(ally_x->input, BTN_X, byte & BIT(2)); -+ input_report_key(ally_x->input, BTN_Y, byte & BIT(3)); -+ input_report_key(ally_x->input, BTN_TL, byte & BIT(4)); -+ input_report_key(ally_x->input, BTN_TR, byte & BIT(5)); -+ input_report_key(ally_x->input, BTN_SELECT, byte & BIT(6)); -+ input_report_key(ally_x->input, BTN_START, byte & BIT(7)); -+ -+ byte = in_report->buttons[1]; -+ input_report_key(ally_x->input, BTN_THUMBL, byte & BIT(0)); -+ input_report_key(ally_x->input, BTN_THUMBR, byte & BIT(1)); -+ input_report_key(ally_x->input, BTN_MODE, byte & BIT(2)); -+ -+ byte = in_report->buttons[2]; -+ input_report_abs(ally_x->input, ABS_HAT0X, hat_values[byte][0]); -+ input_report_abs(ally_x->input, ABS_HAT0Y, hat_values[byte][1]); -+ } -+ /* -+ * The MCU used on Ally provides many devices: gamepad, keyboord, mouse, other. -+ * The AC and QAM buttons route through another interface making it difficult to -+ * use the events unless we grab those and use them here. Only works for Ally X. -+ */ -+ else if (data[0] == 0x5A) { -+ if (ally_x->qam_btns_steam_mode) { -+ spin_lock_irqsave(&ally_x->lock, flags); -+ if (data[1] == 0x38 && !ally_x->update_qam_btn) { -+ ally_x->update_qam_btn = true; -+ if (ally_x->output_worker_initialized) -+ schedule_work(&ally_x->output_worker); -+ } -+ spin_unlock_irqrestore(&ally_x->lock, flags); -+ /* Left/XBox button. Long press does ctrl+alt+del which we can't catch */ -+ input_report_key(ally_x->input, BTN_MODE, data[1] == 0xA6); -+ } else { -+ input_report_key(ally_x->input, KEY_F16, data[1] == 0xA6); -+ input_report_key(ally_x->input, KEY_PROG1, data[1] == 0x38); -+ } -+ /* QAM long press */ -+ input_report_key(ally_x->input, KEY_F17, data[1] == 0xA7); -+ /* QAM long press released */ -+ input_report_key(ally_x->input, KEY_F18, data[1] == 0xA8); -+ } -+ -+ input_sync(ally_x->input); -+ -+ return 0; -+} -+ -+static struct input_dev *ally_x_alloc_input_dev(struct hid_device *hdev, -+ const char *name_suffix) -+{ -+ struct input_dev *input_dev; -+ -+ input_dev = devm_input_allocate_device(&hdev->dev); -+ if (!input_dev) -+ return ERR_PTR(-ENOMEM); -+ -+ input_dev->id.bustype = hdev->bus; -+ input_dev->id.vendor = hdev->vendor; -+ input_dev->id.product = hdev->product; -+ input_dev->id.version = hdev->version; -+ input_dev->uniq = hdev->uniq; -+ input_dev->name = "ASUS ROG Ally X Gamepad"; -+ -+ input_set_drvdata(input_dev, hdev); -+ -+ return input_dev; -+} -+ -+static int ally_x_play_effect(struct input_dev *idev, void *data, struct ff_effect *effect) -+{ -+ struct ally_x_device *ally_x = drvdata.ally_x; -+ unsigned long flags; -+ -+ if (effect->type != FF_RUMBLE) -+ return 0; -+ -+ spin_lock_irqsave(&ally_x->lock, flags); -+ ally_x->ff_packet->ff.magnitude_strong = effect->u.rumble.strong_magnitude / 512; -+ ally_x->ff_packet->ff.magnitude_weak = effect->u.rumble.weak_magnitude / 512; -+ ally_x->update_ff = true; -+ spin_unlock_irqrestore(&ally_x->lock, flags); -+ -+ if (ally_x->output_worker_initialized) -+ schedule_work(&ally_x->output_worker); -+ -+ return 0; -+} -+ -+static void ally_x_work(struct work_struct *work) -+{ -+ struct ally_x_device *ally_x = container_of(work, struct ally_x_device, output_worker); -+ struct ff_report *ff_report = NULL; -+ bool update_qam = false; -+ bool update_ff = false; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&ally_x->lock, flags); -+ update_ff = ally_x->update_ff; -+ if (ally_x->update_ff) { -+ ff_report = kmemdup(ally_x->ff_packet, sizeof(*ally_x->ff_packet), GFP_KERNEL); -+ ally_x->update_ff = false; -+ } -+ update_qam = ally_x->update_qam_btn; -+ spin_unlock_irqrestore(&ally_x->lock, flags); -+ -+ if (update_ff && ff_report) { -+ ff_report->ff.magnitude_left = ff_report->ff.magnitude_strong; -+ ff_report->ff.magnitude_right = ff_report->ff.magnitude_weak; -+ asus_dev_set_report(ally_x->hdev, (u8 *)ff_report, sizeof(*ff_report)); -+ } -+ kfree(ff_report); -+ -+ if (update_qam) { -+ /* -+ * The sleeps here are required to allow steam to register the button combo. -+ */ -+ usleep_range(1000, 2000); -+ input_report_key(ally_x->input, BTN_MODE, 1); -+ input_sync(ally_x->input); -+ -+ msleep(80); -+ input_report_key(ally_x->input, BTN_A, 1); -+ input_sync(ally_x->input); -+ -+ msleep(80); -+ input_report_key(ally_x->input, BTN_A, 0); -+ input_sync(ally_x->input); -+ -+ msleep(80); -+ input_report_key(ally_x->input, BTN_MODE, 0); -+ input_sync(ally_x->input); -+ -+ spin_lock_irqsave(&ally_x->lock, flags); -+ ally_x->update_qam_btn = false; -+ spin_unlock_irqrestore(&ally_x->lock, flags); -+ } -+} -+ -+static struct input_dev *ally_x_setup_input(struct hid_device *hdev) -+{ -+ int ret, abs_min = 0, js_abs_max = 65535, tr_abs_max = 1023; -+ struct input_dev *input; -+ -+ input = ally_x_alloc_input_dev(hdev, NULL); -+ if (IS_ERR(input)) -+ return ERR_CAST(input); -+ -+ input_set_abs_params(input, ABS_X, abs_min, js_abs_max, 0, 0); -+ input_set_abs_params(input, ABS_Y, abs_min, js_abs_max, 0, 0); -+ input_set_abs_params(input, ABS_RX, abs_min, js_abs_max, 0, 0); -+ input_set_abs_params(input, ABS_RY, abs_min, js_abs_max, 0, 0); -+ input_set_abs_params(input, ABS_Z, abs_min, tr_abs_max, 0, 0); -+ input_set_abs_params(input, ABS_RZ, abs_min, tr_abs_max, 0, 0); -+ input_set_abs_params(input, ABS_HAT0X, -1, 1, 0, 0); -+ input_set_abs_params(input, ABS_HAT0Y, -1, 1, 0, 0); -+ input_set_capability(input, EV_KEY, BTN_A); -+ input_set_capability(input, EV_KEY, BTN_B); -+ input_set_capability(input, EV_KEY, BTN_X); -+ input_set_capability(input, EV_KEY, BTN_Y); -+ input_set_capability(input, EV_KEY, BTN_TL); -+ input_set_capability(input, EV_KEY, BTN_TR); -+ input_set_capability(input, EV_KEY, BTN_SELECT); -+ input_set_capability(input, EV_KEY, BTN_START); -+ input_set_capability(input, EV_KEY, BTN_MODE); -+ input_set_capability(input, EV_KEY, BTN_THUMBL); -+ input_set_capability(input, EV_KEY, BTN_THUMBR); -+ -+ input_set_capability(input, EV_KEY, KEY_PROG1); -+ input_set_capability(input, EV_KEY, KEY_F16); -+ input_set_capability(input, EV_KEY, KEY_F17); -+ input_set_capability(input, EV_KEY, KEY_F18); -+ -+ input_set_capability(input, EV_FF, FF_RUMBLE); -+ input_ff_create_memless(input, NULL, ally_x_play_effect); -+ -+ ret = input_register_device(input); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ return input; -+} -+ -+static ssize_t ally_x_qam_mode_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ struct ally_x_device *ally_x = drvdata.ally_x; -+ -+ return sysfs_emit(buf, "%d\n", ally_x->qam_btns_steam_mode); -+} -+ -+static ssize_t ally_x_qam_mode_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct ally_x_device *ally_x = drvdata.ally_x; -+ bool val; -+ int ret; -+ -+ ret = kstrtobool(buf, &val); -+ if (ret < 0) -+ return ret; -+ -+ ally_x->qam_btns_steam_mode = val; -+ -+ return count; -+} -+ALLY_DEVICE_ATTR_RW(ally_x_qam_mode, qam_mode); -+ -+static struct ally_x_device *ally_x_create(struct hid_device *hdev) -+{ -+ uint8_t max_output_report_size; -+ struct ally_x_device *ally_x; -+ struct ff_report *report; -+ int ret; -+ -+ ally_x = devm_kzalloc(&hdev->dev, sizeof(*ally_x), GFP_KERNEL); -+ if (!ally_x) -+ return ERR_PTR(-ENOMEM); -+ -+ ally_x->hdev = hdev; -+ INIT_WORK(&ally_x->output_worker, ally_x_work); -+ spin_lock_init(&ally_x->lock); -+ ally_x->output_worker_initialized = true; -+ ally_x->qam_btns_steam_mode = -+ true; /* Always default to steam mode, it can be changed by userspace attr */ -+ -+ max_output_report_size = sizeof(struct ally_x_input_report); -+ report = devm_kzalloc(&hdev->dev, sizeof(*report), GFP_KERNEL); -+ if (!report) { -+ ret = -ENOMEM; -+ goto free_ally_x; -+ } -+ -+ /* None of these bytes will change for the FF command for now */ -+ report->report_id = 0x0D; -+ report->ff.enable = 0x0F; /* Enable all by default */ -+ report->ff.pulse_sustain_10ms = 0xFF; /* Duration */ -+ report->ff.pulse_release_10ms = 0x00; /* Start Delay */ -+ report->ff.loop_count = 0xEB; /* Loop Count */ -+ ally_x->ff_packet = report; -+ -+ ally_x->input = ally_x_setup_input(hdev); -+ if (IS_ERR(ally_x->input)) { -+ ret = PTR_ERR(ally_x->input); -+ goto free_ff_packet; -+ } -+ -+ if (sysfs_create_file(&hdev->dev.kobj, &dev_attr_ally_x_qam_mode.attr)) { -+ ret = -ENODEV; -+ goto unregister_input; -+ } -+ -+ ally_x->update_ff = true; -+ if (ally_x->output_worker_initialized) -+ schedule_work(&ally_x->output_worker); -+ -+ hid_info(hdev, "Registered Ally X controller using %s\n", -+ dev_name(&ally_x->input->dev)); -+ return ally_x; -+ -+unregister_input: -+ input_unregister_device(ally_x->input); -+free_ff_packet: -+ kfree(ally_x->ff_packet); -+free_ally_x: -+ kfree(ally_x); -+ return ERR_PTR(ret); -+} -+ -+static void ally_x_remove(struct hid_device *hdev) -+{ -+ struct ally_x_device *ally_x = drvdata.ally_x; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&ally_x->lock, flags); -+ ally_x->output_worker_initialized = false; -+ spin_unlock_irqrestore(&ally_x->lock, flags); -+ cancel_work_sync(&ally_x->output_worker); -+ sysfs_remove_file(&hdev->dev.kobj, &dev_attr_ally_x_qam_mode.attr); -+} -+ -+/**************************************************************************************************/ -+/* ROG Ally LED control */ -+/**************************************************************************************************/ -+static void ally_rgb_schedule_work(struct ally_rgb_dev *led) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&led->lock, flags); -+ if (!led->removed) -+ schedule_work(&led->work); -+ spin_unlock_irqrestore(&led->lock, flags); -+} -+ -+/* -+ * The RGB still has the basic 0-3 level brightness. Since the multicolour -+ * brightness is being used in place, set this to max -+ */ -+static int ally_rgb_set_bright_base_max(struct hid_device *hdev) -+{ -+ u8 buf[] = { FEATURE_KBD_LED_REPORT_ID1, 0xba, 0xc5, 0xc4, 0x02 }; -+ -+ return asus_dev_set_report(hdev, buf, sizeof(buf)); -+} -+ -+static void ally_rgb_do_work(struct work_struct *work) -+{ -+ struct ally_rgb_dev *led = container_of(work, struct ally_rgb_dev, work); -+ int ret; -+ unsigned long flags; -+ -+ u8 buf[16] = { [0] = FEATURE_ROG_ALLY_REPORT_ID, -+ [1] = FEATURE_ROG_ALLY_CODE_PAGE, -+ [2] = xpad_cmd_set_leds, -+ [3] = xpad_cmd_len_leds }; -+ -+ spin_lock_irqsave(&led->lock, flags); -+ if (!led->update_rgb) { -+ spin_unlock_irqrestore(&led->lock, flags); -+ return; -+ } -+ -+ for (int i = 0; i < 4; i++) { -+ buf[5 + i * 3] = drvdata.led_rgb_dev->green[i]; -+ buf[6 + i * 3] = drvdata.led_rgb_dev->blue[i]; -+ buf[4 + i * 3] = drvdata.led_rgb_dev->red[i]; -+ } -+ led->update_rgb = false; -+ -+ spin_unlock_irqrestore(&led->lock, flags); -+ -+ ret = asus_dev_set_report(led->hdev, buf, sizeof(buf)); -+ if (ret < 0) -+ hid_err(led->hdev, "Ally failed to set gamepad backlight: %d\n", ret); -+} -+ -+static void ally_rgb_set(struct led_classdev *cdev, enum led_brightness brightness) -+{ -+ struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(cdev); -+ struct ally_rgb_dev *led = container_of(mc_cdev, struct ally_rgb_dev, led_rgb_dev); -+ int intensity, bright; -+ unsigned long flags; -+ -+ led_mc_calc_color_components(mc_cdev, brightness); -+ spin_lock_irqsave(&led->lock, flags); -+ led->update_rgb = true; -+ bright = mc_cdev->led_cdev.brightness; -+ for (int i = 0; i < 4; i++) { -+ intensity = mc_cdev->subled_info[i].intensity; -+ drvdata.led_rgb_dev->red[i] = (((intensity >> 16) & 0xFF) * bright) / 255; -+ drvdata.led_rgb_dev->green[i] = (((intensity >> 8) & 0xFF) * bright) / 255; -+ drvdata.led_rgb_dev->blue[i] = ((intensity & 0xFF) * bright) / 255; -+ } -+ spin_unlock_irqrestore(&led->lock, flags); -+ drvdata.led_rgb_data.initialized = true; -+ -+ ally_rgb_schedule_work(led); -+} -+ -+static int ally_rgb_set_static_from_multi(struct hid_device *hdev) -+{ -+ u8 buf[17] = {FEATURE_KBD_LED_REPORT_ID1, 0xb3}; -+ int ret; -+ -+ /* -+ * Set single zone single colour based on the first LED of EC software mode. -+ * buf[2] = zone, buf[3] = mode -+ */ -+ buf[4] = drvdata.led_rgb_data.red[0]; -+ buf[5] = drvdata.led_rgb_data.green[0]; -+ buf[6] = drvdata.led_rgb_data.blue[0]; -+ -+ ret = asus_dev_set_report(hdev, buf, sizeof(buf)); -+ if (ret < 0) -+ return ret; -+ -+ ret = asus_dev_set_report(hdev, EC_MODE_LED_APPLY, sizeof(EC_MODE_LED_APPLY)); -+ if (ret < 0) -+ return ret; -+ -+ return asus_dev_set_report(hdev, EC_MODE_LED_SET, sizeof(EC_MODE_LED_SET)); -+} -+ -+/* -+ * Store the RGB values for restoring on resume, and set the static mode to the first LED colour -+*/ -+static void ally_rgb_store_settings(void) -+{ -+ int arr_size = sizeof(drvdata.led_rgb_data.red); -+ -+ struct ally_rgb_dev *led_rgb = drvdata.led_rgb_dev; -+ -+ drvdata.led_rgb_data.brightness = led_rgb->led_rgb_dev.led_cdev.brightness; -+ -+ memcpy(drvdata.led_rgb_data.red, led_rgb->red, arr_size); -+ memcpy(drvdata.led_rgb_data.green, led_rgb->green, arr_size); -+ memcpy(drvdata.led_rgb_data.blue, led_rgb->blue, arr_size); -+ -+ ally_rgb_set_static_from_multi(led_rgb->hdev); -+} -+ -+static void ally_rgb_restore_settings(struct ally_rgb_dev *led_rgb, struct led_classdev *led_cdev, -+ struct mc_subled *mc_led_info) -+{ -+ int arr_size = sizeof(drvdata.led_rgb_data.red); -+ -+ memcpy(led_rgb->red, drvdata.led_rgb_data.red, arr_size); -+ memcpy(led_rgb->green, drvdata.led_rgb_data.green, arr_size); -+ memcpy(led_rgb->blue, drvdata.led_rgb_data.blue, arr_size); -+ for (int i = 0; i < 4; i++) { -+ mc_led_info[i].intensity = (drvdata.led_rgb_data.red[i] << 16) | -+ (drvdata.led_rgb_data.green[i] << 8) | -+ drvdata.led_rgb_data.blue[i]; -+ } -+ led_cdev->brightness = drvdata.led_rgb_data.brightness; -+} -+ -+/* Set LEDs. Call after any setup. */ -+static void ally_rgb_resume(void) -+{ -+ struct ally_rgb_dev *led_rgb = drvdata.led_rgb_dev; -+ struct led_classdev *led_cdev; -+ struct mc_subled *mc_led_info; -+ -+ if (!led_rgb) -+ return; -+ -+ led_cdev = &led_rgb->led_rgb_dev.led_cdev; -+ mc_led_info = led_rgb->led_rgb_dev.subled_info; -+ -+ if (drvdata.led_rgb_data.initialized) { -+ ally_rgb_restore_settings(led_rgb, led_cdev, mc_led_info); -+ led_rgb->update_rgb = true; -+ ally_rgb_schedule_work(led_rgb); -+ ally_rgb_set_bright_base_max(led_rgb->hdev); -+ } -+} -+ -+static int ally_rgb_register(struct hid_device *hdev, struct ally_rgb_dev *led_rgb) -+{ -+ struct mc_subled *mc_led_info; -+ struct led_classdev *led_cdev; -+ -+ mc_led_info = -+ devm_kmalloc_array(&hdev->dev, 12, sizeof(*mc_led_info), GFP_KERNEL | __GFP_ZERO); -+ if (!mc_led_info) -+ return -ENOMEM; -+ -+ mc_led_info[0].color_index = LED_COLOR_ID_RGB; -+ mc_led_info[1].color_index = LED_COLOR_ID_RGB; -+ mc_led_info[2].color_index = LED_COLOR_ID_RGB; -+ mc_led_info[3].color_index = LED_COLOR_ID_RGB; -+ -+ led_rgb->led_rgb_dev.subled_info = mc_led_info; -+ led_rgb->led_rgb_dev.num_colors = 4; -+ -+ led_cdev = &led_rgb->led_rgb_dev.led_cdev; -+ led_cdev->brightness = 128; -+ led_cdev->name = "ally:rgb:joystick_rings"; -+ led_cdev->max_brightness = 255; -+ led_cdev->brightness_set = ally_rgb_set; -+ -+ if (drvdata.led_rgb_data.initialized) { -+ ally_rgb_restore_settings(led_rgb, led_cdev, mc_led_info); -+ } -+ -+ return devm_led_classdev_multicolor_register(&hdev->dev, &led_rgb->led_rgb_dev); -+} -+ -+static struct ally_rgb_dev *ally_rgb_create(struct hid_device *hdev) -+{ -+ struct ally_rgb_dev *led_rgb; -+ int ret; -+ -+ led_rgb = devm_kzalloc(&hdev->dev, sizeof(struct ally_rgb_dev), GFP_KERNEL); -+ if (!led_rgb) -+ return ERR_PTR(-ENOMEM); -+ -+ ret = ally_rgb_register(hdev, led_rgb); -+ if (ret < 0) { -+ cancel_work_sync(&led_rgb->work); -+ devm_kfree(&hdev->dev, led_rgb); -+ return ERR_PTR(ret); -+ } -+ -+ led_rgb->hdev = hdev; -+ led_rgb->removed = false; -+ -+ INIT_WORK(&led_rgb->work, ally_rgb_do_work); -+ led_rgb->output_worker_initialized = true; -+ spin_lock_init(&led_rgb->lock); -+ -+ ally_rgb_set_bright_base_max(hdev); -+ -+ /* Not marked as initialized unless ally_rgb_set() is called */ -+ if (drvdata.led_rgb_data.initialized) { -+ msleep(1500); -+ led_rgb->update_rgb = true; -+ ally_rgb_schedule_work(led_rgb); -+ } -+ -+ return led_rgb; -+} -+ -+static void ally_rgb_remove(struct hid_device *hdev) -+{ -+ struct ally_rgb_dev *led_rgb = drvdata.led_rgb_dev; -+ unsigned long flags; -+ int ep; -+ -+ ep = get_endpoint_address(hdev); -+ if (ep != ROG_ALLY_CFG_INTF_IN) -+ return; -+ -+ if (!drvdata.led_rgb_dev || led_rgb->removed) -+ return; -+ -+ spin_lock_irqsave(&led_rgb->lock, flags); -+ led_rgb->removed = true; -+ led_rgb->output_worker_initialized = false; -+ spin_unlock_irqrestore(&led_rgb->lock, flags); -+ cancel_work_sync(&led_rgb->work); -+ devm_led_classdev_multicolor_unregister(&hdev->dev, &led_rgb->led_rgb_dev); -+ -+ hid_info(hdev, "Removed Ally RGB interface"); -+} -+ -+/**************************************************************************************************/ -+/* ROG Ally driver init */ -+/**************************************************************************************************/ -+ -+static int ally_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, -+ int size) -+{ -+ struct ally_gamepad_cfg *cfg = drvdata.gamepad_cfg; -+ struct ally_x_device *ally_x = drvdata.ally_x; -+ -+ if (ally_x) { -+ if ((hdev->bus == BUS_USB && report->id == ALLY_X_INPUT_REPORT_USB && -+ size == ALLY_X_INPUT_REPORT_USB_SIZE) || -+ (data[0] == 0x5A)) { -+ ally_x_raw_event(ally_x, report, data, size); -+ } else { -+ return -1; -+ } -+ } -+ -+ if (cfg && !ally_x) { -+ input_report_key(cfg->input, KEY_PROG1, data[1] == 0x38); -+ input_report_key(cfg->input, KEY_F16, data[1] == 0xA6); -+ input_report_key(cfg->input, KEY_F17, data[1] == 0xA7); -+ input_report_key(cfg->input, KEY_F18, data[1] == 0xA8); -+ input_sync(cfg->input); -+ } -+ -+ return 0; -+} -+ -+static int ally_hid_init(struct hid_device *hdev) -+{ -+ int ret; -+ -+ ret = asus_dev_set_report(hdev, EC_INIT_STRING, sizeof(EC_INIT_STRING)); -+ if (ret < 0) { -+ hid_err(hdev, "Ally failed to send init command: %d\n", ret); -+ return ret; -+ } -+ -+ ret = asus_dev_set_report(hdev, FORCE_FEEDBACK_OFF, sizeof(FORCE_FEEDBACK_OFF)); -+ if (ret < 0) -+ hid_err(hdev, "Ally failed to send init command: %d\n", ret); -+ -+ return ret; -+} -+ -+static int ally_hid_probe(struct hid_device *hdev, const struct hid_device_id *_id) -+{ -+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent); -+ struct usb_device *udev = interface_to_usbdev(intf); -+ u16 idProduct = le16_to_cpu(udev->descriptor.idProduct); -+ int ret, ep; -+ -+ ep = get_endpoint_address(hdev); -+ if (ep < 0) -+ return ep; -+ -+ if (ep != ROG_ALLY_CFG_INTF_IN && -+ ep != ROG_ALLY_X_INTF_IN) -+ return -ENODEV; -+ -+ ret = hid_parse(hdev); -+ if (ret) { -+ hid_err(hdev, "Parse failed\n"); -+ return ret; -+ } -+ -+ ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); -+ if (ret) { -+ hid_err(hdev, "Failed to start HID device\n"); -+ return ret; -+ } -+ -+ ret = hid_hw_open(hdev); -+ if (ret) { -+ hid_err(hdev, "Failed to open HID device\n"); -+ goto err_stop; -+ } -+ -+ /* Initialize MCU even before alloc */ -+ ret = ally_hid_init(hdev); -+ if (ret < 0) -+ return ret; -+ -+ drvdata.hdev = hdev; -+ hid_set_drvdata(hdev, &drvdata); -+ -+ /* This should almost always exist */ -+ if (ep == ROG_ALLY_CFG_INTF_IN) { -+ validate_mcu_fw_version(hdev, idProduct); -+ -+ drvdata.led_rgb_dev = ally_rgb_create(hdev); -+ if (IS_ERR(drvdata.led_rgb_dev)) -+ hid_err(hdev, "Failed to create Ally gamepad LEDs.\n"); -+ else -+ hid_info(hdev, "Created Ally RGB LED controls.\n"); -+ -+ drvdata.gamepad_cfg = ally_gamepad_cfg_create(hdev); -+ if (IS_ERR(drvdata.gamepad_cfg)) -+ hid_err(hdev, "Failed to create Ally gamepad attributes.\n"); -+ else -+ hid_info(hdev, "Created Ally gamepad attributes.\n"); -+ -+ if (IS_ERR(drvdata.led_rgb_dev) && IS_ERR(drvdata.gamepad_cfg)) -+ goto err_close; -+ } -+ -+ /* May or may not exist */ -+ if (ep == ROG_ALLY_X_INTF_IN) { -+ drvdata.ally_x = ally_x_create(hdev); -+ if (IS_ERR(drvdata.ally_x)) { -+ hid_err(hdev, "Failed to create Ally X gamepad.\n"); -+ drvdata.ally_x = NULL; -+ goto err_close; -+ } -+ hid_info(hdev, "Created Ally X controller.\n"); -+ -+ // Not required since we send this inputs ep through the gamepad input dev -+ if (drvdata.gamepad_cfg && drvdata.gamepad_cfg->input) { -+ input_unregister_device(drvdata.gamepad_cfg->input); -+ hid_info(hdev, "Ally X removed unrequired input dev.\n"); -+ } -+ } -+ -+ return 0; -+ -+err_close: -+ hid_hw_close(hdev); -+err_stop: -+ hid_hw_stop(hdev); -+ return ret; -+} -+ -+static void ally_hid_remove(struct hid_device *hdev) -+{ -+ if (drvdata.led_rgb_dev) -+ ally_rgb_remove(hdev); -+ -+ if (drvdata.ally_x) -+ ally_x_remove(hdev); -+ -+ if (drvdata.gamepad_cfg) -+ ally_cfg_remove(hdev); -+ -+ hid_hw_close(hdev); -+ hid_hw_stop(hdev); -+} -+ -+static int ally_hid_resume(struct hid_device *hdev) -+{ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; -+ int err; -+ -+ if (!ally_cfg) -+ return 0; -+ -+ err = _gamepad_apply_all(hdev, ally_cfg); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+static int ally_hid_reset_resume(struct hid_device *hdev) -+{ -+ int ep = get_endpoint_address(hdev); -+ if (ep != ROG_ALLY_CFG_INTF_IN) -+ return 0; -+ -+ ally_hid_init(hdev); -+ ally_rgb_resume(); -+ -+ return ally_hid_resume(hdev); -+} -+ -+static int ally_pm_thaw(struct device *dev) -+{ -+ struct hid_device *hdev = to_hid_device(dev); -+ -+ return ally_hid_reset_resume(hdev); -+} -+ -+static int ally_pm_suspend(struct device *dev) -+{ -+ if (drvdata.led_rgb_dev) { -+ ally_rgb_store_settings(); -+ } -+ -+ return 0; -+} -+ -+static const struct dev_pm_ops ally_pm_ops = { -+ .thaw = ally_pm_thaw, -+ .suspend = ally_pm_suspend, -+ .poweroff = ally_pm_suspend, -+}; -+ -+MODULE_DEVICE_TABLE(hid, rog_ally_devices); -+ -+static struct hid_driver rog_ally_cfg = { .name = "asus_rog_ally", -+ .id_table = rog_ally_devices, -+ .probe = ally_hid_probe, -+ .remove = ally_hid_remove, -+ .raw_event = ally_raw_event, -+ /* HID is the better place for resume functions, not pm_ops */ -+ .resume = ally_hid_resume, -+ /* ALLy 1 requires this to reset device state correctly */ -+ .reset_resume = ally_hid_reset_resume, -+ .driver = { -+ .pm = &ally_pm_ops, -+ } -+}; -+ -+static int __init rog_ally_init(void) -+{ -+ return hid_register_driver(&rog_ally_cfg); -+} -+ -+static void __exit rog_ally_exit(void) -+{ -+ hid_unregister_driver(&rog_ally_cfg); -+} -+ -+module_init(rog_ally_init); -+module_exit(rog_ally_exit); -+ -+MODULE_IMPORT_NS("ASUS_WMI"); -+MODULE_IMPORT_NS("HID_ASUS"); -+MODULE_AUTHOR("Luke D. Jones"); -+MODULE_DESCRIPTION("HID Driver for ASUS ROG Ally gamepad configuration."); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/hid/hid-asus-ally.h b/drivers/hid/hid-asus-ally.h -new file mode 100644 -index 000000000000..c83817589082 ---- /dev/null -+++ b/drivers/hid/hid-asus-ally.h -@@ -0,0 +1,398 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later -+ * -+ * HID driver for Asus ROG laptops and Ally -+ * -+ * Copyright (c) 2023 Luke Jones -+ */ -+ -+#include -+#include -+ -+/* -+ * the xpad_mode is used inside the mode setting packet and is used -+ * for indexing (xpad_mode - 1) -+ */ -+enum xpad_mode { -+ xpad_mode_game = 0x01, -+ xpad_mode_wasd = 0x02, -+ xpad_mode_mouse = 0x03, -+}; -+ -+/* the xpad_cmd determines which feature is set or queried */ -+enum xpad_cmd { -+ xpad_cmd_set_mode = 0x01, -+ xpad_cmd_set_mapping = 0x02, -+ xpad_cmd_set_js_dz = 0x04, /* deadzones */ -+ xpad_cmd_set_tr_dz = 0x05, /* deadzones */ -+ xpad_cmd_set_vibe_intensity = 0x06, -+ xpad_cmd_set_leds = 0x08, -+ xpad_cmd_check_ready = 0x0A, -+ xpad_cmd_set_turbo = 0x0F, -+ xpad_cmd_set_response_curve = 0x13, -+ xpad_cmd_set_adz = 0x18, -+}; -+ -+/* the xpad_cmd determines which feature is set or queried */ -+enum xpad_cmd_len { -+ xpad_cmd_len_mode = 0x01, -+ xpad_cmd_len_mapping = 0x2c, -+ xpad_cmd_len_deadzone = 0x04, -+ xpad_cmd_len_vibe_intensity = 0x02, -+ xpad_cmd_len_leds = 0x0C, -+ xpad_cmd_len_turbo = 0x20, -+ xpad_cmd_len_response_curve = 0x09, -+ xpad_cmd_len_adz = 0x02, -+}; -+ -+/* Values correspond to the actual HID byte value required */ -+enum btn_pair_index { -+ btn_pair_dpad_u_d = 0x01, -+ btn_pair_dpad_l_r = 0x02, -+ btn_pair_ls_rs = 0x03, -+ btn_pair_lb_rb = 0x04, -+ btn_pair_a_b = 0x05, -+ btn_pair_x_y = 0x06, -+ btn_pair_view_menu = 0x07, -+ btn_pair_m1_m2 = 0x08, -+ btn_pair_lt_rt = 0x09, -+}; -+ -+#define BTN_PAD_A 0x0101000000000000 -+#define BTN_PAD_B 0x0102000000000000 -+#define BTN_PAD_X 0x0103000000000000 -+#define BTN_PAD_Y 0x0104000000000000 -+#define BTN_PAD_LB 0x0105000000000000 -+#define BTN_PAD_RB 0x0106000000000000 -+#define BTN_PAD_LS 0x0107000000000000 -+#define BTN_PAD_RS 0x0108000000000000 -+#define BTN_PAD_DPAD_UP 0x0109000000000000 -+#define BTN_PAD_DPAD_DOWN 0x010A000000000000 -+#define BTN_PAD_DPAD_LEFT 0x010B000000000000 -+#define BTN_PAD_DPAD_RIGHT 0x010C000000000000 -+#define BTN_PAD_LT 0x010D000000000000 -+#define BTN_PAD_RT 0x010E000000000000 -+#define BTN_PAD_VIEW 0x0111000000000000 -+#define BTN_PAD_MENU 0x0112000000000000 -+#define BTN_PAD_XBOX 0x0113000000000000 -+ -+#define BTN_KB_M2 0x02008E0000000000 -+#define BTN_KB_M1 0x02008F0000000000 -+#define BTN_KB_ESC 0x0200760000000000 -+#define BTN_KB_F1 0x0200500000000000 -+#define BTN_KB_F2 0x0200600000000000 -+#define BTN_KB_F3 0x0200400000000000 -+#define BTN_KB_F4 0x02000C0000000000 -+#define BTN_KB_F5 0x0200030000000000 -+#define BTN_KB_F6 0x02000B0000000000 -+#define BTN_KB_F7 0x0200800000000000 -+#define BTN_KB_F8 0x02000A0000000000 -+#define BTN_KB_F9 0x0200010000000000 -+#define BTN_KB_F10 0x0200090000000000 -+#define BTN_KB_F11 0x0200780000000000 -+#define BTN_KB_F12 0x0200070000000000 -+#define BTN_KB_F14 0x0200180000000000 -+#define BTN_KB_F15 0x0200100000000000 -+#define BTN_KB_BACKTICK 0x02000E0000000000 -+#define BTN_KB_1 0x0200160000000000 -+#define BTN_KB_2 0x02001E0000000000 -+#define BTN_KB_3 0x0200260000000000 -+#define BTN_KB_4 0x0200250000000000 -+#define BTN_KB_5 0x02002E0000000000 -+#define BTN_KB_6 0x0200360000000000 -+#define BTN_KB_7 0x02003D0000000000 -+#define BTN_KB_8 0x02003E0000000000 -+#define BTN_KB_9 0x0200460000000000 -+#define BTN_KB_0 0x0200450000000000 -+#define BTN_KB_HYPHEN 0x02004E0000000000 -+#define BTN_KB_EQUALS 0x0200550000000000 -+#define BTN_KB_BACKSPACE 0x0200660000000000 -+#define BTN_KB_TAB 0x02000D0000000000 -+#define BTN_KB_Q 0x0200150000000000 -+#define BTN_KB_W 0x02001D0000000000 -+#define BTN_KB_E 0x0200240000000000 -+#define BTN_KB_R 0x02002D0000000000 -+#define BTN_KB_T 0x02002C0000000000 -+#define BTN_KB_Y 0x0200350000000000 -+#define BTN_KB_U 0x02003C0000000000 -+#define BTN_KB_O 0x0200440000000000 -+#define BTN_KB_P 0x02004D0000000000 -+#define BTN_KB_LBRACKET 0x0200540000000000 -+#define BTN_KB_RBRACKET 0x02005B0000000000 -+#define BTN_KB_BACKSLASH 0x02005D0000000000 -+#define BTN_KB_CAPS 0x0200580000000000 -+#define BTN_KB_A 0x02001C0000000000 -+#define BTN_KB_S 0x02001B0000000000 -+#define BTN_KB_D 0x0200230000000000 -+#define BTN_KB_F 0x02002B0000000000 -+#define BTN_KB_G 0x0200340000000000 -+#define BTN_KB_H 0x0200330000000000 -+#define BTN_KB_J 0x02003B0000000000 -+#define BTN_KB_K 0x0200420000000000 -+#define BTN_KB_L 0x02004B0000000000 -+#define BTN_KB_SEMI 0x02004C0000000000 -+#define BTN_KB_QUOTE 0x0200520000000000 -+#define BTN_KB_RET 0x02005A0000000000 -+#define BTN_KB_LSHIFT 0x0200880000000000 -+#define BTN_KB_Z 0x02001A0000000000 -+#define BTN_KB_X 0x0200220000000000 -+#define BTN_KB_C 0x0200210000000000 -+#define BTN_KB_V 0x02002A0000000000 -+#define BTN_KB_B 0x0200320000000000 -+#define BTN_KB_N 0x0200310000000000 -+#define BTN_KB_M 0x02003A0000000000 -+#define BTN_KB_COMMA 0x0200410000000000 -+#define BTN_KB_PERIOD 0x0200490000000000 -+#define BTN_KB_RSHIFT 0x0200890000000000 -+#define BTN_KB_LCTL 0x02008C0000000000 -+#define BTN_KB_META 0x0200820000000000 -+#define BTN_KB_LALT 0x02008A0000000000 -+#define BTN_KB_SPACE 0x0200290000000000 -+#define BTN_KB_RALT 0x02008B0000000000 -+#define BTN_KB_MENU 0x0200840000000000 -+#define BTN_KB_RCTL 0x02008D0000000000 -+#define BTN_KB_PRNTSCN 0x0200C30000000000 -+#define BTN_KB_SCRLCK 0x02007E0000000000 -+#define BTN_KB_PAUSE 0x0200910000000000 -+#define BTN_KB_INS 0x0200C20000000000 -+#define BTN_KB_HOME 0x0200940000000000 -+#define BTN_KB_PGUP 0x0200960000000000 -+#define BTN_KB_DEL 0x0200C00000000000 -+#define BTN_KB_END 0x0200950000000000 -+#define BTN_KB_PGDWN 0x0200970000000000 -+#define BTN_KB_UP_ARROW 0x0200980000000000 -+#define BTN_KB_DOWN_ARROW 0x0200990000000000 -+#define BTN_KB_LEFT_ARROW 0x0200910000000000 -+#define BTN_KB_RIGHT_ARROW 0x02009B0000000000 -+ -+#define BTN_NUMPAD_LOCK 0x0200770000000000 -+#define BTN_NUMPAD_FWDSLASH 0x0200900000000000 -+#define BTN_NUMPAD_ASTERISK 0x02007C0000000000 -+#define BTN_NUMPAD_HYPHEN 0x02007B0000000000 -+#define BTN_NUMPAD_0 0x0200700000000000 -+#define BTN_NUMPAD_1 0x0200690000000000 -+#define BTN_NUMPAD_2 0x0200720000000000 -+#define BTN_NUMPAD_3 0x02007A0000000000 -+#define BTN_NUMPAD_4 0x02006B0000000000 -+#define BTN_NUMPAD_5 0x0200730000000000 -+#define BTN_NUMPAD_6 0x0200740000000000 -+#define BTN_NUMPAD_7 0x02006C0000000000 -+#define BTN_NUMPAD_8 0x0200750000000000 -+#define BTN_NUMPAD_9 0x02007D0000000000 -+#define BTN_NUMPAD_PLUS 0x0200790000000000 -+#define BTN_NUMPAD_ENTER 0x0200810000000000 -+#define BTN_NUMPAD_PERIOD 0x0200710000000000 -+ -+#define BTN_MOUSE_LCLICK 0x0300000001000000 -+#define BTN_MOUSE_RCLICK 0x0300000002000000 -+#define BTN_MOUSE_MCLICK 0x0300000003000000 -+#define BTN_MOUSE_WHEEL_UP 0x0300000004000000 -+#define BTN_MOUSE_WHEEL_DOWN 0x0300000005000000 -+ -+#define BTN_MEDIA_SCREENSHOT 0x0500001600000000 -+#define BTN_MEDIA_SHOW_KEYBOARD 0x0500001900000000 -+#define BTN_MEDIA_SHOW_DESKTOP 0x0500001C00000000 -+#define BTN_MEDIA_START_RECORDING 0x0500001E00000000 -+#define BTN_MEDIA_MIC_OFF 0x0500000100000000 -+#define BTN_MEDIA_VOL_DOWN 0x0500000200000000 -+#define BTN_MEDIA_VOL_UP 0x0500000300000000 -+ -+#define ALLY_DEVICE_ATTR_WO(_name, _sysfs_name) \ -+ struct device_attribute dev_attr_##_name = \ -+ __ATTR(_sysfs_name, 0200, NULL, _name##_store) -+ -+/* required so we can have nested attributes with same name but different functions */ -+#define ALLY_DEVICE_ATTR_RW(_name, _sysfs_name) \ -+ struct device_attribute dev_attr_##_name = \ -+ __ATTR(_sysfs_name, 0644, _name##_show, _name##_store) -+ -+#define ALLY_DEVICE_ATTR_RO(_name, _sysfs_name) \ -+ struct device_attribute dev_attr_##_name = \ -+ __ATTR(_sysfs_name, 0444, _name##_show, NULL) -+ -+/* button specific macros */ -+#define ALLY_BTN_SHOW(_fname, _btn_name, _secondary) \ -+ static ssize_t _fname##_show(struct device *dev, \ -+ struct device_attribute *attr, char *buf) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ struct btn_data *btn; \ -+ const char* name; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ btn = &ally_cfg->key_mapping[ally_cfg->mode - 1]._btn_name; \ -+ name = btn_to_name(_secondary ? btn->macro : btn->button); \ -+ return sysfs_emit(buf, "%s\n", name); \ -+ } -+ -+#define ALLY_BTN_STORE(_fname, _btn_name, _secondary) \ -+ static ssize_t _fname##_store(struct device *dev, \ -+ struct device_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ struct btn_data *btn; \ -+ u64 code; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ btn = &ally_cfg->key_mapping[ally_cfg->mode - 1]._btn_name; \ -+ code = name_to_btn(buf); \ -+ if (_secondary) \ -+ btn->macro = code; \ -+ else \ -+ btn->button = code; \ -+ return count; \ -+ } -+ -+#define ALLY_TURBO_SHOW(_fname, _btn_name) \ -+ static ssize_t _fname##_show(struct device *dev, \ -+ struct device_attribute *attr, char *buf) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ struct btn_data *btn; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ btn = &ally_cfg->key_mapping[ally_cfg->mode - 1]._btn_name; \ -+ return sysfs_emit(buf, "%d\n", btn->turbo); \ -+ } -+ -+#define ALLY_TURBO_STORE(_fname, _btn_name) \ -+ static ssize_t _fname##_store(struct device *dev, \ -+ struct device_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ struct btn_data *btn; \ -+ bool turbo; \ -+ int ret; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ btn = &ally_cfg->key_mapping[ally_cfg->mode - 1]._btn_name; \ -+ ret = kstrtobool(buf, &turbo); \ -+ if (ret) \ -+ return ret; \ -+ btn->turbo = turbo; \ -+ return count; \ -+ } -+ -+#define ALLY_DEADZONE_SHOW(_fname, _axis_name) \ -+ static ssize_t _fname##_show(struct device *dev, \ -+ struct device_attribute *attr, char *buf) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ struct deadzone *dz; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ dz = &ally_cfg->_axis_name; \ -+ return sysfs_emit(buf, "%d %d\n", dz->inner, dz->outer); \ -+ } -+ -+#define ALLY_DEADZONE_STORE(_fname, _axis_name) \ -+ static ssize_t _fname##_store(struct device *dev, \ -+ struct device_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ struct hid_device *hdev = to_hid_device(dev); \ -+ u32 inner, outer; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ if (sscanf(buf, "%d %d", &inner, &outer) != 2) \ -+ return -EINVAL; \ -+ if (inner > 64 || outer > 64 || inner > outer) \ -+ return -EINVAL; \ -+ ally_cfg->_axis_name.inner = inner; \ -+ ally_cfg->_axis_name.outer = outer; \ -+ _gamepad_apply_deadzones(hdev, ally_cfg); \ -+ return count; \ -+ } -+ -+#define ALLY_DEADZONES(_fname, _mname) \ -+ ALLY_DEADZONE_SHOW(_fname##_deadzone, _mname); \ -+ ALLY_DEADZONE_STORE(_fname##_deadzone, _mname); \ -+ ALLY_DEVICE_ATTR_RW(_fname##_deadzone, deadzone) -+ -+/* response curve macros */ -+#define ALLY_RESP_CURVE_SHOW(_fname, _mname) \ -+static ssize_t _fname##_show(struct device *dev, \ -+ struct device_attribute *attr, \ -+ char *buf) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ return sysfs_emit(buf, "%d\n", ally_cfg->ls_rc._mname); \ -+ } -+ -+#define ALLY_RESP_CURVE_STORE(_fname, _mname) \ -+static ssize_t _fname##_store(struct device *dev, \ -+ struct device_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ struct ally_gamepad_cfg *ally_cfg = drvdata.gamepad_cfg; \ -+ int ret, val; \ -+ if (!drvdata.gamepad_cfg) \ -+ return -ENODEV; \ -+ ret = kstrtoint(buf, 0, &val); \ -+ if (ret) \ -+ return ret; \ -+ if (val < 0 || val > 100) \ -+ return -EINVAL; \ -+ ally_cfg->ls_rc._mname = val; \ -+ return count; \ -+ } -+ -+/* _point_n must start at 1 */ -+#define ALLY_JS_RC_POINT(_fname, _mname, _num) \ -+ ALLY_RESP_CURVE_SHOW(_fname##_##_mname##_##_num, _mname##_pct_##_num); \ -+ ALLY_RESP_CURVE_STORE(_fname##_##_mname##_##_num, _mname##_pct_##_num); \ -+ ALLY_DEVICE_ATTR_RW(_fname##_##_mname##_##_num, curve_##_mname##_pct_##_num) -+ -+#define ALLY_BTN_ATTRS_GROUP(_name, _fname) \ -+ static struct attribute *_fname##_attrs[] = { \ -+ &dev_attr_##_fname.attr, \ -+ &dev_attr_##_fname##_macro.attr, \ -+ }; \ -+ static const struct attribute_group _fname##_attr_group = { \ -+ .name = __stringify(_name), \ -+ .attrs = _fname##_attrs, \ -+ } -+ -+#define _ALLY_BTN_REMAP(_fname, _btn_name) \ -+ ALLY_BTN_SHOW(btn_mapping_##_fname##_remap, _btn_name, false); \ -+ ALLY_BTN_STORE(btn_mapping_##_fname##_remap, _btn_name, false); \ -+ ALLY_DEVICE_ATTR_RW(btn_mapping_##_fname##_remap, remap); -+ -+#define _ALLY_BTN_MACRO(_fname, _btn_name) \ -+ ALLY_BTN_SHOW(btn_mapping_##_fname##_macro, _btn_name, true); \ -+ ALLY_BTN_STORE(btn_mapping_##_fname##_macro, _btn_name, true); \ -+ ALLY_DEVICE_ATTR_RW(btn_mapping_##_fname##_macro, macro_remap); -+ -+#define ALLY_BTN_MAPPING(_fname, _btn_name) \ -+ _ALLY_BTN_REMAP(_fname, _btn_name) \ -+ _ALLY_BTN_MACRO(_fname, _btn_name) \ -+ static struct attribute *_fname##_attrs[] = { \ -+ &dev_attr_btn_mapping_##_fname##_remap.attr, \ -+ &dev_attr_btn_mapping_##_fname##_macro.attr, \ -+ NULL, \ -+ }; \ -+ static const struct attribute_group btn_mapping_##_fname##_attr_group = { \ -+ .name = __stringify(btn_##_fname), \ -+ .attrs = _fname##_attrs, \ -+ } -+ -+#define ALLY_TURBO_BTN_MAPPING(_fname, _btn_name) \ -+ _ALLY_BTN_REMAP(_fname, _btn_name) \ -+ _ALLY_BTN_MACRO(_fname, _btn_name) \ -+ ALLY_TURBO_SHOW(btn_mapping_##_fname##_turbo, _btn_name); \ -+ ALLY_TURBO_STORE(btn_mapping_##_fname##_turbo, _btn_name); \ -+ ALLY_DEVICE_ATTR_RW(btn_mapping_##_fname##_turbo, turbo); \ -+ static struct attribute *_fname##_turbo_attrs[] = { \ -+ &dev_attr_btn_mapping_##_fname##_remap.attr, \ -+ &dev_attr_btn_mapping_##_fname##_macro.attr, \ -+ &dev_attr_btn_mapping_##_fname##_turbo.attr, \ -+ NULL, \ -+ }; \ -+ static const struct attribute_group btn_mapping_##_fname##_attr_group = { \ -+ .name = __stringify(btn_##_fname), \ -+ .attrs = _fname##_turbo_attrs, \ -+ } -diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c -index d27dcfb2b9e4..188eff9d3573 100644 ---- a/drivers/hid/hid-asus.c -+++ b/drivers/hid/hid-asus.c -@@ -23,6 +23,7 @@ - /* - */ - -+#include "linux/export.h" - #include - #include - #include -@@ -33,6 +34,7 @@ - #include - - #include "hid-ids.h" -+#include "hid-asus.h" - - MODULE_AUTHOR("Yusuke Fujimaki "); - MODULE_AUTHOR("Brendan McGrath "); -@@ -601,7 +603,7 @@ static int mcu_request_version(struct hid_device *hdev) - return ret; - } - --static void validate_mcu_fw_version(struct hid_device *hdev, int idProduct) -+void validate_mcu_fw_version(struct hid_device *hdev, int idProduct) - { - int min_version, version; - -@@ -629,12 +631,11 @@ static void validate_mcu_fw_version(struct hid_device *hdev, int idProduct) - set_ally_mcu_powersave(true); - } - } -+EXPORT_SYMBOL_NS(validate_mcu_fw_version, "HID_ASUS"); - - static int asus_kbd_register_leds(struct hid_device *hdev) - { - struct asus_drvdata *drvdata = hid_get_drvdata(hdev); -- struct usb_interface *intf; -- struct usb_device *udev; - unsigned char kbd_func; - int ret; - -@@ -659,12 +660,14 @@ static int asus_kbd_register_leds(struct hid_device *hdev) - return ret; - } - -+ #if !IS_REACHABLE(CONFIG_HID_ASUS_ALLY) - if (drvdata->quirks & QUIRK_ROG_ALLY_XPAD) { -- intf = to_usb_interface(hdev->dev.parent); -- udev = interface_to_usbdev(intf); -+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent); -+ struct usb_device *udev = interface_to_usbdev(intf); - validate_mcu_fw_version(hdev, - le16_to_cpu(udev->descriptor.idProduct)); - } -+ #endif /* !IS_REACHABLE(CONFIG_HID_ASUS_ALLY) */ - - } else { - /* Initialize keyboard */ -@@ -1122,8 +1125,10 @@ static int __maybe_unused asus_reset_resume(struct hid_device *hdev) - - static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) - { -- int ret; - struct asus_drvdata *drvdata; -+ struct usb_host_endpoint *ep; -+ struct usb_interface *intf; -+ int ret; - - drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); - if (drvdata == NULL) { -@@ -1135,6 +1140,18 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) - - drvdata->quirks = id->driver_data; - -+ /* Ignore these endpoints as they are used by hid-asus-ally */ -+ #if IS_REACHABLE(CONFIG_HID_ASUS_ALLY) -+ if (drvdata->quirks & QUIRK_ROG_ALLY_XPAD) { -+ intf = to_usb_interface(hdev->dev.parent); -+ ep = intf->cur_altsetting->endpoint; -+ if (ep->desc.bEndpointAddress == ROG_ALLY_X_INTF_IN || -+ ep->desc.bEndpointAddress == ROG_ALLY_CFG_INTF_IN || -+ ep->desc.bEndpointAddress == ROG_ALLY_CFG_INTF_OUT) -+ return -ENODEV; -+ } -+ #endif /* IS_REACHABLE(CONFIG_HID_ASUS_ALLY) */ -+ - /* - * T90CHI's keyboard dock returns same ID values as T100CHI's dock. - * Thus, identify T90CHI dock with product name string. -diff --git a/drivers/hid/hid-asus.h b/drivers/hid/hid-asus.h -new file mode 100644 -index 000000000000..f67dd5a3a1bc ---- /dev/null -+++ b/drivers/hid/hid-asus.h -@@ -0,0 +1,13 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef __HID_ASUS_H -+#define __HID_ASUS_H -+ -+#include -+ -+#define ROG_ALLY_CFG_INTF_IN 0x83 -+#define ROG_ALLY_CFG_INTF_OUT 0x04 -+#define ROG_ALLY_X_INTF_IN 0x87 -+ -+void validate_mcu_fw_version(struct hid_device *hdev, int idProduct); -+ -+#endif /* __HID_ASUS_H */ -diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h -index 149798754570..a94b734266be 100644 ---- a/drivers/hid/hid-ids.h -+++ b/drivers/hid/hid-ids.h -@@ -225,6 +225,7 @@ - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2 0x19b6 - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 - #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 -+#define USB_DEVICE_ID_ASUSTEK_ROG_RAIKIRI_PAD 0x1abb - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c - #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b -diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig -index 6d238e120dce..fc45a7c8c201 100644 ---- a/drivers/platform/x86/Kconfig -+++ b/drivers/platform/x86/Kconfig -@@ -250,6 +250,18 @@ config ASUS_WIRELESS - If you choose to compile this driver as a module the module will be - called asus-wireless. - -+config ASUS_ARMOURY -+ tristate "ASUS Armoury driver" -+ depends on ASUS_WMI -+ select FW_ATTR_CLASS -+ help -+ Say Y here if you have a WMI aware Asus machine and would like to use the -+ firmware_attributes API to control various settings typically exposed in -+ the ASUS Armoury Crate application available on Windows. -+ -+ To compile this driver as a module, choose M here: the module will -+ be called asus-armoury. -+ - config ASUS_WMI - tristate "ASUS WMI Driver" - depends on ACPI_WMI -@@ -272,6 +284,17 @@ config ASUS_WMI - To compile this driver as a module, choose M here: the module will - be called asus-wmi. - -+config ASUS_WMI_DEPRECATED_ATTRS -+ bool "BIOS option support in WMI platform (DEPRECATED)" -+ depends on ASUS_WMI -+ default y -+ help -+ Say Y to expose the configurable BIOS options through the asus-wmi -+ driver. -+ -+ This can be used with or without the asus-armoury driver which -+ has the same attributes, but more, and better features. -+ - config ASUS_NB_WMI - tristate "Asus Notebook WMI Driver" - depends on ASUS_WMI -diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile -index a0c5848513e3..4279f5443f30 100644 ---- a/drivers/platform/x86/Makefile -+++ b/drivers/platform/x86/Makefile -@@ -32,6 +32,7 @@ obj-$(CONFIG_APPLE_GMUX) += apple-gmux.o - # ASUS - obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o - obj-$(CONFIG_ASUS_WIRELESS) += asus-wireless.o -+obj-$(CONFIG_ASUS_ARMOURY) += asus-armoury.o - obj-$(CONFIG_ASUS_WMI) += asus-wmi.o - obj-$(CONFIG_ASUS_NB_WMI) += asus-nb-wmi.o - obj-$(CONFIG_ASUS_TF103C_DOCK) += asus-tf103c-dock.o -diff --git a/drivers/platform/x86/asus-armoury.c b/drivers/platform/x86/asus-armoury.c -new file mode 100644 -index 000000000000..a461be936294 ---- /dev/null -+++ b/drivers/platform/x86/asus-armoury.c -@@ -0,0 +1,1174 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Asus Armoury (WMI) attributes driver. -+ * -+ * This driver uses the fw_attributes class to expose various WMI functions -+ * that are present in many gaming and some non-gaming ASUS laptops. -+ * -+ * These typically don't fit anywhere else in the sysfs such as under LED class, -+ * hwmon or others, and are set in Windows using the ASUS Armoury Crate tool. -+ * -+ * Copyright(C) 2024 Luke Jones -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "asus-armoury.h" -+#include "firmware_attributes_class.h" -+ -+#define ASUS_NB_WMI_EVENT_GUID "0B3CBB35-E3C2-45ED-91C2-4C5A6D195D1C" -+ -+#define ASUS_MINI_LED_MODE_MASK 0x03 -+/* Standard modes for devices with only on/off */ -+#define ASUS_MINI_LED_OFF 0x00 -+#define ASUS_MINI_LED_ON 0x01 -+/* Like "on" but the effect is more vibrant or brighter */ -+#define ASUS_MINI_LED_STRONG_MODE 0x02 -+/* New modes for devices with 3 mini-led mode types */ -+#define ASUS_MINI_LED_2024_WEAK 0x00 -+#define ASUS_MINI_LED_2024_STRONG 0x01 -+#define ASUS_MINI_LED_2024_OFF 0x02 -+ -+/* Power tunable attribute name defines */ -+#define ATTR_PPT_PL1_SPL "ppt_pl1_spl" -+#define ATTR_PPT_PL2_SPPT "ppt_pl2_sppt" -+#define ATTR_PPT_PL3_FPPT "ppt_pl3_fppt" -+#define ATTR_PPT_APU_SPPT "ppt_apu_sppt" -+#define ATTR_PPT_PLATFORM_SPPT "ppt_platform_sppt" -+#define ATTR_NV_DYNAMIC_BOOST "nv_dynamic_boost" -+#define ATTR_NV_TEMP_TARGET "nv_temp_target" -+#define ATTR_NV_BASE_TGP "nv_base_tgp" -+#define ATTR_NV_TGP "nv_tgp" -+ -+#define ASUS_POWER_CORE_MASK GENMASK(15, 8) -+#define ASUS_PERF_CORE_MASK GENMASK(7, 0) -+ -+enum cpu_core_type { -+ CPU_CORE_PERF = 0, -+ CPU_CORE_POWER, -+}; -+ -+enum cpu_core_value { -+ CPU_CORE_DEFAULT = 0, -+ CPU_CORE_MIN, -+ CPU_CORE_MAX, -+ CPU_CORE_CURRENT, -+}; -+ -+#define CPU_PERF_CORE_COUNT_MIN 4 -+#define CPU_POWR_CORE_COUNT_MIN 0 -+ -+/* Tunables provided by ASUS for gaming laptops */ -+struct cpu_cores { -+ u32 cur_perf_cores; -+ u32 min_perf_cores; -+ u32 max_perf_cores; -+ u32 cur_power_cores; -+ u32 min_power_cores; -+ u32 max_power_cores; -+}; -+ -+struct rog_tunables { -+ const struct power_limits *power_limits; -+ u32 ppt_pl1_spl; // cpu -+ u32 ppt_pl2_sppt; // cpu -+ u32 ppt_pl3_fppt; // cpu -+ u32 ppt_apu_sppt; // plat -+ u32 ppt_platform_sppt; // plat -+ -+ u32 nv_dynamic_boost; -+ u32 nv_temp_target; -+ u32 nv_tgp; -+}; -+ -+static struct asus_armoury_priv { -+ struct device *fw_attr_dev; -+ struct kset *fw_attr_kset; -+ -+ struct cpu_cores *cpu_cores; -+ /* Index 0 for DC, 1 for AC */ -+ struct rog_tunables *rog_tunables[2]; -+ u32 mini_led_dev_id; -+ u32 gpu_mux_dev_id; -+ /* -+ * Mutex to prevent big/little core count changes writing to same -+ * endpoint at the same time. Must lock during attr store. -+ */ -+ struct mutex cpu_core_mutex; -+} asus_armoury = { -+ .cpu_core_mutex = __MUTEX_INITIALIZER(asus_armoury.cpu_core_mutex) -+}; -+ -+struct fw_attrs_group { -+ bool pending_reboot; -+}; -+ -+static struct fw_attrs_group fw_attrs = { -+ .pending_reboot = false, -+}; -+ -+struct asus_attr_group { -+ const struct attribute_group *attr_group; -+ u32 wmi_devid; -+}; -+ -+static bool asus_wmi_is_present(u32 dev_id) -+{ -+ u32 retval; -+ int status; -+ -+ status = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS, dev_id, 0, &retval); -+ pr_debug("%s called (0x%08x), retval: 0x%08x\n", __func__, dev_id, retval); -+ -+ return status == 0 && (retval & ASUS_WMI_DSTS_PRESENCE_BIT); -+} -+ -+static void asus_set_reboot_and_signal_event(void) -+{ -+ fw_attrs.pending_reboot = true; -+ kobject_uevent(&asus_armoury.fw_attr_dev->kobj, KOBJ_CHANGE); -+} -+ -+static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -+{ -+ return sysfs_emit(buf, "%d\n", fw_attrs.pending_reboot); -+} -+ -+static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot); -+ -+static bool asus_bios_requires_reboot(struct kobj_attribute *attr) -+{ -+ return !strcmp(attr->attr.name, "gpu_mux_mode") || -+ !strcmp(attr->attr.name, "cores_performance") || -+ !strcmp(attr->attr.name, "cores_efficiency") || -+ !strcmp(attr->attr.name, "panel_hd_mode"); -+} -+ -+static int armoury_wmi_set_devstate(struct kobj_attribute *attr, u32 value, u32 wmi_dev) -+{ -+ u32 result; -+ int err; -+ -+ err = asus_wmi_set_devstate(wmi_dev, value, &result); -+ if (err) { -+ pr_err("Failed to set %s: %d\n", attr->attr.name, err); -+ return err; -+ } -+ /* -+ * !1 is usually considered a fail by ASUS, but some WMI methods do use > 1 -+ * to return a status code or similar. -+ */ -+ if (result < 1) { -+ pr_err("Failed to set %s: (result): 0x%x\n", attr->attr.name, result); -+ return -EIO; -+ } -+ -+ return 0; -+} -+ -+/** -+ * attr_uint_store() - Send an uint to wmi method, checks if within min/max exclusive. -+ * @kobj: Pointer to the driver object. -+ * @attr: Pointer to the attribute calling this function. -+ * @buf: The buffer to read from, this is parsed to `uint` type. -+ * @count: Required by sysfs attribute macros, pass in from the callee attr. -+ * @min: Minimum accepted value. Below this returns -EINVAL. -+ * @max: Maximum accepted value. Above this returns -EINVAL. -+ * @store_value: Pointer to where the parsed value should be stored. -+ * @wmi_dev: The WMI function ID to use. -+ * -+ * This function is intended to be generic so it can be called from any "_store" -+ * attribute which works only with integers. The integer to be sent to the WMI method -+ * is range checked and an error returned if out of range. -+ * -+ * If the value is valid and WMI is success, then the sysfs attribute is notified -+ * and if asus_bios_requires_reboot() is true then reboot attribute is also notified. -+ * -+ * Returns: Either count, or an error. -+ */ -+static ssize_t attr_uint_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, -+ size_t count, u32 min, u32 max, u32 *store_value, u32 wmi_dev) -+{ -+ u32 value; -+ int err; -+ -+ err = kstrtouint(buf, 10, &value); -+ if (err) -+ return err; -+ -+ if (value < min || value > max) -+ return -EINVAL; -+ -+ err = armoury_wmi_set_devstate(attr, value, wmi_dev); -+ if (err) -+ return err; -+ -+ if (store_value != NULL) -+ *store_value = value; -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ -+ if (asus_bios_requires_reboot(attr)) -+ asus_set_reboot_and_signal_event(); -+ -+ return count; -+} -+ -+static ssize_t enum_type_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ return sysfs_emit(buf, "enumeration\n"); -+} -+ -+static ssize_t int_type_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ return sysfs_emit(buf, "integer\n"); -+} -+ -+/* Mini-LED mode **************************************************************/ -+static ssize_t mini_led_mode_current_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ u32 value; -+ int err; -+ -+ err = asus_wmi_get_devstate_dsts(asus_armoury.mini_led_dev_id, &value); -+ if (err) -+ return err; -+ -+ value &= ASUS_MINI_LED_MODE_MASK; -+ -+ /* -+ * Remap the mode values to match previous generation mini-LED. The last gen -+ * WMI 0 == off, while on this version WMI 2 == off (flipped). -+ */ -+ if (asus_armoury.mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) { -+ switch (value) { -+ case ASUS_MINI_LED_2024_WEAK: -+ value = ASUS_MINI_LED_ON; -+ break; -+ case ASUS_MINI_LED_2024_STRONG: -+ value = ASUS_MINI_LED_STRONG_MODE; -+ break; -+ case ASUS_MINI_LED_2024_OFF: -+ value = ASUS_MINI_LED_OFF; -+ break; -+ } -+ } -+ -+ return sysfs_emit(buf, "%u\n", value); -+} -+ -+static ssize_t mini_led_mode_current_value_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ u32 mode; -+ int err; -+ -+ err = kstrtou32(buf, 10, &mode); -+ if (err) -+ return err; -+ -+ if (asus_armoury.mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE && -+ mode > ASUS_MINI_LED_ON) -+ return -EINVAL; -+ if (asus_armoury.mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2 && -+ mode > ASUS_MINI_LED_STRONG_MODE) -+ return -EINVAL; -+ -+ /* -+ * Remap the mode values so expected behaviour is the same as the last -+ * generation of mini-LED with 0 == off, 1 == on. -+ */ -+ if (asus_armoury.mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) { -+ switch (mode) { -+ case ASUS_MINI_LED_OFF: -+ mode = ASUS_MINI_LED_2024_OFF; -+ break; -+ case ASUS_MINI_LED_ON: -+ mode = ASUS_MINI_LED_2024_WEAK; -+ break; -+ case ASUS_MINI_LED_STRONG_MODE: -+ mode = ASUS_MINI_LED_2024_STRONG; -+ break; -+ } -+ } -+ -+ err = armoury_wmi_set_devstate(attr, mode, asus_armoury.mini_led_dev_id); -+ if (err) -+ return err; -+ -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ -+ return count; -+} -+ -+static ssize_t mini_led_mode_possible_values_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ switch (asus_armoury.mini_led_dev_id) { -+ case ASUS_WMI_DEVID_MINI_LED_MODE: -+ return sysfs_emit(buf, "0;1\n"); -+ case ASUS_WMI_DEVID_MINI_LED_MODE2: -+ return sysfs_emit(buf, "0;1;2\n"); -+ default: -+ return -ENODEV; -+ } -+} -+ -+ATTR_GROUP_ENUM_CUSTOM(mini_led_mode, "mini_led_mode", "Set the mini-LED backlight mode"); -+ -+static ssize_t gpu_mux_mode_current_value_store(struct kobject *kobj, -+ struct kobj_attribute *attr, const char *buf, -+ size_t count) -+{ -+ int result, err; -+ u32 optimus; -+ -+ err = kstrtou32(buf, 10, &optimus); -+ if (err) -+ return err; -+ -+ if (optimus > 1) -+ return -EINVAL; -+ -+ if (asus_wmi_is_present(ASUS_WMI_DEVID_DGPU)) { -+ err = asus_wmi_get_devstate_dsts(ASUS_WMI_DEVID_DGPU, &result); -+ if (err) -+ return err; -+ if (result && !optimus) { -+ pr_warn("Can not switch MUX to dGPU mode when dGPU is disabled: %02X %02X\n", -+ result, optimus); -+ return -ENODEV; -+ } -+ } -+ -+ if (asus_wmi_is_present(ASUS_WMI_DEVID_EGPU)) { -+ err = asus_wmi_get_devstate_dsts(ASUS_WMI_DEVID_EGPU, &result); -+ if (err) -+ return err; -+ if (result && !optimus) { -+ pr_warn("Can not switch MUX to dGPU mode when eGPU is enabled\n"); -+ return -EBUSY; -+ } -+ } -+ -+ err = armoury_wmi_set_devstate(attr, optimus, asus_armoury.gpu_mux_dev_id); -+ if (err) -+ return err; -+ -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ asus_set_reboot_and_signal_event(); -+ -+ return count; -+} -+WMI_SHOW_INT(gpu_mux_mode_current_value, "%u\n", asus_armoury.gpu_mux_dev_id); -+ATTR_GROUP_BOOL_CUSTOM(gpu_mux_mode, "gpu_mux_mode", "Set the GPU display MUX mode"); -+ -+/* -+ * A user may be required to store the value twice, typical store first, then -+ * rescan PCI bus to activate power, then store a second time to save correctly. -+ */ -+static ssize_t dgpu_disable_current_value_store(struct kobject *kobj, -+ struct kobj_attribute *attr, const char *buf, -+ size_t count) -+{ -+ int result, err; -+ u32 disable; -+ -+ err = kstrtou32(buf, 10, &disable); -+ if (err) -+ return err; -+ -+ if (disable > 1) -+ return -EINVAL; -+ -+ if (asus_armoury.gpu_mux_dev_id) { -+ err = asus_wmi_get_devstate_dsts(asus_armoury.gpu_mux_dev_id, &result); -+ if (err) -+ return err; -+ if (!result && disable) { -+ pr_warn("Can not disable dGPU when the MUX is in dGPU mode\n"); -+ return -EBUSY; -+ } -+ } -+ -+ err = armoury_wmi_set_devstate(attr, disable, ASUS_WMI_DEVID_DGPU); -+ if (err) -+ return err; -+ -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ -+ return count; -+} -+WMI_SHOW_INT(dgpu_disable_current_value, "%d\n", ASUS_WMI_DEVID_DGPU); -+ATTR_GROUP_BOOL_CUSTOM(dgpu_disable, "dgpu_disable", "Disable the dGPU"); -+ -+/* The ACPI call to enable the eGPU also disables the internal dGPU */ -+static ssize_t egpu_enable_current_value_store(struct kobject *kobj, struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int result, err; -+ u32 enable; -+ -+ err = kstrtou32(buf, 10, &enable); -+ if (err) -+ return err; -+ -+ if (enable > 1) -+ return -EINVAL; -+ -+ err = asus_wmi_get_devstate_dsts(ASUS_WMI_DEVID_EGPU_CONNECTED, &result); -+ if (err) { -+ pr_warn("Failed to get eGPU connection status: %d\n", err); -+ return err; -+ } -+ -+ if (asus_armoury.gpu_mux_dev_id) { -+ err = asus_wmi_get_devstate_dsts(asus_armoury.gpu_mux_dev_id, &result); -+ if (err) { -+ pr_warn("Failed to get GPU MUX status: %d\n", result); -+ return err; -+ } -+ if (!result && enable) { -+ pr_warn("Can not enable eGPU when the MUX is in dGPU mode\n"); -+ return -ENODEV; -+ } -+ } -+ -+ err = armoury_wmi_set_devstate(attr, enable, ASUS_WMI_DEVID_EGPU); -+ if (err) -+ return err; -+ -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ -+ return count; -+} -+WMI_SHOW_INT(egpu_enable_current_value, "%d\n", ASUS_WMI_DEVID_EGPU); -+ATTR_GROUP_BOOL_CUSTOM(egpu_enable, "egpu_enable", "Enable the eGPU (also disables dGPU)"); -+ -+/* Device memory available to APU */ -+ -+/* Values map for APU memory: some looks out of order but are actually correct */ -+static u32 apu_mem_map[] = { -+ [0] = 0x000, /* called "AUTO" on the BIOS, is the minimum available */ -+ [1] = 0x102, -+ [2] = 0x103, -+ [3] = 0x104, -+ [4] = 0x105, -+ [5] = 0x107, -+ [6] = 0x108, -+ [7] = 0x109, -+ [8] = 0x106, -+}; -+ -+static ssize_t apu_mem_current_value_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ int err; -+ u32 mem; -+ -+ err = asus_wmi_get_devstate_dsts(ASUS_WMI_DEVID_APU_MEM, &mem); -+ if (err) -+ return err; -+ -+ if ((mem & ASUS_WMI_DSTS_PRESENCE_BIT) == 0) -+ return -ENODEV; -+ -+ mem &= ~ASUS_WMI_DSTS_PRESENCE_BIT; -+ -+ /* After 0x000 is set, a read will return 0x100 */ -+ if (mem == 0x100) -+ return sysfs_emit(buf, "0\n"); -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE(apu_mem_map); i++) { -+ if (apu_mem_map[i] == mem) -+ return sysfs_emit(buf, "%u\n", i); -+ } -+ -+ pr_warn("Unrecognised value for APU mem 0x%08x\n", mem); -+ return sysfs_emit(buf, "%u\n", mem); -+} -+ -+static ssize_t apu_mem_current_value_store(struct kobject *kobj, struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int result, err; -+ u32 requested, mem; -+ -+ result = kstrtou32(buf, 10, &requested); -+ if (result) -+ return result; -+ -+ if (requested > ARRAY_SIZE(apu_mem_map)) -+ return -EINVAL; -+ -+ mem = apu_mem_map[requested]; -+ -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_APU_MEM, mem, &result); -+ if (err) { -+ pr_warn("Failed to set apu_mem: %d\n", err); -+ return err; -+ } -+ -+ pr_info("APU memory changed to %uGB, reboot required\n", requested+1); -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ -+ asus_set_reboot_and_signal_event(); -+ -+ return count; -+} -+ -+static ssize_t apu_mem_possible_values_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ BUILD_BUG_ON(ARRAY_SIZE(apu_mem_map) != 9); -+ return sysfs_emit(buf, "0;1;2;3;4;5;6;7;8\n"); -+} -+ATTR_GROUP_ENUM_CUSTOM(apu_mem, "apu_mem", "Set available system RAM (in GB) for the APU to use"); -+ -+static int init_max_cpu_cores(void) -+{ -+ u32 cores; -+ int err; -+ -+ asus_armoury.cpu_cores = kzalloc(sizeof(struct cpu_cores), GFP_KERNEL); -+ if (!asus_armoury.cpu_cores) -+ return -ENOMEM; -+ -+ err = asus_wmi_get_devstate_dsts(ASUS_WMI_DEVID_CORES_MAX, &cores); -+ if (err) -+ return err; -+ -+ if ((cores & ASUS_WMI_DSTS_PRESENCE_BIT) == 0) { -+ pr_err("ACPI does not support CPU core count control\n"); -+ err = -ENODEV; -+ goto init_max_cpu_cores_err; -+ } -+ -+ asus_armoury.cpu_cores->max_power_cores = FIELD_GET(ASUS_POWER_CORE_MASK, cores); -+ asus_armoury.cpu_cores->max_perf_cores = FIELD_GET(ASUS_PERF_CORE_MASK, cores); -+ -+ err = asus_wmi_get_devstate_dsts(ASUS_WMI_DEVID_CORES, &cores); -+ if (err) { -+ pr_err("Could not get CPU core count: error %d\n", err); -+ goto init_max_cpu_cores_err; -+ } -+ -+ asus_armoury.cpu_cores->cur_perf_cores = FIELD_GET(ASUS_PERF_CORE_MASK, cores); -+ asus_armoury.cpu_cores->cur_power_cores = FIELD_GET(ASUS_POWER_CORE_MASK, cores); -+ -+ asus_armoury.cpu_cores->min_perf_cores = CPU_PERF_CORE_COUNT_MIN; -+ asus_armoury.cpu_cores->min_power_cores = CPU_POWR_CORE_COUNT_MIN; -+ -+ return 0; -+ -+init_max_cpu_cores_err: -+ kfree(asus_armoury.cpu_cores); -+ return err; -+} -+ -+static ssize_t cores_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, -+ enum cpu_core_type core_type, enum cpu_core_value core_value) -+{ -+ u32 cores; -+ -+ switch (core_value) { -+ case CPU_CORE_DEFAULT: -+ case CPU_CORE_MAX: -+ if (core_type == CPU_CORE_PERF) -+ return sysfs_emit(buf, "%u\n", -+ asus_armoury.cpu_cores->max_perf_cores); -+ else -+ return sysfs_emit(buf, "%u\n", -+ asus_armoury.cpu_cores->max_power_cores); -+ case CPU_CORE_MIN: -+ if (core_type == CPU_CORE_PERF) -+ return sysfs_emit(buf, "%u\n", -+ asus_armoury.cpu_cores->min_perf_cores); -+ else -+ return sysfs_emit(buf, "%u\n", -+ asus_armoury.cpu_cores->min_power_cores); -+ default: -+ break; -+ } -+ -+ if (core_type == CPU_CORE_PERF) -+ cores = asus_armoury.cpu_cores->cur_perf_cores; -+ else -+ cores = asus_armoury.cpu_cores->cur_power_cores; -+ -+ return sysfs_emit(buf, "%u\n", cores); -+} -+ -+static ssize_t cores_current_value_store(struct kobject *kobj, struct kobj_attribute *attr, -+ const char *buf, enum cpu_core_type core_type) -+{ -+ u32 new_cores, perf_cores, power_cores, out_val, min, max; -+ int result, err; -+ -+ result = kstrtou32(buf, 10, &new_cores); -+ if (result) -+ return result; -+ -+ scoped_guard(mutex, &asus_armoury.cpu_core_mutex) { -+ if (core_type == CPU_CORE_PERF) { -+ perf_cores = new_cores; -+ power_cores = asus_armoury.cpu_cores->cur_power_cores; -+ min = asus_armoury.cpu_cores->min_perf_cores; -+ max = asus_armoury.cpu_cores->max_perf_cores; -+ } else { -+ perf_cores = asus_armoury.cpu_cores->cur_perf_cores; -+ power_cores = new_cores; -+ min = asus_armoury.cpu_cores->min_power_cores; -+ max = asus_armoury.cpu_cores->max_power_cores; -+ } -+ -+ if (new_cores < min || new_cores > max) -+ return -EINVAL; -+ -+ out_val = FIELD_PREP(ASUS_PERF_CORE_MASK, perf_cores) | -+ FIELD_PREP(ASUS_POWER_CORE_MASK, power_cores); -+ -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_CORES, out_val, &result); -+ if (err) { -+ pr_warn("Failed to set CPU core count: %d\n", err); -+ return err; -+ } -+ -+ if (result > 1) { -+ pr_warn("Failed to set CPU core count (result): 0x%x\n", result); -+ return -EIO; -+ } -+ } -+ -+ pr_info("CPU core count changed, reboot required\n"); -+ -+ sysfs_notify(kobj, NULL, attr->attr.name); -+ asus_set_reboot_and_signal_event(); -+ -+ return 0; -+} -+ -+static ssize_t cores_performance_min_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_PERF, CPU_CORE_MIN); -+} -+ -+static ssize_t cores_performance_max_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_PERF, CPU_CORE_MAX); -+} -+ -+static ssize_t cores_performance_default_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_PERF, CPU_CORE_DEFAULT); -+} -+ -+static ssize_t cores_performance_current_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_PERF, CPU_CORE_CURRENT); -+} -+ -+static ssize_t cores_performance_current_value_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int err; -+ -+ err = cores_current_value_store(kobj, attr, buf, CPU_CORE_PERF); -+ if (err) -+ return err; -+ -+ return count; -+} -+ATTR_GROUP_CORES_RW(cores_performance, "cores_performance", -+ "Set the max available performance cores"); -+ -+static ssize_t cores_efficiency_min_value_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_POWER, CPU_CORE_MIN); -+} -+ -+static ssize_t cores_efficiency_max_value_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_POWER, CPU_CORE_MAX); -+} -+ -+static ssize_t cores_efficiency_default_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_POWER, CPU_CORE_DEFAULT); -+} -+ -+static ssize_t cores_efficiency_current_value_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return cores_value_show(kobj, attr, buf, CPU_CORE_POWER, CPU_CORE_CURRENT); -+} -+ -+static ssize_t cores_efficiency_current_value_store(struct kobject *kobj, -+ struct kobj_attribute *attr, const char *buf, -+ size_t count) -+{ -+ int err; -+ -+ err = cores_current_value_store(kobj, attr, buf, CPU_CORE_POWER); -+ if (err) -+ return err; -+ -+ return count; -+} -+ATTR_GROUP_CORES_RW(cores_efficiency, "cores_efficiency", -+ "Set the max available efficiency cores"); -+ -+/* Define helper to access the current power mode tunable values */ -+static inline struct rog_tunables *get_current_tunables(void) -+{ -+ return asus_armoury -+ .rog_tunables[power_supply_is_system_supplied() ? 1 : 0]; -+} -+ -+/* Simple attribute creation */ -+ATTR_GROUP_ROG_TUNABLE(ppt_pl1_spl, ATTR_PPT_PL1_SPL, ASUS_WMI_DEVID_PPT_PL1_SPL, -+ "Set the CPU slow package limit"); -+ATTR_GROUP_ROG_TUNABLE(ppt_pl2_sppt, ATTR_PPT_PL2_SPPT, ASUS_WMI_DEVID_PPT_PL2_SPPT, -+ "Set the CPU fast package limit"); -+ATTR_GROUP_ROG_TUNABLE(ppt_pl3_fppt, ATTR_PPT_PL3_FPPT, ASUS_WMI_DEVID_PPT_FPPT, -+ "Set the CPU fastest package limit"); -+ATTR_GROUP_ROG_TUNABLE(ppt_apu_sppt, ATTR_PPT_APU_SPPT, ASUS_WMI_DEVID_PPT_APU_SPPT, -+ "Set the APU package limit"); -+ATTR_GROUP_ROG_TUNABLE(ppt_platform_sppt, ATTR_PPT_PLATFORM_SPPT, ASUS_WMI_DEVID_PPT_PLAT_SPPT, -+ "Set the platform package limit"); -+ATTR_GROUP_ROG_TUNABLE(nv_dynamic_boost, ATTR_NV_DYNAMIC_BOOST, ASUS_WMI_DEVID_NV_DYN_BOOST, -+ "Set the Nvidia dynamic boost limit"); -+ATTR_GROUP_ROG_TUNABLE(nv_temp_target, ATTR_NV_TEMP_TARGET, ASUS_WMI_DEVID_NV_THERM_TARGET, -+ "Set the Nvidia max thermal limit"); -+ATTR_GROUP_ROG_TUNABLE(nv_tgp, "nv_tgp", ASUS_WMI_DEVID_DGPU_SET_TGP, -+ "Set the additional TGP on top of the base TGP"); -+ATTR_GROUP_INT_VALUE_ONLY_RO(nv_base_tgp, ATTR_NV_BASE_TGP, ASUS_WMI_DEVID_DGPU_BASE_TGP, -+ "Read the base TGP value"); -+ -+ -+ATTR_GROUP_ENUM_INT_RO(charge_mode, "charge_mode", ASUS_WMI_DEVID_CHARGE_MODE, "0;1;2", -+ "Show the current mode of charging"); -+ -+ATTR_GROUP_BOOL_RW(boot_sound, "boot_sound", ASUS_WMI_DEVID_BOOT_SOUND, -+ "Set the boot POST sound"); -+ATTR_GROUP_BOOL_RW(mcu_powersave, "mcu_powersave", ASUS_WMI_DEVID_MCU_POWERSAVE, -+ "Set MCU powersaving mode"); -+ATTR_GROUP_BOOL_RW(panel_od, "panel_overdrive", ASUS_WMI_DEVID_PANEL_OD, -+ "Set the panel refresh overdrive"); -+ATTR_GROUP_BOOL_RW(panel_hd_mode, "panel_hd_mode", ASUS_WMI_DEVID_PANEL_HD, -+ "Set the panel HD mode to UHD<0> or FHD<1>"); -+ATTR_GROUP_BOOL_RW(screen_auto_brightness, "screen_auto_brightness", -+ ASUS_WMI_DEVID_SCREEN_AUTO_BRIGHTNESS, -+ "Set the panel brightness to Off<0> or On<1>"); -+ATTR_GROUP_BOOL_RO(egpu_connected, "egpu_connected", ASUS_WMI_DEVID_EGPU_CONNECTED, -+ "Show the eGPU connection status"); -+ -+/* If an attribute does not require any special case handling add it here */ -+static const struct asus_attr_group armoury_attr_groups[] = { -+ { &egpu_connected_attr_group, ASUS_WMI_DEVID_EGPU_CONNECTED }, -+ { &egpu_enable_attr_group, ASUS_WMI_DEVID_EGPU }, -+ { &dgpu_disable_attr_group, ASUS_WMI_DEVID_DGPU }, -+ { &apu_mem_attr_group, ASUS_WMI_DEVID_APU_MEM }, -+ { &cores_efficiency_attr_group, ASUS_WMI_DEVID_CORES_MAX }, -+ { &cores_performance_attr_group, ASUS_WMI_DEVID_CORES_MAX }, -+ -+ { &ppt_pl1_spl_attr_group, ASUS_WMI_DEVID_PPT_PL1_SPL }, -+ { &ppt_pl2_sppt_attr_group, ASUS_WMI_DEVID_PPT_PL2_SPPT }, -+ { &ppt_pl3_fppt_attr_group, ASUS_WMI_DEVID_PPT_FPPT }, -+ { &ppt_apu_sppt_attr_group, ASUS_WMI_DEVID_PPT_APU_SPPT }, -+ { &ppt_platform_sppt_attr_group, ASUS_WMI_DEVID_PPT_PLAT_SPPT }, -+ { &nv_dynamic_boost_attr_group, ASUS_WMI_DEVID_NV_DYN_BOOST }, -+ { &nv_temp_target_attr_group, ASUS_WMI_DEVID_NV_THERM_TARGET }, -+ { &nv_base_tgp_attr_group, ASUS_WMI_DEVID_DGPU_BASE_TGP }, -+ { &nv_tgp_attr_group, ASUS_WMI_DEVID_DGPU_SET_TGP }, -+ -+ { &charge_mode_attr_group, ASUS_WMI_DEVID_CHARGE_MODE }, -+ { &boot_sound_attr_group, ASUS_WMI_DEVID_BOOT_SOUND }, -+ { &mcu_powersave_attr_group, ASUS_WMI_DEVID_MCU_POWERSAVE }, -+ { &panel_od_attr_group, ASUS_WMI_DEVID_PANEL_OD }, -+ { &panel_hd_mode_attr_group, ASUS_WMI_DEVID_PANEL_HD }, -+ { &screen_auto_brightness_attr_group, ASUS_WMI_DEVID_SCREEN_AUTO_BRIGHTNESS }, -+}; -+ -+/** -+ * is_power_tunable_attr - Determines if an attribute is a power-related tunable -+ * @name: The name of the attribute to check -+ * -+ * This function checks if the given attribute name is related to power tuning. -+ * -+ * Return: true if the attribute is a power-related tunable, false otherwise -+ */ -+static bool is_power_tunable_attr(const char *name) -+{ -+ static const char * const power_tunable_attrs[] = { -+ ATTR_PPT_PL1_SPL, ATTR_PPT_PL2_SPPT, -+ ATTR_PPT_PL3_FPPT, ATTR_PPT_APU_SPPT, -+ ATTR_PPT_PLATFORM_SPPT, ATTR_NV_DYNAMIC_BOOST, -+ ATTR_NV_TEMP_TARGET, ATTR_NV_BASE_TGP, -+ ATTR_NV_TGP -+ }; -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE(power_tunable_attrs); i++) { -+ if (!strcmp(name, power_tunable_attrs[i])) -+ return true; -+ } -+ -+ return false; -+} -+ -+/** -+ * has_valid_limit - Checks if a power-related attribute has a valid limit value -+ * @name: The name of the attribute to check -+ * @limits: Pointer to the power_limits structure containing limit values -+ * -+ * This function checks if a power-related attribute has a valid limit value. -+ * It returns false if limits is NULL or if the corresponding limit value is zero. -+ * -+ * Return: true if the attribute has a valid limit value, false otherwise -+ */ -+static bool has_valid_limit(const char *name, const struct power_limits *limits) -+{ -+ u32 limit_value = 0; -+ -+ if (!limits) -+ return false; -+ -+ if (!strcmp(name, ATTR_PPT_PL1_SPL)) -+ limit_value = limits->ppt_pl1_spl_max; -+ else if (!strcmp(name, ATTR_PPT_PL2_SPPT)) -+ limit_value = limits->ppt_pl2_sppt_max; -+ else if (!strcmp(name, ATTR_PPT_PL3_FPPT)) -+ limit_value = limits->ppt_pl3_fppt_max; -+ else if (!strcmp(name, ATTR_PPT_APU_SPPT)) -+ limit_value = limits->ppt_apu_sppt_max; -+ else if (!strcmp(name, ATTR_PPT_PLATFORM_SPPT)) -+ limit_value = limits->ppt_platform_sppt_max; -+ else if (!strcmp(name, ATTR_NV_DYNAMIC_BOOST)) -+ limit_value = limits->nv_dynamic_boost_max; -+ else if (!strcmp(name, ATTR_NV_TEMP_TARGET)) -+ limit_value = limits->nv_temp_target_max; -+ else if (!strcmp(name, ATTR_NV_BASE_TGP) || -+ !strcmp(name, ATTR_NV_TGP)) -+ limit_value = limits->nv_tgp_max; -+ -+ return limit_value > 0; -+} -+ -+static int asus_fw_attr_add(void) -+{ -+ const struct power_limits *limits; -+ bool should_create; -+ const char *name; -+ int err, i; -+ -+ asus_armoury.fw_attr_dev = device_create(&firmware_attributes_class, NULL, MKDEV(0, 0), -+ NULL, "%s", DRIVER_NAME); -+ if (IS_ERR(asus_armoury.fw_attr_dev)) { -+ err = PTR_ERR(asus_armoury.fw_attr_dev); -+ goto fail_class_get; -+ } -+ -+ asus_armoury.fw_attr_kset = kset_create_and_add("attributes", NULL, -+ &asus_armoury.fw_attr_dev->kobj); -+ if (!asus_armoury.fw_attr_kset) { -+ err = -ENOMEM; -+ goto err_destroy_classdev; -+ } -+ -+ err = sysfs_create_file(&asus_armoury.fw_attr_kset->kobj, &pending_reboot.attr); -+ if (err) { -+ pr_err("Failed to create sysfs level attributes\n"); -+ goto err_destroy_kset; -+ } -+ -+ asus_armoury.mini_led_dev_id = 0; -+ if (asus_wmi_is_present(ASUS_WMI_DEVID_MINI_LED_MODE)) -+ asus_armoury.mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; -+ else if (asus_wmi_is_present(ASUS_WMI_DEVID_MINI_LED_MODE2)) -+ asus_armoury.mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE2; -+ -+ if (asus_armoury.mini_led_dev_id) { -+ err = sysfs_create_group(&asus_armoury.fw_attr_kset->kobj, -+ &mini_led_mode_attr_group); -+ if (err) { -+ pr_err("Failed to create sysfs-group for mini_led\n"); -+ goto err_remove_file; -+ } -+ } -+ -+ asus_armoury.gpu_mux_dev_id = 0; -+ if (asus_wmi_is_present(ASUS_WMI_DEVID_GPU_MUX)) -+ asus_armoury.gpu_mux_dev_id = ASUS_WMI_DEVID_GPU_MUX; -+ else if (asus_wmi_is_present(ASUS_WMI_DEVID_GPU_MUX_VIVO)) -+ asus_armoury.gpu_mux_dev_id = ASUS_WMI_DEVID_GPU_MUX_VIVO; -+ -+ if (asus_armoury.gpu_mux_dev_id) { -+ err = sysfs_create_group(&asus_armoury.fw_attr_kset->kobj, -+ &gpu_mux_mode_attr_group); -+ if (err) { -+ pr_err("Failed to create sysfs-group for gpu_mux\n"); -+ goto err_remove_mini_led_group; -+ } -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(armoury_attr_groups); i++) { -+ if (!asus_wmi_is_present(armoury_attr_groups[i].wmi_devid)) -+ continue; -+ -+ /* Always create by default, unless PPT is not present */ -+ should_create = true; -+ name = armoury_attr_groups[i].attr_group->name; -+ -+ /* Check if this is a power-related tunable requiring limits */ -+ if (asus_armoury.rog_tunables[1] && asus_armoury.rog_tunables[1]->power_limits && -+ is_power_tunable_attr(name)) { -+ limits = asus_armoury.rog_tunables[1]->power_limits; -+ /* Check only AC, if DC is not present then AC won't be either */ -+ should_create = has_valid_limit(name, limits); -+ if (!should_create) { -+ pr_debug("Missing max value on %s for tunable: %s\n", -+ dmi_get_system_info(DMI_BOARD_NAME), name); -+ } -+ } -+ -+ if (should_create) { -+ err = sysfs_create_group(&asus_armoury.fw_attr_kset->kobj, -+ armoury_attr_groups[i].attr_group); -+ if (err) { -+ pr_err("Failed to create sysfs-group for %s\n", -+ armoury_attr_groups[i].attr_group->name); -+ goto err_remove_groups; -+ } -+ } -+ } -+ -+ return 0; -+ -+err_remove_groups: -+ while (i--) { -+ if (asus_wmi_is_present(armoury_attr_groups[i].wmi_devid)) -+ sysfs_remove_group(&asus_armoury.fw_attr_kset->kobj, -+ armoury_attr_groups[i].attr_group); -+ } -+ if (asus_armoury.gpu_mux_dev_id) -+ sysfs_remove_group(&asus_armoury.fw_attr_kset->kobj, &gpu_mux_mode_attr_group); -+err_remove_mini_led_group: -+ if (asus_armoury.mini_led_dev_id) -+ sysfs_remove_group(&asus_armoury.fw_attr_kset->kobj, &mini_led_mode_attr_group); -+err_remove_file: -+ sysfs_remove_file(&asus_armoury.fw_attr_kset->kobj, &pending_reboot.attr); -+err_destroy_kset: -+ kset_unregister(asus_armoury.fw_attr_kset); -+err_destroy_classdev: -+fail_class_get: -+ device_destroy(&firmware_attributes_class, MKDEV(0, 0)); -+ return err; -+} -+ -+/* Init / exit ****************************************************************/ -+ -+/* Set up the min/max and defaults for ROG tunables */ -+static void init_rog_tunables(void) -+{ -+ const struct power_limits *ac_limits, *dc_limits; -+ const struct power_data *power_data; -+ const struct dmi_system_id *dmi_id; -+ bool ac_initialized = false, dc_initialized = false; -+ -+ /* Match the system against the power_limits table */ -+ dmi_id = dmi_first_match(power_limits); -+ if (!dmi_id) { -+ pr_warn("No matching power limits found for this system\n"); -+ return; -+ } -+ -+ /* Get the power data for this system */ -+ power_data = dmi_id->driver_data; -+ if (!power_data) { -+ pr_info("No power data available for this system\n"); -+ return; -+ } -+ -+ /* Initialize AC power tunables */ -+ ac_limits = power_data->ac_data; -+ if (ac_limits) { -+ asus_armoury.rog_tunables[1] = -+ kzalloc(sizeof(*asus_armoury.rog_tunables[1]), GFP_KERNEL); -+ if (!asus_armoury.rog_tunables[1]) -+ goto err_nomem; -+ -+ asus_armoury.rog_tunables[1]->power_limits = ac_limits; -+ -+ /* Set initial AC values */ -+ asus_armoury.rog_tunables[1]->ppt_pl1_spl = -+ ac_limits->ppt_pl1_spl_def ? -+ ac_limits->ppt_pl1_spl_def : -+ ac_limits->ppt_pl1_spl_max; -+ -+ asus_armoury.rog_tunables[1]->ppt_pl2_sppt = -+ ac_limits->ppt_pl2_sppt_def ? -+ ac_limits->ppt_pl2_sppt_def : -+ ac_limits->ppt_pl2_sppt_max; -+ -+ asus_armoury.rog_tunables[1]->ppt_pl3_fppt = -+ ac_limits->ppt_pl3_fppt_def ? -+ ac_limits->ppt_pl3_fppt_def : -+ ac_limits->ppt_pl3_fppt_max; -+ -+ asus_armoury.rog_tunables[1]->ppt_apu_sppt = -+ ac_limits->ppt_apu_sppt_def ? -+ ac_limits->ppt_apu_sppt_def : -+ ac_limits->ppt_apu_sppt_max; -+ -+ asus_armoury.rog_tunables[1]->ppt_platform_sppt = -+ ac_limits->ppt_platform_sppt_def ? -+ ac_limits->ppt_platform_sppt_def : -+ ac_limits->ppt_platform_sppt_max; -+ -+ asus_armoury.rog_tunables[1]->nv_dynamic_boost = -+ ac_limits->nv_dynamic_boost_max; -+ asus_armoury.rog_tunables[1]->nv_temp_target = -+ ac_limits->nv_temp_target_max; -+ asus_armoury.rog_tunables[1]->nv_tgp = ac_limits->nv_tgp_max; -+ -+ ac_initialized = true; -+ pr_debug("AC power limits initialized for %s\n", dmi_id->matches[0].substr); -+ } -+ -+ /* Initialize DC power tunables */ -+ dc_limits = power_data->dc_data; -+ if (dc_limits) { -+ asus_armoury.rog_tunables[0] = -+ kzalloc(sizeof(*asus_armoury.rog_tunables[0]), GFP_KERNEL); -+ if (!asus_armoury.rog_tunables[0]) { -+ if (ac_initialized) -+ kfree(asus_armoury.rog_tunables[1]); -+ goto err_nomem; -+ } -+ -+ asus_armoury.rog_tunables[0]->power_limits = dc_limits; -+ -+ /* Set initial DC values */ -+ asus_armoury.rog_tunables[0]->ppt_pl1_spl = -+ dc_limits->ppt_pl1_spl_def ? -+ dc_limits->ppt_pl1_spl_def : -+ dc_limits->ppt_pl1_spl_max; -+ -+ asus_armoury.rog_tunables[0]->ppt_pl2_sppt = -+ dc_limits->ppt_pl2_sppt_def ? -+ dc_limits->ppt_pl2_sppt_def : -+ dc_limits->ppt_pl2_sppt_max; -+ -+ asus_armoury.rog_tunables[0]->ppt_pl3_fppt = -+ dc_limits->ppt_pl3_fppt_def ? -+ dc_limits->ppt_pl3_fppt_def : -+ dc_limits->ppt_pl3_fppt_max; -+ -+ asus_armoury.rog_tunables[0]->ppt_apu_sppt = -+ dc_limits->ppt_apu_sppt_def ? -+ dc_limits->ppt_apu_sppt_def : -+ dc_limits->ppt_apu_sppt_max; -+ -+ asus_armoury.rog_tunables[0]->ppt_platform_sppt = -+ dc_limits->ppt_platform_sppt_def ? -+ dc_limits->ppt_platform_sppt_def : -+ dc_limits->ppt_platform_sppt_max; -+ -+ asus_armoury.rog_tunables[0]->nv_dynamic_boost = -+ dc_limits->nv_dynamic_boost_max; -+ asus_armoury.rog_tunables[0]->nv_temp_target = -+ dc_limits->nv_temp_target_max; -+ asus_armoury.rog_tunables[0]->nv_tgp = dc_limits->nv_tgp_max; -+ -+ dc_initialized = true; -+ pr_debug("DC power limits initialized for %s\n", dmi_id->matches[0].substr); -+ } -+ -+ if (!ac_initialized) -+ pr_debug("No AC PPT limits defined\n"); -+ -+ if (!dc_initialized) -+ pr_debug("No DC PPT limits defined\n"); -+ -+ return; -+ -+err_nomem: -+ pr_err("Failed to allocate memory for tunables\n"); -+} -+ -+static int __init asus_fw_init(void) -+{ -+ char *wmi_uid; -+ int err; -+ -+ wmi_uid = wmi_get_acpi_device_uid(ASUS_WMI_MGMT_GUID); -+ if (!wmi_uid) -+ return -ENODEV; -+ -+ /* -+ * if equal to "ASUSWMI" then it's DCTS that can't be used for this -+ * driver, DSTS is required. -+ */ -+ if (!strcmp(wmi_uid, ASUS_ACPI_UID_ASUSWMI)) -+ return -ENODEV; -+ -+ if (asus_wmi_is_present(ASUS_WMI_DEVID_CORES_MAX)) { -+ err = init_max_cpu_cores(); -+ if (err) { -+ pr_err("Could not initialise CPU core control %d\n", err); -+ return err; -+ } -+ } -+ -+ init_rog_tunables(); -+ -+ /* Must always be last step to ensure data is available */ -+ return asus_fw_attr_add(); -+} -+ -+static void __exit asus_fw_exit(void) -+{ -+ sysfs_remove_file(&asus_armoury.fw_attr_kset->kobj, &pending_reboot.attr); -+ kset_unregister(asus_armoury.fw_attr_kset); -+ device_destroy(&firmware_attributes_class, MKDEV(0, 0)); -+ -+ kfree(asus_armoury.rog_tunables[0]); -+ kfree(asus_armoury.rog_tunables[1]); -+} -+ -+module_init(asus_fw_init); -+module_exit(asus_fw_exit); -+ -+MODULE_IMPORT_NS("ASUS_WMI"); -+MODULE_AUTHOR("Luke Jones "); -+MODULE_DESCRIPTION("ASUS BIOS Configuration Driver"); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS("wmi:" ASUS_NB_WMI_EVENT_GUID); -diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h -new file mode 100644 -index 000000000000..438768ea14cc ---- /dev/null -+++ b/drivers/platform/x86/asus-armoury.h -@@ -0,0 +1,1278 @@ -+/* SPDX-License-Identifier: GPL-2.0 -+ * -+ * Definitions for kernel modules using asus-armoury driver -+ * -+ * Copyright (c) 2024 Luke Jones -+ */ -+ -+#ifndef _ASUS_ARMOURY_H_ -+#define _ASUS_ARMOURY_H_ -+ -+#include -+#include -+#include -+ -+#define DRIVER_NAME "asus-armoury" -+ -+#define __ASUS_ATTR_RO(_func, _name) \ -+ { \ -+ .attr = { .name = __stringify(_name), .mode = 0444 }, \ -+ .show = _func##_##_name##_show, \ -+ } -+ -+#define __ASUS_ATTR_RO_AS(_name, _show) \ -+ { \ -+ .attr = { .name = __stringify(_name), .mode = 0444 }, \ -+ .show = _show, \ -+ } -+ -+#define __ASUS_ATTR_RW(_func, _name) \ -+ __ATTR(_name, 0644, _func##_##_name##_show, _func##_##_name##_store) -+ -+#define __WMI_STORE_INT(_attr, _min, _max, _wmi) \ -+ static ssize_t _attr##_store(struct kobject *kobj, \ -+ struct kobj_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ return attr_uint_store(kobj, attr, buf, count, _min, \ -+ _max, NULL, _wmi); \ -+ } -+ -+#define WMI_SHOW_INT(_attr, _fmt, _wmi) \ -+ static ssize_t _attr##_show(struct kobject *kobj, \ -+ struct kobj_attribute *attr, char *buf) \ -+ { \ -+ u32 result; \ -+ int err; \ -+ \ -+ err = asus_wmi_get_devstate_dsts(_wmi, &result); \ -+ if (err) \ -+ return err; \ -+ return sysfs_emit(buf, _fmt, \ -+ result & ~ASUS_WMI_DSTS_PRESENCE_BIT); \ -+ } -+ -+/* Create functions and attributes for use in other macros or on their own */ -+ -+/* Shows a formatted static variable */ -+#define __ATTR_SHOW_FMT(_prop, _attrname, _fmt, _val) \ -+ static ssize_t _attrname##_##_prop##_show( \ -+ struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ -+ { \ -+ return sysfs_emit(buf, _fmt, _val); \ -+ } \ -+ static struct kobj_attribute attr_##_attrname##_##_prop = \ -+ __ASUS_ATTR_RO(_attrname, _prop) -+ -+#define __ATTR_RO_INT_GROUP_ENUM(_attrname, _wmi, _fsname, _possible, _dispname)\ -+ WMI_SHOW_INT(_attrname##_current_value, "%d\n", _wmi); \ -+ static struct kobj_attribute attr_##_attrname##_current_value = \ -+ __ASUS_ATTR_RO(_attrname, current_value); \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ __ATTR_SHOW_FMT(possible_values, _attrname, "%s\n", _possible); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, enum_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_possible_values.attr, \ -+ &attr_##_attrname##_type.attr, \ -+ NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+#define __ATTR_RW_INT_GROUP_ENUM(_attrname, _minv, _maxv, _wmi, _fsname,\ -+ _possible, _dispname) \ -+ __WMI_STORE_INT(_attrname##_current_value, _minv, _maxv, _wmi); \ -+ WMI_SHOW_INT(_attrname##_current_value, "%d\n", _wmi); \ -+ static struct kobj_attribute attr_##_attrname##_current_value = \ -+ __ASUS_ATTR_RW(_attrname, current_value); \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ __ATTR_SHOW_FMT(possible_values, _attrname, "%s\n", _possible); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, enum_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_possible_values.attr, \ -+ &attr_##_attrname##_type.attr, \ -+ NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+/* Boolean style enumeration, base macro. Requires adding show/store */ -+#define __ATTR_GROUP_ENUM(_attrname, _fsname, _possible, _dispname) \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ __ATTR_SHOW_FMT(possible_values, _attrname, "%s\n", _possible); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, enum_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_possible_values.attr, \ -+ &attr_##_attrname##_type.attr, \ -+ NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+#define ATTR_GROUP_BOOL_RO(_attrname, _fsname, _wmi, _dispname) \ -+ __ATTR_RO_INT_GROUP_ENUM(_attrname, _wmi, _fsname, "0;1", _dispname) -+ -+ -+#define ATTR_GROUP_BOOL_RW(_attrname, _fsname, _wmi, _dispname) \ -+ __ATTR_RW_INT_GROUP_ENUM(_attrname, 0, 1, _wmi, _fsname, "0;1", _dispname) -+ -+#define ATTR_GROUP_ENUM_INT_RO(_attrname, _fsname, _wmi, _possible, _dispname) \ -+ __ATTR_RO_INT_GROUP_ENUM(_attrname, _wmi, _fsname, _possible, _dispname) -+ -+/* -+ * Requires _current_value_show(), _current_value_show() -+ */ -+#define ATTR_GROUP_BOOL_CUSTOM(_attrname, _fsname, _dispname) \ -+ static struct kobj_attribute attr_##_attrname##_current_value = \ -+ __ASUS_ATTR_RW(_attrname, current_value); \ -+ __ATTR_GROUP_ENUM(_attrname, _fsname, "0;1", _dispname) -+ -+/* -+ * Requires _current_value_show(), _current_value_show() -+ * and _possible_values_show() -+ */ -+#define ATTR_GROUP_ENUM_CUSTOM(_attrname, _fsname, _dispname) \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ static struct kobj_attribute attr_##_attrname##_current_value = \ -+ __ASUS_ATTR_RW(_attrname, current_value); \ -+ static struct kobj_attribute attr_##_attrname##_possible_values = \ -+ __ASUS_ATTR_RO(_attrname, possible_values); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, enum_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_possible_values.attr, \ -+ &attr_##_attrname##_type.attr, \ -+ NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+/* CPU core attributes need a little different in setup */ -+#define ATTR_GROUP_CORES_RW(_attrname, _fsname, _dispname) \ -+ __ATTR_SHOW_FMT(scalar_increment, _attrname, "%d\n", 1); \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ static struct kobj_attribute attr_##_attrname##_current_value = \ -+ __ASUS_ATTR_RW(_attrname, current_value); \ -+ static struct kobj_attribute attr_##_attrname##_default_value = \ -+ __ASUS_ATTR_RO(_attrname, default_value); \ -+ static struct kobj_attribute attr_##_attrname##_min_value = \ -+ __ASUS_ATTR_RO(_attrname, min_value); \ -+ static struct kobj_attribute attr_##_attrname##_max_value = \ -+ __ASUS_ATTR_RO(_attrname, max_value); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, int_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_default_value.attr, \ -+ &attr_##_attrname##_min_value.attr, \ -+ &attr_##_attrname##_max_value.attr, \ -+ &attr_##_attrname##_scalar_increment.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_type.attr, \ -+ NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+#define ATTR_GROUP_INT_VALUE_ONLY_RO(_attrname, _fsname, _wmi, _dispname) \ -+ WMI_SHOW_INT(_attrname##_current_value, "%d\n", _wmi); \ -+ static struct kobj_attribute attr_##_attrname##_current_value = \ -+ __ASUS_ATTR_RO(_attrname, current_value); \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, int_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_type.attr, NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+/* -+ * ROG PPT attributes need a little different in setup as they -+ * require rog_tunables members. -+ */ -+ -+#define __ROG_TUNABLE_SHOW(_prop, _attrname, _val) \ -+ static ssize_t _attrname##_##_prop##_show( \ -+ struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ -+ { \ -+ struct rog_tunables *tunables = get_current_tunables(); \ -+ \ -+ if (!tunables || !tunables->power_limits) \ -+ return -ENODEV; \ -+ \ -+ return sysfs_emit(buf, "%d\n", tunables->power_limits->_val); \ -+ } \ -+ static struct kobj_attribute attr_##_attrname##_##_prop = \ -+ __ASUS_ATTR_RO(_attrname, _prop) -+ -+#define __ROG_TUNABLE_SHOW_DEFAULT(_attrname) \ -+ static ssize_t _attrname##_default_value_show( \ -+ struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ -+ { \ -+ struct rog_tunables *tunables = get_current_tunables(); \ -+ \ -+ if (!tunables || !tunables->power_limits) \ -+ return -ENODEV; \ -+ \ -+ return sysfs_emit( \ -+ buf, "%d\n", \ -+ tunables->power_limits->_attrname##_def ? \ -+ tunables->power_limits->_attrname##_def : \ -+ tunables->power_limits->_attrname##_max); \ -+ } \ -+ static struct kobj_attribute attr_##_attrname##_default_value = \ -+ __ASUS_ATTR_RO(_attrname, default_value) -+ -+#define __ROG_TUNABLE_RW(_attr, _wmi) \ -+ static ssize_t _attr##_current_value_store( \ -+ struct kobject *kobj, struct kobj_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ struct rog_tunables *tunables = get_current_tunables(); \ -+ \ -+ if (!tunables || !tunables->power_limits) \ -+ return -ENODEV; \ -+ \ -+ return attr_uint_store(kobj, attr, buf, count, \ -+ tunables->power_limits->_attr##_min, \ -+ tunables->power_limits->_attr##_max, \ -+ &tunables->_attr, _wmi); \ -+ } \ -+ static ssize_t _attr##_current_value_show( \ -+ struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ -+ { \ -+ struct rog_tunables *tunables = get_current_tunables(); \ -+ \ -+ if (!tunables) \ -+ return -ENODEV; \ -+ \ -+ return sysfs_emit(buf, "%u\n", tunables->_attr); \ -+ } \ -+ static struct kobj_attribute attr_##_attr##_current_value = \ -+ __ASUS_ATTR_RW(_attr, current_value) -+ -+#define ATTR_GROUP_ROG_TUNABLE(_attrname, _fsname, _wmi, _dispname) \ -+ __ROG_TUNABLE_RW(_attrname, _wmi); \ -+ __ROG_TUNABLE_SHOW_DEFAULT(_attrname); \ -+ __ROG_TUNABLE_SHOW(min_value, _attrname, _attrname##_min); \ -+ __ROG_TUNABLE_SHOW(max_value, _attrname, _attrname##_max); \ -+ __ATTR_SHOW_FMT(scalar_increment, _attrname, "%d\n", 1); \ -+ __ATTR_SHOW_FMT(display_name, _attrname, "%s\n", _dispname); \ -+ static struct kobj_attribute attr_##_attrname##_type = \ -+ __ASUS_ATTR_RO_AS(type, int_type_show); \ -+ static struct attribute *_attrname##_attrs[] = { \ -+ &attr_##_attrname##_current_value.attr, \ -+ &attr_##_attrname##_default_value.attr, \ -+ &attr_##_attrname##_min_value.attr, \ -+ &attr_##_attrname##_max_value.attr, \ -+ &attr_##_attrname##_scalar_increment.attr, \ -+ &attr_##_attrname##_display_name.attr, \ -+ &attr_##_attrname##_type.attr, \ -+ NULL \ -+ }; \ -+ static const struct attribute_group _attrname##_attr_group = { \ -+ .name = _fsname, .attrs = _attrname##_attrs \ -+ } -+ -+/* Default is always the maximum value unless *_def is specified */ -+struct power_limits { -+ u8 ppt_pl1_spl_min; -+ u8 ppt_pl1_spl_def; -+ u8 ppt_pl1_spl_max; -+ u8 ppt_pl2_sppt_min; -+ u8 ppt_pl2_sppt_def; -+ u8 ppt_pl2_sppt_max; -+ u8 ppt_pl3_fppt_min; -+ u8 ppt_pl3_fppt_def; -+ u8 ppt_pl3_fppt_max; -+ u8 ppt_apu_sppt_min; -+ u8 ppt_apu_sppt_def; -+ u8 ppt_apu_sppt_max; -+ u8 ppt_platform_sppt_min; -+ u8 ppt_platform_sppt_def; -+ u8 ppt_platform_sppt_max; -+ /* Nvidia GPU specific, default is always max */ -+ u8 nv_dynamic_boost_def; // unused. exists for macro -+ u8 nv_dynamic_boost_min; -+ u8 nv_dynamic_boost_max; -+ u8 nv_temp_target_def; // unused. exists for macro -+ u8 nv_temp_target_min; -+ u8 nv_temp_target_max; -+ u8 nv_tgp_def; // unused. exists for macro -+ u8 nv_tgp_min; -+ u8 nv_tgp_max; -+}; -+ -+struct power_data { -+ const struct power_limits *ac_data; -+ const struct power_limits *dc_data; -+ bool requires_fan_curve; -+}; -+ -+/* -+ * For each avilable attribute there must be a min and a max. -+ * _def is not required and will be assumed to be default == max if missing. -+ */ -+static const struct dmi_system_id power_limits[] = { -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA401W"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 75, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 30, -+ .ppt_pl2_sppt_min = 31, -+ .ppt_pl2_sppt_max = 44, -+ .ppt_pl3_fppt_min = 45, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA507N"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 45, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 54, -+ .ppt_pl2_sppt_max = 65, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA507R"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80 -+ }, -+ .dc_data = NULL -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA507X"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 85, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 45, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 54, -+ .ppt_pl2_sppt_max = 65, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA507Z"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 105, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 15, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 85, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 45, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 60, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA607P"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 30, -+ .ppt_pl1_spl_def = 100, -+ .ppt_pl1_spl_max = 135, -+ .ppt_pl2_sppt_min = 30, -+ .ppt_pl2_sppt_def = 115, -+ .ppt_pl2_sppt_max = 135, -+ .ppt_pl3_fppt_min = 30, -+ .ppt_pl3_fppt_max = 135, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 115, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_def = 45, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_def = 60, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 25, -+ .ppt_pl3_fppt_max = 80, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA617NS"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 15, -+ .ppt_apu_sppt_max = 80, -+ .ppt_platform_sppt_min = 30, -+ .ppt_platform_sppt_max = 120 -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 25, -+ .ppt_apu_sppt_max = 35, -+ .ppt_platform_sppt_min = 45, -+ .ppt_platform_sppt_max = 100 -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA617NT"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 15, -+ .ppt_apu_sppt_max = 80, -+ .ppt_platform_sppt_min = 30, -+ .ppt_platform_sppt_max = 115 -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 15, -+ .ppt_apu_sppt_max = 45, -+ .ppt_platform_sppt_min = 30, -+ .ppt_platform_sppt_max = 50 -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FA617XS"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 15, -+ .ppt_apu_sppt_max = 80, -+ .ppt_platform_sppt_min = 30, -+ .ppt_platform_sppt_max = 120, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 25, -+ .ppt_apu_sppt_max = 35, -+ .ppt_platform_sppt_min = 45, -+ .ppt_platform_sppt_max = 100, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "FX507Z"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 90, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 135, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 15, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 45, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 60, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GA401Q"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_max = 80, -+ }, -+ .dc_data = NULL -+ }, -+ }, -+ { -+ .matches = { -+ // This model is full AMD. No Nvidia dGPU. -+ DMI_MATCH(DMI_BOARD_NAME, "GA402R"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 15, -+ .ppt_apu_sppt_max = 80, -+ .ppt_platform_sppt_min = 30, -+ .ppt_platform_sppt_max = 115, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_apu_sppt_min = 25, -+ .ppt_apu_sppt_def = 30, -+ .ppt_apu_sppt_max = 45, -+ .ppt_platform_sppt_min = 40, -+ .ppt_platform_sppt_max = 60, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GA402X"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 35, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_def = 65, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 35, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GA403U"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 65, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 35, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GA503R"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 35, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 65, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 25, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 54, -+ .ppt_pl2_sppt_max = 60, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65 -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GA605W"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 85, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 31, -+ .ppt_pl2_sppt_max = 44, -+ .ppt_pl3_fppt_min = 45, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GU603Z"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 60, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 135, -+ /* Only allowed in AC mode */ -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 40, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 40, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GU604V"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 65, -+ .ppt_pl1_spl_max = 120, -+ .ppt_pl2_sppt_min = 65, -+ .ppt_pl2_sppt_max = 150, -+ /* Only allowed in AC mode */ -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 40, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 40, -+ .ppt_pl2_sppt_max = 60, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GU605M"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 90, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 135, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 38, -+ .ppt_pl2_sppt_max = 53, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GV301Q"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 45, -+ .ppt_pl2_sppt_min = 65, -+ .ppt_pl2_sppt_max = 80, -+ }, -+ .dc_data = NULL -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GV301R"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 45, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 54, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 35, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GV601R"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 35, -+ .ppt_pl1_spl_max = 90, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 54, -+ .ppt_pl2_sppt_max = 100, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_def = 80, -+ .ppt_pl3_fppt_max = 125, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 28, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 54, -+ .ppt_pl2_sppt_def = 40, -+ .ppt_pl2_sppt_max = 60, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_def = 80, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GV601V"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_def = 100, -+ .ppt_pl1_spl_max = 110, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 135, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 40, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 40, -+ .ppt_pl2_sppt_max = 60, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "GX650P"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 110, -+ .ppt_pl1_spl_max = 130, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 125, -+ .ppt_pl2_sppt_max = 130, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_def = 125, -+ .ppt_pl3_fppt_max = 135, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_def = 25, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_def = 35, -+ .ppt_pl2_sppt_max = 65, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_def = 42, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G513I"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ /* Yes this laptop is very limited */ -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_max = 80, -+ }, -+ .dc_data = NULL, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G513QM"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ /* Yes this laptop is very limited */ -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 100, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_max = 190, -+ }, -+ .dc_data = NULL, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G513R"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 35, -+ .ppt_pl1_spl_max = 90, -+ .ppt_pl2_sppt_min = 54, -+ .ppt_pl2_sppt_max = 100, -+ .ppt_pl3_fppt_min = 54, -+ .ppt_pl3_fppt_max = 125, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 50, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 50, -+ .ppt_pl3_fppt_min = 28, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G614J"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 140, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 175, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 55, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 70, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G634J"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 140, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 175, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 55, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 70, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G733C"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 170, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 175, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 35, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G733P"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 30, -+ .ppt_pl1_spl_def = 100, -+ .ppt_pl1_spl_max = 130, -+ .ppt_pl2_sppt_min = 65, -+ .ppt_pl2_sppt_def = 125, -+ .ppt_pl2_sppt_max = 130, -+ .ppt_pl3_fppt_min = 65, -+ .ppt_pl3_fppt_def = 125, -+ .ppt_pl3_fppt_max = 130, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 65, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 65, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 75, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G814J"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 140, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 140, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 55, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 70, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "G834J"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 28, -+ .ppt_pl1_spl_max = 140, -+ .ppt_pl2_sppt_min = 28, -+ .ppt_pl2_sppt_max = 175, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 25, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 55, -+ .ppt_pl2_sppt_min = 25, -+ .ppt_pl2_sppt_max = 70, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ }, -+ .requires_fan_curve = true, -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "H7606W"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 15, -+ .ppt_pl1_spl_max = 80, -+ .ppt_pl2_sppt_min = 35, -+ .ppt_pl2_sppt_max = 80, -+ .ppt_pl3_fppt_min = 35, -+ .ppt_pl3_fppt_max = 80, -+ .nv_dynamic_boost_min = 5, -+ .nv_dynamic_boost_max = 20, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ .nv_tgp_min = 55, -+ .nv_tgp_max = 85, -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 25, -+ .ppt_pl1_spl_max = 35, -+ .ppt_pl2_sppt_min = 31, -+ .ppt_pl2_sppt_max = 44, -+ .ppt_pl3_fppt_min = 45, -+ .ppt_pl3_fppt_max = 65, -+ .nv_temp_target_min = 75, -+ .nv_temp_target_max = 87, -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "RC71"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 7, -+ .ppt_pl1_spl_max = 30, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_max = 43, -+ .ppt_pl3_fppt_min = 15, -+ .ppt_pl3_fppt_max = 53 -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 7, -+ .ppt_pl1_spl_def = 15, -+ .ppt_pl1_spl_max = 25, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_def = 20, -+ .ppt_pl2_sppt_max = 30, -+ .ppt_pl3_fppt_min = 15, -+ .ppt_pl3_fppt_def = 25, -+ .ppt_pl3_fppt_max = 35 -+ } -+ }, -+ }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BOARD_NAME, "RC72"), -+ }, -+ .driver_data = &(struct power_data) { -+ .ac_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 7, -+ .ppt_pl1_spl_max = 30, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_max = 43, -+ .ppt_pl3_fppt_min = 15, -+ .ppt_pl3_fppt_max = 53 -+ }, -+ .dc_data = &(struct power_limits) { -+ .ppt_pl1_spl_min = 7, -+ .ppt_pl1_spl_def = 17, -+ .ppt_pl1_spl_max = 25, -+ .ppt_pl2_sppt_min = 15, -+ .ppt_pl2_sppt_def = 24, -+ .ppt_pl2_sppt_max = 30, -+ .ppt_pl3_fppt_min = 15, -+ .ppt_pl3_fppt_def = 30, -+ .ppt_pl3_fppt_max = 35 -+ } -+ }, -+ }, -+ {} -+}; -+ -+#endif /* _ASUS_ARMOURY_H_ */ -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index f7191fdded14..a6d6efdb50b7 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -55,8 +55,6 @@ module_param(fnlock_default, bool, 0444); - #define to_asus_wmi_driver(pdrv) \ - (container_of((pdrv), struct asus_wmi_driver, platform_driver)) - --#define ASUS_WMI_MGMT_GUID "97845ED0-4E6D-11DE-8A39-0800200C9A66" -- - #define NOTIFY_BRNUP_MIN 0x11 - #define NOTIFY_BRNUP_MAX 0x1f - #define NOTIFY_BRNDOWN_MIN 0x20 -@@ -105,8 +103,6 @@ module_param(fnlock_default, bool, 0444); - #define USB_INTEL_XUSB2PR 0xD0 - #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 - --#define ASUS_ACPI_UID_ASUSWMI "ASUSWMI" -- - #define WMI_EVENT_MASK 0xFFFF - - #define FAN_CURVE_POINTS 8 -@@ -340,6 +336,13 @@ struct asus_wmi { - /* Global to allow setting externally without requiring driver data */ - static enum asus_ally_mcu_hack use_ally_mcu_hack = ASUS_WMI_ALLY_MCU_HACK_INIT; - -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) -+static void asus_wmi_show_deprecated(void) -+{ -+ pr_notice_once("Accessing attributes through /sys/bus/platform/asus_wmi is deprecated and will be removed in a future release. Please switch over to /sys/class/firmware_attributes.\n"); -+} -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ -+ - /* WMI ************************************************************************/ - - static int asus_wmi_evaluate_method3(u32 method_id, -@@ -390,7 +393,7 @@ int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval) - { - return asus_wmi_evaluate_method3(method_id, arg0, arg1, 0, retval); - } --EXPORT_SYMBOL_GPL(asus_wmi_evaluate_method); -+EXPORT_SYMBOL_NS_GPL(asus_wmi_evaluate_method, "ASUS_WMI"); - - static int asus_wmi_evaluate_method5(u32 method_id, - u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 *retval) -@@ -554,12 +557,46 @@ static int asus_wmi_get_devstate(struct asus_wmi *asus, u32 dev_id, u32 *retval) - return 0; - } - --int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, -- u32 *retval) -+/** -+ * asus_wmi_get_devstate_dsts() - Get the WMI function state. -+ * @dev_id: The WMI method ID to call. -+ * @retval: A pointer to where to store the value returned from WMI. -+ * @return: 0 on success and retval is filled. -+ * @return: -ENODEV if the method ID is unsupported. -+ * @return: everything else is an error from WMI call. -+ */ -+int asus_wmi_get_devstate_dsts(u32 dev_id, u32 *retval) -+{ -+ int err; -+ -+ err = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS, dev_id, 0, retval); -+ if (err) -+ return err; -+ -+ if (*retval == ASUS_WMI_UNSUPPORTED_METHOD) -+ return -ENODEV; -+ -+ return 0; -+} -+EXPORT_SYMBOL_NS_GPL(asus_wmi_get_devstate_dsts, "ASUS_WMI"); -+ -+/** -+ * asus_wmi_set_devstate() - Set the WMI function state. -+ * @dev_id: The WMI function to call. -+ * @ctrl_param: The argument to be used for this WMI function. -+ * @retval: A pointer to where to store the value returned from WMI. -+ * @return: 0 on success and retval is filled. -+ * @return: everything else is an error from WMI call. -+ * -+ * A asus_wmi_set_devstate() call must be paired with a -+ * asus_wmi_get_devstate_dsts() to check if the WMI function is supported. -+ */ -+int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval) - { - return asus_wmi_evaluate_method(ASUS_WMI_METHODID_DEVS, dev_id, - ctrl_param, retval); - } -+EXPORT_SYMBOL_NS_GPL(asus_wmi_set_devstate, "ASUS_WMI"); - - /* Helper for special devices with magic return codes */ - static int asus_wmi_get_devstate_bits(struct asus_wmi *asus, -@@ -692,6 +729,7 @@ static void asus_wmi_tablet_mode_get_state(struct asus_wmi *asus) - } - - /* Charging mode, 1=Barrel, 2=USB ******************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t charge_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -702,12 +740,16 @@ static ssize_t charge_mode_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", value & 0xff); - } - - static DEVICE_ATTR_RO(charge_mode); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* dGPU ********************************************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t dgpu_disable_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -718,6 +760,8 @@ static ssize_t dgpu_disable_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - -@@ -771,8 +815,10 @@ static ssize_t dgpu_disable_store(struct device *dev, - return count; - } - static DEVICE_ATTR_RW(dgpu_disable); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* eGPU ********************************************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t egpu_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -783,6 +829,8 @@ static ssize_t egpu_enable_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - -@@ -839,8 +887,10 @@ static ssize_t egpu_enable_store(struct device *dev, - return count; - } - static DEVICE_ATTR_RW(egpu_enable); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Is eGPU connected? *********************************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t egpu_connected_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -851,12 +901,16 @@ static ssize_t egpu_connected_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - - static DEVICE_ATTR_RO(egpu_connected); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* gpu mux switch *************************************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t gpu_mux_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -867,6 +921,8 @@ static ssize_t gpu_mux_mode_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - -@@ -925,6 +981,7 @@ static ssize_t gpu_mux_mode_store(struct device *dev, - return count; - } - static DEVICE_ATTR_RW(gpu_mux_mode); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* TUF Laptop Keyboard RGB Modes **********************************************/ - static ssize_t kbd_rgb_mode_store(struct device *dev, -@@ -1048,6 +1105,7 @@ static const struct attribute_group *kbd_rgb_mode_groups[] = { - }; - - /* Tunable: PPT: Intel=PL1, AMD=SPPT *****************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t ppt_pl2_sppt_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -@@ -1086,6 +1144,8 @@ static ssize_t ppt_pl2_sppt_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->ppt_pl2_sppt); - } - static DEVICE_ATTR_RW(ppt_pl2_sppt); -@@ -1128,6 +1188,8 @@ static ssize_t ppt_pl1_spl_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->ppt_pl1_spl); - } - static DEVICE_ATTR_RW(ppt_pl1_spl); -@@ -1171,6 +1233,8 @@ static ssize_t ppt_fppt_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->ppt_fppt); - } - static DEVICE_ATTR_RW(ppt_fppt); -@@ -1214,6 +1278,8 @@ static ssize_t ppt_apu_sppt_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->ppt_apu_sppt); - } - static DEVICE_ATTR_RW(ppt_apu_sppt); -@@ -1257,6 +1323,8 @@ static ssize_t ppt_platform_sppt_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->ppt_platform_sppt); - } - static DEVICE_ATTR_RW(ppt_platform_sppt); -@@ -1300,6 +1368,8 @@ static ssize_t nv_dynamic_boost_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->nv_dynamic_boost); - } - static DEVICE_ATTR_RW(nv_dynamic_boost); -@@ -1343,9 +1413,12 @@ static ssize_t nv_temp_target_show(struct device *dev, - { - struct asus_wmi *asus = dev_get_drvdata(dev); - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%u\n", asus->nv_temp_target); - } - static DEVICE_ATTR_RW(nv_temp_target); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Ally MCU Powersave ********************************************************/ - -@@ -1386,6 +1459,7 @@ void set_ally_mcu_powersave(bool enabled) - } - EXPORT_SYMBOL_NS_GPL(set_ally_mcu_powersave, "ASUS_WMI"); - -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t mcu_powersave_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -1396,6 +1470,8 @@ static ssize_t mcu_powersave_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - -@@ -1431,6 +1507,7 @@ static ssize_t mcu_powersave_store(struct device *dev, - return count; - } - static DEVICE_ATTR_RW(mcu_powersave); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Battery ********************************************************************/ - -@@ -2304,6 +2381,7 @@ static int asus_wmi_rfkill_init(struct asus_wmi *asus) - } - - /* Panel Overdrive ************************************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t panel_od_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -2314,6 +2392,8 @@ static ssize_t panel_od_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - -@@ -2350,9 +2430,10 @@ static ssize_t panel_od_store(struct device *dev, - return count; - } - static DEVICE_ATTR_RW(panel_od); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Bootup sound ***************************************************************/ -- -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t boot_sound_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -2363,6 +2444,8 @@ static ssize_t boot_sound_show(struct device *dev, - if (result < 0) - return result; - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", result); - } - -@@ -2398,8 +2481,10 @@ static ssize_t boot_sound_store(struct device *dev, - return count; - } - static DEVICE_ATTR_RW(boot_sound); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Mini-LED mode **************************************************************/ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t mini_led_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -2430,6 +2515,8 @@ static ssize_t mini_led_mode_show(struct device *dev, - } - } - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "%d\n", value); - } - -@@ -2500,10 +2587,13 @@ static ssize_t available_mini_led_mode_show(struct device *dev, - return sysfs_emit(buf, "0 1 2\n"); - } - -+ asus_wmi_show_deprecated(); -+ - return sysfs_emit(buf, "0\n"); - } - - static DEVICE_ATTR_RO(available_mini_led_mode); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Quirks *********************************************************************/ - -@@ -3791,6 +3881,7 @@ static int throttle_thermal_policy_set_default(struct asus_wmi *asus) - return throttle_thermal_policy_write(asus); - } - -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - static ssize_t throttle_thermal_policy_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -3834,6 +3925,7 @@ static ssize_t throttle_thermal_policy_store(struct device *dev, - * Throttle thermal policy: 0 - default, 1 - overboost, 2 - silent - */ - static DEVICE_ATTR_RW(throttle_thermal_policy); -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - /* Platform profile ***********************************************************/ - static int asus_wmi_platform_profile_get(struct device *dev, -@@ -3853,7 +3945,7 @@ static int asus_wmi_platform_profile_get(struct device *dev, - *profile = PLATFORM_PROFILE_PERFORMANCE; - break; - case ASUS_THROTTLE_THERMAL_POLICY_SILENT: -- *profile = PLATFORM_PROFILE_QUIET; -+ *profile = PLATFORM_PROFILE_LOW_POWER; - break; - default: - return -EINVAL; -@@ -3877,7 +3969,7 @@ static int asus_wmi_platform_profile_set(struct device *dev, - case PLATFORM_PROFILE_BALANCED: - tp = ASUS_THROTTLE_THERMAL_POLICY_DEFAULT; - break; -- case PLATFORM_PROFILE_QUIET: -+ case PLATFORM_PROFILE_LOW_POWER: - tp = ASUS_THROTTLE_THERMAL_POLICY_SILENT; - break; - default: -@@ -3890,7 +3982,7 @@ static int asus_wmi_platform_profile_set(struct device *dev, - - static int asus_wmi_platform_profile_probe(void *drvdata, unsigned long *choices) - { -- set_bit(PLATFORM_PROFILE_QUIET, choices); -+ set_bit(PLATFORM_PROFILE_LOW_POWER, choices); - set_bit(PLATFORM_PROFILE_BALANCED, choices); - set_bit(PLATFORM_PROFILE_PERFORMANCE, choices); - -@@ -4435,27 +4527,29 @@ static struct attribute *platform_attributes[] = { - &dev_attr_camera.attr, - &dev_attr_cardr.attr, - &dev_attr_touchpad.attr, -- &dev_attr_charge_mode.attr, -- &dev_attr_egpu_enable.attr, -- &dev_attr_egpu_connected.attr, -- &dev_attr_dgpu_disable.attr, -- &dev_attr_gpu_mux_mode.attr, - &dev_attr_lid_resume.attr, - &dev_attr_als_enable.attr, - &dev_attr_fan_boost_mode.attr, -- &dev_attr_throttle_thermal_policy.attr, -- &dev_attr_ppt_pl2_sppt.attr, -- &dev_attr_ppt_pl1_spl.attr, -- &dev_attr_ppt_fppt.attr, -- &dev_attr_ppt_apu_sppt.attr, -- &dev_attr_ppt_platform_sppt.attr, -- &dev_attr_nv_dynamic_boost.attr, -- &dev_attr_nv_temp_target.attr, -- &dev_attr_mcu_powersave.attr, -- &dev_attr_boot_sound.attr, -- &dev_attr_panel_od.attr, -- &dev_attr_mini_led_mode.attr, -- &dev_attr_available_mini_led_mode.attr, -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) -+ &dev_attr_charge_mode.attr, -+ &dev_attr_egpu_enable.attr, -+ &dev_attr_egpu_connected.attr, -+ &dev_attr_dgpu_disable.attr, -+ &dev_attr_gpu_mux_mode.attr, -+ &dev_attr_ppt_pl2_sppt.attr, -+ &dev_attr_ppt_pl1_spl.attr, -+ &dev_attr_ppt_fppt.attr, -+ &dev_attr_ppt_apu_sppt.attr, -+ &dev_attr_ppt_platform_sppt.attr, -+ &dev_attr_nv_dynamic_boost.attr, -+ &dev_attr_nv_temp_target.attr, -+ &dev_attr_mcu_powersave.attr, -+ &dev_attr_boot_sound.attr, -+ &dev_attr_panel_od.attr, -+ &dev_attr_mini_led_mode.attr, -+ &dev_attr_available_mini_led_mode.attr, -+ &dev_attr_throttle_thermal_policy.attr, -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - NULL - }; - -@@ -4477,7 +4571,11 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj, - devid = ASUS_WMI_DEVID_LID_RESUME; - else if (attr == &dev_attr_als_enable.attr) - devid = ASUS_WMI_DEVID_ALS_ENABLE; -- else if (attr == &dev_attr_charge_mode.attr) -+ else if (attr == &dev_attr_fan_boost_mode.attr) -+ ok = asus->fan_boost_mode_available; -+ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) -+ if (attr == &dev_attr_charge_mode.attr) - devid = ASUS_WMI_DEVID_CHARGE_MODE; - else if (attr == &dev_attr_egpu_enable.attr) - ok = asus->egpu_enable_available; -@@ -4515,6 +4613,7 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj, - ok = asus->mini_led_dev_id != 0; - else if (attr == &dev_attr_available_mini_led_mode.attr) - ok = asus->mini_led_dev_id != 0; -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - if (devid != -1) { - ok = !(asus_wmi_get_devstate_simple(asus, devid) < 0); -@@ -4770,6 +4869,7 @@ static int asus_wmi_add(struct platform_device *pdev) - } - - /* ensure defaults for tunables */ -+#if IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) - asus->ppt_pl2_sppt = 5; - asus->ppt_pl1_spl = 5; - asus->ppt_apu_sppt = 5; -@@ -4792,17 +4892,18 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX; - else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX_VIVO)) - asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX_VIVO; -- -- if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE)) -- asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE; -- else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE2)) -- asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE2; -+#endif /* IS_ENABLED(CONFIG_ASUS_WMI_DEPRECATED_ATTRS) */ - - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY)) - asus->throttle_thermal_policy_dev = ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY; - else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY_VIVO)) - asus->throttle_thermal_policy_dev = ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY_VIVO; - -+ if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE)) -+ asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE; -+ else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE2)) -+ asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE2; -+ - err = fan_boost_mode_check_present(asus); - if (err) - goto fail_fan_boost_mode; -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index 8a515179113d..86279da06ea2 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -6,6 +6,9 @@ - #include - #include - -+#define ASUS_WMI_MGMT_GUID "97845ED0-4E6D-11DE-8A39-0800200C9A66" -+#define ASUS_ACPI_UID_ASUSWMI "ASUSWMI" -+ - /* WMI Methods */ - #define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */ - #define ASUS_WMI_METHODID_SFBD 0x44424653 /* Set First Boot Device */ -@@ -73,12 +76,14 @@ - #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY_VIVO 0x00110019 - - /* Misc */ -+#define ASUS_WMI_DEVID_PANEL_HD 0x0005001C - #define ASUS_WMI_DEVID_PANEL_OD 0x00050019 - #define ASUS_WMI_DEVID_CAMERA 0x00060013 - #define ASUS_WMI_DEVID_LID_FLIP 0x00060062 - #define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077 - #define ASUS_WMI_DEVID_MINI_LED_MODE 0x0005001E - #define ASUS_WMI_DEVID_MINI_LED_MODE2 0x0005002E -+#define ASUS_WMI_DEVID_SCREEN_AUTO_BRIGHTNESS 0x0005002A - - /* Storage */ - #define ASUS_WMI_DEVID_CARDREADER 0x00080013 -@@ -133,6 +138,16 @@ - /* dgpu on/off */ - #define ASUS_WMI_DEVID_DGPU 0x00090020 - -+/* Intel E-core and P-core configuration in a format 0x0[E]0[P] */ -+#define ASUS_WMI_DEVID_CORES 0x001200D2 -+ /* Maximum Intel E-core and P-core availability */ -+#define ASUS_WMI_DEVID_CORES_MAX 0x001200D3 -+ -+#define ASUS_WMI_DEVID_APU_MEM 0x000600C1 -+ -+#define ASUS_WMI_DEVID_DGPU_BASE_TGP 0x00120099 -+#define ASUS_WMI_DEVID_DGPU_SET_TGP 0x00120098 -+ - /* gpu mux switch, 0 = dGPU, 1 = Optimus */ - #define ASUS_WMI_DEVID_GPU_MUX 0x00090016 - #define ASUS_WMI_DEVID_GPU_MUX_VIVO 0x00090026 -@@ -166,6 +181,7 @@ enum asus_ally_mcu_hack { - #if IS_REACHABLE(CONFIG_ASUS_WMI) - void set_ally_mcu_hack(enum asus_ally_mcu_hack status); - void set_ally_mcu_powersave(bool enabled); -+int asus_wmi_get_devstate_dsts(u32 dev_id, u32 *retval); - int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval); - int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval); - #else -@@ -179,6 +195,10 @@ static inline int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval) - { - return -ENODEV; - } -+static inline int asus_wmi_get_devstate_dsts(u32 dev_id, u32 *retval) -+{ -+ return -ENODEV; -+} - static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, - u32 *retval) - { -@@ -187,6 +207,7 @@ static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, - #endif - - /* To be used by both hid-asus and asus-wmi to determine which controls kbd_brightness */ -+#if IS_REACHABLE(CONFIG_ASUS_WMI) || IS_REACHABLE(CONFIG_HID_ASUS) - static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { - { - .matches = { -@@ -225,5 +246,6 @@ static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { - }, - { }, - }; -+#endif - - #endif /* __PLATFORM_DATA_X86_ASUS_WMI_H */ --- -2.51.0 - diff --git a/sys-kernel/git-sources/0001_Support-printing-firmware-info.patch b/sys-kernel/git-sources/0001_Support-printing-firmware-info.patch new file mode 100644 index 0000000..a630cfb --- /dev/null +++ b/sys-kernel/git-sources/0001_Support-printing-firmware-info.patch @@ -0,0 +1,14 @@ +--- a/drivers/base/firmware_loader/main.c 2021-08-24 15:42:07.025482085 -0400 ++++ b/drivers/base/firmware_loader/main.c 2021-08-24 15:44:40.782975313 -0400 +@@ -809,6 +809,11 @@ _request_firmware(const struct firmware + + ret = _request_firmware_prepare(&fw, name, device, buf, size, + offset, opt_flags); ++ ++#ifdef CONFIG_GENTOO_PRINT_FIRMWARE_INFO ++ printk(KERN_NOTICE "Loading firmware: %s\n", name); ++#endif ++ + if (ret <= 0) /* error or already assigned */ + goto out; + diff --git a/sys-kernel/git-sources/0002-bbr3.patch b/sys-kernel/git-sources/0002-bbr3.patch deleted file mode 100644 index dcc5932..0000000 --- a/sys-kernel/git-sources/0002-bbr3.patch +++ /dev/null @@ -1,3404 +0,0 @@ -From 3205f6b619a4a9a62d914442d0925738f05854ac Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Mon, 1 Sep 2025 09:38:54 +0800 -Subject: [PATCH 2/4] bbr3 - -Signed-off-by: Eric Naim ---- - include/linux/tcp.h | 6 +- - include/net/inet_connection_sock.h | 4 +- - include/net/tcp.h | 73 +- - include/uapi/linux/inet_diag.h | 23 + - include/uapi/linux/rtnetlink.h | 4 +- - include/uapi/linux/tcp.h | 1 + - net/ipv4/Kconfig | 21 +- - net/ipv4/bpf_tcp_ca.c | 4 +- - net/ipv4/tcp.c | 3 + - net/ipv4/tcp_bbr.c | 2232 +++++++++++++++++++++------- - net/ipv4/tcp_cong.c | 1 + - net/ipv4/tcp_input.c | 40 +- - net/ipv4/tcp_minisocks.c | 2 + - net/ipv4/tcp_output.c | 48 +- - net/ipv4/tcp_rate.c | 30 +- - net/ipv4/tcp_timer.c | 4 +- - 16 files changed, 1941 insertions(+), 555 deletions(-) - -diff --git a/include/linux/tcp.h b/include/linux/tcp.h -index 57e478bfaef2..0ea92792629c 100644 ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -247,7 +247,8 @@ struct tcp_sock { - void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); - #endif - u32 snd_ssthresh; /* Slow start size threshold */ -- u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ -+ u32 recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ -+ fast_ack_mode:1;/* ack ASAP if >1 rcv_mss received? */ - __cacheline_group_end(tcp_sock_read_rx); - - /* TX read-write hotpath cache lines */ -@@ -304,7 +305,8 @@ struct tcp_sock { - */ - struct tcp_options_received rx_opt; - u8 nonagle : 4,/* Disable Nagle algorithm? */ -- rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ -+ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ -+ tlp_orig_data_app_limited:1; /* app-limited before TLP rtx? */ - __cacheline_group_end(tcp_sock_write_txrx); - - /* RX read-write hotpath cache lines */ -diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h -index 1735db332aab..2c4a94af7093 100644 ---- a/include/net/inet_connection_sock.h -+++ b/include/net/inet_connection_sock.h -@@ -132,8 +132,8 @@ struct inet_connection_sock { - u32 icsk_probes_tstamp; - u32 icsk_user_timeout; - -- u64 icsk_ca_priv[104 / sizeof(u64)]; --#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) -+#define ICSK_CA_PRIV_SIZE (144) -+ u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)]; - }; - - #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -diff --git a/include/net/tcp.h b/include/net/tcp.h -index 526a26e7a150..564084c537c7 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -378,11 +378,14 @@ static inline void tcp_dec_quickack_mode(struct sock *sk) - #define TCP_ECN_DEMAND_CWR BIT(2) - #define TCP_ECN_SEEN BIT(3) - #define TCP_ECN_MODE_ACCECN BIT(4) -+#define TCP_ECN_LOW BIT(5) -+#define TCP_ECN_ECT_PERMANENT BIT(6) - - #define TCP_ECN_DISABLED 0 - #define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) - #define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) - -+ - static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp) - { - return tp->ecn_flags & TCP_ECN_MODE_ANY; -@@ -840,6 +843,15 @@ static inline void tcp_fast_path_check(struct sock *sk) - - u32 tcp_delack_max(const struct sock *sk); - -+static inline void tcp_set_ecn_low_from_dst(struct sock *sk, -+ const struct dst_entry *dst) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW)) -+ tp->ecn_flags |= TCP_ECN_LOW; -+} -+ - /* Compute the actual rto_min value */ - static inline u32 tcp_rto_min(const struct sock *sk) - { -@@ -945,6 +957,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) - return max_t(s64, t1 - t0, 0); - } - -+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) -+{ -+ return max_t(s32, t1 - t0, 0); -+} -+ - /* provide the departure time in us unit */ - static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) - { -@@ -1043,9 +1060,14 @@ struct tcp_skb_cb { - /* pkts S/ACKed so far upon tx of skb, incl retrans: */ - __u32 delivered; - /* start of send pipeline phase */ -- u64 first_tx_mstamp; -+ u32 first_tx_mstamp; - /* when we reached the "delivered" count */ -- u64 delivered_mstamp; -+ u32 delivered_mstamp; -+#define TCPCB_IN_FLIGHT_BITS 20 -+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) -+ u32 in_flight:20, /* packets in flight at transmit */ -+ unused2:12; -+ u32 lost; /* packets lost so far upon tx of skb */ - } tx; /* only used for outgoing skbs */ - union { - struct inet_skb_parm h4; -@@ -1158,6 +1180,7 @@ enum tcp_ca_event { - CA_EVENT_LOSS, /* loss timeout */ - CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ - CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ -+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */ - }; - - /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ -@@ -1180,7 +1203,11 @@ enum tcp_ca_ack_event_flags { - #define TCP_CONG_NON_RESTRICTED BIT(0) - /* Requires ECN/ECT set on all packets */ - #define TCP_CONG_NEEDS_ECN BIT(1) --#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) -+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ -+#define TCP_CONG_WANTS_CE_EVENTS BIT(2) -+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ -+ TCP_CONG_NEEDS_ECN | \ -+ TCP_CONG_WANTS_CE_EVENTS) - - union tcp_cc_info; - -@@ -1200,10 +1227,13 @@ struct ack_sample { - */ - struct rate_sample { - u64 prior_mstamp; /* starting timestamp for interval */ -+ u32 prior_lost; /* tp->lost at "prior_mstamp" */ - u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ - u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ -+ u32 tx_in_flight; /* packets in flight at starting timestamp */ -+ s32 lost; /* number of packets lost over interval */ - s32 delivered; /* number of packets delivered over interval */ -- s32 delivered_ce; /* number of packets delivered w/ CE marks*/ -+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */ - long interval_us; /* time for tp->delivered to incr "delivered" */ - u32 snd_interval_us; /* snd interval for delivered packets */ - u32 rcv_interval_us; /* rcv interval for delivered packets */ -@@ -1214,7 +1244,9 @@ struct rate_sample { - u32 last_end_seq; /* end_seq of most recently ACKed packet */ - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ -+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ -+ bool is_ece; /* did this ACK have ECN marked? */ - }; - - struct tcp_congestion_ops { -@@ -1238,8 +1270,11 @@ struct tcp_congestion_ops { - /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - -- /* override sysctl_tcp_min_tso_segs */ -- u32 (*min_tso_segs)(struct sock *sk); -+ /* pick target number of segments per TSO/GSO skb (optional): */ -+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); -+ -+ /* react to a specific lost skb (optional) */ -+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); - - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) -@@ -1305,6 +1340,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) - } - #endif - -+static inline bool tcp_ca_wants_ce_events(const struct sock *sk) -+{ -+ const struct inet_connection_sock *icsk = inet_csk(sk); -+ -+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | -+ TCP_CONG_WANTS_CE_EVENTS); -+} -+ - static inline bool tcp_ca_needs_ecn(const struct sock *sk) - { - const struct inet_connection_sock *icsk = inet_csk(sk); -@@ -1324,6 +1367,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) - void tcp_set_ca_state(struct sock *sk, const u8 ca_state); - - /* From tcp_rate.c */ -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - struct rate_sample *rs); -@@ -1336,6 +1380,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) - return t1 > t2 || (t1 == t2 && after(seq1, seq2)); - } - -+/* If a retransmit failed due to local qdisc congestion or other local issues, -+ * then we may have called tcp_set_skb_tso_segs() to increase the number of -+ * segments in the skb without increasing the tx.in_flight. In all other cases, -+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We -+ * do not have the state to know whether a retransmit failed due to local qdisc -+ * congestion or other local issues, so to avoid spurious warnings we consider -+ * that any skb marked lost may have suffered that fate. -+ */ -+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount, -+ u32 skb_sacked_flags, -+ u32 tx_in_flight) -+{ -+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST); -+} -+ - /* These functions determine how the current flow behaves in respect of SACK - * handling. SACK is negotiated with the peer, and therefore it can vary - * between different flows. -@@ -2483,7 +2542,7 @@ struct tcp_plb_state { - u8 consec_cong_rounds:5, /* consecutive congested rounds */ - unused:3; - u32 pause_until; /* jiffies32 when PLB can resume rerouting */ --}; -+} __attribute__ ((__packed__)); - - static inline void tcp_plb_init(const struct sock *sk, - struct tcp_plb_state *plb) -diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h -index 86bb2e8b17c9..9d9a3eb2ce9b 100644 ---- a/include/uapi/linux/inet_diag.h -+++ b/include/uapi/linux/inet_diag.h -@@ -229,6 +229,29 @@ struct tcp_bbr_info { - __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ - __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ - __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ -+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ -+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ -+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ -+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ -+ __u8 bbr_mode; /* current bbr_mode in state machine */ -+ __u8 bbr_phase; /* current state machine phase */ -+ __u8 unused1; /* alignment padding; not used yet */ -+ __u8 bbr_version; /* BBR algorithm version */ -+ __u32 bbr_inflight_lo; /* lower short-term data volume bound */ -+ __u32 bbr_inflight_hi; /* higher long-term data volume bound */ -+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ -+}; -+ -+/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */ -+enum tcp_bbr_phase { -+ BBR_PHASE_INVALID = 0, -+ BBR_PHASE_STARTUP = 1, -+ BBR_PHASE_DRAIN = 2, -+ BBR_PHASE_PROBE_RTT = 3, -+ BBR_PHASE_PROBE_BW_UP = 4, -+ BBR_PHASE_PROBE_BW_DOWN = 5, -+ BBR_PHASE_PROBE_BW_CRUISE = 6, -+ BBR_PHASE_PROBE_BW_REFILL = 7, - }; - - union tcp_cc_info { -diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h -index dab9493c791b..cce4975fdcfe 100644 ---- a/include/uapi/linux/rtnetlink.h -+++ b/include/uapi/linux/rtnetlink.h -@@ -517,12 +517,14 @@ enum { - #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ - #define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ - #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) -+#define RTAX_FEATURE_ECN_LOW (1 << 5) - - #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ - RTAX_FEATURE_SACK | \ - RTAX_FEATURE_TIMESTAMP | \ - RTAX_FEATURE_ALLFRAG | \ -- RTAX_FEATURE_TCP_USEC_TS) -+ RTAX_FEATURE_TCP_USEC_TS | \ -+ RTAX_FEATURE_ECN_LOW) - - struct rta_session { - __u8 proto; -diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h -index bdac8c42fa82..362644a272ba 100644 ---- a/include/uapi/linux/tcp.h -+++ b/include/uapi/linux/tcp.h -@@ -185,6 +185,7 @@ enum tcp_fastopen_client_fail { - #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ - #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ - #define TCPI_OPT_TFO_CHILD 128 /* child from a Fast Open option on SYN */ -+#define TCPI_OPT_ECN_LOW 256 /* Low-latency ECN configured at init */ - - /* - * Sender's congestion state indicating normal or abnormal situations -diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig -index 12850a277251..3b8b96692fb4 100644 ---- a/net/ipv4/Kconfig -+++ b/net/ipv4/Kconfig -@@ -669,15 +669,18 @@ config TCP_CONG_BBR - default n - help - -- BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to -- maximize network utilization and minimize queues. It builds an explicit -- model of the bottleneck delivery rate and path round-trip propagation -- delay. It tolerates packet loss and delay unrelated to congestion. It -- can operate over LAN, WAN, cellular, wifi, or cable modem links. It can -- coexist with flows that use loss-based congestion control, and can -- operate with shallow buffers, deep buffers, bufferbloat, policers, or -- AQM schemes that do not provide a delay signal. It requires the fq -- ("Fair Queue") pacing packet scheduler. -+ BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a -+ model-based congestion control algorithm that aims to maximize -+ network utilization, keep queues and retransmit rates low, and to be -+ able to coexist with Reno/CUBIC in common scenarios. It builds an -+ explicit model of the network path. It tolerates a targeted degree -+ of random packet loss and delay. It can operate over LAN, WAN, -+ cellular, wifi, or cable modem links, and can use shallow-threshold -+ ECN signals. It can coexist to some degree with flows that use -+ loss-based congestion control, and can operate with shallow buffers, -+ deep buffers, bufferbloat, policers, or AQM schemes that do not -+ provide a delay signal. It requires pacing, using either TCP internal -+ pacing or the fq ("Fair Queue") pacing packet scheduler. - - choice - prompt "Default TCP congestion control" -diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c -index e01492234b0b..27893b774e08 100644 ---- a/net/ipv4/bpf_tcp_ca.c -+++ b/net/ipv4/bpf_tcp_ca.c -@@ -280,7 +280,7 @@ static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *samp - { - } - --static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) -+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now) - { - return 0; - } -@@ -315,7 +315,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { - .cwnd_event = bpf_tcp_ca_cwnd_event, - .in_ack_event = bpf_tcp_ca_in_ack_event, - .pkts_acked = bpf_tcp_ca_pkts_acked, -- .min_tso_segs = bpf_tcp_ca_min_tso_segs, -+ .tso_segs = bpf_tcp_ca_tso_segs, - .cong_control = bpf_tcp_ca_cong_control, - .undo_cwnd = bpf_tcp_ca_undo_cwnd, - .sndbuf_expand = bpf_tcp_ca_sndbuf_expand, -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 71a956fbfc55..f9866bd97ac4 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -3439,6 +3439,7 @@ int tcp_disconnect(struct sock *sk, int flags) - tp->rx_opt.dsack = 0; - tp->rx_opt.num_sacks = 0; - tp->rcv_ooopack = 0; -+ tp->fast_ack_mode = 0; - - - /* Clean up fastopen related fields */ -@@ -4191,6 +4192,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) - info->tcpi_options |= TCPI_OPT_ECN; - if (tp->ecn_flags & TCP_ECN_SEEN) - info->tcpi_options |= TCPI_OPT_ECN_SEEN; -+ if (tp->ecn_flags & TCP_ECN_LOW) -+ info->tcpi_options |= TCPI_OPT_ECN_LOW; - if (tp->syn_data_acked) - info->tcpi_options |= TCPI_OPT_SYN_DATA; - if (tp->tcp_usec_ts) -diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c -index 760941e55153..066da5e5747c 100644 ---- a/net/ipv4/tcp_bbr.c -+++ b/net/ipv4/tcp_bbr.c -@@ -1,18 +1,19 @@ --/* Bottleneck Bandwidth and RTT (BBR) congestion control -+/* BBR (Bottleneck Bandwidth and RTT) congestion control - * -- * BBR congestion control computes the sending rate based on the delivery -- * rate (throughput) estimated from ACKs. In a nutshell: -+ * BBR is a model-based congestion control algorithm that aims for low queues, -+ * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the -+ * network path, it uses measurements of bandwidth and RTT, as well as (if they -+ * occur) packet loss and/or shallow-threshold ECN signals. Note that although -+ * it can use ECN or loss signals explicitly, it does not require either; it -+ * can bound its in-flight data based on its estimate of the BDP. - * -- * On each ACK, update our model of the network path: -- * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) -- * min_rtt = windowed_min(rtt, 10 seconds) -- * pacing_rate = pacing_gain * bottleneck_bandwidth -- * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) -- * -- * The core algorithm does not react directly to packet losses or delays, -- * although BBR may adjust the size of next send per ACK when loss is -- * observed, or adjust the sending rate if it estimates there is a -- * traffic policer, in order to keep the drop rate reasonable. -+ * The model has both higher and lower bounds for the operating range: -+ * lo: bw_lo, inflight_lo: conservative short-term lower bound -+ * hi: bw_hi, inflight_hi: robust long-term upper bound -+ * The bandwidth-probing time scale is (a) extended dynamically based on -+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by -+ * an interactive wall-clock time-scale to be more scalable and responsive -+ * than Reno and CUBIC. - * - * Here is a state transition diagram for BBR: - * -@@ -65,6 +66,13 @@ - #include - #include - -+#include -+#include "tcp_dctcp.h" -+ -+#define BBR_VERSION 3 -+ -+#define bbr_param(sk,name) (bbr_ ## name) -+ - /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth - * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. - * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. -@@ -85,36 +93,41 @@ enum bbr_mode { - BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ - }; - -+/* How does the incoming ACK stream relate to our bandwidth probing? */ -+enum bbr_ack_phase { -+ BBR_ACKS_INIT, /* not probing; not getting probe feedback */ -+ BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ -+ BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ -+ BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ -+ BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ -+}; -+ - /* BBR congestion control block */ - struct bbr { - u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ - u32 min_rtt_stamp; /* timestamp of min_rtt_us */ - u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ -- struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ -- u32 rtt_cnt; /* count of packet-timed rounds elapsed */ -+ u32 probe_rtt_min_us; /* min RTT in probe_rtt_win_ms win */ -+ u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ - u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ - u64 cycle_mstamp; /* time of this cycle phase start */ -- u32 mode:3, /* current bbr_mode in state machine */ -+ u32 mode:2, /* current bbr_mode in state machine */ - prev_ca_state:3, /* CA state on previous ACK */ -- packet_conservation:1, /* use packet conservation? */ - round_start:1, /* start of packet-timed tx->ack round? */ -+ ce_state:1, /* If most recent data has CE bit set */ -+ bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ -+ try_fast_path:1, /* can we take fast path? */ - idle_restart:1, /* restarting after idle? */ - probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ -- unused:13, -- lt_is_sampling:1, /* taking long-term ("LT") samples now? */ -- lt_rtt_cnt:7, /* round trips in long-term interval */ -- lt_use_bw:1; /* use lt_bw as our bw estimate? */ -- u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ -- u32 lt_last_delivered; /* LT intvl start: tp->delivered */ -- u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ -- u32 lt_last_lost; /* LT intvl start: tp->lost */ -+ init_cwnd:7, /* initial cwnd */ -+ unused_1:10; - u32 pacing_gain:10, /* current gain for setting pacing rate */ - cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_reached:1, /* reached full bw in Startup? */ - full_bw_cnt:2, /* number of rounds without large bw gains */ -- cycle_idx:3, /* current index in pacing_gain cycle array */ -+ cycle_idx:2, /* current index in pacing_gain cycle array */ - has_seen_rtt:1, /* have we seen an RTT sample yet? */ -- unused_b:5; -+ unused_2:6; - u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ - u32 full_bw; /* recent bw, to estimate if pipe is full */ - -@@ -124,19 +137,67 @@ struct bbr { - u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ - extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ - extra_acked_win_idx:1, /* current index in extra_acked array */ -- unused_c:6; -+ /* BBR v3 state: */ -+ full_bw_now:1, /* recently reached full bw plateau? */ -+ startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ -+ loss_in_cycle:1, /* packet loss in this cycle? */ -+ ecn_in_cycle:1, /* ECN in this cycle? */ -+ unused_3:1; -+ u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ -+ u32 undo_bw_lo; /* bw_lo before latest losses */ -+ u32 undo_inflight_lo; /* inflight_lo before latest losses */ -+ u32 undo_inflight_hi; /* inflight_hi before latest losses */ -+ u32 bw_latest; /* max delivered bw in last round trip */ -+ u32 bw_lo; /* lower bound on sending bandwidth */ -+ u32 bw_hi[2]; /* max recent measured bw sample */ -+ u32 inflight_latest; /* max delivered data in last round trip */ -+ u32 inflight_lo; /* lower bound of inflight data range */ -+ u32 inflight_hi; /* upper bound of inflight data range */ -+ u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ -+ u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ -+ u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ -+ u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ -+ u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ -+ ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ -+ bw_probe_samples:1, /* rate samples reflect bw probing? */ -+ prev_probe_too_high:1, /* did last PROBE_UP go too high? */ -+ stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ -+ rounds_since_probe:8, /* packet-timed rounds since probed bw */ -+ loss_round_start:1, /* loss_round_delivered round trip? */ -+ loss_in_round:1, /* loss marked in this round trip? */ -+ ecn_in_round:1, /* ECN marked in this round trip? */ -+ ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ -+ loss_events_in_round:4,/* losses in STARTUP round */ -+ initialized:1; /* has bbr_init() been called? */ -+ u32 alpha_last_delivered; /* tp->delivered at alpha update */ -+ u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ -+ -+ u8 unused_4; /* to preserve alignment */ -+ struct tcp_plb_state plb; - }; - --#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ -+struct bbr_context { -+ u32 sample_bw; -+}; - --/* Window length of bw filter (in rounds): */ --static const int bbr_bw_rtts = CYCLE_LEN + 2; - /* Window length of min_rtt filter (in sec): */ - static const u32 bbr_min_rtt_win_sec = 10; - /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ - static const u32 bbr_probe_rtt_mode_ms = 200; --/* Skip TSO below the following bandwidth (bits/sec): */ --static const int bbr_min_tso_rate = 1200000; -+/* Window length of probe_rtt_min_us filter (in ms), and consequently the -+ * typical interval between PROBE_RTT mode entries. The default is 5000ms. -+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC -+ */ -+static const u32 bbr_probe_rtt_win_ms = 5000; -+/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */ -+static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; -+ -+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting -+ * in bigger TSO bursts. We cut the RTT-based allowance in half -+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance -+ * is below 1500 bytes after 6 * ~500 usec = 3ms. -+ */ -+static const u32 bbr_tso_rtt_shift = 9; - - /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. - * In order to help drive the network toward lower queues and low latency while -@@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000; - */ - static const int bbr_pacing_margin_percent = 1; - --/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain -+/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value - * that will allow a smoothly increasing pacing rate that will double each RTT - * and send the same number of packets per RTT that an un-paced, slow-starting - * Reno or CUBIC flow would: - */ --static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; --/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain -+static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1; -+/* The gain for deriving startup cwnd: */ -+static const int bbr_startup_cwnd_gain = BBR_UNIT * 2; -+/* The pacing gain in BBR_DRAIN is calculated to typically drain - * the queue created in BBR_STARTUP in a single round: - */ - static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; -@@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; - static const int bbr_cwnd_gain = BBR_UNIT * 2; - /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ - static const int bbr_pacing_gain[] = { -- BBR_UNIT * 5 / 4, /* probe for more available bw */ -- BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ -- BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ -- BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ -+ BBR_UNIT * 5 / 4, /* UP: probe for more available bw */ -+ BBR_UNIT * 91 / 100, /* DOWN: drain queue and/or yield bw */ -+ BBR_UNIT, /* CRUISE: try to use pipe w/ some headroom */ -+ BBR_UNIT, /* REFILL: refill pipe to estimated 100% */ -+}; -+enum bbr_pacing_gain_phase { -+ BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ -+ BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ -+ BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ -+ BBR_BW_PROBE_REFILL = 3, /* refill the pipe again to 100% */ - }; --/* Randomize the starting gain cycling phase over N phases: */ --static const u32 bbr_cycle_rand = 7; - - /* Try to keep at least this many packets in flight, if things go smoothly. For - * smooth functioning, a sliding window protocol ACKing every other packet -@@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7; - */ - static const u32 bbr_cwnd_min_target = 4; - --/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ -+/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */ - /* If bw has increased significantly (1.25x), there may be more bw available: */ - static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; - /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ - static const u32 bbr_full_bw_cnt = 3; - --/* "long-term" ("LT") bandwidth estimator parameters... */ --/* The minimum number of rounds in an LT bw sampling interval: */ --static const u32 bbr_lt_intvl_min_rtts = 4; --/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ --static const u32 bbr_lt_loss_thresh = 50; --/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ --static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; --/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ --static const u32 bbr_lt_bw_diff = 4000 / 8; --/* If we estimate we're policed, use lt_bw for this many round trips: */ --static const u32 bbr_lt_bw_max_rtts = 48; -- - /* Gain factor for adding extra_acked to target cwnd: */ - static const int bbr_extra_acked_gain = BBR_UNIT; - /* Window length of extra_acked window. */ -@@ -201,8 +256,122 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; - /* Time period for clamping cwnd increment due to ack aggregation */ - static const u32 bbr_extra_acked_max_us = 100 * 1000; - -+/* Flags to control BBR ECN-related behavior... */ -+ -+/* Ensure ACKs only ACK packets with consistent ECN CE status? */ -+static const bool bbr_precise_ece_ack = true; -+ -+/* Max RTT (in usec) at which to use sender-side ECN logic. -+ * Disabled when 0 (ECN allowed at any RTT). -+ */ -+static const u32 bbr_ecn_max_rtt_us = 5000; -+ -+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. -+ * No loss response when 0. -+ */ -+static const u32 bbr_beta = BBR_UNIT * 30 / 100; -+ -+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */ -+static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; -+ -+/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly -+ * to congestion if the bottleneck is congested when the flow starts up. -+ */ -+static const u32 bbr_ecn_alpha_init = BBR_UNIT; -+ -+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. -+ * No ECN based bounding when 0. -+ */ -+static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ -+ -+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. -+ * Scaled by BBR_SCALE. Disabled when 0. -+ */ -+static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ -+ -+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN -+ * clears then make the first round's increment to inflight_hi the following -+ * fraction of inflight_hi. -+ */ -+static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2; -+ -+/* Estimate bw probing has gone too far if loss rate exceeds this level. */ -+static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ -+ -+/* Slow down for a packet loss recovered by TLP? */ -+static const bool bbr_loss_probe_recovery = true; -+ -+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, -+ * and loss rate is higher than bbr_loss_thresh. -+ * Disabled if 0. -+ */ -+static const u32 bbr_full_loss_cnt = 6; -+ -+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh -+ * meets this count. -+ */ -+static const u32 bbr_full_ecn_cnt = 2; -+ -+/* Fraction of unutilized headroom to try to leave in path upon high loss. */ -+static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; -+ -+/* How much do we increase cwnd_gain when probing for bandwidth in -+ * BBR_BW_PROBE_UP? This specifies the increment in units of -+ * BBR_UNIT/4. The default is 1, meaning 0.25. -+ * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75). -+ */ -+static const u32 bbr_bw_probe_cwnd_gain = 1; -+ -+/* Max number of packet-timed rounds to wait before probing for bandwidth. If -+ * we want to tolerate 1% random loss per round, and not have this cut our -+ * inflight too much, we must probe for bw periodically on roughly this scale. -+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. -+ * We aim to be fair with Reno/CUBIC up to a BDP of at least: -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ */ -+static const u32 bbr_bw_probe_max_rounds = 63; -+ -+/* Max amount of randomness to inject in round counting for Reno-coexistence. -+ */ -+static const u32 bbr_bw_probe_rand_rounds = 2; -+ -+/* Use BBR-native probe time scale starting at this many usec. -+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: -+ * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs -+ */ -+static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ -+ -+/* Use BBR-native probes spread over this many usec: */ -+static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ -+ -+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ -+static const bool bbr_fast_path = true; -+ -+/* Use fast ack mode? */ -+static const bool bbr_fast_ack_mode = true; -+ -+static u32 bbr_max_bw(const struct sock *sk); -+static u32 bbr_bw(const struct sock *sk); -+static void bbr_exit_probe_rtt(struct sock *sk); -+static void bbr_reset_congestion_signals(struct sock *sk); -+static void bbr_run_loss_probe_recovery(struct sock *sk); -+ - static void bbr_check_probe_rtt_done(struct sock *sk); - -+/* This connection can use ECN if both endpoints have signaled ECN support in -+ * the handshake and the per-route settings indicated this is a -+ * shallow-threshold ECN environment, meaning both: -+ * (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and -+ * (b) TCP endpoints provide precise ACKs that only ACK data segments -+ * with consistent ECN CE status -+ */ -+static bool bbr_can_use_ecn(const struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ -+ return (tcp_ecn_mode_any(tp)) && (tp->ecn_flags & TCP_ECN_LOW); -+} -+ - /* Do we estimate that STARTUP filled the pipe? */ - static bool bbr_full_bw_reached(const struct sock *sk) - { -@@ -214,17 +383,17 @@ static bool bbr_full_bw_reached(const struct sock *sk) - /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ - static u32 bbr_max_bw(const struct sock *sk) - { -- struct bbr *bbr = inet_csk_ca(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); - -- return minmax_get(&bbr->bw); -+ return max(bbr->bw_hi[0], bbr->bw_hi[1]); - } - - /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ - static u32 bbr_bw(const struct sock *sk) - { -- struct bbr *bbr = inet_csk_ca(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); - -- return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); -+ return min(bbr_max_bw(sk), bbr->bw_lo); - } - - /* Return maximum extra acked in past k-2k round trips, -@@ -241,15 +410,23 @@ static u16 bbr_extra_acked(const struct sock *sk) - * The order here is chosen carefully to avoid overflow of u64. This should - * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. - */ --static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) -+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, -+ int margin) - { - unsigned int mss = tcp_sk(sk)->mss_cache; - - rate *= mss; - rate *= gain; - rate >>= BBR_SCALE; -- rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent); -- return rate >> BW_SCALE; -+ rate *= USEC_PER_SEC / 100 * (100 - margin); -+ rate >>= BW_SCALE; -+ rate = max(rate, 1ULL); -+ return rate; -+} -+ -+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) -+{ -+ return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); - } - - /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ -@@ -257,12 +434,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) - { - u64 rate = bw; - -- rate = bbr_rate_bytes_per_sec(sk, rate, gain); -+ rate = bbr_rate_bytes_per_sec(sk, rate, gain, -+ bbr_pacing_margin_percent); - rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)); - return rate; - } - --/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ -+/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */ - static void bbr_init_pacing_rate_from_rtt(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -279,7 +457,8 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) - bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; - do_div(bw, rtt_us); - WRITE_ONCE(sk->sk_pacing_rate, -- bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain)); -+ bbr_bw_to_pacing_rate(sk, bw, -+ bbr_param(sk, startup_pacing_gain))); - } - - /* Pace using current bw estimate and a gain factor. */ -@@ -295,26 +474,48 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) - WRITE_ONCE(sk->sk_pacing_rate, rate); - } - --/* override sysctl_tcp_min_tso_segs */ --__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) -+/* Return the number of segments BBR would like in a TSO/GSO skb, given a -+ * particular max gso size as a constraint. TODO: make this simpler and more -+ * consistent by switching bbr to just call tcp_tso_autosize(). -+ */ -+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, -+ u32 gso_max_size) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 segs, r; -+ u64 bytes; -+ -+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ -+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); -+ -+ /* Budget a TSO/GSO burst size allowance based on min_rtt. For every -+ * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. -+ * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) -+ */ -+ if (bbr_param(sk, tso_rtt_shift)) { -+ r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift); -+ if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ -+ bytes += GSO_LEGACY_MAX_SIZE >> r; -+ } -+ -+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); -+ segs = max_t(u32, bytes / mss_now, -+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); -+ return segs; -+} -+ -+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ -+__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) - { -- return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2; -+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); - } - -+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ - static u32 bbr_tso_segs_goal(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -- u32 segs, bytes; -- -- /* Sort of tcp_tso_autosize() but ignoring -- * driver provided sk_gso_max_size. -- */ -- bytes = min_t(unsigned long, -- READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift), -- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); -- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - -- return min(segs, 0x7FU); -+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE); - } - - /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -@@ -334,7 +535,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - -- if (event == CA_EVENT_TX_START && tp->app_limited) { -+ if (event == CA_EVENT_TX_START) { -+ if (!tp->app_limited) -+ return; - bbr->idle_restart = 1; - bbr->ack_epoch_mstamp = tp->tcp_mstamp; - bbr->ack_epoch_acked = 0; -@@ -345,6 +548,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) - bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); - else if (bbr->mode == BBR_PROBE_RTT) - bbr_check_probe_rtt_done(sk); -+ } else if ((event == CA_EVENT_ECN_IS_CE || -+ event == CA_EVENT_ECN_NO_CE) && -+ bbr_can_use_ecn(sk) && -+ bbr_param(sk, precise_ece_ack)) { -+ u32 state = bbr->ce_state; -+ dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); -+ bbr->ce_state = state; -+ } else if (event == CA_EVENT_TLP_RECOVERY && -+ bbr_param(sk, loss_probe_recovery)) { -+ bbr_run_loss_probe_recovery(sk); - } - } - -@@ -367,10 +580,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) - * default. This should only happen when the connection is not using TCP - * timestamps and has retransmitted all of the SYN/SYNACK/data packets - * ACKed so far. In this case, an RTO can cut cwnd to 1, in which -- * case we need to slow-start up toward something safe: TCP_INIT_CWND. -+ * case we need to slow-start up toward something safe: initial cwnd. - */ - if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ -- return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ -+ return bbr->init_cwnd; /* be safe: cap at initial cwnd */ - - w = (u64)bw * bbr->min_rtt_us; - -@@ -387,23 +600,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) - * - one skb in sending host Qdisc, - * - one skb in sending host TSO/GSO engine - * - one skb being received by receiver host LRO/GRO/delayed-ACK engine -- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because -- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, -+ * Don't worry, at low rates this won't bloat cwnd because -+ * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets, - * which allows 2 outstanding 2-packet sequences, to try to keep pipe - * full even with ACK-every-other-packet delayed ACKs. - */ - static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) - { - struct bbr *bbr = inet_csk_ca(sk); -+ u32 tso_segs_goal; - -- /* Allow enough full-sized skbs in flight to utilize end systems. */ -- cwnd += 3 * bbr_tso_segs_goal(sk); -- -- /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ -- cwnd = (cwnd + 1) & ~1U; -+ tso_segs_goal = 3 * bbr_tso_segs_goal(sk); - -+ /* Allow enough full-sized skbs in flight to utilize end systems. */ -+ cwnd = max_t(u32, cwnd, tso_segs_goal); -+ cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); - /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ -- if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) - cwnd += 2; - - return cwnd; -@@ -458,10 +671,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) - { - u32 max_aggr_cwnd, aggr_cwnd = 0; - -- if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { -+ if (bbr_param(sk, extra_acked_gain)) { - max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) - / BW_UNIT; -- aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) -+ aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk)) - >> BBR_SCALE; - aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); - } -@@ -469,66 +682,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) - return aggr_cwnd; - } - --/* An optimization in BBR to reduce losses: On the first round of recovery, we -- * follow the packet conservation principle: send P packets per P packets acked. -- * After that, we slow-start and send at most 2*P packets per P packets acked. -- * After recovery finishes, or upon undo, we restore the cwnd we had when -- * recovery started (capped by the target cwnd based on estimated BDP). -- * -- * TODO(ycheng/ncardwell): implement a rate-based approach. -- */ --static bool bbr_set_cwnd_to_recover_or_restore( -- struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) -+/* Returns the cwnd for PROBE_RTT mode. */ -+static u32 bbr_probe_rtt_cwnd(struct sock *sk) - { -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; -- u32 cwnd = tcp_snd_cwnd(tp); -- -- /* An ACK for P pkts should release at most 2*P packets. We do this -- * in two steps. First, here we deduct the number of lost packets. -- * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. -- */ -- if (rs->losses > 0) -- cwnd = max_t(s32, cwnd - rs->losses, 1); -- -- if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { -- /* Starting 1st round of Recovery, so do packet conservation. */ -- bbr->packet_conservation = 1; -- bbr->next_rtt_delivered = tp->delivered; /* start round now */ -- /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ -- cwnd = tcp_packets_in_flight(tp) + acked; -- } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { -- /* Exiting loss recovery; restore cwnd saved before recovery. */ -- cwnd = max(cwnd, bbr->prior_cwnd); -- bbr->packet_conservation = 0; -- } -- bbr->prev_ca_state = state; -- -- if (bbr->packet_conservation) { -- *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); -- return true; /* yes, using packet conservation */ -- } -- *new_cwnd = cwnd; -- return false; -+ return max_t(u32, bbr_param(sk, cwnd_min_target), -+ bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain))); - } - - /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss - * has drawn us down below target), or snap down to target if we're above it. - */ - static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, -- u32 acked, u32 bw, int gain) -+ u32 acked, u32 bw, int gain, u32 cwnd, -+ struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; -+ u32 target_cwnd = 0; - - if (!acked) - goto done; /* no packet fully ACKed; just apply caps */ - -- if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) -- goto done; -- - target_cwnd = bbr_bdp(sk, bw, gain); - - /* Increment the cwnd to account for excess ACKed data that seems -@@ -537,74 +711,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, - target_cwnd += bbr_ack_aggregation_cwnd(sk); - target_cwnd = bbr_quantization_budget(sk, target_cwnd); - -- /* If we're below target cwnd, slow start cwnd toward target cwnd. */ -- if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ -- cwnd = min(cwnd + acked, target_cwnd); -- else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) -- cwnd = cwnd + acked; -- cwnd = max(cwnd, bbr_cwnd_min_target); -+ /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ -+ bbr->try_fast_path = 0; -+ if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ -+ cwnd += acked; -+ if (cwnd >= target_cwnd) { -+ cwnd = target_cwnd; -+ bbr->try_fast_path = 1; -+ } -+ } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { -+ cwnd += acked; -+ } else { -+ bbr->try_fast_path = 1; -+ } - -+ cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); - done: -- tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ -+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* global cap */ - if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ -- tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); --} -- --/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ --static bool bbr_is_next_cycle_phase(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- bool is_full_length = -- tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > -- bbr->min_rtt_us; -- u32 inflight, bw; -- -- /* The pacing_gain of 1.0 paces at the estimated bw to try to fully -- * use the pipe without increasing the queue. -- */ -- if (bbr->pacing_gain == BBR_UNIT) -- return is_full_length; /* just use wall clock time */ -- -- inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -- bw = bbr_max_bw(sk); -- -- /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at -- * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is -- * small (e.g. on a LAN). We do not persist if packets are lost, since -- * a path with small buffers may not hold that much. -- */ -- if (bbr->pacing_gain > BBR_UNIT) -- return is_full_length && -- (rs->losses || /* perhaps pacing_gain*BDP won't fit */ -- inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); -- -- /* A pacing_gain < 1.0 tries to drain extra queue we added if bw -- * probing didn't find more bw. If inflight falls to match BDP then we -- * estimate queue is drained; persisting would underutilize the pipe. -- */ -- return is_full_length || -- inflight <= bbr_inflight(sk, bw, BBR_UNIT); --} -- --static void bbr_advance_cycle_phase(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); -- bbr->cycle_mstamp = tp->delivered_mstamp; --} -- --/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ --static void bbr_update_cycle_phase(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) -- bbr_advance_cycle_phase(sk); -+ tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp), -+ bbr_probe_rtt_cwnd(sk))); - } - - static void bbr_reset_startup_mode(struct sock *sk) -@@ -614,191 +740,49 @@ static void bbr_reset_startup_mode(struct sock *sk) - bbr->mode = BBR_STARTUP; - } - --static void bbr_reset_probe_bw_mode(struct sock *sk) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->mode = BBR_PROBE_BW; -- bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand); -- bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ --} -- --static void bbr_reset_mode(struct sock *sk) --{ -- if (!bbr_full_bw_reached(sk)) -- bbr_reset_startup_mode(sk); -- else -- bbr_reset_probe_bw_mode(sk); --} -- --/* Start a new long-term sampling interval. */ --static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); -- bbr->lt_last_delivered = tp->delivered; -- bbr->lt_last_lost = tp->lost; -- bbr->lt_rtt_cnt = 0; --} -- --/* Completely reset long-term bandwidth sampling. */ --static void bbr_reset_lt_bw_sampling(struct sock *sk) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->lt_bw = 0; -- bbr->lt_use_bw = 0; -- bbr->lt_is_sampling = false; -- bbr_reset_lt_bw_sampling_interval(sk); --} -- --/* Long-term bw sampling interval is done. Estimate whether we're policed. */ --static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- u32 diff; -- -- if (bbr->lt_bw) { /* do we have bw from a previous interval? */ -- /* Is new bw close to the lt_bw from the previous interval? */ -- diff = abs(bw - bbr->lt_bw); -- if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || -- (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= -- bbr_lt_bw_diff)) { -- /* All criteria are met; estimate we're policed. */ -- bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ -- bbr->lt_use_bw = 1; -- bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ -- bbr->lt_rtt_cnt = 0; -- return; -- } -- } -- bbr->lt_bw = bw; -- bbr_reset_lt_bw_sampling_interval(sk); --} -- --/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of -- * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and -- * explicitly models their policed rate, to reduce unnecessary losses. We -- * estimate that we're policed if we see 2 consecutive sampling intervals with -- * consistent throughput and high packet loss. If we think we're being policed, -- * set lt_bw to the "long-term" average delivery rate from those 2 intervals. -+/* See if we have reached next round trip. Upon start of the new round, -+ * returns packets delivered since previous round start plus this ACK. - */ --static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- u32 lost, delivered; -- u64 bw; -- u32 t; -- -- if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ -- if (bbr->mode == BBR_PROBE_BW && bbr->round_start && -- ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { -- bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ -- bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ -- } -- return; -- } -- -- /* Wait for the first loss before sampling, to let the policer exhaust -- * its tokens and estimate the steady-state rate allowed by the policer. -- * Starting samples earlier includes bursts that over-estimate the bw. -- */ -- if (!bbr->lt_is_sampling) { -- if (!rs->losses) -- return; -- bbr_reset_lt_bw_sampling_interval(sk); -- bbr->lt_is_sampling = true; -- } -- -- /* To avoid underestimates, reset sampling if we run out of data. */ -- if (rs->is_app_limited) { -- bbr_reset_lt_bw_sampling(sk); -- return; -- } -- -- if (bbr->round_start) -- bbr->lt_rtt_cnt++; /* count round trips in this interval */ -- if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) -- return; /* sampling interval needs to be longer */ -- if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { -- bbr_reset_lt_bw_sampling(sk); /* interval is too long */ -- return; -- } -- -- /* End sampling interval when a packet is lost, so we estimate the -- * policer tokens were exhausted. Stopping the sampling before the -- * tokens are exhausted under-estimates the policed rate. -- */ -- if (!rs->losses) -- return; -- -- /* Calculate packets lost and delivered in sampling interval. */ -- lost = tp->lost - bbr->lt_last_lost; -- delivered = tp->delivered - bbr->lt_last_delivered; -- /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ -- if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) -- return; -- -- /* Find average delivery rate in this sampling interval. */ -- t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; -- if ((s32)t < 1) -- return; /* interval is less than one ms, so wait */ -- /* Check if can multiply without overflow */ -- if (t >= ~0U / USEC_PER_MSEC) { -- bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ -- return; -- } -- t *= USEC_PER_MSEC; -- bw = (u64)delivered * BW_UNIT; -- do_div(bw, t); -- bbr_lt_bw_interval_done(sk, bw); --} -- --/* Estimate the bandwidth based on how fast packets are delivered */ --static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) -+static u32 bbr_update_round_start(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u64 bw; -+ u32 round_delivered = 0; - - bbr->round_start = 0; -- if (rs->delivered < 0 || rs->interval_us <= 0) -- return; /* Not a valid observation */ - - /* See if we've reached the next RTT */ -- if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ if (rs->interval_us > 0 && -+ !before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ round_delivered = tp->delivered - bbr->next_rtt_delivered; - bbr->next_rtt_delivered = tp->delivered; -- bbr->rtt_cnt++; - bbr->round_start = 1; -- bbr->packet_conservation = 0; - } -+ return round_delivered; -+} - -- bbr_lt_bw_sampling(sk, rs); -+/* Calculate the bandwidth based on how fast packets are delivered */ -+static void bbr_calculate_bw_sample(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ u64 bw = 0; - - /* Divide delivered by the interval to find a (lower bound) bottleneck - * bandwidth sample. Delivered is in packets and interval_us in uS and - * ratio will be <<1 for most connections. So delivered is first scaled. -+ * Round up to allow growth at low rates, even with integer division. - */ -- bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); -- -- /* If this sample is application-limited, it is likely to have a very -- * low delivered count that represents application behavior rather than -- * the available network rate. Such a sample could drag down estimated -- * bw, causing needless slow-down. Thus, to continue to send at the -- * last measured network rate, we filter out app-limited samples unless -- * they describe the path bw at least as well as our bw model. -- * -- * So the goal during app-limited phase is to proceed with the best -- * network rate no matter how long. We automatically leave this -- * phase when app writes faster than the network can deliver :) -- */ -- if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { -- /* Incorporate new sample into our max bw filter. */ -- minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); -+ if (rs->interval_us > 0) { -+ if (WARN_ONCE(rs->delivered < 0, -+ "negative delivered: %d interval_us: %ld\n", -+ rs->delivered, rs->interval_us)) -+ return; -+ -+ bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); - } -+ -+ ctx->sample_bw = bw; - } - - /* Estimates the windowed max degree of ack aggregation. -@@ -812,7 +796,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) - * - * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). - * Max filter is an approximate sliding window of 5-10 (packet timed) round -- * trips. -+ * trips for non-startup phase, and 1-2 round trips for startup. - */ - static void bbr_update_ack_aggregation(struct sock *sk, - const struct rate_sample *rs) -@@ -820,15 +804,19 @@ static void bbr_update_ack_aggregation(struct sock *sk, - u32 epoch_us, expected_acked, extra_acked; - struct bbr *bbr = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); -+ u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts); - -- if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || -+ if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 || - rs->delivered < 0 || rs->interval_us <= 0) - return; - - if (bbr->round_start) { - bbr->extra_acked_win_rtts = min(0x1F, - bbr->extra_acked_win_rtts + 1); -- if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { -+ if (!bbr_full_bw_reached(sk)) -+ extra_acked_win_rtts_thresh = 1; -+ if (bbr->extra_acked_win_rtts >= -+ extra_acked_win_rtts_thresh) { - bbr->extra_acked_win_rtts = 0; - bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? - 0 : 1; -@@ -862,49 +850,6 @@ static void bbr_update_ack_aggregation(struct sock *sk, - bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; - } - --/* Estimate when the pipe is full, using the change in delivery rate: BBR -- * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by -- * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -- * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -- * higher rwin, 3: we get higher delivery rate samples. Or transient -- * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -- * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -- */ --static void bbr_check_full_bw_reached(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- u32 bw_thresh; -- -- if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) -- return; -- -- bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; -- if (bbr_max_bw(sk) >= bw_thresh) { -- bbr->full_bw = bbr_max_bw(sk); -- bbr->full_bw_cnt = 0; -- return; -- } -- ++bbr->full_bw_cnt; -- bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; --} -- --/* If pipe is probably full, drain the queue and then enter steady-state. */ --static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -- bbr->mode = BBR_DRAIN; /* drain queue we created */ -- tcp_sk(sk)->snd_ssthresh = -- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -- } /* fall through to check if in-flight is already small: */ -- if (bbr->mode == BBR_DRAIN && -- bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) -- bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ --} -- - static void bbr_check_probe_rtt_done(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -914,9 +859,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk) - after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) - return; - -- bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ - tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); -- bbr_reset_mode(sk); -+ bbr_exit_probe_rtt(sk); - } - - /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and -@@ -942,23 +887,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- bool filter_expired; -+ bool probe_rtt_expired, min_rtt_expired; -+ u32 expire; - -- /* Track min RTT seen in the min_rtt_win_sec filter window: */ -- filter_expired = after(tcp_jiffies32, -- bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); -+ /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ -+ expire = bbr->probe_rtt_min_stamp + -+ msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms)); -+ probe_rtt_expired = after(tcp_jiffies32, expire); - if (rs->rtt_us >= 0 && -- (rs->rtt_us < bbr->min_rtt_us || -- (filter_expired && !rs->is_ack_delayed))) { -- bbr->min_rtt_us = rs->rtt_us; -- bbr->min_rtt_stamp = tcp_jiffies32; -+ (rs->rtt_us < bbr->probe_rtt_min_us || -+ (probe_rtt_expired && !rs->is_ack_delayed))) { -+ bbr->probe_rtt_min_us = rs->rtt_us; -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ } -+ /* Track min RTT seen in the min_rtt_win_sec filter window: */ -+ expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ; -+ min_rtt_expired = after(tcp_jiffies32, expire); -+ if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || -+ min_rtt_expired) { -+ bbr->min_rtt_us = bbr->probe_rtt_min_us; -+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; - } - -- if (bbr_probe_rtt_mode_ms > 0 && filter_expired && -+ if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired && - !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { - bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ - bbr_save_cwnd(sk); /* note cwnd so we can restore it */ - bbr->probe_rtt_done_stamp = 0; -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; - } - - if (bbr->mode == BBR_PROBE_RTT) { -@@ -967,9 +924,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) - (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; - /* Maintain min packets in flight for max(200 ms, 1 round). */ - if (!bbr->probe_rtt_done_stamp && -- tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { -+ tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { - bbr->probe_rtt_done_stamp = tcp_jiffies32 + -- msecs_to_jiffies(bbr_probe_rtt_mode_ms); -+ msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms)); - bbr->probe_rtt_round_done = 0; - bbr->next_rtt_delivered = tp->delivered; - } else if (bbr->probe_rtt_done_stamp) { -@@ -990,18 +947,20 @@ static void bbr_update_gains(struct sock *sk) - - switch (bbr->mode) { - case BBR_STARTUP: -- bbr->pacing_gain = bbr_high_gain; -- bbr->cwnd_gain = bbr_high_gain; -+ bbr->pacing_gain = bbr_param(sk, startup_pacing_gain); -+ bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); - break; - case BBR_DRAIN: -- bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ -- bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ -+ bbr->pacing_gain = bbr_param(sk, drain_gain); /* slow, to drain */ -+ bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); /* keep cwnd */ - break; - case BBR_PROBE_BW: -- bbr->pacing_gain = (bbr->lt_use_bw ? -- BBR_UNIT : -- bbr_pacing_gain[bbr->cycle_idx]); -- bbr->cwnd_gain = bbr_cwnd_gain; -+ bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; -+ bbr->cwnd_gain = bbr_param(sk, cwnd_gain); -+ if (bbr_param(sk, bw_probe_cwnd_gain) && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr->cwnd_gain += -+ BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4; - break; - case BBR_PROBE_RTT: - bbr->pacing_gain = BBR_UNIT; -@@ -1013,144 +972,1387 @@ static void bbr_update_gains(struct sock *sk) - } - } - --static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) -+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) - { -- bbr_update_bw(sk, rs); -- bbr_update_ack_aggregation(sk, rs); -- bbr_update_cycle_phase(sk, rs); -- bbr_check_full_bw_reached(sk, rs); -- bbr_check_drain(sk, rs); -- bbr_update_min_rtt(sk, rs); -- bbr_update_gains(sk); -+ /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -+ return 3; - } - --__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) -+/* Incorporate a new bw sample into the current window of our max filter. */ -+static void bbr_take_max_bw_sample(struct sock *sk, u32 bw) - { - struct bbr *bbr = inet_csk_ca(sk); -- u32 bw; -- -- bbr_update_model(sk, rs); - -- bw = bbr_bw(sk); -- bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); -- bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); -+ bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); - } - --__bpf_kfunc static void bbr_init(struct sock *sk) -+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ -+static void bbr_advance_max_bw_filter(struct sock *sk) - { -- struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - -- bbr->prior_cwnd = 0; -- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -- bbr->rtt_cnt = 0; -- bbr->next_rtt_delivered = tp->delivered; -- bbr->prev_ca_state = TCP_CA_Open; -- bbr->packet_conservation = 0; -- -- bbr->probe_rtt_done_stamp = 0; -- bbr->probe_rtt_round_done = 0; -- bbr->min_rtt_us = tcp_min_rtt(tp); -- bbr->min_rtt_stamp = tcp_jiffies32; -- -- minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ -+ if (!bbr->bw_hi[1]) -+ return; /* no samples in this window; remember old window */ -+ bbr->bw_hi[0] = bbr->bw_hi[1]; -+ bbr->bw_hi[1] = 0; -+} - -- bbr->has_seen_rtt = 0; -- bbr_init_pacing_rate_from_rtt(sk); -+/* Reset the estimator for reaching full bandwidth based on bw plateau. */ -+static void bbr_reset_full_bw(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); - -- bbr->round_start = 0; -- bbr->idle_restart = 0; -- bbr->full_bw_reached = 0; - bbr->full_bw = 0; - bbr->full_bw_cnt = 0; -- bbr->cycle_mstamp = 0; -- bbr->cycle_idx = 0; -- bbr_reset_lt_bw_sampling(sk); -- bbr_reset_startup_mode(sk); -+ bbr->full_bw_now = 0; -+} - -- bbr->ack_epoch_mstamp = tp->tcp_mstamp; -- bbr->ack_epoch_acked = 0; -- bbr->extra_acked_win_rtts = 0; -- bbr->extra_acked_win_idx = 0; -- bbr->extra_acked[0] = 0; -- bbr->extra_acked[1] = 0; -+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ -+static u32 bbr_target_inflight(struct sock *sk) -+{ -+ u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); - -- cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); -+ return min(bdp, tcp_sk(sk)->snd_cwnd); - } - --__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) -+static bool bbr_is_probing_bandwidth(struct sock *sk) - { -- /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -- return 3; -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return (bbr->mode == BBR_STARTUP) || -+ (bbr->mode == BBR_PROBE_BW && -+ (bbr->cycle_idx == BBR_BW_PROBE_REFILL || -+ bbr->cycle_idx == BBR_BW_PROBE_UP)); -+} -+ -+/* Has the given amount of time elapsed since we marked the phase start? */ -+static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); -+ -+ return tcp_stamp_us_delta(tp->tcp_mstamp, -+ bbr->cycle_mstamp + interval_us) > 0; -+} -+ -+static void bbr_handle_queue_too_high_in_startup(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bdp; /* estimated BDP in packets, with quantization budget */ -+ -+ bbr->full_bw_reached = 1; -+ -+ bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr->inflight_hi = max(bdp, bbr->inflight_latest); -+} -+ -+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ -+static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || -+ !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh)) -+ return; -+ -+ if (ce_ratio >= bbr_param(sk, ecn_thresh)) -+ bbr->startup_ecn_rounds++; -+ else -+ bbr->startup_ecn_rounds = 0; -+ -+ if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) { -+ bbr_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+} -+ -+/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */ -+static int bbr_update_ecn_alpha(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct net *net = sock_net(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ s32 delivered, delivered_ce; -+ u64 alpha, ce_ratio; -+ u32 gain; -+ bool want_ecn_alpha; -+ -+ /* See if we should use ECN sender logic for this connection. */ -+ if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) && -+ !!bbr_param(sk, ecn_factor) && -+ (bbr->min_rtt_us <= bbr_ecn_max_rtt_us || -+ !bbr_ecn_max_rtt_us)) -+ bbr->ecn_eligible = 1; -+ -+ /* Skip updating alpha only if not ECN-eligible and PLB is disabled. */ -+ want_ecn_alpha = (bbr->ecn_eligible || -+ (bbr_can_use_ecn(sk) && -+ READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))); -+ if (!want_ecn_alpha) -+ return -1; -+ -+ delivered = tp->delivered - bbr->alpha_last_delivered; -+ delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; -+ -+ if (delivered == 0 || /* avoid divide by zero */ -+ WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ -+ return -1; -+ -+ BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE); -+ ce_ratio = (u64)delivered_ce << BBR_SCALE; -+ do_div(ce_ratio, delivered); -+ -+ gain = bbr_param(sk, ecn_alpha_gain); -+ alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; -+ alpha += (gain * ce_ratio) >> BBR_SCALE; -+ bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); -+ -+ bbr->alpha_last_delivered = tp->delivered; -+ bbr->alpha_last_delivered_ce = tp->delivered_ce; -+ -+ bbr_check_ecn_too_high_in_startup(sk, ce_ratio); -+ return (int)ce_ratio; - } - --/* In theory BBR does not need to undo the cwnd since it does not -- * always reduce cwnd on losses (see bbr_main()). Keep it for now. -+/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6 -+ * flow label) if it encounters sustained congestion in the form of ECN marks. - */ --__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) -+static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->round_start && ce_ratio >= 0) -+ tcp_plb_update_state(sk, &bbr->plb, ce_ratio); -+ -+ tcp_plb_check_rehash(sk, &bbr->plb); -+} -+ -+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ -+static void bbr_raise_inflight_hi_slope(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 growth_this_round, cnt; -+ -+ /* Calculate "slope": packets S/Acked per inflight_hi increment. */ -+ growth_this_round = 1 << bbr->bw_probe_up_rounds; -+ bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); -+ cnt = tcp_snd_cwnd(tp) / growth_this_round; -+ cnt = max(cnt, 1U); -+ bbr->bw_probe_up_cnt = cnt; -+} -+ -+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ -+static void bbr_probe_inflight_hi_upward(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 delta; -+ -+ if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi) -+ return; /* not fully using inflight_hi, so don't grow it */ -+ -+ /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ -+ bbr->bw_probe_up_acks += rs->acked_sacked; -+ if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { -+ delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; -+ bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; -+ bbr->inflight_hi += delta; -+ bbr->try_fast_path = 0; /* Need to update cwnd */ -+ } -+ -+ if (bbr->round_start) -+ bbr_raise_inflight_hi_slope(sk); -+} -+ -+/* Does loss/ECN rate for this sample say inflight is "too high"? -+ * This is used by both the bbr_check_loss_too_high_in_startup() function, -+ * and in PROBE_UP. -+ */ -+static bool bbr_is_inflight_too_high(const struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ const struct bbr *bbr = inet_csk_ca(sk); -+ u32 loss_thresh, ecn_thresh; -+ -+ if (rs->lost > 0 && rs->tx_in_flight) { -+ loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >> -+ BBR_SCALE; -+ if (rs->lost > loss_thresh) { -+ return true; -+ } -+ } -+ -+ if (rs->delivered_ce > 0 && rs->delivered > 0 && -+ bbr->ecn_eligible && !!bbr_param(sk, ecn_thresh)) { -+ ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >> -+ BBR_SCALE; -+ if (rs->delivered_ce > ecn_thresh) { -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+/* Calculate the tx_in_flight level that corresponded to excessive loss. -+ * We find "lost_prefix" segs of the skb where loss rate went too high, -+ * by solving for "lost_prefix" in the following equation: -+ * lost / inflight >= loss_thresh -+ * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh -+ * Then we take that equation, convert it to fixed point, and -+ * round up to the nearest packet. -+ */ -+static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk, -+ const struct rate_sample *rs, -+ const struct sk_buff *skb) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ u32 loss_thresh = bbr_param(sk, loss_thresh); -+ u32 pcount, divisor, inflight_hi; -+ s32 inflight_prev, lost_prev; -+ u64 loss_budget, lost_prefix; -+ -+ pcount = tcp_skb_pcount(skb); -+ -+ /* How much data was in flight before this skb? */ -+ inflight_prev = rs->tx_in_flight - pcount; -+ if (inflight_prev < 0) { -+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( -+ pcount, -+ TCP_SKB_CB(skb)->sacked, -+ rs->tx_in_flight), -+ "tx_in_flight: %u pcount: %u reneg: %u", -+ rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg); -+ return ~0U; -+ } -+ -+ /* How much inflight data was marked lost before this skb? */ -+ lost_prev = rs->lost - pcount; -+ if (WARN_ONCE(lost_prev < 0, -+ "cwnd: %u ca: %d out: %u lost: %u pif: %u " -+ "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d " -+ "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u", -+ tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state, -+ tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp), -+ rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost, -+ rs->lost, lost_prev, pcount, -+ TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, -+ tp->is_sack_reneg)) -+ return ~0U; -+ -+ /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ -+ loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; -+ loss_budget >>= BBR_SCALE; -+ if (lost_prev >= loss_budget) { -+ lost_prefix = 0; /* previous losses crossed loss_thresh */ -+ } else { -+ lost_prefix = loss_budget - lost_prev; -+ lost_prefix <<= BBR_SCALE; -+ divisor = BBR_UNIT - loss_thresh; -+ if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ -+ return ~0U; -+ do_div(lost_prefix, divisor); -+ } -+ -+ inflight_hi = inflight_prev + lost_prefix; -+ return inflight_hi; -+} -+ -+/* If loss/ECN rates during probing indicated we may have overfilled a -+ * buffer, return an operating point that tries to leave unutilized headroom in -+ * the path for other flows, for fairness convergence and lower RTTs and loss. -+ */ -+static u32 bbr_inflight_with_headroom(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 headroom, headroom_fraction; -+ -+ if (bbr->inflight_hi == ~0U) -+ return ~0U; -+ -+ headroom_fraction = bbr_param(sk, inflight_headroom); -+ headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; -+ headroom = max(headroom, 1U); -+ return max_t(s32, bbr->inflight_hi - headroom, -+ bbr_param(sk, cwnd_min_target)); -+} -+ -+/* Bound cwnd to a sensible level, based on our current probing state -+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi). -+ */ -+static void bbr_bound_cwnd_for_inflight_model(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 cap; -+ -+ /* tcp_rcv_synsent_state_process() currently calls tcp_ack() -+ * and thus cong_control() without first initializing us(!). -+ */ -+ if (!bbr->initialized) -+ return; -+ -+ cap = ~0U; -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { -+ /* Probe to see if more packets fit in the path. */ -+ cap = bbr->inflight_hi; -+ } else { -+ if (bbr->mode == BBR_PROBE_RTT || -+ (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) -+ cap = bbr_inflight_with_headroom(sk); -+ } -+ /* Adapt to any loss/ECN since our last bw probe. */ -+ cap = min(cap, bbr->inflight_lo); -+ -+ cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target)); -+ tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp))); -+} -+ -+/* How should we multiplicatively cut bw or inflight limits based on ECN? */ -+static u32 bbr_ecn_cut(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return BBR_UNIT - -+ ((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE); -+} -+ -+/* Init lower bounds if have not inited yet. */ -+static void bbr_init_lower_bounds(struct sock *sk, bool init_bw) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (init_bw && bbr->bw_lo == ~0U) -+ bbr->bw_lo = bbr_max_bw(sk); -+ if (bbr->inflight_lo == ~0U) -+ bbr->inflight_lo = tcp_snd_cwnd(tp); -+} -+ -+/* Reduce bw and inflight to (1 - beta). */ -+static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight) -+{ -+ struct bbr* bbr = inet_csk_ca(sk); -+ u32 loss_cut = BBR_UNIT - bbr_param(sk, beta); -+ -+ *bw = max_t(u32, bbr->bw_latest, -+ (u64)bbr->bw_lo * loss_cut >> BBR_SCALE); -+ *inflight = max_t(u32, bbr->inflight_latest, -+ (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE); -+} -+ -+/* Reduce inflight to (1 - alpha*ecn_factor). */ -+static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_cut = bbr_ecn_cut(sk); -+ -+ *inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; -+} -+ -+/* Estimate a short-term lower bound on the capacity available now, based -+ * on measurements of the current delivery process and recent history. When we -+ * are seeing loss/ECN at times when we are not probing bw, then conservatively -+ * move toward flow balance by multiplicatively cutting our short-term -+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a -+ * multiplicative decrease in order to converge to a lower capacity in time -+ * logarithmic in the magnitude of the decrease. -+ * -+ * However, we do not cut our short-term estimates lower than the current rate -+ * and volume of delivered data from this round trip, since from the current -+ * delivery process we can estimate the measured capacity available now. -+ * -+ * Anything faster than that approach would knowingly risk high loss, which can -+ * cause low bw for Reno/CUBIC and high loss recovery latency for -+ * request/response flows using any congestion control. -+ */ -+static void bbr_adapt_lower_bounds(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_inflight_lo = ~0U; -+ -+ /* We only use lower-bound estimates when not probing bw. -+ * When probing we need to push inflight higher to probe bw. -+ */ -+ if (bbr_is_probing_bandwidth(sk)) -+ return; -+ -+ /* ECN response. */ -+ if (bbr->ecn_in_round && !!bbr_param(sk, ecn_factor)) { -+ bbr_init_lower_bounds(sk, false); -+ bbr_ecn_lower_bounds(sk, &ecn_inflight_lo); -+ } -+ -+ /* Loss response. */ -+ if (bbr->loss_in_round) { -+ bbr_init_lower_bounds(sk, true); -+ bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo); -+ } -+ -+ /* Adjust to the lower of the levels implied by loss/ECN. */ -+ bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); -+ bbr->bw_lo = max(1U, bbr->bw_lo); -+} -+ -+/* Reset any short-term lower-bound adaptation to congestion, so that we can -+ * push our inflight up. -+ */ -+static void bbr_reset_lower_bounds(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->bw_lo = ~0U; -+ bbr->inflight_lo = ~0U; -+} -+ -+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state -+ * machine phase where we adapt our lower bound based on congestion signals. -+ */ -+static void bbr_reset_congestion_signals(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+ bbr->loss_in_cycle = 0; -+ bbr->ecn_in_cycle = 0; -+ bbr->bw_latest = 0; -+ bbr->inflight_latest = 0; -+} -+ -+static void bbr_exit_loss_recovery(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); -+ bbr->try_fast_path = 0; /* bound cwnd using latest model */ -+} -+ -+/* Update rate and volume of delivered data from latest round trip. */ -+static void bbr_update_latest_delivery_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_round_start = 0; -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ -+ bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); -+ bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); -+ -+ if (!before(rs->prior_delivered, bbr->loss_round_delivered)) { -+ bbr->loss_round_delivered = tp->delivered; -+ bbr->loss_round_start = 1; /* mark start of new round trip */ -+ } -+} -+ -+/* Once per round, reset filter for latest rate and volume of delivered data. */ -+static void bbr_advance_latest_delivery_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* If ACK matches a TLP retransmit, persist the filter. If we detect -+ * that a TLP retransmit plugged a tail loss, we'll want to remember -+ * how much data the path delivered before the tail loss. -+ */ -+ if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) { -+ bbr->bw_latest = ctx->sample_bw; -+ bbr->inflight_latest = rs->delivered; -+ } -+} -+ -+/* Update (most of) our congestion signals: track the recent rate and volume of -+ * delivered data, presence of loss, and EWMA degree of ECN marking. -+ */ -+static void bbr_update_congestion_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw; -+ -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ bw = ctx->sample_bw; -+ -+ if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) -+ bbr_take_max_bw_sample(sk, bw); -+ -+ bbr->loss_in_round |= (rs->losses > 0); -+ -+ if (!bbr->loss_round_start) -+ return; /* skip the per-round-trip updates */ -+ /* Now do per-round-trip updates. */ -+ bbr_adapt_lower_bounds(sk, rs); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+} -+ -+/* Bandwidth probing can cause loss. To help coexistence with loss-based -+ * congestion control we spread out our probing in a Reno-conscious way. Due to -+ * the shape of the Reno sawtooth, the time required between loss epochs for an -+ * idealized Reno flow is a number of round trips that is the BDP of that -+ * flow. We count packet-timed round trips directly, since measured RTT can -+ * vary widely, and Reno is driven by packet-timed round trips. -+ */ -+static bool bbr_is_reno_coexistence_probe_time(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 rounds; -+ -+ /* Random loss can shave some small percentage off of our inflight -+ * in each round. To survive this, flows need robust periodic probes. -+ */ -+ rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk)); -+ return bbr->rounds_since_probe >= rounds; -+} -+ -+/* How long do we want to wait before probing for bandwidth (and risking -+ * loss)? We randomize the wait, for better mixing and fairness convergence. -+ * -+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. -+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, -+ * (eg 4K video to a broadband user): -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ * -+ * We bound the BBR-native inter-bw-probe wall clock time to be: -+ * (a) higher than 2 sec: to try to avoid causing loss for a long enough time -+ * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must -+ * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs -+ * (b) lower than 3 sec: to ensure flows can start probing in a reasonable -+ * amount of time to discover unutilized bw on human-scale interactive -+ * time-scales (e.g. perhaps traffic from a web page download that we -+ * were competing with is now complete). -+ */ -+static void bbr_pick_probe_wait(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Decide the random round-trip bound for wait until probe: */ -+ bbr->rounds_since_probe = -+ get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds)); -+ /* Decide the random wall clock bound for wait until probe: */ -+ bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) + -+ get_random_u32_below(bbr_param(sk, bw_probe_rand_us)); -+} -+ -+static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->cycle_idx = cycle_idx; -+ /* New phase, so need to update cwnd and pacing rate. */ -+ bbr->try_fast_path = 0; -+} -+ -+/* Send at estimated bw to fill the pipe, but not queue. We need this phase -+ * before PROBE_UP, because as soon as we send faster than the available bw -+ * we will start building a queue, and if the buffer is shallow we can cause -+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and -+ * inflight_hi estimates will underestimate. -+ */ -+static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_lower_bounds(sk); -+ bbr->bw_probe_up_rounds = bw_probe_up_rounds; -+ bbr->bw_probe_up_acks = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_REFILLING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); -+} -+ -+/* Now probe max deliverable data rate and volume. */ -+static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->ack_phase = BBR_ACKS_PROBE_STARTING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->cycle_mstamp = tp->tcp_mstamp; -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP); -+ bbr_raise_inflight_hi_slope(sk); -+} -+ -+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall -+ * clock time at which to probe beyond an inflight that we think to be -+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to -+ * keep packet loss rates low. Also start a round-trip counter, to probe faster -+ * if we estimate a Reno flow at our BDP would probe faster. -+ */ -+static void bbr_start_bw_probe_down(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_congestion_signals(sk); -+ bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ -+ bbr_pick_probe_wait(sk); -+ bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); -+} -+ -+/* Cruise: maintain what we estimate to be a neutral, conservative -+ * operating point, without attempting to probe up for bandwidth or down for -+ * RTT, and only reducing inflight in response to loss/ECN signals. -+ */ -+static void bbr_start_bw_probe_cruise(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->inflight_lo != ~0U) -+ bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); -+ -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); -+} -+ -+/* Loss and/or ECN rate is too high while probing. -+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. -+ */ -+static void bbr_handle_inflight_too_high(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ const u32 beta = bbr_param(sk, beta); -+ -+ bbr->prev_probe_too_high = 1; -+ bbr->bw_probe_samples = 0; /* only react once per probe */ -+ /* If we are app-limited then we are not robustly -+ * probing the max volume of inflight data we think -+ * might be safe (analogous to how app-limited bw -+ * samples are not known to be robustly probing bw). -+ */ -+ if (!rs->is_app_limited) { -+ bbr->inflight_hi = max_t(u32, rs->tx_in_flight, -+ (u64)bbr_target_inflight(sk) * -+ (BBR_UNIT - beta) >> BBR_SCALE); -+ } -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr_start_bw_probe_down(sk); -+} -+ -+/* If we're seeing bw and loss samples reflecting our bw probing, adapt -+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt -+ * inflight_hi downward. If we're able to push inflight higher without such -+ * signals, push higher: adapt inflight_hi upward. -+ */ -+static bool bbr_adapt_upper_bounds(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Track when we'll see bw/loss samples resulting from our bw probes. */ -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) -+ bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { -+ /* End of samples from bw probing phase. */ -+ bbr->bw_probe_samples = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ /* At this point in the cycle, our current bw sample is also -+ * our best recent chance at finding the highest available bw -+ * for this flow. So now is the best time to forget the bw -+ * samples from the previous cycle, by advancing the window. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) -+ bbr_advance_max_bw_filter(sk); -+ /* If we had an inflight_hi, then probed and pushed inflight all -+ * the way up to hit that inflight_hi without seeing any -+ * high loss/ECN in all the resulting ACKs from that probing, -+ * then probe up again, this time letting inflight persist at -+ * inflight_hi for a round trip, then accelerating beyond. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { -+ bbr_start_bw_probe_refill(sk, 0); -+ return true; /* yes, decided state transition */ -+ } -+ } -+ if (bbr_is_inflight_too_high(sk, rs)) { -+ if (bbr->bw_probe_samples) /* sample is from bw probing? */ -+ bbr_handle_inflight_too_high(sk, rs); -+ } else { -+ /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ -+ -+ if (bbr->inflight_hi == ~0U) -+ return false; /* no excess queue signals yet */ -+ -+ /* To be resilient to random loss, we must raise bw/inflight_hi -+ * if we observe in any phase that a higher level is safe. -+ */ -+ if (rs->tx_in_flight > bbr->inflight_hi) { -+ bbr->inflight_hi = rs->tx_in_flight; -+ } -+ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr_probe_inflight_hi_upward(sk, rs); -+ } -+ -+ return false; -+} -+ -+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ -+static bool bbr_check_time_to_probe_bw(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 n; -+ -+ /* If we seem to be at an operating point where we are not seeing loss -+ * but we are seeing ECN marks, then when the ECN marks cease we reprobe -+ * quickly (in case cross-traffic has ceased and freed up bw). -+ */ -+ if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible && -+ bbr->ecn_in_cycle && !bbr->loss_in_cycle && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { -+ /* Calculate n so that when bbr_raise_inflight_hi_slope() -+ * computes growth_this_round as 2^n it will be roughly the -+ * desired volume of data (inflight_hi*ecn_reprobe_gain). -+ */ -+ n = ilog2((((u64)bbr->inflight_hi * -+ bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE)); -+ bbr_start_bw_probe_refill(sk, n); -+ return true; -+ } -+ -+ if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) || -+ bbr_is_reno_coexistence_probe_time(sk)) { -+ bbr_start_bw_probe_refill(sk, 0); -+ return true; -+ } -+ return false; -+} -+ -+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ -+static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) -+{ -+ /* Always need to pull inflight down to leave headroom in queue. */ -+ if (inflight > bbr_inflight_with_headroom(sk)) -+ return false; -+ -+ return inflight <= bbr_inflight(sk, bw, BBR_UNIT); -+} -+ -+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ -+static void bbr_update_cycle_phase(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) - { -+ struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -+ bool is_bw_probe_done = false; -+ u32 inflight, bw; -+ -+ if (!bbr_full_bw_reached(sk)) -+ return; -+ -+ /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ -+ if (bbr_adapt_upper_bounds(sk, rs, ctx)) -+ return; /* already decided state transition */ -+ -+ if (bbr->mode != BBR_PROBE_BW) -+ return; -+ -+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -+ bw = bbr_max_bw(sk); -+ -+ switch (bbr->cycle_idx) { -+ /* First we spend most of our time cruising with a pacing_gain of 1.0, -+ * which paces at the estimated bw, to try to fully use the pipe -+ * without building queue. If we encounter loss/ECN marks, we adapt -+ * by slowing down. -+ */ -+ case BBR_BW_PROBE_CRUISE: -+ if (bbr_check_time_to_probe_bw(sk, rs)) -+ return; /* already decided state transition */ -+ break; -+ -+ /* After cruising, when it's time to probe, we first "refill": we send -+ * at the estimated bw to fill the pipe, before probing higher and -+ * knowingly risking overflowing the bottleneck buffer (causing loss). -+ */ -+ case BBR_BW_PROBE_REFILL: -+ if (bbr->round_start) { -+ /* After one full round trip of sending in REFILL, we -+ * start to see bw samples reflecting our REFILL, which -+ * may be putting too much data in flight. -+ */ -+ bbr->bw_probe_samples = 1; -+ bbr_start_bw_probe_up(sk, ctx); -+ } -+ break; - -- bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ -+ /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to -+ * probe for bw. If we have not seen loss/ECN, we try to raise inflight -+ * to at least pacing_gain*BDP; note that this may take more than -+ * min_rtt if min_rtt is small (e.g. on a LAN). -+ * -+ * We terminate PROBE_UP bandwidth probing upon any of the following: -+ * -+ * (1) We've pushed inflight up to hit the inflight_hi target set in the -+ * most recent previous bw probe phase. Thus we want to start -+ * draining the queue immediately because it's very likely the most -+ * recently sent packets will fill the queue and cause drops. -+ * (2) If inflight_hi has not limited bandwidth growth recently, and -+ * yet delivered bandwidth has not increased much recently -+ * (bbr->full_bw_now). -+ * (3) Loss filter says loss rate is "too high". -+ * (4) ECN filter says ECN mark rate is "too high". -+ * -+ * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high() -+ */ -+ case BBR_BW_PROBE_UP: -+ if (bbr->prev_probe_too_high && -+ inflight >= bbr->inflight_hi) { -+ bbr->stopped_risky_probe = 1; -+ is_bw_probe_done = true; -+ } else { -+ if (tp->is_cwnd_limited && -+ tcp_snd_cwnd(tp) >= bbr->inflight_hi) { -+ /* inflight_hi is limiting bw growth */ -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ } else if (bbr->full_bw_now) { -+ /* Plateau in estimated bw. Pipe looks full. */ -+ is_bw_probe_done = true; -+ } -+ } -+ if (is_bw_probe_done) { -+ bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ -+ bbr_start_bw_probe_down(sk); /* restart w/ down */ -+ } -+ break; -+ -+ /* After probing in PROBE_UP, we have usually accumulated some data in -+ * the bottleneck buffer (if bw probing didn't find more bw). We next -+ * enter PROBE_DOWN to try to drain any excess data from the queue. To -+ * do this, we use a pacing_gain < 1.0. We hold this pacing gain until -+ * our inflight is less then that target cruising point, which is the -+ * minimum of (a) the amount needed to leave headroom, and (b) the -+ * estimated BDP. Once inflight falls to match the target, we estimate -+ * the queue is drained; persisting would underutilize the pipe. -+ */ -+ case BBR_BW_PROBE_DOWN: -+ if (bbr_check_time_to_probe_bw(sk, rs)) -+ return; /* already decided state transition */ -+ if (bbr_check_time_to_cruise(sk, inflight, bw)) -+ bbr_start_bw_probe_cruise(sk); -+ break; -+ -+ default: -+ WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); -+ } -+} -+ -+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ -+static void bbr_exit_probe_rtt(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_lower_bounds(sk); -+ if (bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_PROBE_BW; -+ /* Raising inflight after PROBE_RTT may cause loss, so reset -+ * the PROBE_BW clock and schedule the next bandwidth probe for -+ * a friendly and randomized future point in time. -+ */ -+ bbr_start_bw_probe_down(sk); -+ /* Since we are exiting PROBE_RTT, we know inflight is -+ * below our estimated BDP, so it is reasonable to cruise. -+ */ -+ bbr_start_bw_probe_cruise(sk); -+ } else { -+ bbr->mode = BBR_STARTUP; -+ } -+} -+ -+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until -+ * the end of the round in recovery to get a good estimate of how many packets -+ * have been lost, and how many we need to drain with a low pacing rate. -+ */ -+static void bbr_check_loss_too_high_in_startup(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk)) -+ return; -+ -+ /* For STARTUP exit, check the loss rate at the end of each round trip -+ * of Recovery episodes in STARTUP. We check the loss rate at the end -+ * of the round trip to filter out noisy/low loss and have a better -+ * sense of inflight (extent of loss), so we can drain more accurately. -+ */ -+ if (rs->losses && bbr->loss_events_in_round < 0xf) -+ bbr->loss_events_in_round++; /* update saturating counter */ -+ if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && -+ bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) && -+ bbr_is_inflight_too_high(sk, rs)) { -+ bbr_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+ if (bbr->loss_round_start) -+ bbr->loss_events_in_round = 0; -+} -+ -+/* Estimate when the pipe is full, using the change in delivery rate: BBR -+ * estimates bw probing filled the pipe if the estimated bw hasn't changed by -+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -+ * higher rwin, 3: we get higher delivery rate samples. Or transient -+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -+ */ -+static void bbr_check_full_bw_reached(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bw_thresh, full_cnt, thresh; -+ -+ if (bbr->full_bw_now || rs->is_app_limited) -+ return; -+ -+ thresh = bbr_param(sk, full_bw_thresh); -+ full_cnt = bbr_param(sk, full_bw_cnt); -+ bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE; -+ if (ctx->sample_bw >= bw_thresh) { -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ return; -+ } -+ if (!bbr->round_start) -+ return; -+ ++bbr->full_bw_cnt; -+ bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt; -+ bbr->full_bw_reached |= bbr->full_bw_now; -+} -+ -+/* If pipe is probably full, drain the queue and then enter steady-state. */ -+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_DRAIN; /* drain queue we created */ -+ /* Set ssthresh to export purely for monitoring, to signal -+ * completion of initial STARTUP by setting to a non- -+ * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR). -+ */ -+ tcp_sk(sk)->snd_ssthresh = -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr_reset_congestion_signals(sk); -+ } /* fall through to check if in-flight is already small: */ -+ if (bbr->mode == BBR_DRAIN && -+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) { -+ bbr->mode = BBR_PROBE_BW; -+ bbr_start_bw_probe_down(sk); -+ } -+} -+ -+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ bbr_update_congestion_signals(sk, rs, ctx); -+ bbr_update_ack_aggregation(sk, rs); -+ bbr_check_loss_too_high_in_startup(sk, rs); -+ bbr_check_full_bw_reached(sk, rs, ctx); -+ bbr_check_drain(sk, rs, ctx); -+ bbr_update_cycle_phase(sk, rs, ctx); -+ bbr_update_min_rtt(sk, rs); -+} -+ -+/* Fast path for app-limited case. -+ * -+ * On each ack, we execute bbr state machine, which primarily consists of: -+ * 1) update model based on new rate sample, and -+ * 2) update control based on updated model or state change. -+ * -+ * There are certain workload/scenarios, e.g. app-limited case, where -+ * either we can skip updating model or we can skip update of both model -+ * as well as control. This provides signifcant softirq cpu savings for -+ * processing incoming acks. -+ * -+ * In case of app-limited, if there is no congestion (loss/ecn) and -+ * if observed bw sample is less than current estimated bw, then we can -+ * skip some of the computation in bbr state processing: -+ * -+ * - if there is no rtt/mode/phase change: In this case, since all the -+ * parameters of the network model are constant, we can skip model -+ * as well control update. -+ * -+ * - else we can skip rest of the model update. But we still need to -+ * update the control to account for the new rtt/mode/phase. -+ * -+ * Returns whether we can take fast path or not. -+ */ -+static bool bbr_run_fast_path(struct sock *sk, bool *update_model, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 prev_min_rtt_us, prev_mode; -+ -+ if (bbr_param(sk, fast_path) && bbr->try_fast_path && -+ rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && -+ !bbr->loss_in_round && !bbr->ecn_in_round ) { -+ prev_mode = bbr->mode; -+ prev_min_rtt_us = bbr->min_rtt_us; -+ bbr_check_drain(sk, rs, ctx); -+ bbr_update_cycle_phase(sk, rs, ctx); -+ bbr_update_min_rtt(sk, rs); -+ -+ if (bbr->mode == prev_mode && -+ bbr->min_rtt_us == prev_min_rtt_us && -+ bbr->try_fast_path) { -+ return true; -+ } -+ -+ /* Skip model update, but control still needs to be updated */ -+ *update_model = false; -+ } -+ return false; -+} -+ -+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, -+ const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct bbr_context ctx = { 0 }; -+ bool update_model = true; -+ u32 bw, round_delivered; -+ int ce_ratio = -1; -+ -+ round_delivered = bbr_update_round_start(sk, rs, &ctx); -+ if (bbr->round_start) { -+ bbr->rounds_since_probe = -+ min_t(s32, bbr->rounds_since_probe + 1, 0xFF); -+ ce_ratio = bbr_update_ecn_alpha(sk); -+ } -+ bbr_plb(sk, rs, ce_ratio); -+ -+ bbr->ecn_in_round |= (bbr->ecn_eligible && rs->is_ece); -+ bbr_calculate_bw_sample(sk, rs, &ctx); -+ bbr_update_latest_delivery_signals(sk, rs, &ctx); -+ -+ if (bbr_run_fast_path(sk, &update_model, rs, &ctx)) -+ goto out; -+ -+ if (update_model) -+ bbr_update_model(sk, rs, &ctx); -+ -+ bbr_update_gains(sk); -+ bw = bbr_bw(sk); -+ bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); -+ bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, -+ tcp_snd_cwnd(tp), &ctx); -+ bbr_bound_cwnd_for_inflight_model(sk); -+ -+out: -+ bbr_advance_latest_delivery_signals(sk, rs, &ctx); -+ bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; -+ bbr->loss_in_cycle |= rs->lost > 0; -+ bbr->ecn_in_cycle |= rs->delivered_ce > 0; -+} -+ -+__bpf_kfunc static void bbr_init(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->initialized = 1; -+ -+ bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp)); -+ bbr->prior_cwnd = tp->prior_cwnd; -+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->prev_ca_state = TCP_CA_Open; -+ -+ bbr->probe_rtt_done_stamp = 0; -+ bbr->probe_rtt_round_done = 0; -+ bbr->probe_rtt_min_us = tcp_min_rtt(tp); -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ bbr->min_rtt_us = tcp_min_rtt(tp); -+ bbr->min_rtt_stamp = tcp_jiffies32; -+ -+ bbr->has_seen_rtt = 0; -+ bbr_init_pacing_rate_from_rtt(sk); -+ -+ bbr->round_start = 0; -+ bbr->idle_restart = 0; -+ bbr->full_bw_reached = 0; -+ bbr->full_bw = 0; - bbr->full_bw_cnt = 0; -- bbr_reset_lt_bw_sampling(sk); -- return tcp_snd_cwnd(tcp_sk(sk)); -+ bbr->cycle_mstamp = 0; -+ bbr->cycle_idx = 0; -+ -+ bbr_reset_startup_mode(sk); -+ -+ bbr->ack_epoch_mstamp = tp->tcp_mstamp; -+ bbr->ack_epoch_acked = 0; -+ bbr->extra_acked_win_rtts = 0; -+ bbr->extra_acked_win_idx = 0; -+ bbr->extra_acked[0] = 0; -+ bbr->extra_acked[1] = 0; -+ -+ bbr->ce_state = 0; -+ bbr->prior_rcv_nxt = tp->rcv_nxt; -+ bbr->try_fast_path = 0; -+ -+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); -+ -+ /* Start sampling ECN mark rate after first full flight is ACKed: */ -+ bbr->loss_round_delivered = tp->delivered + 1; -+ bbr->loss_round_start = 0; -+ bbr->undo_bw_lo = 0; -+ bbr->undo_inflight_lo = 0; -+ bbr->undo_inflight_hi = 0; -+ bbr->loss_events_in_round = 0; -+ bbr->startup_ecn_rounds = 0; -+ bbr_reset_congestion_signals(sk); -+ bbr->bw_lo = ~0U; -+ bbr->bw_hi[0] = 0; -+ bbr->bw_hi[1] = 0; -+ bbr->inflight_lo = ~0U; -+ bbr->inflight_hi = ~0U; -+ bbr_reset_full_bw(sk); -+ bbr->bw_probe_up_cnt = ~0U; -+ bbr->bw_probe_up_acks = 0; -+ bbr->bw_probe_up_rounds = 0; -+ bbr->probe_wait_us = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ bbr->rounds_since_probe = 0; -+ bbr->bw_probe_samples = 0; -+ bbr->prev_probe_too_high = 0; -+ bbr->ecn_eligible = 0; -+ bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init); -+ bbr->alpha_last_delivered = 0; -+ bbr->alpha_last_delivered_ce = 0; -+ bbr->plb.pause_until = 0; -+ -+ tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; -+ -+ if (bbr_can_use_ecn(sk)) -+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; -+} -+ -+/* BBR marks the current round trip as a loss round. */ -+static void bbr_note_loss(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Capture "current" data over the full round trip of loss, to -+ * have a better chance of observing the full capacity of the path. -+ */ -+ if (!bbr->loss_in_round) /* first loss in this round trip? */ -+ bbr->loss_round_delivered = tp->delivered; /* set round trip */ -+ bbr->loss_in_round = 1; -+ bbr->loss_in_cycle = 1; - } - --/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ -+/* Core TCP stack informs us that the given skb was just marked lost. */ -+__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk, -+ const struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb); -+ struct rate_sample rs = {}; -+ -+ bbr_note_loss(sk); -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not an skb sent while probing for bandwidth */ -+ if (unlikely(!scb->tx.delivered_mstamp)) -+ return; /* skb was SACKed, reneged, marked lost; ignore it */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this lost skb, -+ * then see if the loss rate went too high, and if so at which packet. -+ */ -+ rs.tx_in_flight = scb->tx.in_flight; -+ rs.lost = tp->lost - scb->tx.lost; -+ rs.is_app_limited = scb->tx.is_app_limited; -+ if (bbr_is_inflight_too_high(sk, &rs)) { -+ rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb); -+ bbr_handle_inflight_too_high(sk, &rs); -+ } -+} -+ -+static void bbr_run_loss_probe_recovery(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct rate_sample rs = {0}; -+ -+ bbr_note_loss(sk); -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not sent while probing for bandwidth */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this -+ * loss, then see if the loss rate went too high. -+ */ -+ rs.lost = 1; /* TLP probe repaired loss of a single segment */ -+ rs.tx_in_flight = bbr->inflight_latest + rs.lost; -+ rs.is_app_limited = tp->tlp_orig_data_app_limited; -+ if (bbr_is_inflight_too_high(sk, &rs)) -+ bbr_handle_inflight_too_high(sk, &rs); -+} -+ -+/* Revert short-term model if current loss recovery event was spurious. */ -+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */ -+ bbr->loss_in_round = 0; -+ -+ /* Revert to cwnd and other state saved before loss episode. */ -+ bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); -+ bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); -+ bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); -+ bbr->try_fast_path = 0; /* take slow path to set proper cwnd, pacing */ -+ return bbr->prior_cwnd; -+} -+ -+/* Entering loss recovery, so save state for when we undo recovery. */ - __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) - { -+ struct bbr *bbr = inet_csk_ca(sk); -+ - bbr_save_cwnd(sk); -+ /* For undo, save state that adapts based on loss signal. */ -+ bbr->undo_bw_lo = bbr->bw_lo; -+ bbr->undo_inflight_lo = bbr->inflight_lo; -+ bbr->undo_inflight_hi = bbr->inflight_hi; - return tcp_sk(sk)->snd_ssthresh; - } - -+static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr) -+{ -+ switch (bbr->mode) { -+ case BBR_STARTUP: -+ return BBR_PHASE_STARTUP; -+ case BBR_DRAIN: -+ return BBR_PHASE_DRAIN; -+ case BBR_PROBE_BW: -+ break; -+ case BBR_PROBE_RTT: -+ return BBR_PHASE_PROBE_RTT; -+ default: -+ return BBR_PHASE_INVALID; -+ } -+ switch (bbr->cycle_idx) { -+ case BBR_BW_PROBE_UP: -+ return BBR_PHASE_PROBE_BW_UP; -+ case BBR_BW_PROBE_DOWN: -+ return BBR_PHASE_PROBE_BW_DOWN; -+ case BBR_BW_PROBE_CRUISE: -+ return BBR_PHASE_PROBE_BW_CRUISE; -+ case BBR_BW_PROBE_REFILL: -+ return BBR_PHASE_PROBE_BW_REFILL; -+ default: -+ return BBR_PHASE_INVALID; -+ } -+} -+ - static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, -- union tcp_cc_info *info) -+ union tcp_cc_info *info) - { - if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || - ext & (1 << (INET_DIAG_VEGASINFO - 1))) { -- struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u64 bw = bbr_bw(sk); -- -- bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; -- memset(&info->bbr, 0, sizeof(info->bbr)); -- info->bbr.bbr_bw_lo = (u32)bw; -- info->bbr.bbr_bw_hi = (u32)(bw >> 32); -- info->bbr.bbr_min_rtt = bbr->min_rtt_us; -- info->bbr.bbr_pacing_gain = bbr->pacing_gain; -- info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; -+ u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); -+ u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); -+ u64 bw_lo = bbr->bw_lo == ~0U ? -+ ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); -+ struct tcp_bbr_info *bbr_info = &info->bbr; -+ -+ memset(bbr_info, 0, sizeof(*bbr_info)); -+ bbr_info->bbr_bw_lo = (u32)bw; -+ bbr_info->bbr_bw_hi = (u32)(bw >> 32); -+ bbr_info->bbr_min_rtt = bbr->min_rtt_us; -+ bbr_info->bbr_pacing_gain = bbr->pacing_gain; -+ bbr_info->bbr_cwnd_gain = bbr->cwnd_gain; -+ bbr_info->bbr_bw_hi_lsb = (u32)bw_hi; -+ bbr_info->bbr_bw_hi_msb = (u32)(bw_hi >> 32); -+ bbr_info->bbr_bw_lo_lsb = (u32)bw_lo; -+ bbr_info->bbr_bw_lo_msb = (u32)(bw_lo >> 32); -+ bbr_info->bbr_mode = bbr->mode; -+ bbr_info->bbr_phase = (__u8)bbr_get_phase(bbr); -+ bbr_info->bbr_version = (__u8)BBR_VERSION; -+ bbr_info->bbr_inflight_lo = bbr->inflight_lo; -+ bbr_info->bbr_inflight_hi = bbr->inflight_hi; -+ bbr_info->bbr_extra_acked = bbr_extra_acked(sk); - *attr = INET_DIAG_BBRINFO; -- return sizeof(info->bbr); -+ return sizeof(*bbr_info); - } - return 0; - } - - __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) - { -+ struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - if (new_state == TCP_CA_Loss) { -- struct rate_sample rs = { .losses = 1 }; - - bbr->prev_ca_state = TCP_CA_Loss; -- bbr->full_bw = 0; -- bbr->round_start = 1; /* treat RTO like end of a round */ -- bbr_lt_bw_sampling(sk, &rs); -+ tcp_plb_update_state_upon_rto(sk, &bbr->plb); -+ /* The tcp_write_timeout() call to sk_rethink_txhash() likely -+ * repathed this flow, so re-learn the min network RTT on the -+ * new path: -+ */ -+ bbr_reset_full_bw(sk); -+ if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { -+ /* bbr_adapt_lower_bounds() needs cwnd before -+ * we suffered an RTO, to update inflight_lo: -+ */ -+ bbr->inflight_lo = -+ max(tcp_snd_cwnd(tp), bbr->prior_cwnd); -+ } -+ } else if (bbr->prev_ca_state == TCP_CA_Loss && -+ new_state != TCP_CA_Loss) { -+ bbr_exit_loss_recovery(sk); - } - } - -+ - static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { -- .flags = TCP_CONG_NON_RESTRICTED, -+ .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, - .name = "bbr", - .owner = THIS_MODULE, - .init = bbr_init, - .cong_control = bbr_main, - .sndbuf_expand = bbr_sndbuf_expand, -+ .skb_marked_lost = bbr_skb_marked_lost, - .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, - .ssthresh = bbr_ssthresh, -- .min_tso_segs = bbr_min_tso_segs, -+ .tso_segs = bbr_tso_segs, - .get_info = bbr_get_info, - .set_state = bbr_set_state, - }; -@@ -1159,10 +2361,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) - BTF_ID_FLAGS(func, bbr_init) - BTF_ID_FLAGS(func, bbr_main) - BTF_ID_FLAGS(func, bbr_sndbuf_expand) -+BTF_ID_FLAGS(func, bbr_skb_marked_lost) - BTF_ID_FLAGS(func, bbr_undo_cwnd) - BTF_ID_FLAGS(func, bbr_cwnd_event) - BTF_ID_FLAGS(func, bbr_ssthresh) --BTF_ID_FLAGS(func, bbr_min_tso_segs) -+BTF_ID_FLAGS(func, bbr_tso_segs) - BTF_ID_FLAGS(func, bbr_set_state) - BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) - -@@ -1195,5 +2398,12 @@ MODULE_AUTHOR("Van Jacobson "); - MODULE_AUTHOR("Neal Cardwell "); - MODULE_AUTHOR("Yuchung Cheng "); - MODULE_AUTHOR("Soheil Hassas Yeganeh "); -+MODULE_AUTHOR("Priyaranjan Jha "); -+MODULE_AUTHOR("Yousuk Seung "); -+MODULE_AUTHOR("Kevin Yang "); -+MODULE_AUTHOR("Arjun Roy "); -+MODULE_AUTHOR("David Morley "); -+ - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); -+MODULE_VERSION(__stringify(BBR_VERSION)); -diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c -index df758adbb445..e98e5dbc050e 100644 ---- a/net/ipv4/tcp_cong.c -+++ b/net/ipv4/tcp_cong.c -@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct sock *sk) - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_sk(sk)->prior_ssthresh = 0; -+ tcp_sk(sk)->fast_ack_mode = 0; - if (icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - if (tcp_ca_needs_ecn(sk)) -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 71b76e98371a..d7bdfbae1a1e 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -381,7 +381,7 @@ static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb) - tcp_enter_quickack_mode(sk, 2); - break; - case INET_ECN_CE: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); - - if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { -@@ -392,7 +392,7 @@ static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb) - tp->ecn_flags |= TCP_ECN_SEEN; - break; - default: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); - tp->ecn_flags |= TCP_ECN_SEEN; - break; -@@ -1134,7 +1134,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) - */ - static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) - { -+ struct sock *sk = (struct sock *)tp; -+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -+ - tp->lost += tcp_skb_pcount(skb); -+ if (ca_ops->skb_marked_lost) -+ ca_ops->skb_marked_lost(sk, skb); - } - - void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) -@@ -1498,6 +1503,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, - WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); - tcp_skb_pcount_add(skb, -pcount); - -+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ -+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, -+ "prev in_flight: %u skb in_flight: %u pcount: %u", -+ TCP_SKB_CB(prev)->tx.in_flight, -+ TCP_SKB_CB(skb)->tx.in_flight, -+ pcount)) -+ TCP_SKB_CB(skb)->tx.in_flight = 0; -+ else -+ TCP_SKB_CB(skb)->tx.in_flight -= pcount; -+ TCP_SKB_CB(prev)->tx.in_flight += pcount; -+ - /* When we're adding to gso_segs == 1, gso_size will be zero, - * in theory this shouldn't be necessary but as long as DSACK - * code can come after this skb later on it's better to keep -@@ -3716,7 +3732,8 @@ static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) - /* This routine deals with acks during a TLP episode and ends an episode by - * resetting tlp_high_seq. Ref: TLP algorithm in RFC8985 - */ --static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) -+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag, -+ struct rate_sample *rs) - { - struct tcp_sock *tp = tcp_sk(sk); - -@@ -3733,6 +3750,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) - /* ACK advances: there was a loss, so reduce cwnd. Reset - * tlp_high_seq in tcp_init_cwnd_reduction() - */ -+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY); - tcp_init_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); -@@ -3743,6 +3761,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) - FLAG_NOT_DUP | FLAG_DATA_SACKED))) { - /* Pure dupack: original and TLP probe arrived; no loss */ - tp->tlp_high_seq = 0; -+ } else { -+ /* This ACK matches a TLP retransmit. We cannot yet tell if -+ * this ACK is for the original or the TLP retransmit. -+ */ -+ rs->is_acking_tlp_retrans_seq = 1; - } - } - -@@ -3862,6 +3885,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - - prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; - rs.prior_in_flight = tcp_packets_in_flight(tp); -+ tcp_rate_check_app_limited(sk); - - /* ts_recent update must be made after we are sure that the packet - * is in window. -@@ -3927,7 +3951,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - tcp_in_ack_event(sk, flag); - - if (tp->tlp_high_seq) -- tcp_process_tlp_ack(sk, ack, flag); -+ tcp_process_tlp_ack(sk, ack, flag, &rs); - - if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | -@@ -3951,6 +3975,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - delivered = tcp_newly_delivered(sk, delivered, flag); - lost = tp->lost - lost; /* freshly marked lost */ - rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); -+ rs.is_ece = !!(flag & FLAG_ECE); - tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); - tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); - tcp_xmit_recovery(sk, rexmit); -@@ -3971,7 +3996,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - tcp_ack_probe(sk); - - if (tp->tlp_high_seq) -- tcp_process_tlp_ack(sk, ack, flag); -+ tcp_process_tlp_ack(sk, ack, flag, &rs); - return 1; - - old_ack: -@@ -5677,13 +5702,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) - - /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && -+ (tp->fast_ack_mode == 1 || - /* ... and right edge of window advances far enough. - * (tcp_recvmsg() will send ACK otherwise). - * If application uses SO_RCVLOWAT, we want send ack now if - * we have not received enough bytes to satisfy the condition. - */ -- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -- __tcp_select_window(sk) >= tp->rcv_wnd)) || -+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -+ __tcp_select_window(sk) >= tp->rcv_wnd))) || - /* We ACK each frame or... */ - tcp_in_quickack_mode(sk) || - /* Protocol state mandates a one-time immediate ACK */ -diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c -index 2994c9222c9c..a53af9d32e09 100644 ---- a/net/ipv4/tcp_minisocks.c -+++ b/net/ipv4/tcp_minisocks.c -@@ -475,6 +475,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) - u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); - bool ca_got_dst = false; - -+ tcp_set_ecn_low_from_dst(sk, dst); -+ - if (ca_key != TCP_CA_UNSPEC) { - const struct tcp_congestion_ops *ca; - -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index caf11920a878..61e45fbd3e5f 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) - bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || - tcp_ca_needs_ecn(sk) || bpf_needs_ecn; -+ const struct dst_entry *dst = __sk_dst_get(sk); - - if (!use_ecn) { -- const struct dst_entry *dst = __sk_dst_get(sk); -- - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) - use_ecn = true; - } -@@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) - tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); - if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) - INET_ECN_xmit(sk); -+ -+ if (dst) -+ tcp_set_ecn_low_from_dst(sk, dst); - } - } - -@@ -391,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, - th->cwr = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - } -- } else if (!tcp_ca_needs_ecn(sk)) { -+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && -+ !tcp_ca_needs_ecn(sk)) { - /* ACK or retransmitted segment: clear ECT|CE */ - INET_ECN_dontxmit(sk); - } -@@ -1609,7 +1612,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - { - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *buff; -- int old_factor; -+ int old_factor, inflight_prev; - long limit; - u16 flags; - int nlen; -@@ -1684,6 +1687,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - - if (diff) - tcp_adjust_pcount(sk, skb, diff); -+ -+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; -+ if (inflight_prev < 0) { -+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( -+ old_factor, -+ TCP_SKB_CB(skb)->sacked, -+ TCP_SKB_CB(skb)->tx.in_flight), -+ "inconsistent: tx.in_flight: %u " -+ "old_factor: %d mss: %u sacked: %u " -+ "1st pcount: %d 2nd pcount: %d " -+ "1st len: %u 2nd len: %u ", -+ TCP_SKB_CB(skb)->tx.in_flight, old_factor, -+ mss_now, TCP_SKB_CB(skb)->sacked, -+ tcp_skb_pcount(skb), tcp_skb_pcount(buff), -+ skb->len, buff->len); -+ inflight_prev = 0; -+ } -+ /* Set 1st tx.in_flight as if 1st were sent by itself: */ -+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(skb); -+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */ -+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(skb) + -+ tcp_skb_pcount(buff); - } - - /* Link BUFF into the send queue. */ -@@ -2040,13 +2067,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) - { - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -- u32 min_tso, tso_segs; -- -- min_tso = ca_ops->min_tso_segs ? -- ca_ops->min_tso_segs(sk) : -- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); -+ u32 tso_segs; - -- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); -+ tso_segs = ca_ops->tso_segs ? -+ ca_ops->tso_segs(sk, mss_now) : -+ tcp_tso_autosize(sk, mss_now, -+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); - return min_t(u32, tso_segs, sk->sk_gso_max_segs); - } - -@@ -2771,6 +2797,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); - list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); - tcp_init_tso_segs(skb, mss_now); -+ tcp_set_tx_in_flight(sk, skb); - goto repair; /* Skip network transmission */ - } - -@@ -2983,6 +3010,7 @@ void tcp_send_loss_probe(struct sock *sk) - if (WARN_ON(!skb || !tcp_skb_pcount(skb))) - goto rearm_timer; - -+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited; - if (__tcp_retransmit_skb(sk, skb, 1)) - goto rearm_timer; - -diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c -index a8f6d9d06f2e..8737f2134648 100644 ---- a/net/ipv4/tcp_rate.c -+++ b/net/ipv4/tcp_rate.c -@@ -34,6 +34,24 @@ - * ready to send in the write queue. - */ - -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ u32 in_flight; -+ -+ /* Check, sanitize, and record packets in flight after skb was sent. */ -+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); -+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, -+ "insane in_flight %u cc %s mss %u " -+ "cwnd %u pif %u %u %u %u\n", -+ in_flight, inet_csk(sk)->icsk_ca_ops->name, -+ tp->mss_cache, tp->snd_cwnd, -+ tp->packets_out, tp->retrans_out, -+ tp->sacked_out, tp->lost_out)) -+ in_flight = TCPCB_IN_FLIGHT_MAX; -+ TCP_SKB_CB(skb)->tx.in_flight = in_flight; -+} -+ - /* Snapshot the current delivery information in the skb, to generate - * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). - */ -@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) - TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; - TCP_SKB_CB(skb)->tx.delivered = tp->delivered; - TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; -+ TCP_SKB_CB(skb)->tx.lost = tp->lost; - TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; -+ tcp_set_tx_in_flight(sk, skb); - } - - /* When an skb is sacked or acked, we fill in the rate sample with the (prior) -@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - if (!rs->prior_delivered || - tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, - scb->end_seq, rs->last_end_seq)) { -+ rs->prior_lost = scb->tx.lost; - rs->prior_delivered_ce = scb->tx.delivered_ce; - rs->prior_delivered = scb->tx.delivered; - rs->prior_mstamp = scb->tx.delivered_mstamp; - rs->is_app_limited = scb->tx.is_app_limited; - rs->is_retrans = scb->sacked & TCPCB_RETRANS; -+ rs->tx_in_flight = scb->tx.in_flight; - rs->last_end_seq = scb->end_seq; - - /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tx_tstamp; - /* Find the duration of the "send phase" of this window: */ -- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, -- scb->tx.first_tx_mstamp); -+ rs->interval_us = tcp_stamp32_us_delta( -+ tp->first_tx_mstamp, -+ scb->tx.first_tx_mstamp); - - } - /* Mark off the skb delivered once it's sacked to avoid being -@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - return; - } - rs->delivered = tp->delivered - rs->prior_delivered; -+ rs->lost = tp->lost - rs->prior_lost; - - rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; - /* delivered_ce occupies less than 32 bits in the skb control block */ -@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - * longer phase. - */ - snd_us = rs->interval_us; /* send phase */ -- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, -+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, - rs->prior_mstamp); /* ack phase */ - rs->interval_us = max(snd_us, ack_us); - -diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index a207877270fb..0e67c7281410 100644 ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -565,7 +565,7 @@ void tcp_retransmit_timer(struct sock *sk) - struct inet_sock *inet = inet_sk(sk); - u32 rtx_delta; - -- rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: -+ rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: - tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); - if (tp->tcp_usec_ts) - rtx_delta /= USEC_PER_MSEC; -@@ -702,6 +702,8 @@ void tcp_write_timer_handler(struct sock *sk) - icsk_timeout(icsk)); - return; - } -+ -+ tcp_rate_check_app_limited(sk); - tcp_mstamp_refresh(tcp_sk(sk)); - event = icsk->icsk_pending; - --- -2.51.0 - diff --git a/sys-kernel/git-sources/0003-cachy.patch b/sys-kernel/git-sources/0003-cachy.patch deleted file mode 100644 index 0a55a31..0000000 --- a/sys-kernel/git-sources/0003-cachy.patch +++ /dev/null @@ -1,9540 +0,0 @@ -From 657b2f3ce3beb8717754f7b0c4ab900f8f3fe0a6 Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Mon, 1 Sep 2025 09:38:54 +0800 -Subject: [PATCH 3/4] cachy - -Signed-off-by: Eric Naim ---- - .gitignore | 2 + - .../admin-guide/kernel-parameters.txt | 12 + - Documentation/admin-guide/sysctl/vm.rst | 72 + - Makefile | 33 +- - arch/Kconfig | 19 + - arch/x86/Kconfig.cpu | 46 + - arch/x86/Makefile | 16 +- - arch/x86/include/asm/pci.h | 6 + - arch/x86/pci/common.c | 7 +- - block/Kconfig.iosched | 14 + - block/Makefile | 8 + - block/adios.c | 1881 ++++++++++ - block/elevator.c | 26 +- - drivers/Makefile | 13 +- - drivers/ata/ahci.c | 23 +- - drivers/cpufreq/Kconfig.x86 | 2 - - drivers/cpufreq/intel_pstate.c | 2 + - drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 + - drivers/gpu/drm/amd/display/Kconfig | 6 + - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- - .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- - .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- - .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- - drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + - drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- - drivers/input/evdev.c | 19 +- - drivers/md/dm-crypt.c | 5 + - drivers/media/v4l2-core/Kconfig | 5 + - drivers/media/v4l2-core/Makefile | 2 + - drivers/media/v4l2-core/v4l2loopback.c | 3316 +++++++++++++++++ - drivers/media/v4l2-core/v4l2loopback.h | 108 + - .../media/v4l2-core/v4l2loopback_formats.h | 445 +++ - drivers/pci/controller/Makefile | 6 + - drivers/pci/controller/intel-nvme-remap.c | 462 +++ - drivers/pci/quirks.c | 101 + - drivers/scsi/Kconfig | 2 + - drivers/scsi/Makefile | 1 + - drivers/scsi/vhba/Kconfig | 9 + - drivers/scsi/vhba/Makefile | 4 + - drivers/scsi/vhba/vhba.c | 1132 ++++++ - include/linux/mm.h | 8 + - include/linux/pagemap.h | 2 +- - include/linux/user_namespace.h | 4 + - init/Kconfig | 26 + - kernel/Kconfig.hz | 24 + - kernel/Kconfig.preempt | 2 +- - kernel/fork.c | 14 + - kernel/locking/rwsem.c | 4 +- - kernel/sched/fair.c | 13 + - kernel/sched/sched.h | 2 +- - kernel/sysctl.c | 13 + - kernel/user_namespace.c | 7 + - mm/Kconfig | 65 +- - mm/compaction.c | 4 + - mm/huge_memory.c | 4 + - mm/mm_init.c | 1 + - mm/page-writeback.c | 8 + - mm/page_alloc.c | 4 + - mm/swap.c | 5 + - mm/util.c | 34 + - mm/vmpressure.c | 4 + - mm/vmscan.c | 157 +- - scripts/Makefile.thinlto | 38 + - scripts/Makefile.vmlinux_a | 83 + - scripts/mod/modpost.c | 15 +- - 66 files changed, 8314 insertions(+), 76 deletions(-) - create mode 100644 block/adios.c - create mode 100644 drivers/media/v4l2-core/v4l2loopback.c - create mode 100644 drivers/media/v4l2-core/v4l2loopback.h - create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h - create mode 100644 drivers/pci/controller/intel-nvme-remap.c - create mode 100644 drivers/scsi/vhba/Kconfig - create mode 100644 drivers/scsi/vhba/Makefile - create mode 100644 drivers/scsi/vhba/vhba.c - create mode 100644 scripts/Makefile.thinlto - create mode 100644 scripts/Makefile.vmlinux_a - -diff --git a/.gitignore b/.gitignore -index 929054df5212..e4b492cc3993 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -55,6 +55,7 @@ - *.zst - Module.symvers - dtbs-list -+builtin.order - modules.order - - # -@@ -66,6 +67,7 @@ modules.order - /vmlinux.32 - /vmlinux.map - /vmlinux.symvers -+/vmlinux.thinlto-index - /vmlinux.unstripped - /vmlinux-gdb.py - /vmlinuz -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 747a55abf494..71751ccf0755 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2384,6 +2384,9 @@ - disable - Do not enable intel_pstate as the default - scaling driver for the supported processors -+ enable -+ Enable intel_pstate in-case "disable" was passed -+ previously in the kernel boot parameters - active - Use intel_pstate driver to bypass the scaling - governors layer of cpufreq and provides it own -@@ -4799,6 +4802,15 @@ - nomsi [MSI] If the PCI_MSI kernel config parameter is - enabled, this kernel boot option can be used to - disable the use of MSI interrupts system-wide. -+ pcie_acs_override = -+ [PCIE] Override missing PCIe ACS support for: -+ downstream -+ All downstream ports - full ACS capabilities -+ multfunction -+ All multifunction devices - multifunction ACS subset -+ id:nnnn:nnnn -+ Specfic device - full ACS capabilities -+ Specified as vid:did (vendor/device ID) in hex - noioapicquirk [APIC] Disable all boot interrupt quirks. - Safety option to keep boot IRQs enabled. This - should never be necessary. -diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst -index 4d71211fdad8..57af938f1969 100644 ---- a/Documentation/admin-guide/sysctl/vm.rst -+++ b/Documentation/admin-guide/sysctl/vm.rst -@@ -25,6 +25,9 @@ files can be found in mm/swap.c. - Currently, these files are in /proc/sys/vm: - - - admin_reserve_kbytes -+- anon_min_ratio -+- clean_low_ratio -+- clean_min_ratio - - compact_memory - - compaction_proactiveness - - compact_unevictable_allowed -@@ -110,6 +113,67 @@ On x86_64 this is about 128MB. - Changing this takes effect whenever an application requests memory. - - -+anon_min_ratio -+============== -+ -+This knob provides *hard* protection of anonymous pages. The anonymous pages -+on the current node won't be reclaimed under any conditions when their amount -+is below vm.anon_min_ratio. -+ -+This knob may be used to prevent excessive swap thrashing when anonymous -+memory is low (for example, when memory is going to be overfilled by -+compressed data of zram module). -+ -+Setting this value too high (close to 100) can result in inability to -+swap and can lead to early OOM under memory pressure. -+ -+The unit of measurement is the percentage of the total memory of the node. -+ -+The default value is 1. -+ -+ -+clean_low_ratio -+================ -+ -+This knob provides *best-effort* protection of clean file pages. The file pages -+on the current node won't be reclaimed under memory pressure when the amount of -+clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. -+ -+Protection of clean file pages using this knob may be used when swapping is -+still possible to -+ - prevent disk I/O thrashing under memory pressure; -+ - improve performance in disk cache-bound tasks under memory pressure. -+ -+Setting it to a high value may result in a early eviction of anonymous pages -+into the swap space by attempting to hold the protected amount of clean file -+pages in memory. -+ -+The unit of measurement is the percentage of the total memory of the node. -+ -+The default value is 15. -+ -+ -+clean_min_ratio -+================ -+ -+This knob provides *hard* protection of clean file pages. The file pages on the -+current node won't be reclaimed under memory pressure when the amount of clean -+file pages is below vm.clean_min_ratio. -+ -+Hard protection of clean file pages using this knob may be used to -+ - prevent disk I/O thrashing under memory pressure even with no free swap space; -+ - improve performance in disk cache-bound tasks under memory pressure; -+ - avoid high latency and prevent livelock in near-OOM conditions. -+ -+Setting it to a high value may result in a early out-of-memory condition due to -+the inability to reclaim the protected amount of clean file pages when other -+types of pages cannot be reclaimed. -+ -+The unit of measurement is the percentage of the total memory of the node. -+ -+The default value is 4. -+ -+ - compact_memory - ============== - -@@ -980,6 +1044,14 @@ be 133 (x + 2x = 200, 2x = 133.33). - At 0, the kernel will not initiate swap until the amount of free and - file-backed pages is less than the high watermark in a zone. - -+This knob has no effect if the amount of clean file pages on the current -+node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, -+only anonymous pages can be reclaimed. -+ -+If the number of anonymous pages on the current node is below -+vm.anon_min_ratio, then only file pages can be reclaimed with -+any vm.swappiness value. -+ - - unprivileged_userfaultfd - ======================== -diff --git a/Makefile b/Makefile -index b9c661913250..8fc00895b0ba 100644 ---- a/Makefile -+++ b/Makefile -@@ -869,11 +869,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks - ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE - KBUILD_CFLAGS += -O2 - KBUILD_RUSTFLAGS += -Copt-level=2 -+else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 -+KBUILD_CFLAGS += -O3 -+KBUILD_RUSTFLAGS += -Copt-level=3 - else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE - KBUILD_CFLAGS += -Os - KBUILD_RUSTFLAGS += -Copt-level=s - endif - -+# Perform swing modulo scheduling immediately before the first scheduling pass. -+# This pass looks at innermost loops and reorders their instructions by -+# overlapping different iterations. -+KBUILD_CFLAGS += $(call cc-option,-fmodulo-sched -fmodulo-sched-allow-regmoves -fivopts -fmodulo-sched) -+ - # Always set `debug-assertions` and `overflow-checks` because their default - # depends on `opt-level` and `debug-assertions`, respectively. - KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) -@@ -1003,10 +1011,10 @@ export CC_FLAGS_SCS - endif - - ifdef CONFIG_LTO_CLANG --ifdef CONFIG_LTO_CLANG_THIN --CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit --else -+ifdef CONFIG_LTO_CLANG_FULL - CC_FLAGS_LTO := -flto -+else -+CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit - endif - CC_FLAGS_LTO += -fvisibility=hidden - -@@ -1200,7 +1208,7 @@ export ARCH_DRIVERS := $(drivers-y) $(drivers-m) - KBUILD_VMLINUX_OBJS := built-in.a $(patsubst %/, %/lib.a, $(filter %/, $(libs-y))) - KBUILD_VMLINUX_LIBS := $(filter-out %/, $(libs-y)) - --export KBUILD_VMLINUX_LIBS -+export KBUILD_VMLINUX_OBJS KBUILD_VMLINUX_LIBS - export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds - - ifdef CONFIG_TRIM_UNUSED_KSYMS -@@ -1209,16 +1217,12 @@ ifdef CONFIG_TRIM_UNUSED_KSYMS - KBUILD_MODULES := y - endif - --# '$(AR) mPi' needs 'T' to workaround the bug of llvm-ar <= 14 --quiet_cmd_ar_vmlinux.a = AR $@ -- cmd_ar_vmlinux.a = \ -- rm -f $@; \ -- $(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \ -- $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) -+PHONY += vmlinux_a -+vmlinux_a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_a - --targets += vmlinux.a --vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE -- $(call if_changed,ar_vmlinux.a) -+vmlinux.a: vmlinux_a -+ @: - - PHONY += vmlinux_o - vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) -@@ -1578,6 +1582,7 @@ endif # CONFIG_MODULES - CLEAN_FILES += vmlinux.symvers modules-only.symvers \ - modules.builtin modules.builtin.modinfo modules.nsdeps \ - modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ -+ vmlinux.thinlto-index builtin.order \ - compile_commands.json rust/test \ - rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S -@@ -2019,7 +2024,7 @@ clean: $(clean-dirs) - $(call cmd,rmfiles) - @find . $(RCS_FIND_IGNORE) \ - \( -name '*.[aios]' -o -name '*.rsi' -o -name '*.ko' -o -name '.*.cmd' \ -- -o -name '*.ko.*' \ -+ -o -name '*.ko.*' -o -name '*.o.thinlto.bc' \ - -o -name '*.dtb' -o -name '*.dtbo' \ - -o -name '*.dtb.S' -o -name '*.dtbo.S' \ - -o -name '*.dt.yaml' -o -name 'dtbs-list' \ -diff --git a/arch/Kconfig b/arch/Kconfig -index d1b4ffd6e085..9ea0ac45923e 100644 ---- a/arch/Kconfig -+++ b/arch/Kconfig -@@ -826,6 +826,25 @@ config LTO_CLANG_THIN - https://clang.llvm.org/docs/ThinLTO.html - - If unsure, say Y. -+ -+config LTO_CLANG_THIN_DIST -+ bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" -+ depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN -+ select LTO_CLANG -+ help -+ This option enables Clang's ThinLTO in distributed build mode. -+ In this mode, the linker performs the thin-link, generating -+ ThinLTO index files. Subsequently, the build system explicitly -+ invokes ThinLTO backend compilation using these index files -+ and pre-linked IR objects. The resulting native object files -+ are with the .thinlto-native.o suffix. -+ -+ This build mode offers improved visibility into the ThinLTO -+ process through explicit subcommand exposure. It also makes -+ final native object files directly available, benefiting -+ tools like objtool and kpatch. Additionally, it provides -+ crucial granular control over back-end options, enabling -+ module-specific compiler options, and simplifies debugging. - endchoice - - config ARCH_SUPPORTS_AUTOFDO_CLANG -diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu -index f928cf6e3252..d4ce964d9713 100644 ---- a/arch/x86/Kconfig.cpu -+++ b/arch/x86/Kconfig.cpu -@@ -255,6 +255,11 @@ config CC_HAS_MARCH_NATIVE - # usage warnings that only appear wth '-march=native'. - depends on CC_IS_GCC || CLANG_VERSION >= 190100 - -+ -+choice -+ prompt "x86_64 Compiler Build Optimization" -+ default GENERIC_CPU -+ - config X86_NATIVE_CPU - bool "Build and optimize for local/native CPU" - depends on X86_64 -@@ -269,6 +274,47 @@ config X86_NATIVE_CPU - - If unsure, say N. - -+config GENERIC_CPU -+ bool "Generic-x86-64" -+ depends on X86_64 -+ help -+ Generic x86-64 CPU. -+ Runs equally well on all x86-64 CPUs. -+ -+config MZEN4 -+ bool "AMD Ryzen 4" -+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000) -+ help -+ Select this for AMD Family 19h Zen 4 processors. -+ -+ Enables -march=znver4 -+ -+endchoice -+ -+config X86_64_VERSION -+ int "x86-64 compiler ISA level" -+ range 1 4 -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ depends on X86_64 && GENERIC_CPU -+ help -+ Specify a specific x86-64 compiler ISA level. -+ -+ There are three x86-64 ISA levels that work on top of -+ the x86-64 baseline, namely: x86-64-v2 and x86-64-v3. -+ -+ x86-64-v2 brings support for vector instructions up to Streaming SIMD -+ Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 -+ (SSSE3), the POPCNT instruction, and CMPXCHG16B. -+ -+ x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional -+ bit-manipulation instructions. -+ -+ x86-64-v4 is not included since the kernel does not use AVX512 instructions -+ -+ You can find the best version for your CPU by running one of the following: -+ /lib/ld-linux-x86-64.so.2 --help | grep supported -+ /lib64/ld-linux-x86-64.so.2 --help | grep supported -+ - config X86_GENERIC - bool "Generic x86 support" - depends on X86_32 -diff --git a/arch/x86/Makefile b/arch/x86/Makefile -index 1913d342969b..82358ed864bb 100644 ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -176,10 +176,22 @@ else - ifdef CONFIG_X86_NATIVE_CPU - KBUILD_CFLAGS += -march=native - KBUILD_RUSTFLAGS += -Ctarget-cpu=native --else -+endif -+ -+ifdef CONFIG_MZEN4 -+ KBUILD_CFLAGS += -march=znver4 -+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4 -+endif -+ -+ifdef CONFIG_GENERIC_CPU -+ifeq ($(CONFIG_X86_64_VERSION),1) - KBUILD_CFLAGS += -march=x86-64 -mtune=generic - KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic --endif -+else -+ KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION) -+ KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) -+endif # CONFIG_X86_64_VERSION -+endif # CONFIG_GENERIC_CPU - - KBUILD_CFLAGS += -mno-red-zone - KBUILD_CFLAGS += -mcmodel=kernel -diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h -index b3ab80a03365..5e883b397ff3 100644 ---- a/arch/x86/include/asm/pci.h -+++ b/arch/x86/include/asm/pci.h -@@ -26,6 +26,7 @@ struct pci_sysdata { - #if IS_ENABLED(CONFIG_VMD) - struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ - #endif -+ struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */ - }; - - extern int pci_routeirq; -@@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus) - #define is_vmd(bus) false - #endif /* CONFIG_VMD */ - -+static inline bool is_nvme_remap(struct pci_bus *bus) -+{ -+ return to_pci_sysdata(bus)->nvme_remap_dev != NULL; -+} -+ - /* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ -diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c -index ddb798603201..7c20387d8202 100644 ---- a/arch/x86/pci/common.c -+++ b/arch/x86/pci/common.c -@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void) - return 0; - } - --#if IS_ENABLED(CONFIG_VMD) - struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) - { -+#if IS_ENABLED(CONFIG_VMD) - if (is_vmd(dev->bus)) - return to_pci_sysdata(dev->bus)->vmd_dev; -+#endif -+ -+ if (is_nvme_remap(dev->bus)) -+ return to_pci_sysdata(dev->bus)->nvme_remap_dev; - - return dev; - } --#endif -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched -index 27f11320b8d1..e98585dd83e0 100644 ---- a/block/Kconfig.iosched -+++ b/block/Kconfig.iosched -@@ -16,6 +16,20 @@ config MQ_IOSCHED_KYBER - synchronous writes, it will self-tune queue depths to achieve that - goal. - -+config MQ_IOSCHED_ADIOS -+ tristate "Adaptive Deadline I/O scheduler" -+ default m -+ help -+ The Adaptive Deadline I/O Scheduler (ADIOS) is a multi-queue I/O -+ scheduler with learning-based adaptive latency control. -+ -+config MQ_IOSCHED_DEFAULT_ADIOS -+ bool "Enable ADIOS I/O scheduler as default MQ I/O scheduler" -+ depends on MQ_IOSCHED_ADIOS=y -+ default n -+ help -+ Enable the ADIOS I/O scheduler as the default scheduler for MQ I/O. -+ - config IOSCHED_BFQ - tristate "BFQ I/O scheduler" - select BLK_ICQ -diff --git a/block/Makefile b/block/Makefile -index c65f4da93702..105b12fd86b8 100644 ---- a/block/Makefile -+++ b/block/Makefile -@@ -22,6 +22,7 @@ obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o - obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o - obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o - obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o -+obj-$(CONFIG_MQ_IOSCHED_ADIOS) += adios.o - bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o - obj-$(CONFIG_IOSCHED_BFQ) += bfq.o - -@@ -36,3 +37,10 @@ obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \ - blk-crypto-sysfs.o - obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o - obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o -+ -+all: -+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules -+ -+clean: -+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean -+ -diff --git a/block/adios.c b/block/adios.c -new file mode 100644 -index 000000000000..bcc90564b9ce ---- /dev/null -+++ b/block/adios.c -@@ -0,0 +1,1881 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Adaptive Deadline I/O Scheduler (ADIOS) -+ * Copyright (C) 2025 Masahito Suzuki -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "elevator.h" -+#include "blk.h" -+#include "blk-mq.h" -+#include "blk-mq-sched.h" -+ -+#define ADIOS_VERSION "3.0.1" -+ -+/* Request Types: -+ * -+ * Tier 0 (Highest Priority): Emergency & System Integrity Requests -+ * ----------------------------------------------------------------- -+ * - Target: Requests with the BLK_MQ_INSERT_AT_HEAD flag. -+ * - Purpose: For critical, non-negotiable operations such as device error -+ * recovery or flush sequences that must bypass all other scheduling logic. -+ * - Implementation: Placed in a dedicated, high-priority FIFO queue -+ * (`prio_queue[0]`) for immediate dispatch. -+ * -+ * Tier 1 (High Priority): Data Persistence & Ordering Guarantees -+ * --------------------------------------------------------------- -+ * - Target: Requests with integrity-sensitive flags like REQ_FUA or -+ * REQ_PREFLUSH, typically originating from O_DIRECT I/O. -+ * - Purpose: To ensure strict ordering and data persistence guarantees, -+ * preventing data corruption in applications like databases. -+ * - Implementation: Handled in a separate, secondary FIFO queue -+ * (`prio_queue[1]`) to ensure they are processed in submission order and -+ * before any lower-priority requests. -+ * -+ * Tier 2 (Medium Priority): Application Responsiveness -+ * ---------------------------------------------------- -+ * - Target: Normal synchronous requests (e.g., from standard file reads). -+ * - Purpose: To ensure correct application behavior for operations that -+ * depend on sequential I/O completion (e.g., file system mounts) and to -+ * provide low latency for interactive applications. -+ * - Implementation: The deadline for these requests is set to their start -+ * time (`rq->start_time_ns`). This effectively enforces FIFO-like behavior -+ * within the deadline-sorted red-black tree, preventing out-of-order -+ * execution of dependent synchronous operations. -+ * -+ * Tier 3 (Normal Priority): Background Throughput -+ * ----------------------------------------------- -+ * - Target: Asynchronous requests. -+ * - Purpose: To maximize disk throughput for background tasks where latency -+ * is not critical. -+ * - Implementation: These are the only requests where ADIOS's adaptive -+ * latency prediction model is used. A dynamic deadline is calculated based -+ * on the predicted I/O latency, allowing for aggressive reordering to -+ * optimize I/O efficiency. -+ * -+ * Dispatch Logic: -+ * The scheduler always dispatches requests in strict priority order: -+ * 1. prio_queue[0] (Tier 0) -+ * 2. prio_queue[1] (Tier 1) -+ * 3. The deadline-sorted batch queue (which naturally prioritizes Tier 2 -+ * over Tier 3 due to their calculated deadlines). -+ */ -+ -+// Global variable to control the latency -+static u64 default_global_latency_window = 16000000ULL; -+static u64 default_global_latency_window_rotational = 22000000ULL; -+// Ratio below which batch queues should be refilled -+static u8 default_bq_refill_below_ratio = 20; -+// Maximum latency sample to input -+static u64 default_lat_model_latency_limit = 500000000ULL; -+// Batch ordering strategy -+static u64 default_batch_order = 0; -+// Flags to control compliance with block layer constraints -+static u64 default_compliance_flags = 0x7; -+ -+/* Compliance Flags: -+ * 0x1: REQ_FUA requests will be handled as Tier-1, strictly prioritized -+ * 0x2: REQ_PREFLUSH requests will be handled as Tier-1, strictly prioritized -+ * 0x4: Async requests will not be reordered based on the predicted latency -+ */ -+enum adios_compliance_flags { -+ ADIOS_CF_PRIO_FUA = 1U << 0, -+ ADIOS_CF_PRIO_PF = 1U << 1, -+ ADIOS_CF_FIXORDER = 1U << 2, -+}; -+ -+// Dynamic thresholds for shrinkage -+static u32 default_lm_shrink_at_kreqs = 5000; -+static u32 default_lm_shrink_at_gbytes = 50; -+static u32 default_lm_shrink_resist = 2; -+ -+enum adios_optype { -+ ADIOS_READ = 0, -+ ADIOS_WRITE = 1, -+ ADIOS_DISCARD = 2, -+ ADIOS_OTHER = 3, -+ ADIOS_OPTYPES = 4, -+}; -+ -+// Latency targets for each operation type -+static u64 default_latency_target[ADIOS_OPTYPES] = { -+ [ADIOS_READ] = 2ULL * NSEC_PER_MSEC, -+ [ADIOS_WRITE] = 2000ULL * NSEC_PER_MSEC, -+ [ADIOS_DISCARD] = 8000ULL * NSEC_PER_MSEC, -+ [ADIOS_OTHER] = 0ULL * NSEC_PER_MSEC, -+}; -+ -+// Maximum batch size limits for each operation type -+static u32 default_batch_limit[ADIOS_OPTYPES] = { -+ [ADIOS_READ] = 36, -+ [ADIOS_WRITE] = 72, -+ [ADIOS_DISCARD] = 1, -+ [ADIOS_OTHER] = 1, -+}; -+ -+enum adios_batch_order { -+ ADIOS_BO_OPTYPE = 0, -+ ADIOS_BO_ELEVATOR = 1, -+}; -+ -+// Thresholds for latency model control -+#define LM_BLOCK_SIZE_THRESHOLD 4096 -+#define LM_SAMPLES_THRESHOLD 1024 -+#define LM_INTERVAL_THRESHOLD 1500 -+#define LM_OUTLIER_PERCENTILE 99 -+#define LM_LAT_BUCKET_COUNT 64 -+ -+#define ADIOS_PQ_LEVELS 2 -+#define ADIOS_DL_TYPES 2 -+#define ADIOS_BQ_PAGES 2 -+ -+static u32 default_dl_prio[ADIOS_DL_TYPES] = {8, 0}; -+ -+// Bit flags for the atomic state variable, indicating which queues have requests. -+enum adios_state_flags { -+ ADIOS_STATE_PQ_0 = 1U << 0, -+ ADIOS_STATE_PQ_1 = 1U << 1, -+ ADIOS_STATE_DL_0 = 1U << 2, -+ ADIOS_STATE_DL_1 = 1U << 3, -+ ADIOS_STATE_BQ_PAGE_0 = 1U << 4, -+ ADIOS_STATE_BQ_PAGE_1 = 1U << 5, -+}; -+#define ADIOS_STATE_PQ 0 -+#define ADIOS_STATE_DL 2 -+#define ADIOS_STATE_BQ 4 -+ -+// Temporal granularity of the deadline tree node (dl_group) -+#define ADIOS_QUANTUM_SHIFT 20 -+ -+#define ADIOS_MAX_INSERTS_PER_LOCK 72 -+#define ADIOS_MAX_DELETES_PER_LOCK 24 -+ -+// Structure to hold latency bucket data for small requests -+struct latency_bucket_small { -+ u64 weighted_sum_latency; -+ u64 sum_of_weights; -+}; -+ -+// Structure to hold latency bucket data for large requests -+struct latency_bucket_large { -+ u64 weighted_sum_latency; -+ u64 weighted_sum_block_size; -+ u64 sum_of_weights; -+}; -+ -+// Structure to hold per-cpu buckets, improving data locality and code clarity. -+struct lm_buckets { -+ struct latency_bucket_small small_bucket[LM_LAT_BUCKET_COUNT]; -+ struct latency_bucket_large large_bucket[LM_LAT_BUCKET_COUNT]; -+}; -+ -+// Structure to hold RCU-protected latency model parameters -+struct latency_model_params { -+ u64 base; -+ u64 slope; -+ u64 small_sum_delay; -+ u64 small_count; -+ u64 large_sum_delay; -+ u64 large_sum_bsize; -+ u64 last_update_jiffies; -+ struct rcu_head rcu; -+}; -+ -+// Structure to hold the latency model context data -+struct latency_model { -+ spinlock_t update_lock; -+ struct latency_model_params __rcu *params; -+ -+ // Per-CPU buckets to avoid lock contention on the completion path -+ struct lm_buckets __percpu *pcpu_buckets; -+ -+ u32 lm_shrink_at_kreqs; -+ u32 lm_shrink_at_gbytes; -+ u8 lm_shrink_resist; -+}; -+ -+// Adios scheduler data -+struct adios_data { -+ spinlock_t pq_lock; -+ struct list_head prio_queue[2]; -+ -+ struct rb_root_cached dl_tree[2]; -+ spinlock_t lock; -+ s64 dl_bias; -+ s32 dl_prio[2]; -+ -+ atomic_t state; -+ u8 bq_state[ADIOS_BQ_PAGES]; -+ -+ void (*insert_request_fn)(struct blk_mq_hw_ctx *, struct request *, -+ blk_insert_t, struct list_head *); -+ -+ u64 global_latency_window; -+ u64 compliance_flags; -+ u64 latency_target[ADIOS_OPTYPES]; -+ u32 batch_limit[ADIOS_OPTYPES]; -+ u32 batch_actual_max_size[ADIOS_OPTYPES]; -+ u32 batch_actual_max_total; -+ u32 async_depth; -+ u32 lat_model_latency_limit; -+ u8 bq_refill_below_ratio; -+ u8 is_rotational; -+ u8 batch_order; -+ u8 elv_direction; -+ sector_t head_pos; -+ sector_t last_completed_pos; -+ -+ bool bq_page; -+ struct list_head batch_queue[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; -+ u32 batch_count[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; -+ u8 bq_batch_order[ADIOS_BQ_PAGES]; -+ spinlock_t bq_lock; -+ -+ struct lm_buckets *aggr_buckets; -+ -+ struct latency_model latency_model[ADIOS_OPTYPES]; -+ struct timer_list update_timer; -+ -+ atomic64_t total_pred_lat; -+ u64 last_completed_time; -+ -+ struct kmem_cache *rq_data_pool; -+ struct kmem_cache *dl_group_pool; -+ -+ struct request_queue *queue; -+}; -+ -+// List of requests with the same deadline in the deadline-sorted tree -+struct dl_group { -+ struct rb_node node; -+ struct list_head rqs; -+ u64 deadline; -+} __attribute__((aligned(64))); -+ -+// Structure to hold scheduler-specific data for each request -+struct adios_rq_data { -+ struct list_head *dl_group; -+ struct list_head dl_node; -+ -+ struct request *rq; -+ u64 deadline; -+ u64 pred_lat; -+ u32 block_size; -+} __attribute__((aligned(64))); -+ -+static const int adios_prio_to_wmult[40] = { -+ /* -20 */ 88761, 71755, 56483, 46273, 36291, -+ /* -15 */ 29154, 23254, 18705, 14949, 11916, -+ /* -10 */ 9548, 7620, 6100, 4904, 3906, -+ /* -5 */ 3121, 2501, 1991, 1586, 1277, -+ /* 0 */ 1024, 820, 655, 526, 423, -+ /* 5 */ 335, 272, 215, 172, 137, -+ /* 10 */ 110, 87, 70, 56, 45, -+ /* 15 */ 36, 29, 23, 18, 15, -+}; -+ -+static inline bool compliant(struct adios_data *ad, u32 flag) { -+ return ad->compliance_flags & flag; -+} -+ -+// Count the number of entries in aggregated small buckets -+static u64 lm_count_small_entries(struct latency_bucket_small *buckets) { -+ u64 total_weight = 0; -+ for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) -+ total_weight += buckets[i].sum_of_weights; -+ return total_weight; -+} -+ -+// Update the small buckets in the latency model from aggregated data -+static bool lm_update_small_buckets(struct latency_model *model, -+ struct latency_model_params *params, -+ struct latency_bucket_small *buckets, -+ u64 total_weight, bool count_all) { -+ u64 sum_latency = 0; -+ u64 sum_weight = 0; -+ u64 cumulative_weight = 0, threshold_weight = 0; -+ u8 outlier_threshold_bucket = 0; -+ u8 outlier_percentile = LM_OUTLIER_PERCENTILE; -+ u8 reduction; -+ -+ if (count_all) -+ outlier_percentile = 100; -+ -+ // Calculate the threshold weight for outlier detection -+ threshold_weight = (total_weight * outlier_percentile) / 100; -+ -+ // Identify the bucket that corresponds to the outlier threshold -+ for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { -+ cumulative_weight += buckets[i].sum_of_weights; -+ if (cumulative_weight >= threshold_weight) { -+ outlier_threshold_bucket = i; -+ break; -+ } -+ } -+ -+ // Calculate the average latency, excluding outliers -+ for (u8 i = 0; i <= outlier_threshold_bucket; i++) { -+ struct latency_bucket_small *bucket = &buckets[i]; -+ if (i < outlier_threshold_bucket) { -+ sum_latency += bucket->weighted_sum_latency; -+ sum_weight += bucket->sum_of_weights; -+ } else { -+ // The threshold bucket's contribution is proportional -+ u64 remaining_weight = -+ threshold_weight - (cumulative_weight - bucket->sum_of_weights); -+ if (bucket->sum_of_weights > 0) { -+ sum_latency += div_u64(bucket->weighted_sum_latency * -+ remaining_weight, bucket->sum_of_weights); -+ sum_weight += remaining_weight; -+ } -+ } -+ } -+ -+ // Shrink the model if it reaches at the readjustment threshold -+ if (params->small_count >= 1000ULL * model->lm_shrink_at_kreqs) { -+ reduction = model->lm_shrink_resist; -+ if (params->small_count >> reduction) { -+ params->small_sum_delay -= params->small_sum_delay >> reduction; -+ params->small_count -= params->small_count >> reduction; -+ } -+ } -+ -+ if (!sum_weight) -+ return false; -+ -+ // Accumulate the average latency into the statistics -+ params->small_sum_delay += sum_latency; -+ params->small_count += sum_weight; -+ -+ return true; -+} -+ -+// Count the number of entries in aggregated large buckets -+static u64 lm_count_large_entries(struct latency_bucket_large *buckets) { -+ u64 total_weight = 0; -+ for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) -+ total_weight += buckets[i].sum_of_weights; -+ return total_weight; -+} -+ -+// Update the large buckets in the latency model from aggregated data -+static bool lm_update_large_buckets(struct latency_model *model, -+ struct latency_model_params *params, -+ struct latency_bucket_large *buckets, -+ u64 total_weight, bool count_all) { -+ s64 sum_latency = 0; -+ u64 sum_block_size = 0, intercept; -+ u64 cumulative_weight = 0, threshold_weight = 0; -+ u64 sum_weight = 0; -+ u8 outlier_threshold_bucket = 0; -+ u8 outlier_percentile = LM_OUTLIER_PERCENTILE; -+ u8 reduction; -+ -+ if (count_all) -+ outlier_percentile = 100; -+ -+ // Calculate the threshold weight for outlier detection -+ threshold_weight = (total_weight * outlier_percentile) / 100; -+ -+ // Identify the bucket that corresponds to the outlier threshold -+ for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { -+ cumulative_weight += buckets[i].sum_of_weights; -+ if (cumulative_weight >= threshold_weight) { -+ outlier_threshold_bucket = i; -+ break; -+ } -+ } -+ -+ // Calculate the average latency and block size, excluding outliers -+ for (u8 i = 0; i <= outlier_threshold_bucket; i++) { -+ struct latency_bucket_large *bucket = &buckets[i]; -+ if (i < outlier_threshold_bucket) { -+ sum_latency += bucket->weighted_sum_latency; -+ sum_block_size += bucket->weighted_sum_block_size; -+ sum_weight += bucket->sum_of_weights; -+ } else { -+ // The threshold bucket's contribution is proportional -+ u64 remaining_weight = -+ threshold_weight - (cumulative_weight - bucket->sum_of_weights); -+ if (bucket->sum_of_weights > 0) { -+ sum_latency += div_u64(bucket->weighted_sum_latency * -+ remaining_weight, bucket->sum_of_weights); -+ sum_block_size += div_u64(bucket->weighted_sum_block_size * -+ remaining_weight, bucket->sum_of_weights); -+ sum_weight += remaining_weight; -+ } -+ } -+ } -+ -+ if (!sum_weight) -+ return false; -+ -+ // Shrink the model if it reaches at the readjustment threshold -+ if (params->large_sum_bsize >= 0x40000000ULL * model->lm_shrink_at_gbytes) { -+ reduction = model->lm_shrink_resist; -+ if (params->large_sum_bsize >> reduction) { -+ params->large_sum_delay -= params->large_sum_delay >> reduction; -+ params->large_sum_bsize -= params->large_sum_bsize >> reduction; -+ } -+ } -+ -+ // Accumulate the average delay into the statistics -+ intercept = params->base; -+ if (sum_latency > intercept) -+ sum_latency -= intercept; -+ -+ params->large_sum_delay += sum_latency; -+ params->large_sum_bsize += sum_block_size; -+ -+ return true; -+} -+ -+static void reset_buckets(struct lm_buckets *buckets) -+{ memset(buckets, 0, sizeof(*buckets)); } -+ -+static void lm_reset_pcpu_buckets(struct latency_model *model) { -+ int cpu; -+ for_each_possible_cpu(cpu) -+ reset_buckets(per_cpu_ptr(model->pcpu_buckets, cpu)); -+} -+ -+// Update the latency model parameters and statistics -+static void latency_model_update( -+ struct adios_data *ad, struct latency_model *model) { -+ u64 now; -+ u64 small_weight, large_weight; -+ bool time_elapsed; -+ bool small_processed = false, large_processed = false; -+ struct lm_buckets *aggr = ad->aggr_buckets; -+ struct latency_bucket_small *asb; -+ struct latency_bucket_large *alb; -+ struct lm_buckets *pcpu_b; -+ unsigned long flags; -+ int cpu; -+ struct latency_model_params *old_params, *new_params; -+ -+ spin_lock_irqsave(&model->update_lock, flags); -+ -+ old_params = rcu_dereference_protected(model->params, -+ lockdep_is_held(&model->update_lock)); -+ new_params = kmemdup(old_params, sizeof(*new_params), GFP_ATOMIC); -+ if (!new_params) { -+ spin_unlock_irqrestore(&model->update_lock, flags); -+ return; -+ } -+ -+ // Aggregate data from all CPUs and reset per-cpu buckets. -+ for_each_possible_cpu(cpu) { -+ pcpu_b = per_cpu_ptr(model->pcpu_buckets, cpu); -+ -+ for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { -+ if (pcpu_b->small_bucket[i].sum_of_weights) { -+ asb = &aggr->small_bucket[i]; -+ asb->sum_of_weights += -+ pcpu_b->small_bucket[i].sum_of_weights; -+ asb->weighted_sum_latency += -+ pcpu_b->small_bucket[i].weighted_sum_latency; -+ } -+ if (pcpu_b->large_bucket[i].sum_of_weights) { -+ alb = &aggr->large_bucket[i]; -+ alb->sum_of_weights += -+ pcpu_b->large_bucket[i].sum_of_weights; -+ alb->weighted_sum_latency += -+ pcpu_b->large_bucket[i].weighted_sum_latency; -+ alb->weighted_sum_block_size += -+ pcpu_b->large_bucket[i].weighted_sum_block_size; -+ } -+ } -+ // Reset per-cpu buckets after aggregating -+ reset_buckets(pcpu_b); -+ } -+ -+ // Count the number of entries in aggregated buckets -+ small_weight = lm_count_small_entries(aggr->small_bucket); -+ large_weight = lm_count_large_entries(aggr->large_bucket); -+ -+ // Whether enough time has elapsed since the last update -+ now = jiffies; -+ time_elapsed = unlikely(!new_params->base) || -+ new_params->last_update_jiffies + -+ msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; -+ -+ // Update small buckets -+ if (small_weight && (time_elapsed || -+ LM_SAMPLES_THRESHOLD <= small_weight || !new_params->base)) { -+ small_processed = lm_update_small_buckets(model, new_params, -+ aggr->small_bucket, small_weight, !new_params->base); -+ memset(&aggr->small_bucket[0], 0, sizeof(aggr->small_bucket)); -+ } -+ // Update large buckets -+ if (large_weight && (time_elapsed || -+ LM_SAMPLES_THRESHOLD <= large_weight || !new_params->slope)) { -+ large_processed = lm_update_large_buckets(model, new_params, -+ aggr->large_bucket, large_weight, !new_params->slope); -+ memset(&aggr->large_bucket[0], 0, sizeof(aggr->large_bucket)); -+ } -+ -+ // Update the base parameter if small bucket was processed -+ if (small_processed && likely(new_params->small_count)) -+ new_params->base = div_u64(new_params->small_sum_delay, -+ new_params->small_count); -+ -+ // Update the slope parameter if large bucket was processed -+ if (large_processed && likely(new_params->large_sum_bsize)) -+ new_params->slope = div_u64(new_params->large_sum_delay, -+ DIV_ROUND_UP_ULL(new_params->large_sum_bsize, 1024)); -+ -+ // Update last updated jiffies if update happened or time has elapsed -+ if (small_processed || large_processed || time_elapsed) -+ new_params->last_update_jiffies = now; -+ -+ rcu_assign_pointer(model->params, new_params); -+ spin_unlock_irqrestore(&model->update_lock, flags); -+ -+ kfree_rcu(old_params, rcu); -+} -+ -+// Determine the bucket index for a given measured and predicted latency -+static u8 lm_input_bucket_index(u64 measured, u64 predicted) { -+ u8 bucket_index; -+ -+ if (measured < predicted * 2) -+ bucket_index = div_u64((measured * 20), predicted); -+ else if (measured < predicted * 5) -+ bucket_index = div_u64((measured * 10), predicted) + 20; -+ else -+ bucket_index = div_u64((measured * 3), predicted) + 40; -+ -+ return bucket_index; -+} -+ -+// Input latency data into the latency model -+static void latency_model_input(struct adios_data *ad, -+ struct latency_model *model, -+ u32 block_size, u64 latency, u64 pred_lat, u32 weight) { -+ unsigned long flags; -+ u8 bucket_index; -+ struct lm_buckets *buckets; -+ u64 current_base; -+ struct latency_model_params *params; -+ -+ local_irq_save(flags); -+ buckets = per_cpu_ptr(model->pcpu_buckets, __smp_processor_id()); -+ -+ rcu_read_lock(); -+ params = rcu_dereference(model->params); -+ current_base = params->base; -+ rcu_read_unlock(); -+ -+ if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { -+ // Handle small requests -+ bucket_index = lm_input_bucket_index(latency, current_base ?: 1); -+ -+ if (bucket_index >= LM_LAT_BUCKET_COUNT) -+ bucket_index = LM_LAT_BUCKET_COUNT - 1; -+ -+ buckets->small_bucket[bucket_index].sum_of_weights += weight; -+ buckets->small_bucket[bucket_index].weighted_sum_latency += -+ latency * weight; -+ -+ local_irq_restore(flags); -+ -+ if (unlikely(!current_base)) { -+ latency_model_update(ad, model); -+ return; -+ } -+ } else { -+ // Handle large requests -+ if (!current_base || !pred_lat) { -+ local_irq_restore(flags); -+ return; -+ } -+ -+ bucket_index = lm_input_bucket_index(latency, pred_lat); -+ -+ if (bucket_index >= LM_LAT_BUCKET_COUNT) -+ bucket_index = LM_LAT_BUCKET_COUNT - 1; -+ -+ buckets->large_bucket[bucket_index].sum_of_weights += weight; -+ buckets->large_bucket[bucket_index].weighted_sum_latency += -+ latency * weight; -+ buckets->large_bucket[bucket_index].weighted_sum_block_size += -+ block_size * weight; -+ -+ local_irq_restore(flags); -+ } -+} -+ -+// Predict the latency for a given block size using the latency model -+static u64 latency_model_predict(struct latency_model *model, u32 block_size) { -+ u64 result; -+ struct latency_model_params *params; -+ -+ rcu_read_lock(); -+ params = rcu_dereference(model->params); -+ -+ result = params->base; -+ if (block_size > LM_BLOCK_SIZE_THRESHOLD) -+ result += params->slope * -+ DIV_ROUND_UP_ULL(block_size - LM_BLOCK_SIZE_THRESHOLD, 1024); -+ -+ rcu_read_unlock(); -+ -+ return result; -+} -+ -+// Determine the type of operation based on request flags -+static u8 adios_optype(struct request *rq) { -+ switch (rq->cmd_flags & REQ_OP_MASK) { -+ case REQ_OP_READ: -+ return ADIOS_READ; -+ case REQ_OP_WRITE: -+ return ADIOS_WRITE; -+ case REQ_OP_DISCARD: -+ return ADIOS_DISCARD; -+ default: -+ return ADIOS_OTHER; -+ } -+} -+ -+static inline u8 adios_optype_not_read(struct request *rq) { -+ return (rq->cmd_flags & REQ_OP_MASK) != REQ_OP_READ; -+} -+ -+// Helper function to retrieve adios_rq_data from a request -+static inline struct adios_rq_data *get_rq_data(struct request *rq) { -+ return rq->elv.priv[0]; -+} -+ -+static inline -+void set_adios_state(struct adios_data *ad, u32 shift, u32 idx, bool flag) { -+ if (flag) -+ atomic_or(1U << (idx + shift), &ad->state); -+ else -+ atomic_andnot(1U << (idx + shift), &ad->state); -+} -+ -+static inline u32 get_adios_state(struct adios_data *ad, u32 shift) -+{ return (atomic_read(&ad->state) >> shift) & 0x3; } -+ -+// Add a request to the deadline-sorted red-black tree -+static void add_to_dl_tree( -+ struct adios_data *ad, bool dl_idx, struct request *rq) { -+ struct rb_root_cached *root = &ad->dl_tree[dl_idx]; -+ struct rb_node **link = &(root->rb_root.rb_node), *parent = NULL; -+ bool leftmost = true; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ struct dl_group *dlg; -+ u64 deadline; -+ bool was_empty = RB_EMPTY_ROOT(&root->rb_root); -+ -+ /* Tier-2: Synchronous Requests -+ * - Needs to be FIFO within a same optype -+ * - Relaxed order between different optypes -+ * - basically needs to be processed in early time */ -+ rd->deadline = rq->start_time_ns; -+ -+ /* Tier-3: Aynchronous Requests -+ * - Can be reordered and delayed freely */ -+ if (!(rq->cmd_flags & REQ_SYNC)) { -+ rd->deadline += ad->latency_target[adios_optype(rq)]; -+ if (!compliant(ad, ADIOS_CF_FIXORDER)) -+ rd->deadline += rd->pred_lat; -+ } -+ -+ // Now quantize the deadline (-> dlg->deadline == RB-Tree key) -+ deadline = rd->deadline & ~((1ULL << ADIOS_QUANTUM_SHIFT) - 1); -+ -+ while (*link) { -+ dlg = rb_entry(*link, struct dl_group, node); -+ s64 diff = deadline - dlg->deadline; -+ -+ parent = *link; -+ if (diff < 0) { -+ link = &((*link)->rb_left); -+ } else if (diff > 0) { -+ link = &((*link)->rb_right); -+ leftmost = false; -+ } else { // diff == 0 -+ goto found; -+ } -+ } -+ -+ dlg = rb_entry_safe(parent, struct dl_group, node); -+ if (!dlg || dlg->deadline != deadline) { -+ dlg = kmem_cache_zalloc(ad->dl_group_pool, GFP_ATOMIC); -+ if (!dlg) -+ return; -+ dlg->deadline = deadline; -+ INIT_LIST_HEAD(&dlg->rqs); -+ rb_link_node(&dlg->node, parent, link); -+ rb_insert_color_cached(&dlg->node, root, leftmost); -+ } -+found: -+ list_add_tail(&rd->dl_node, &dlg->rqs); -+ rd->dl_group = &dlg->rqs; -+ -+ if (was_empty) -+ set_adios_state(ad, ADIOS_STATE_DL, dl_idx, true); -+} -+ -+// Remove a request from the deadline-sorted red-black tree -+static void del_from_dl_tree( -+ struct adios_data *ad, bool dl_idx, struct request *rq) { -+ struct rb_root_cached *root = &ad->dl_tree[dl_idx]; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ struct dl_group *dlg = container_of(rd->dl_group, struct dl_group, rqs); -+ -+ list_del_init(&rd->dl_node); -+ if (list_empty(&dlg->rqs)) { -+ rb_erase_cached(&dlg->node, root); -+ kmem_cache_free(ad->dl_group_pool, dlg); -+ } -+ rd->dl_group = NULL; -+ -+ if (RB_EMPTY_ROOT(&ad->dl_tree[dl_idx].rb_root)) -+ set_adios_state(ad, ADIOS_STATE_DL, dl_idx, false); -+} -+ -+// Remove a request from the scheduler -+static void remove_request(struct adios_data *ad, struct request *rq) { -+ bool dl_idx = adios_optype_not_read(rq); -+ struct request_queue *q = rq->q; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ -+ list_del_init(&rq->queuelist); -+ -+ // We might not be on the rbtree, if we are doing an insert merge -+ if (rd->dl_group) -+ del_from_dl_tree(ad, dl_idx, rq); -+ -+ elv_rqhash_del(q, rq); -+ if (q->last_merge == rq) -+ q->last_merge = NULL; -+} -+ -+// Convert a queue depth to the corresponding word depth for shallow allocation -+static int to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) { -+ struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; -+ const unsigned int nrr = hctx->queue->nr_requests; -+ -+ return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; -+} -+ -+// We limit the depth of request allocation for asynchronous and write requests -+static void adios_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { -+ struct adios_data *ad = data->q->elevator->elevator_data; -+ -+ // Do not throttle synchronous reads -+ if (op_is_sync(opf) && !op_is_write(opf)) -+ return; -+ -+ data->shallow_depth = to_word_depth(data->hctx, ad->async_depth); -+} -+ -+// The number of requests in the queue was notified from the block layer -+static void adios_depth_updated(struct blk_mq_hw_ctx *hctx) { -+ struct request_queue *q = hctx->queue; -+ struct adios_data *ad = q->elevator->elevator_data; -+ struct blk_mq_tags *tags = hctx->sched_tags; -+ -+ ad->async_depth = q->nr_requests; -+ -+ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1); -+} -+ -+// Handle request merging after a merge operation -+static void adios_request_merged(struct request_queue *q, struct request *req, -+ enum elv_merge type) { -+ bool dl_idx = adios_optype_not_read(req); -+ struct adios_data *ad = q->elevator->elevator_data; -+ -+ // Reposition request in the deadline-sorted tree -+ del_from_dl_tree(ad, dl_idx, req); -+ add_to_dl_tree(ad, dl_idx, req); -+} -+ -+// Handle merging of requests after one has been merged into another -+static void adios_merged_requests(struct request_queue *q, struct request *req, -+ struct request *next) { -+ struct adios_data *ad = q->elevator->elevator_data; -+ -+ lockdep_assert_held(&ad->lock); -+ -+ // kill knowledge of next, this one is a goner -+ remove_request(ad, next); -+} -+ -+// Try to merge a bio into an existing rq before associating it with an rq -+static bool adios_bio_merge(struct request_queue *q, struct bio *bio, -+ unsigned int nr_segs) { -+ unsigned long flags; -+ struct adios_data *ad = q->elevator->elevator_data; -+ struct request *free = NULL; -+ bool ret; -+ -+ if (!spin_trylock_irqsave(&ad->lock, flags)) -+ return false; -+ -+ ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); -+ spin_unlock_irqrestore(&ad->lock, flags); -+ -+ if (free) -+ blk_mq_free_request(free); -+ -+ return ret; -+} -+ -+// Insert a request into the scheduler (after Read & Write models stabilized) -+static void insert_request_post_stability(struct blk_mq_hw_ctx *hctx, -+ struct request *rq, blk_insert_t insert_flags, struct list_head *free) { -+ struct request_queue *q = hctx->queue; -+ struct adios_data *ad = q->elevator->elevator_data; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ bool dl_idx; -+ u8 optype = adios_optype(rq); -+ u8 insert_pq_flags = 0; -+ -+ rd->block_size = blk_rq_bytes(rq); -+ rd->pred_lat = -+ latency_model_predict(&ad->latency_model[optype], rd->block_size); -+ -+ /* Tier-0: BLK_MQ_INSERT_AT_HEAD Requests -+ * - Needs to be processed ASAP at all costs in any case */ -+ if (insert_flags & BLK_MQ_INSERT_AT_HEAD) -+ { insert_pq_flags |= 0x2; } -+ /* Tier-1: Integrity-sensitive Requests -+ * - Needs to be FIFO across all optypes */ -+ if ((compliant(ad, ADIOS_CF_PRIO_FUA) && (rq->cmd_flags & REQ_FUA)) || -+ (compliant(ad, ADIOS_CF_PRIO_PF ) && (rq->cmd_flags & REQ_PREFLUSH))) -+ { insert_pq_flags |= 0x1; } -+ -+ if (insert_pq_flags) { -+ u8 pq_idx = !(insert_pq_flags >> 1); -+ if (rd->pred_lat) -+ atomic64_add(rd->pred_lat, &ad->total_pred_lat); -+ scoped_guard(spinlock_irqsave, &ad->pq_lock) { -+ bool was_empty = list_empty(&ad->prio_queue[pq_idx]); -+ list_add_tail(&rq->queuelist, &ad->prio_queue[pq_idx]); -+ if (was_empty) -+ set_adios_state(ad, ADIOS_STATE_PQ, pq_idx, true); -+ } -+ return; -+ } -+ -+ if (blk_mq_sched_try_insert_merge(q, rq, free)) -+ return; -+ -+ dl_idx = adios_optype_not_read(rq); -+ add_to_dl_tree(ad, dl_idx, rq); -+ -+ if (rq_mergeable(rq)) { -+ elv_rqhash_add(q, rq); -+ if (!q->last_merge) -+ q->last_merge = rq; -+ } -+} -+ -+// Insert a request into the scheduler (before Read & Write models stabilizes) -+static void insert_request_pre_stability(struct blk_mq_hw_ctx *hctx, -+ struct request *rq, blk_insert_t insert_flags, struct list_head *free) { -+ struct adios_data *ad = hctx->queue->elevator->elevator_data; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ u8 optype = adios_optype(rq); -+ u8 pq_idx = !(insert_flags & BLK_MQ_INSERT_AT_HEAD); -+ bool models_stable = false; -+ -+ rd->block_size = blk_rq_bytes(rq); -+ rd->pred_lat = -+ latency_model_predict(&ad->latency_model[optype], rd->block_size); -+ -+ if (rd->pred_lat) -+ atomic64_add(rd->pred_lat, &ad->total_pred_lat); -+ -+ scoped_guard(spinlock_irqsave, &ad->pq_lock) { -+ bool was_empty = list_empty(&ad->prio_queue[pq_idx]); -+ list_add_tail(&rq->queuelist, &ad->prio_queue[pq_idx]); -+ if (was_empty) -+ set_adios_state(ad, ADIOS_STATE_PQ, pq_idx, true); -+ } -+ -+ rcu_read_lock(); -+ if (rcu_dereference(ad->latency_model[ADIOS_READ].params)->base > 0 && -+ rcu_dereference(ad->latency_model[ADIOS_WRITE].params)->base > 0) -+ models_stable = true; -+ rcu_read_unlock(); -+ -+ if (models_stable) -+ ad->insert_request_fn = insert_request_post_stability; -+} -+ -+// Insert multiple requests into the scheduler -+static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, -+ struct list_head *list, -+ blk_insert_t insert_flags) { -+ struct request_queue *q = hctx->queue; -+ struct adios_data *ad = q->elevator->elevator_data; -+ struct request *rq; -+ bool stop = false; -+ LIST_HEAD(free); -+ -+ do { -+ scoped_guard(spinlock_irqsave, &ad->lock) -+ for (int i = 0; i < ADIOS_MAX_INSERTS_PER_LOCK; i++) { -+ if (list_empty(list)) { -+ stop = true; -+ break; -+ } -+ rq = list_first_entry(list, struct request, queuelist); -+ list_del_init(&rq->queuelist); -+ ad->insert_request_fn(hctx, rq, insert_flags, &free); -+ }} while (!stop); -+ -+ blk_mq_free_requests(&free); -+} -+ -+// Prepare a request before it is inserted into the scheduler -+static void adios_prepare_request(struct request *rq) { -+ struct adios_data *ad = rq->q->elevator->elevator_data; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ -+ rq->elv.priv[0] = NULL; -+ -+ /* Allocate adios_rq_data from the memory pool */ -+ rd = kmem_cache_zalloc(ad->rq_data_pool, GFP_ATOMIC); -+ if (WARN(!rd, "adios_prepare_request: " -+ "Failed to allocate memory from rq_data_pool. rd is NULL\n")) -+ return; -+ -+ rd->rq = rq; -+ rq->elv.priv[0] = rd; -+} -+ -+static struct adios_rq_data *get_dl_first_rd(struct adios_data *ad, bool idx) { -+ struct rb_root_cached *root = &ad->dl_tree[idx]; -+ struct rb_node *first = rb_first_cached(root); -+ struct dl_group *dl_group = rb_entry(first, struct dl_group, node); -+ -+ return list_first_entry(&dl_group->rqs, struct adios_rq_data, dl_node); -+} -+ -+// Comparison function for sorting requests by block address -+static int cmp_rq_pos(void *priv, -+ const struct list_head *a, const struct list_head *b) { -+ struct request *rq_a = list_entry(a, struct request, queuelist); -+ struct request *rq_b = list_entry(b, struct request, queuelist); -+ u64 pos_a = blk_rq_pos(rq_a); -+ u64 pos_b = blk_rq_pos(rq_b); -+ -+ return (int)(pos_a > pos_b) - (int)(pos_a < pos_b); -+} -+ -+#ifndef list_last_entry_or_null -+#define list_last_entry_or_null(ptr, type, member) \ -+ (!list_empty(ptr) ? list_last_entry(ptr, type, member) : NULL) -+#endif -+ -+// Update the elevator direction -+static void update_elv_direction(struct adios_data *ad) { -+ if (!ad->is_rotational) -+ return; -+ -+ bool page = ad->bq_page; -+ struct list_head *q = &ad->batch_queue[page][1]; -+ if (ad->bq_batch_order[page] < ADIOS_BO_ELEVATOR || list_empty(q)) { -+ ad->elv_direction = 0; -+ return; -+ } -+ -+ // Get first and last request positions in the queue -+ struct request *rq_a = list_first_entry(q, struct request, queuelist); -+ struct request *rq_b = list_last_entry (q, struct request, queuelist); -+ u64 pos_a = blk_rq_pos(rq_a); -+ u64 pos_b = blk_rq_pos(rq_b); -+ u64 avg_rq_pos = (pos_a + pos_b) >> 1; -+ -+ ad->elv_direction = !!(ad->head_pos > avg_rq_pos); -+} -+ -+// Fill the batch queues with requests from the deadline-sorted red-black tree -+static bool fill_batch_queues(struct adios_data *ad, u64 tpl) { -+ struct adios_rq_data *rd; -+ struct request *rq; -+ struct list_head *dest_q; -+ u8 dest_idx; -+ u64 added_lat = 0; -+ u32 optype_count[ADIOS_OPTYPES] = {0}; -+ u32 count = 0; -+ u8 optype; -+ bool page = !ad->bq_page, dl_idx, bias_idx, update_bias; -+ u32 dl_queued; -+ u8 bq_batch_order; -+ bool stop = false; -+ -+ // Reset batch queue counts for the back page -+ memset(&ad->batch_count[page], 0, sizeof(ad->batch_count[page])); -+ -+ ad->bq_batch_order[page] = -+ bq_batch_order = ad->batch_order; -+ -+ do { -+ scoped_guard(spinlock_irqsave, &ad->lock) -+ for (int i = 0; i < ADIOS_MAX_DELETES_PER_LOCK; i++) { -+ bool has_base = false; -+ -+ dl_queued = get_adios_state(ad, ADIOS_STATE_DL); -+ // Check if there are any requests queued in the deadline tree -+ if (!dl_queued) { -+ stop = true; -+ break; -+ } -+ -+ // Reads if both queues have requests, otherwise pick the non-empty. -+ dl_idx = dl_queued >> 1; -+ -+ // Get the first request from the deadline-sorted tree -+ rd = get_dl_first_rd(ad, dl_idx); -+ -+ bias_idx = ad->dl_bias < 0; -+ // If read and write requests are queued, choose one based on bias -+ if (dl_queued == 0x3) { -+ struct adios_rq_data *trd[2] = {get_dl_first_rd(ad, 0), rd}; -+ rd = trd[bias_idx]; -+ -+ update_bias = (trd[bias_idx]->deadline > trd[!bias_idx]->deadline); -+ } else -+ update_bias = (bias_idx == dl_idx); -+ -+ rq = rd->rq; -+ optype = adios_optype(rq); -+ -+ rcu_read_lock(); -+ has_base = -+ !!rcu_dereference(ad->latency_model[optype].params)->base; -+ rcu_read_unlock(); -+ -+ // Check batch size and total predicted latency -+ if (count && (!has_base || -+ ad->batch_count[page][optype] >= ad->batch_limit[optype] || -+ (tpl + added_lat + rd->pred_lat) > ad->global_latency_window)) { -+ stop = true; -+ break; -+ } -+ -+ if (update_bias) { -+ s64 sign = ((s64)bias_idx << 1) - 1; -+ if (unlikely(!rd->pred_lat)) -+ ad->dl_bias = sign; -+ else -+ // Adjust the bias based on the predicted latency -+ ad->dl_bias += sign * (s64)((rd->pred_lat * -+ adios_prio_to_wmult[ad->dl_prio[bias_idx] + 20]) >> 10); -+ } -+ -+ remove_request(ad, rq); -+ -+ // Add request to the corresponding batch queue -+ dest_idx = (bq_batch_order == ADIOS_BO_OPTYPE || optype == ADIOS_OTHER)? -+ optype : !!(rd->deadline != rq->start_time_ns); -+ dest_q = &ad->batch_queue[page][dest_idx]; -+ list_add_tail(&rq->queuelist, dest_q); -+ ad->bq_state[page] |= 1U << dest_idx; -+ ad->batch_count[page][optype]++; -+ optype_count[optype]++; -+ added_lat += rd->pred_lat; -+ count++; -+ }} while (!stop); -+ -+ if (bq_batch_order == ADIOS_BO_ELEVATOR && ad->batch_count[page][1] > 1) -+ list_sort(NULL, &ad->batch_queue[page][1], cmp_rq_pos); -+ -+ if (count) { -+ if (added_lat) -+ atomic64_add(added_lat, &ad->total_pred_lat); -+ -+ set_adios_state(ad, ADIOS_STATE_BQ, page, true); -+ -+ for (optype = 0; optype < ADIOS_OPTYPES; optype++) -+ if (ad->batch_actual_max_size[optype] < optype_count[optype]) -+ ad->batch_actual_max_size[optype] = optype_count[optype]; -+ if (ad->batch_actual_max_total < count) -+ ad->batch_actual_max_total = count; -+ } -+ return count; -+} -+ -+// Flip to the next batch queue page -+static void flip_bq_page(struct adios_data *ad) { -+ ad->bq_page = !ad->bq_page; -+ update_elv_direction(ad); -+} -+ -+// Pop a request from the specified index (optype or elevator tier) -+static inline struct request *pop_bq_request( -+ struct adios_data *ad, u8 idx, bool direction) { -+ bool page = ad->bq_page; -+ struct list_head *q = &ad->batch_queue[page][idx]; -+ struct request *rq = direction ? -+ list_last_entry_or_null (q, struct request, queuelist): -+ list_first_entry_or_null(q, struct request, queuelist); -+ if (rq) { -+ list_del_init(&rq->queuelist); -+ if (list_empty(q)) -+ ad->bq_state[page] &= ~(1U << idx); -+ } -+ return rq; -+} -+ -+static struct request *pop_next_bq_request_optype(struct adios_data *ad) { -+ u32 bq_state = ad->bq_state[ad->bq_page]; -+ if (!bq_state) return NULL; -+ -+ struct request *rq; -+ u32 bq_idx = 31 - __builtin_clz(bq_state); -+ -+ // Dispatch based on optype (FIFO within each) or single-queue elevator -+ rq = pop_bq_request(ad, bq_idx, false); -+ return rq; -+} -+ -+static struct request *pop_next_bq_request_elevator(struct adios_data *ad) { -+ u32 bq_state = ad->bq_state[ad->bq_page]; -+ if (!bq_state) return NULL; -+ -+ struct request *rq; -+ u32 bq_idx = 31 - __builtin_clz(bq_state); -+ bool direction = (bq_idx == 1) & ad->elv_direction; -+ -+ // Tier-2 (sync) is always high priority -+ // Tier-3 (async) uses the pre-calculated elevator direction -+ rq = pop_bq_request(ad, bq_idx, direction); -+ -+ /* If batch queue for the sync requests just became empty */ -+ if (bq_idx == 0 && rq && !(bq_state & 0x1)) -+ update_elv_direction(ad); -+ -+ return rq; -+} -+ -+// Returns the state of the other batch queue page -+static bool more_bq_ready(struct adios_data *ad, bool page) { -+ u32 state = get_adios_state(ad, ADIOS_STATE_BQ); -+ return state & (1U << !page); -+} -+ -+// Dispatch a request from the batch queues -+static struct request *dispatch_from_bq(struct adios_data *ad) { -+ struct request *rq; -+ -+ guard(spinlock_irqsave)(&ad->bq_lock); -+ -+ u64 tpl = atomic64_read(&ad->total_pred_lat); -+ -+ if (!more_bq_ready(ad, ad->bq_page) && (!tpl || tpl < div_u64( -+ ad->global_latency_window * ad->bq_refill_below_ratio, 100))) -+ fill_batch_queues(ad, tpl); -+ -+again: -+ // Use the per-page state to decide the dispatch logic, ensuring correctness -+ rq = (ad->bq_batch_order[ad->bq_page] == ADIOS_BO_ELEVATOR) ? -+ pop_next_bq_request_elevator(ad): -+ pop_next_bq_request_optype(ad); -+ -+ if (rq) { -+ bool page = ad->bq_page; -+ bool is_empty = !ad->bq_state[page]; -+ if (is_empty) -+ set_adios_state(ad, ADIOS_STATE_BQ, page, false); -+ return rq; -+ } -+ -+ // If there's more batch queue page available, flip to it and retry -+ if (more_bq_ready(ad, ad->bq_page)) { -+ flip_bq_page(ad); -+ goto again; -+ } -+ -+ return NULL; -+} -+ -+// Dispatch a request from the priority queue -+static struct request *dispatch_from_pq(struct adios_data *ad) { -+ struct request *rq = NULL; -+ -+ guard(spinlock_irqsave)(&ad->pq_lock); -+ u32 pq_state = get_adios_state(ad, ADIOS_STATE_PQ); -+ u8 pq_idx = pq_state >> 1; -+ struct list_head *q = &ad->prio_queue[pq_idx]; -+ -+ if (unlikely(list_empty(q))) return NULL; -+ -+ rq = list_first_entry(q, struct request, queuelist); -+ list_del_init(&rq->queuelist); -+ if (list_empty(q)) { -+ set_adios_state(ad, ADIOS_STATE_PQ, pq_idx, false); -+ update_elv_direction(ad); -+ } -+ return rq; -+} -+ -+// Dispatch a request to the hardware queue -+static struct request *adios_dispatch_request(struct blk_mq_hw_ctx *hctx) { -+ struct adios_data *ad = hctx->queue->elevator->elevator_data; -+ struct request *rq; -+ -+ rq = dispatch_from_pq(ad); -+ if (rq) goto found; -+ rq = dispatch_from_bq(ad); -+ if (!rq) return NULL; -+found: -+ if (ad->is_rotational) -+ ad->head_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ -+ rq->rq_flags |= RQF_STARTED; -+ return rq; -+} -+ -+// Timer callback function to periodically update latency models -+static void update_timer_callback(struct timer_list *t) { -+ struct adios_data *ad = timer_container_of(ad, t, update_timer); -+ -+ for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) -+ latency_model_update(ad, &ad->latency_model[optype]); -+} -+ -+// Handle the completion of a request -+static void adios_completed_request(struct request *rq, u64 now) { -+ struct adios_data *ad = rq->q->elevator->elevator_data; -+ struct adios_rq_data *rd = get_rq_data(rq); -+ -+ u64 tpl_after = atomic64_sub_return(rd->pred_lat, &ad->total_pred_lat); -+ u8 optype = adios_optype(rq); -+ -+ if (optype == ADIOS_OTHER) { -+ // Non-positional commands make the head position unpredictable. -+ // Invalidate our knowledge of the last completed position. -+ if (ad->is_rotational) -+ ad->last_completed_pos = 0; -+ return; -+ } -+ -+ u64 lct = ad->last_completed_time ?: rq->io_start_time_ns; -+ ad->last_completed_time = (tpl_after) ? now : 0; -+ -+ if (!rq->io_start_time_ns || !rd->block_size || unlikely(now < lct)) -+ return; -+ -+ u64 latency = now - lct; -+ if (latency > ad->lat_model_latency_limit) -+ return; -+ -+ u32 weight = 1; -+ if (ad->is_rotational) { -+ sector_t current_pos = blk_rq_pos(rq); -+ // Only calculate seek distance if we have a valid last position. -+ if (ad->last_completed_pos > 0) { -+ u64 seek_distance = abs( -+ (s64)current_pos - (s64)ad->last_completed_pos); -+ weight = 65 - __builtin_clzll(seek_distance); -+ } -+ // Update (or re-synchronize) our knowledge of the head position. -+ ad->last_completed_pos = current_pos + blk_rq_sectors(rq); -+ } -+ -+ latency_model_input(ad, &ad->latency_model[optype], -+ rd->block_size, latency, rd->pred_lat, weight); -+ timer_reduce(&ad->update_timer, jiffies + msecs_to_jiffies(100)); -+} -+ -+// Clean up after a request is finished -+static void adios_finish_request(struct request *rq) { -+ struct adios_data *ad = rq->q->elevator->elevator_data; -+ -+ if (rq->elv.priv[0]) { -+ // Free adios_rq_data back to the memory pool -+ kmem_cache_free(ad->rq_data_pool, get_rq_data(rq)); -+ rq->elv.priv[0] = NULL; -+ } -+} -+ -+// Check if there are any requests available for dispatch -+static bool adios_has_work(struct blk_mq_hw_ctx *hctx) { -+ struct adios_data *ad = hctx->queue->elevator->elevator_data; -+ -+ return atomic_read(&ad->state) != 0; -+} -+ -+// Initialize the scheduler-specific data for a hardware queue -+static int adios_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { -+ adios_depth_updated(hctx); -+ return 0; -+} -+ -+// Initialize the scheduler-specific data when initializing the request queue -+static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { -+ struct adios_data *ad; -+ struct elevator_queue *eq; -+ int ret = -ENOMEM; -+ u8 optype = 0; -+ -+ eq = elevator_alloc(q, e); -+ if (!eq) { -+ pr_err("adios: Failed to allocate the elevator\n"); -+ return ret; -+ } -+ -+ ad = kzalloc_node(sizeof(*ad), GFP_KERNEL, q->node); -+ if (!ad) { -+ pr_err("adios: Failed to create adios_data\n"); -+ goto put_eq; -+ } -+ -+ // Create a memory pool for adios_rq_data -+ ad->rq_data_pool = kmem_cache_create("rq_data_pool", -+ sizeof(struct adios_rq_data), -+ 0, SLAB_HWCACHE_ALIGN, NULL); -+ if (!ad->rq_data_pool) { -+ pr_err("adios: Failed to create rq_data_pool\n"); -+ goto free_ad; -+ } -+ -+ /* Create a memory pool for dl_group */ -+ ad->dl_group_pool = kmem_cache_create("dl_group_pool", -+ sizeof(struct dl_group), -+ 0, SLAB_HWCACHE_ALIGN, NULL); -+ if (!ad->dl_group_pool) { -+ pr_err("adios: Failed to create dl_group_pool\n"); -+ goto destroy_rq_data_pool; -+ } -+ -+ for (int i = 0; i < ADIOS_PQ_LEVELS; i++) -+ INIT_LIST_HEAD(&ad->prio_queue[i]); -+ -+ for (u8 i = 0; i < ADIOS_DL_TYPES; i++) { -+ ad->dl_tree[i] = RB_ROOT_CACHED; -+ ad->dl_prio[i] = default_dl_prio[i]; -+ } -+ ad->dl_bias = 0; -+ -+ for (u8 page = 0; page < ADIOS_BQ_PAGES; page++) -+ for (optype = 0; optype < ADIOS_OPTYPES; optype++) -+ INIT_LIST_HEAD(&ad->batch_queue[page][optype]); -+ -+ ad->aggr_buckets = kzalloc(sizeof(*ad->aggr_buckets), GFP_KERNEL); -+ if (!ad->aggr_buckets) { -+ pr_err("adios: Failed to allocate aggregation buckets\n"); -+ goto destroy_dl_group_pool; -+ } -+ -+ for (optype = 0; optype < ADIOS_OPTYPES; optype++) { -+ struct latency_model *model = &ad->latency_model[optype]; -+ struct latency_model_params *params; -+ -+ spin_lock_init(&model->update_lock); -+ params = kzalloc(sizeof(*params), GFP_KERNEL); -+ if (!params) { -+ pr_err("adios: Failed to allocate latency_model_params\n"); -+ goto free_buckets; -+ } -+ params->last_update_jiffies = jiffies; -+ RCU_INIT_POINTER(model->params, params); -+ -+ model->pcpu_buckets = alloc_percpu(struct lm_buckets); -+ if (!model->pcpu_buckets) { -+ pr_err("adios: Failed to allocate per-CPU buckets\n"); -+ kfree(params); -+ goto free_buckets; -+ } -+ -+ model->lm_shrink_at_kreqs = default_lm_shrink_at_kreqs; -+ model->lm_shrink_at_gbytes = default_lm_shrink_at_gbytes; -+ model->lm_shrink_resist = default_lm_shrink_resist; -+ } -+ -+ for (optype = 0; optype < ADIOS_OPTYPES; optype++) { -+ ad->latency_target[optype] = default_latency_target[optype]; -+ ad->batch_limit[optype] = default_batch_limit[optype]; -+ } -+ -+ eq->elevator_data = ad; -+ -+ ad->is_rotational = !!(q->limits.features & BLK_FEAT_ROTATIONAL); -+ ad->global_latency_window = (ad->is_rotational)? -+ default_global_latency_window_rotational: -+ default_global_latency_window; -+ ad->bq_refill_below_ratio = default_bq_refill_below_ratio; -+ ad->lat_model_latency_limit = default_lat_model_latency_limit; -+ ad->batch_order = default_batch_order; -+ ad->compliance_flags = default_compliance_flags; -+ -+ ad->insert_request_fn = insert_request_pre_stability; -+ -+ atomic_set(&ad->state, 0); -+ -+ spin_lock_init(&ad->lock); -+ spin_lock_init(&ad->pq_lock); -+ spin_lock_init(&ad->bq_lock); -+ -+ timer_setup(&ad->update_timer, update_timer_callback, 0); -+ -+ /* We dispatch from request queue wide instead of hw queue */ -+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); -+ -+ ad->queue = q; -+ blk_stat_enable_accounting(q); -+ -+ q->elevator = eq; -+ return 0; -+ -+free_buckets: -+ pr_err("adios: Failed to allocate per-cpu buckets\n"); -+ while (optype-- > 0) { -+ struct latency_model *prev_model = &ad->latency_model[optype]; -+ kfree(rcu_access_pointer(prev_model->params)); -+ free_percpu(prev_model->pcpu_buckets); -+ } -+ kfree(ad->aggr_buckets); -+destroy_dl_group_pool: -+ kmem_cache_destroy(ad->dl_group_pool); -+destroy_rq_data_pool: -+ kmem_cache_destroy(ad->rq_data_pool); -+free_ad: -+ kfree(ad); -+put_eq: -+ kobject_put(&eq->kobj); -+ return ret; -+} -+ -+// Clean up and free resources when exiting the scheduler -+static void adios_exit_sched(struct elevator_queue *e) { -+ struct adios_data *ad = e->elevator_data; -+ -+ timer_shutdown_sync(&ad->update_timer); -+ -+ for (int i = 0; i < 2; i++) -+ WARN_ON_ONCE(!list_empty(&ad->prio_queue[i])); -+ -+ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { -+ struct latency_model *model = &ad->latency_model[i]; -+ struct latency_model_params *params = rcu_access_pointer(model->params); -+ -+ RCU_INIT_POINTER(model->params, NULL); -+ kfree_rcu(params, rcu); -+ -+ free_percpu(model->pcpu_buckets); -+ } -+ -+ synchronize_rcu(); -+ -+ kfree(ad->aggr_buckets); -+ -+ if (ad->rq_data_pool) -+ kmem_cache_destroy(ad->rq_data_pool); -+ -+ if (ad->dl_group_pool) -+ kmem_cache_destroy(ad->dl_group_pool); -+ -+ blk_stat_disable_accounting(ad->queue); -+ -+ kfree(ad); -+} -+ -+static void sideload_latency_model( -+ struct latency_model *model, u64 base, u64 slope) { -+ struct latency_model_params *old_params, *new_params; -+ unsigned long flags; -+ -+ new_params = kzalloc(sizeof(*new_params), GFP_KERNEL); -+ if (!new_params) -+ return; -+ -+ spin_lock_irqsave(&model->update_lock, flags); -+ -+ old_params = rcu_dereference_protected(model->params, -+ lockdep_is_held(&model->update_lock)); -+ -+ new_params->last_update_jiffies = jiffies; -+ -+ // Initialize base and its statistics as a single sample. -+ new_params->base = base; -+ new_params->small_sum_delay = base; -+ new_params->small_count = 1; -+ -+ // Initialize slope and its statistics as a single sample. -+ new_params->slope = slope; -+ new_params->large_sum_delay = slope; -+ new_params->large_sum_bsize = 1024; /* Corresponds to 1 KiB */ -+ -+ lm_reset_pcpu_buckets(model); -+ -+ rcu_assign_pointer(model->params, new_params); -+ spin_unlock_irqrestore(&model->update_lock, flags); -+ -+ kfree_rcu(old_params, rcu); -+} -+ -+// Define sysfs attributes for operation types -+#define SYSFS_OPTYPE_DECL(name, optype) \ -+static ssize_t adios_lat_model_##name##_show( \ -+ struct elevator_queue *e, char *page) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ struct latency_model *model = &ad->latency_model[optype]; \ -+ struct latency_model_params *params; \ -+ ssize_t len = 0; \ -+ u64 base, slope; \ -+ rcu_read_lock(); \ -+ params = rcu_dereference(model->params); \ -+ base = params->base; \ -+ slope = params->slope; \ -+ rcu_read_unlock(); \ -+ len += sprintf(page, "base : %llu ns\n", base); \ -+ len += sprintf(page + len, "slope: %llu ns/KiB\n", slope); \ -+ return len; \ -+} \ -+static ssize_t adios_lat_model_##name##_store( \ -+ struct elevator_queue *e, const char *page, size_t count) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ struct latency_model *model = &ad->latency_model[optype]; \ -+ u64 base, slope; \ -+ int ret; \ -+ ret = sscanf(page, "%llu %llu", &base, &slope); \ -+ if (ret != 2) \ -+ return -EINVAL; \ -+ sideload_latency_model(model, base, slope); \ -+ reset_buckets(ad->aggr_buckets); \ -+ return count; \ -+} \ -+static ssize_t adios_lat_target_##name##_show( \ -+ struct elevator_queue *e, char *page) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ return sprintf(page, "%llu\n", ad->latency_target[optype]); \ -+} \ -+static ssize_t adios_lat_target_##name##_store( \ -+ struct elevator_queue *e, const char *page, size_t count) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ unsigned long nsec; \ -+ int ret; \ -+ ret = kstrtoul(page, 10, &nsec); \ -+ if (ret) \ -+ return ret; \ -+ sideload_latency_model(&ad->latency_model[optype], 0, 0); \ -+ ad->latency_target[optype] = nsec; \ -+ return count; \ -+} \ -+static ssize_t adios_batch_limit_##name##_show( \ -+ struct elevator_queue *e, char *page) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ return sprintf(page, "%u\n", ad->batch_limit[optype]); \ -+} \ -+static ssize_t adios_batch_limit_##name##_store( \ -+ struct elevator_queue *e, const char *page, size_t count) { \ -+ unsigned long max_batch; \ -+ int ret; \ -+ ret = kstrtoul(page, 10, &max_batch); \ -+ if (ret || max_batch == 0) \ -+ return -EINVAL; \ -+ struct adios_data *ad = e->elevator_data; \ -+ ad->batch_limit[optype] = max_batch; \ -+ return count; \ -+} -+ -+SYSFS_OPTYPE_DECL(read, ADIOS_READ); -+SYSFS_OPTYPE_DECL(write, ADIOS_WRITE); -+SYSFS_OPTYPE_DECL(discard, ADIOS_DISCARD); -+ -+// Show the maximum batch size actually achieved for each operation type -+static ssize_t adios_batch_actual_max_show( -+ struct elevator_queue *e, char *page) { -+ struct adios_data *ad = e->elevator_data; -+ u32 total_count, read_count, write_count, discard_count; -+ -+ total_count = ad->batch_actual_max_total; -+ read_count = ad->batch_actual_max_size[ADIOS_READ]; -+ write_count = ad->batch_actual_max_size[ADIOS_WRITE]; -+ discard_count = ad->batch_actual_max_size[ADIOS_DISCARD]; -+ -+ return sprintf(page, -+ "Total : %u\nDiscard: %u\nRead : %u\nWrite : %u\n", -+ total_count, discard_count, read_count, write_count); -+} -+ -+#define SYSFS_ULL_DECL(field, min_val, max_val) \ -+static ssize_t adios_##field##_show( \ -+ struct elevator_queue *e, char *page) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ return sprintf(page, "%llu\n", ad->field); \ -+} \ -+static ssize_t adios_##field##_store( \ -+ struct elevator_queue *e, const char *page, size_t count) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ unsigned long val; \ -+ int ret; \ -+ ret = kstrtoul(page, 10, &val); \ -+ if (ret || val < (min_val) || val > (max_val)) \ -+ return -EINVAL; \ -+ ad->field = val; \ -+ return count; \ -+} -+ -+SYSFS_ULL_DECL(global_latency_window, 0, ULLONG_MAX) -+SYSFS_ULL_DECL(compliance_flags, 0, ULLONG_MAX) -+ -+#define SYSFS_INT_DECL(field, min_val, max_val) \ -+static ssize_t adios_##field##_show( \ -+ struct elevator_queue *e, char *page) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ return sprintf(page, "%d\n", ad->field); \ -+} \ -+static ssize_t adios_##field##_store( \ -+ struct elevator_queue *e, const char *page, size_t count) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ int val; \ -+ int ret; \ -+ ret = kstrtoint(page, 10, &val); \ -+ if (ret || val < (min_val) || val > (max_val)) \ -+ return -EINVAL; \ -+ ad->field = val; \ -+ return count; \ -+} -+ -+SYSFS_INT_DECL(bq_refill_below_ratio, 0, 100) -+SYSFS_INT_DECL(lat_model_latency_limit, 0, 2*NSEC_PER_SEC) -+SYSFS_INT_DECL(batch_order, ADIOS_BO_OPTYPE, !!ad->is_rotational) -+ -+// Show the read priority -+static ssize_t adios_read_priority_show( -+ struct elevator_queue *e, char *page) { -+ struct adios_data *ad = e->elevator_data; -+ return sprintf(page, "%d\n", ad->dl_prio[0]); -+} -+ -+// Set the read priority -+static ssize_t adios_read_priority_store( -+ struct elevator_queue *e, const char *page, size_t count) { -+ struct adios_data *ad = e->elevator_data; -+ int prio; -+ int ret; -+ -+ ret = kstrtoint(page, 10, &prio); -+ if (ret || prio < -20 || prio > 19) -+ return -EINVAL; -+ -+ guard(spinlock_irqsave)(&ad->lock); -+ ad->dl_prio[0] = prio; -+ ad->dl_bias = 0; -+ -+ return count; -+} -+ -+// Reset batch queue statistics -+static ssize_t adios_reset_bq_stats_store( -+ struct elevator_queue *e, const char *page, size_t count) { -+ struct adios_data *ad = e->elevator_data; -+ unsigned long val; -+ int ret; -+ -+ ret = kstrtoul(page, 10, &val); -+ if (ret || val != 1) -+ return -EINVAL; -+ -+ for (u8 i = 0; i < ADIOS_OPTYPES; i++) -+ ad->batch_actual_max_size[i] = 0; -+ -+ ad->batch_actual_max_total = 0; -+ -+ return count; -+} -+ -+// Reset the latency model parameters or load them from user input -+static ssize_t adios_reset_lat_model_store( -+ struct elevator_queue *e, const char *page, size_t count) -+{ -+ struct adios_data *ad = e->elevator_data; -+ struct latency_model *model; -+ int ret; -+ -+ /* -+ * Differentiate between two modes based on input format: -+ * 1. "1": Fully reset the model (backward compatibility). -+ * 2. "R_base R_slope W_base W_slope D_base D_slope": Load values. -+ */ -+ if (!strchr(page, ' ')) { -+ // Mode 1: Full reset. -+ unsigned long val; -+ -+ ret = kstrtoul(page, 10, &val); -+ if (ret || val != 1) -+ return -EINVAL; -+ -+ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { -+ model = &ad->latency_model[i]; -+ sideload_latency_model(model, 0, 0); -+ } -+ } else { -+ // Mode 2: Load initial values for all latency models. -+ u64 params[3][2]; /* 0:base, 1:slope for R, W, D */ -+ -+ ret = sscanf(page, "%llu %llu %llu %llu %llu %llu", -+ ¶ms[ADIOS_READ ][0], ¶ms[ADIOS_READ ][1], -+ ¶ms[ADIOS_WRITE ][0], ¶ms[ADIOS_WRITE ][1], -+ ¶ms[ADIOS_DISCARD][0], ¶ms[ADIOS_DISCARD][1]); -+ -+ if (ret != 6) -+ return -EINVAL; -+ -+ for (u8 i = ADIOS_READ; i <= ADIOS_DISCARD; i++) { -+ model = &ad->latency_model[i]; -+ sideload_latency_model(model, params[i][0], params[i][1]); -+ } -+ } -+ reset_buckets(ad->aggr_buckets); -+ -+ return count; -+} -+ -+// Show the ADIOS version -+static ssize_t adios_version_show(struct elevator_queue *e, char *page) { -+ return sprintf(page, "%s\n", ADIOS_VERSION); -+} -+ -+// Define sysfs attributes for dynamic thresholds -+#define SHRINK_THRESHOLD_ATTR_RW(name, model_field, min_value, max_value) \ -+static ssize_t adios_shrink_##name##_store( \ -+ struct elevator_queue *e, const char *page, size_t count) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ unsigned long val; \ -+ int ret; \ -+ ret = kstrtoul(page, 10, &val); \ -+ if (ret || val < min_value || val > max_value) \ -+ return -EINVAL; \ -+ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ -+ struct latency_model *model = &ad->latency_model[i]; \ -+ unsigned long flags; \ -+ spin_lock_irqsave(&model->update_lock, flags); \ -+ model->model_field = val; \ -+ spin_unlock_irqrestore(&model->update_lock, flags); \ -+ } \ -+ return count; \ -+} \ -+static ssize_t adios_shrink_##name##_show( \ -+ struct elevator_queue *e, char *page) { \ -+ struct adios_data *ad = e->elevator_data; \ -+ u32 val = 0; \ -+ unsigned long flags; \ -+ struct latency_model *model = &ad->latency_model[0]; \ -+ spin_lock_irqsave(&model->update_lock, flags); \ -+ val = model->model_field; \ -+ spin_unlock_irqrestore(&model->update_lock, flags); \ -+ return sprintf(page, "%u\n", val); \ -+} -+ -+SHRINK_THRESHOLD_ATTR_RW(at_kreqs, lm_shrink_at_kreqs, 1, 100000) -+SHRINK_THRESHOLD_ATTR_RW(at_gbytes, lm_shrink_at_gbytes, 1, 1000) -+SHRINK_THRESHOLD_ATTR_RW(resist, lm_shrink_resist, 1, 3) -+ -+// Define sysfs attributes -+#define AD_ATTR(name, show_func, store_func) \ -+ __ATTR(name, 0644, show_func, store_func) -+#define AD_ATTR_RW(name) \ -+ __ATTR(name, 0644, adios_##name##_show, adios_##name##_store) -+#define AD_ATTR_RO(name) \ -+ __ATTR(name, 0444, adios_##name##_show, NULL) -+#define AD_ATTR_WO(name) \ -+ __ATTR(name, 0200, NULL, adios_##name##_store) -+ -+// Define sysfs attributes for ADIOS scheduler -+static struct elv_fs_entry adios_sched_attrs[] = { -+ AD_ATTR_RO(batch_actual_max), -+ AD_ATTR_RW(bq_refill_below_ratio), -+ AD_ATTR_RW(global_latency_window), -+ AD_ATTR_RW(lat_model_latency_limit), -+ AD_ATTR_RW(batch_order), -+ AD_ATTR_RW(compliance_flags), -+ -+ AD_ATTR_RW(batch_limit_read), -+ AD_ATTR_RW(batch_limit_write), -+ AD_ATTR_RW(batch_limit_discard), -+ -+ AD_ATTR_RW(lat_model_read), -+ AD_ATTR_RW(lat_model_write), -+ AD_ATTR_RW(lat_model_discard), -+ -+ AD_ATTR_RW(lat_target_read), -+ AD_ATTR_RW(lat_target_write), -+ AD_ATTR_RW(lat_target_discard), -+ -+ AD_ATTR_RW(shrink_at_kreqs), -+ AD_ATTR_RW(shrink_at_gbytes), -+ AD_ATTR_RW(shrink_resist), -+ -+ AD_ATTR_RW(read_priority), -+ -+ AD_ATTR_WO(reset_bq_stats), -+ AD_ATTR_WO(reset_lat_model), -+ AD_ATTR(adios_version, adios_version_show, NULL), -+ -+ __ATTR_NULL -+}; -+ -+// Define the ADIOS scheduler type -+static struct elevator_type mq_adios = { -+ .ops = { -+ .next_request = elv_rb_latter_request, -+ .former_request = elv_rb_former_request, -+ .limit_depth = adios_limit_depth, -+ .depth_updated = adios_depth_updated, -+ .request_merged = adios_request_merged, -+ .requests_merged = adios_merged_requests, -+ .bio_merge = adios_bio_merge, -+ .insert_requests = adios_insert_requests, -+ .prepare_request = adios_prepare_request, -+ .dispatch_request = adios_dispatch_request, -+ .completed_request = adios_completed_request, -+ .finish_request = adios_finish_request, -+ .has_work = adios_has_work, -+ .init_hctx = adios_init_hctx, -+ .init_sched = adios_init_sched, -+ .exit_sched = adios_exit_sched, -+ }, -+ .elevator_attrs = adios_sched_attrs, -+ .elevator_name = "adios", -+ .elevator_owner = THIS_MODULE, -+}; -+MODULE_ALIAS("mq-adios-iosched"); -+ -+#define ADIOS_PROGNAME "Adaptive Deadline I/O Scheduler" -+#define ADIOS_AUTHOR "Masahito Suzuki" -+ -+// Initialize the ADIOS scheduler module -+static int __init adios_init(void) { -+ printk(KERN_INFO "%s %s by %s\n", -+ ADIOS_PROGNAME, ADIOS_VERSION, ADIOS_AUTHOR); -+ return elv_register(&mq_adios); -+} -+ -+// Exit the ADIOS scheduler module -+static void __exit adios_exit(void) { -+ elv_unregister(&mq_adios); -+} -+ -+module_init(adios_init); -+module_exit(adios_exit); -+ -+MODULE_AUTHOR(ADIOS_AUTHOR); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION(ADIOS_PROGNAME); -\ No newline at end of file -diff --git a/block/elevator.c b/block/elevator.c -index fe96c6f4753c..7b4f2913841f 100644 ---- a/block/elevator.c -+++ b/block/elevator.c -@@ -752,6 +752,21 @@ void elevator_set_default(struct request_queue *q) - if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) - return; - -+#ifdef CONFIG_MQ_IOSCHED_DEFAULT_ADIOS -+ ctx.name = "adios"; -+#else // !CONFIG_MQ_IOSCHED_DEFAULT_ADIOS -+ bool is_sq = q->nr_hw_queues == 1 || blk_mq_is_shared_tags(q->tag_set->flags); -+#ifdef CONFIG_CACHY -+#ifdef CONFIG_IOSCHED_BFQ -+ if (is_sq) -+ ctx.name = "bfq"; -+#endif /* CONFIG_IOSCHED_BFQ */ -+#else -+ if (!is_sq) -+ return; -+#endif /* CONFIG_CACHY */ -+#endif /* CONFIG_MQ_IOSCHED_DEFAULT_ADIOS */ -+ - /* - * For single queue devices, default to using mq-deadline. If we - * have multiple queues or mq-deadline is not available, default -@@ -761,13 +776,10 @@ void elevator_set_default(struct request_queue *q) - if (!e) - return; - -- if ((q->nr_hw_queues == 1 || -- blk_mq_is_shared_tags(q->tag_set->flags))) { -- err = elevator_change(q, &ctx); -- if (err < 0) -- pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n", -- ctx.name, err); -- } -+ err = elevator_change(q, &ctx); -+ if (err < 0) -+ pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n", -+ ctx.name, err); - elevator_put(e); - } - -diff --git a/drivers/Makefile b/drivers/Makefile -index b5749cf67044..5beba9f57254 100644 ---- a/drivers/Makefile -+++ b/drivers/Makefile -@@ -64,14 +64,8 @@ obj-y += char/ - # iommu/ comes before gpu as gpu are using iommu controllers - obj-y += iommu/ - --# gpu/ comes after char for AGP vs DRM startup and after iommu --obj-y += gpu/ -- - obj-$(CONFIG_CONNECTOR) += connector/ - --# i810fb depends on char/agp/ --obj-$(CONFIG_FB_I810) += video/fbdev/i810/ -- - obj-$(CONFIG_PARPORT) += parport/ - obj-y += base/ block/ misc/ mfd/ nfc/ - obj-$(CONFIG_LIBNVDIMM) += nvdimm/ -@@ -83,6 +77,13 @@ obj-y += macintosh/ - obj-y += scsi/ - obj-y += nvme/ - obj-$(CONFIG_ATA) += ata/ -+ -+# gpu/ comes after char for AGP vs DRM startup and after iommu -+obj-y += gpu/ -+ -+# i810fb depends on char/agp/ -+obj-$(CONFIG_FB_I810) += video/fbdev/i810/ -+ - obj-$(CONFIG_TARGET_CORE) += target/ - obj-$(CONFIG_MTD) += mtd/ - obj-$(CONFIG_SPI) += spi/ -diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c -index 7a7f88b3fa2b..cb26ab099da2 100644 ---- a/drivers/ata/ahci.c -+++ b/drivers/ata/ahci.c -@@ -1672,7 +1672,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) - } - #endif - --static void ahci_remap_check(struct pci_dev *pdev, int bar, -+static int ahci_remap_check(struct pci_dev *pdev, int bar, - struct ahci_host_priv *hpriv) - { - int i; -@@ -1685,7 +1685,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, - pci_resource_len(pdev, bar) < SZ_512K || - bar != AHCI_PCI_BAR_STANDARD || - !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) -- return; -+ return 0; - - cap = readq(hpriv->mmio + AHCI_REMAP_CAP); - for (i = 0; i < AHCI_MAX_REMAP; i++) { -@@ -1700,18 +1700,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, - } - - if (!hpriv->remapped_nvme) -- return; -- -- dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", -- hpriv->remapped_nvme); -- dev_warn(&pdev->dev, -- "Switch your BIOS from RAID to AHCI mode to use them.\n"); -+ return 0; - -- /* -- * Don't rely on the msi-x capability in the remap case, -- * share the legacy interrupt across ahci and remapped devices. -- */ -- hpriv->flags |= AHCI_HFLAG_NO_MSI; -+ /* Abort probe, allowing intel-nvme-remap to step in when available */ -+ dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n"); -+ return -ENODEV; - } - - static int ahci_get_irq_vector(struct ata_host *host, int port) -@@ -1975,7 +1968,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - return -ENOMEM; - - /* detect remapped nvme devices */ -- ahci_remap_check(pdev, ahci_pci_bar, hpriv); -+ rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv); -+ if (rc) -+ return rc; - - sysfs_add_file_to_group(&pdev->dev.kobj, - &dev_attr_remapped_nvme.attr, -diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 -index 2c5c228408bf..918e2bebfe78 100644 ---- a/drivers/cpufreq/Kconfig.x86 -+++ b/drivers/cpufreq/Kconfig.x86 -@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE - select ACPI_PROCESSOR if ACPI - select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO - select CPU_FREQ_GOV_PERFORMANCE -- select CPU_FREQ_GOV_SCHEDUTIL if SMP - help - This driver provides a P state for Intel core processors. - The driver implements an internal governor and will become -@@ -39,7 +38,6 @@ config X86_AMD_PSTATE - depends on X86 && ACPI - select ACPI_PROCESSOR - select ACPI_CPPC_LIB if X86_64 -- select CPU_FREQ_GOV_SCHEDUTIL if SMP - help - This driver adds a CPUFreq driver which utilizes a fine grain - processor performance frequency control range instead of legacy -diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index f366d35c5840..a04b6bfeb1c2 100644 ---- a/drivers/cpufreq/intel_pstate.c -+++ b/drivers/cpufreq/intel_pstate.c -@@ -3950,6 +3950,8 @@ static int __init intel_pstate_setup(char *str) - - if (!strcmp(str, "disable")) - no_load = 1; -+ else if (!strcmp(str, "enable")) -+ no_load = 0; - else if (!strcmp(str, "active")) - default_driver = &intel_pstate; - else if (!strcmp(str, "passive")) -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -index ef3af170dda4..cf918b18db53 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -@@ -163,6 +163,7 @@ struct amdgpu_watchdog_timer { - */ - extern int amdgpu_modeset; - extern unsigned int amdgpu_vram_limit; -+extern int amdgpu_ignore_min_pcap; - extern int amdgpu_vis_vram_limit; - extern int amdgpu_gart_size; - extern int amdgpu_gtt_size; -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index 395c6be901ce..fb1607b2805a 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -148,6 +148,7 @@ enum AMDGPU_DEBUG_MASK { - }; - - unsigned int amdgpu_vram_limit = UINT_MAX; -+int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */ - int amdgpu_vis_vram_limit; - int amdgpu_gart_size = -1; /* auto */ - int amdgpu_gtt_size = -1; /* auto */ -@@ -269,6 +270,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { - .period = 0x0, /* default to 0x0 (timeout disable) */ - }; - -+/** -+ * DOC: ignore_min_pcap (int) -+ * Ignore the minimum power cap. -+ * Useful on graphics cards where the minimum power cap is very high. -+ * The default is 0 (Do not ignore). -+ */ -+MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap"); -+module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600); -+ - /** - * DOC: vramlimit (int) - * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). -diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig -index abd3b6564373..46937e6fa78d 100644 ---- a/drivers/gpu/drm/amd/display/Kconfig -+++ b/drivers/gpu/drm/amd/display/Kconfig -@@ -56,4 +56,10 @@ config DRM_AMD_SECURE_DISPLAY - This option enables the calculation of crc of specific region via - debugfs. Cooperate with specific DMCU FW. - -+config AMD_PRIVATE_COLOR -+ bool "Enable KMS color management by AMD for AMD" -+ default n -+ help -+ This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck. -+ - endmenu -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index a0ca3b2c6bd8..c4ea09496f95 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4675,7 +4675,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) - return r; - } - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - if (amdgpu_dm_create_color_properties(adev)) { - dc_state_release(state->context); - kfree(state); -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -index ebabfe3a512f..4d3ebcaacca1 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -@@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) - return val; - } - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - /* Pre-defined Transfer Functions (TF) - * - * AMD driver supports pre-defined mathematical functions for transferring -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -index 45feb404b097..ee8672919a05 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -@@ -491,7 +491,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) - } - #endif - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - /** - * dm_crtc_additional_color_mgmt - enable additional color properties - * @crtc: DRM CRTC -@@ -573,7 +573,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - #if defined(CONFIG_DEBUG_FS) - .late_register = amdgpu_dm_crtc_late_register, - #endif --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, - .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, - #endif -@@ -770,7 +770,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - - drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - dm_crtc_additional_color_mgmt(&acrtc->base); - #endif - return 0; -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -index eef51652ca35..d5c932c191b2 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -@@ -1601,7 +1601,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, - drm_atomic_helper_plane_destroy_state(plane, state); - } - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - static void - dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, - struct drm_plane *plane) -@@ -1792,7 +1792,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { - .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, - .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, - .format_mod_supported = amdgpu_dm_plane_format_mod_supported, --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - .atomic_set_property = dm_atomic_plane_set_property, - .atomic_get_property = dm_atomic_plane_get_property, - #endif -@@ -1888,7 +1888,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - else - drm_plane_helper_add(plane, &dm_plane_helper_funcs); - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - dm_atomic_plane_attach_color_mgmt_properties(dm, plane); - #endif - /* Create (reset) the plane state */ -diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -index 5fbfe7333b54..9e81953043be 100644 ---- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c -+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -@@ -3073,6 +3073,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, - struct device_attribute *attr, - char *buf) - { -+ if (amdgpu_ignore_min_pcap) -+ return sysfs_emit(buf, "%i\n", 0); -+ - return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); - } - -diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index b47cb4a5f488..f9f6b0d96f97 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -@@ -2921,7 +2921,10 @@ int smu_get_power_limit(void *handle, - *limit = smu->max_power_limit; - break; - case SMU_PPT_LIMIT_MIN: -- *limit = smu->min_power_limit; -+ if (amdgpu_ignore_min_pcap) -+ *limit = 0; -+ else -+ *limit = smu->min_power_limit; - break; - default: - return -EINVAL; -@@ -2945,7 +2948,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) - if (smu->ppt_funcs->set_power_limit) - return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - -- if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { -+ if (amdgpu_ignore_min_pcap) { -+ if ((limit > smu->max_power_limit)) { -+ dev_err(smu->adev->dev, -+ "New power limit (%d) is over the max allowed %d\n", -+ limit, smu->max_power_limit); -+ return -EINVAL; -+ } -+ } else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { - dev_err(smu->adev->dev, - "New power limit (%d) is out of range [%d,%d]\n", - limit, smu->min_power_limit, smu->max_power_limit); -diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c -index 90ff6be85cf4..15159c1cf6e1 100644 ---- a/drivers/input/evdev.c -+++ b/drivers/input/evdev.c -@@ -46,6 +46,7 @@ struct evdev_client { - struct fasync_struct *fasync; - struct evdev *evdev; - struct list_head node; -+ struct rcu_head rcu; - enum input_clock_type clk_type; - bool revoked; - unsigned long *evmasks[EV_CNT]; -@@ -368,13 +369,22 @@ static void evdev_attach_client(struct evdev *evdev, - spin_unlock(&evdev->client_lock); - } - -+static void evdev_reclaim_client(struct rcu_head *rp) -+{ -+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu); -+ unsigned int i; -+ for (i = 0; i < EV_CNT; ++i) -+ bitmap_free(client->evmasks[i]); -+ kvfree(client); -+} -+ - static void evdev_detach_client(struct evdev *evdev, - struct evdev_client *client) - { - spin_lock(&evdev->client_lock); - list_del_rcu(&client->node); - spin_unlock(&evdev->client_lock); -- synchronize_rcu(); -+ call_rcu(&client->rcu, evdev_reclaim_client); - } - - static int evdev_open_device(struct evdev *evdev) -@@ -427,7 +437,6 @@ static int evdev_release(struct inode *inode, struct file *file) - { - struct evdev_client *client = file->private_data; - struct evdev *evdev = client->evdev; -- unsigned int i; - - mutex_lock(&evdev->mutex); - -@@ -439,11 +448,6 @@ static int evdev_release(struct inode *inode, struct file *file) - - evdev_detach_client(evdev, client); - -- for (i = 0; i < EV_CNT; ++i) -- bitmap_free(client->evmasks[i]); -- -- kvfree(client); -- - evdev_close_device(evdev); - - return 0; -@@ -486,7 +490,6 @@ static int evdev_open(struct inode *inode, struct file *file) - - err_free_client: - evdev_detach_client(evdev, client); -- kvfree(client); - return error; - } - -diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c -index 5ef43231fe77..5d754058c023 100644 ---- a/drivers/md/dm-crypt.c -+++ b/drivers/md/dm-crypt.c -@@ -3305,6 +3305,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) - goto bad; - } - -+#ifdef CONFIG_CACHY -+ set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); -+ set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); -+#endif -+ - ret = crypt_ctr_cipher(ti, argv[0], argv[1]); - if (ret < 0) - goto bad; -diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig -index 331b8e535e5b..80dabeebf580 100644 ---- a/drivers/media/v4l2-core/Kconfig -+++ b/drivers/media/v4l2-core/Kconfig -@@ -40,6 +40,11 @@ config VIDEO_TUNER - config V4L2_JPEG_HELPER - tristate - -+config V4L2_LOOPBACK -+ tristate "V4L2 loopback device" -+ help -+ V4L2 loopback device -+ - # Used by drivers that need v4l2-h264.ko - config V4L2_H264 - tristate -diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile -index 2177b9d63a8f..c179507cedc4 100644 ---- a/drivers/media/v4l2-core/Makefile -+++ b/drivers/media/v4l2-core/Makefile -@@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o - obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o - obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o - -+obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o -+ - obj-$(CONFIG_VIDEO_TUNER) += tuner.o - obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o -diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c -new file mode 100644 -index 000000000000..3be7c4abc1e7 ---- /dev/null -+++ b/drivers/media/v4l2-core/v4l2loopback.c -@@ -0,0 +1,3316 @@ -+/* -*- c-file-style: "linux" -*- */ -+/* -+ * v4l2loopback.c -- video4linux2 loopback driver -+ * -+ * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) -+ * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at) -+ * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) -+ * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "v4l2loopback.h" -+ -+#define V4L2LOOPBACK_CTL_ADD_legacy 0x4C80 -+#define V4L2LOOPBACK_CTL_REMOVE_legacy 0x4C81 -+#define V4L2LOOPBACK_CTL_QUERY_legacy 0x4C82 -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) -+#error This module is not supported on kernels before 4.0.0. -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) -+#define strscpy strlcpy -+#endif -+ -+#if defined(timer_setup) -+#define HAVE_TIMER_SETUP -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) -+#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) -+#define timer_delete_sync del_timer_sync -+#endif -+ -+#define V4L2LOOPBACK_VERSION_CODE \ -+ KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ -+ V4L2LOOPBACK_VERSION_BUGFIX) -+ -+MODULE_DESCRIPTION("V4L2 loopback video device"); -+MODULE_AUTHOR("Vasily Levin, " -+ "IOhannes m zmoelnig ," -+ "Stefan Diewald," -+ "Anton Novikov" -+ "et al."); -+#ifdef SNAPSHOT_VERSION -+MODULE_VERSION(__stringify(SNAPSHOT_VERSION)); -+#else -+MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify( -+ V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX)); -+#endif -+MODULE_LICENSE("GPL"); -+ -+/* -+ * helpers -+ */ -+#define dprintk(fmt, args...) \ -+ do { \ -+ if (debug > 0) { \ -+ printk(KERN_INFO "v4l2-loopback[" __stringify( \ -+ __LINE__) "], pid(%d): " fmt, \ -+ task_pid_nr(current), ##args); \ -+ } \ -+ } while (0) -+ -+#define MARK() \ -+ do { \ -+ if (debug > 1) { \ -+ printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \ -+ __LINE__, __func__, task_pid_nr(current)); \ -+ } \ -+ } while (0) -+ -+#define dprintkrw(fmt, args...) \ -+ do { \ -+ if (debug > 2) { \ -+ printk(KERN_INFO "v4l2-loopback[" __stringify( \ -+ __LINE__) "], pid(%d): " fmt, \ -+ task_pid_nr(current), ##args); \ -+ } \ -+ } while (0) -+ -+static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) -+{ -+ struct timespec64 ts; -+ ktime_get_ts64(&ts); -+ -+ b->timestamp.tv_sec = ts.tv_sec; -+ b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); -+ b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; -+ b->flags &= ~V4L2_BUF_FLAG_TIMESTAMP_COPY; -+} -+ -+#if BITS_PER_LONG == 32 -+#include /* do_div() for 64bit division */ -+static inline int v4l2l_mod64(const s64 A, const u32 B) -+{ -+ u64 a = (u64)A; -+ u32 b = B; -+ -+ if (A > 0) -+ return do_div(a, b); -+ a = -A; -+ return -do_div(a, b); -+} -+#else -+static inline int v4l2l_mod64(const s64 A, const u32 B) -+{ -+ return A % B; -+} -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) -+typedef unsigned __poll_t; -+#endif -+ -+/* module constants -+ * can be overridden during he build process using something like -+ * make KCPPFLAGS="-DMAX_DEVICES=100" -+ */ -+ -+/* maximum number of v4l2loopback devices that can be created */ -+#ifndef MAX_DEVICES -+#define MAX_DEVICES 8 -+#endif -+ -+/* whether the default is to announce capabilities exclusively or not */ -+#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS -+#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 -+#endif -+ -+/* when a producer is considered to have gone stale */ -+#ifndef MAX_TIMEOUT -+#define MAX_TIMEOUT (100 * 1000) /* in msecs */ -+#endif -+ -+/* max buffers that can be mapped, actually they -+ * are all mapped to max_buffers buffers */ -+#ifndef MAX_BUFFERS -+#define MAX_BUFFERS 32 -+#endif -+ -+/* module parameters */ -+static int debug = 0; -+module_param(debug, int, S_IRUGO | S_IWUSR); -+MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); -+ -+#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 -+static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; -+module_param(max_buffers, int, S_IRUGO); -+MODULE_PARM_DESC(max_buffers, -+ "how many buffers should be allocated [DEFAULT: " __stringify( -+ V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); -+ -+/* how many times a device can be opened -+ * the per-module default value can be overridden on a per-device basis using -+ * the /sys/devices interface -+ * -+ * note that max_openers should be at least 2 in order to get a working system: -+ * one opener for the producer and one opener for the consumer -+ * however, we leave that to the user -+ */ -+#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 -+static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; -+module_param(max_openers, int, S_IRUGO | S_IWUSR); -+MODULE_PARM_DESC( -+ max_openers, -+ "how many users can open the loopback device [DEFAULT: " __stringify( -+ V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); -+ -+static int devices = -1; -+module_param(devices, int, 0); -+MODULE_PARM_DESC(devices, "how many devices should be created"); -+ -+static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; -+module_param_array(video_nr, int, NULL, 0444); -+MODULE_PARM_DESC(video_nr, -+ "video device numbers (-1=auto, 0=/dev/video0, etc.)"); -+ -+static char *card_label[MAX_DEVICES]; -+module_param_array(card_label, charp, NULL, 0000); -+MODULE_PARM_DESC(card_label, "card labels for each device"); -+ -+static bool exclusive_caps[MAX_DEVICES] = { -+ [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS -+}; -+module_param_array(exclusive_caps, bool, NULL, 0444); -+/* FIXXME: wording */ -+MODULE_PARM_DESC( -+ exclusive_caps, -+ "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " __stringify( -+ V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); -+ -+/* format specifications */ -+#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2 -+#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1 -+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 -+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 -+ -+#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 -+#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 -+ -+static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; -+module_param(max_width, int, S_IRUGO); -+MODULE_PARM_DESC(max_width, -+ "maximum allowed frame width [DEFAULT: " __stringify( -+ V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); -+static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; -+module_param(max_height, int, S_IRUGO); -+MODULE_PARM_DESC(max_height, -+ "maximum allowed frame height [DEFAULT: " __stringify( -+ V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); -+ -+static DEFINE_IDR(v4l2loopback_index_idr); -+static DEFINE_MUTEX(v4l2loopback_ctl_mutex); -+ -+/* frame intervals */ -+#define V4L2LOOPBACK_FRAME_INTERVAL_MAX __UINT32_MAX__ -+#define V4L2LOOPBACK_FPS_DEFAULT 30 -+#define V4L2LOOPBACK_FPS_MAX 1000 -+ -+/* control IDs */ -+#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) -+#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) -+#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) -+#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) -+#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) -+ -+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); -+static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { -+ .s_ctrl = v4l2loopback_s_ctrl, -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_KEEP_FORMAT, -+ .name = "keep_format", -+ .type = V4L2_CTRL_TYPE_BOOLEAN, -+ .min = 0, -+ .max = 1, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_SUSTAIN_FRAMERATE, -+ .name = "sustain_framerate", -+ .type = V4L2_CTRL_TYPE_BOOLEAN, -+ .min = 0, -+ .max = 1, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_TIMEOUT, -+ .name = "timeout", -+ .type = V4L2_CTRL_TYPE_INTEGER, -+ .min = 0, -+ .max = MAX_TIMEOUT, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_TIMEOUT_IMAGE_IO, -+ .name = "timeout_image_io", -+ .type = V4L2_CTRL_TYPE_BUTTON, -+ .min = 0, -+ .max = 0, -+ .step = 0, -+ .def = 0, -+ // clang-format on -+}; -+ -+/* module structures */ -+struct v4l2loopback_private { -+ int device_nr; -+}; -+ -+/* TODO(vasaka) use typenames which are common to kernel, but first find out if -+ * it is needed */ -+/* struct keeping state and settings of loopback device */ -+ -+struct v4l2l_buffer { -+ struct v4l2_buffer buffer; -+ struct list_head list_head; -+ atomic_t use_count; -+}; -+ -+struct v4l2_loopback_device { -+ struct v4l2_device v4l2_dev; -+ struct v4l2_ctrl_handler ctrl_handler; -+ struct video_device *vdev; -+ -+ /* loopback device-specific parameters */ -+ char card_label[32]; -+ bool announce_all_caps; /* announce both OUTPUT and CAPTURE capabilities -+ * when true; else announce OUTPUT when no -+ * writer is streaming, otherwise CAPTURE. */ -+ int max_openers; /* how many times can this device be opened */ -+ int min_width, max_width; -+ int min_height, max_height; -+ -+ /* pixel and stream format */ -+ struct v4l2_pix_format pix_format; -+ bool pix_format_has_valid_sizeimage; -+ struct v4l2_captureparm capture_param; -+ unsigned long frame_jiffies; -+ -+ /* ctrls */ -+ int keep_format; /* CID_KEEP_FORMAT; lock the format, do not free -+ * on close(), and when `!announce_all_caps` do NOT -+ * fall back to OUTPUT when no writers attached (clear -+ * `keep_format` to attach a new writer) */ -+ int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain -+ (close to) nominal framerate */ -+ unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ -+ int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will -+ * queue/dequeue the timeout image buffer */ -+ -+ /* buffers for OUTPUT and CAPTURE */ -+ u8 *image; /* pointer to actual buffers data */ -+ unsigned long image_size; /* number of bytes alloc'd for all buffers */ -+ struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ -+ u32 buffer_count; /* should not be big, 4 is a good choice */ -+ u32 buffer_size; /* number of bytes alloc'd per buffer */ -+ u32 used_buffer_count; /* number of buffers allocated to openers */ -+ struct list_head outbufs_list; /* FIFO queue for OUTPUT buffers */ -+ u32 bufpos2index[MAX_BUFFERS]; /* mapping of `(position % used_buffers)` -+ * to `buffers[index]` */ -+ s64 write_position; /* sequence number of last 'displayed' buffer plus -+ * one */ -+ -+ /* synchronization between openers */ -+ atomic_t open_count; -+ struct mutex image_mutex; /* mutex for allocating image(s) and -+ * exchanging format tokens */ -+ spinlock_t lock; /* lock for the timeout and framerate timers */ -+ spinlock_t list_lock; /* lock for the OUTPUT buffer queue */ -+ wait_queue_head_t read_event; -+ u32 format_tokens; /* tokens to 'set format' for OUTPUT, CAPTURE, or -+ * timeout buffers */ -+ u32 stream_tokens; /* tokens to 'start' OUTPUT, CAPTURE, or timeout -+ * stream */ -+ -+ /* sustain framerate */ -+ struct timer_list sustain_timer; -+ unsigned int reread_count; -+ -+ /* timeout */ -+ u8 *timeout_image; /* copied to outgoing buffers when timeout passes */ -+ struct v4l2l_buffer timeout_buffer; -+ u32 timeout_buffer_size; /* number bytes alloc'd for timeout buffer */ -+ struct timer_list timeout_timer; -+ int timeout_happened; -+}; -+ -+enum v4l2l_io_method { -+ V4L2L_IO_NONE = 0, -+ V4L2L_IO_MMAP = 1, -+ V4L2L_IO_FILE = 2, -+ V4L2L_IO_TIMEOUT = 3, -+}; -+ -+/* struct keeping state and type of opener */ -+struct v4l2_loopback_opener { -+ u32 format_token; /* token (if any) for type used in call to S_FMT or -+ * REQBUFS */ -+ u32 stream_token; /* token (if any) for type used in call to STREAMON */ -+ u32 buffer_count; /* number of buffers (if any) that opener acquired via -+ * REQBUFS */ -+ s64 read_position; /* sequence number of the next 'captured' frame */ -+ unsigned int reread_count; -+ enum v4l2l_io_method io_method; -+ -+ struct v4l2_fh fh; -+}; -+ -+#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) -+ -+/* this is heavily inspired by the bttv driver found in the linux kernel */ -+struct v4l2l_format { -+ char *name; -+ int fourcc; /* video4linux 2 */ -+ int depth; /* bit/pixel */ -+ int flags; -+}; -+/* set the v4l2l_format.flags to PLANAR for non-packed formats */ -+#define FORMAT_FLAGS_PLANAR 0x01 -+#define FORMAT_FLAGS_COMPRESSED 0x02 -+ -+#include "v4l2loopback_formats.h" -+ -+#ifndef V4L2_TYPE_IS_CAPTURE -+#define V4L2_TYPE_IS_CAPTURE(type) \ -+ ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \ -+ (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) -+#endif /* V4L2_TYPE_IS_CAPTURE */ -+#ifndef V4L2_TYPE_IS_OUTPUT -+#define V4L2_TYPE_IS_OUTPUT(type) \ -+ ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \ -+ (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) -+#endif /* V4L2_TYPE_IS_OUTPUT */ -+ -+/* token values for privilege to set format or start/stop stream */ -+#define V4L2L_TOKEN_CAPTURE 0x01 -+#define V4L2L_TOKEN_OUTPUT 0x02 -+#define V4L2L_TOKEN_TIMEOUT 0x04 -+#define V4L2L_TOKEN_MASK \ -+ (V4L2L_TOKEN_CAPTURE | V4L2L_TOKEN_OUTPUT | V4L2L_TOKEN_TIMEOUT) -+ -+/* helpers for token exchange and token status */ -+#define token_from_type(type) \ -+ (V4L2_TYPE_IS_CAPTURE(type) ? V4L2L_TOKEN_CAPTURE : V4L2L_TOKEN_OUTPUT) -+#define acquire_token(dev, opener, label, token) \ -+ do { \ -+ (opener)->label##_token = token; \ -+ (dev)->label##_tokens &= ~token; \ -+ } while (0) -+#define release_token(dev, opener, label) \ -+ do { \ -+ (dev)->label##_tokens |= (opener)->label##_token; \ -+ (opener)->label##_token = 0; \ -+ } while (0) -+#define has_output_token(token) (token & V4L2L_TOKEN_OUTPUT) -+#define has_capture_token(token) (token & V4L2L_TOKEN_CAPTURE) -+#define has_no_owners(dev) ((~((dev)->format_tokens) & V4L2L_TOKEN_MASK) == 0) -+#define has_other_owners(opener, dev) \ -+ (~((dev)->format_tokens ^ (opener)->format_token) & V4L2L_TOKEN_MASK) -+#define need_timeout_buffer(dev, token) \ -+ ((dev)->timeout_jiffies > 0 || (token) & V4L2L_TOKEN_TIMEOUT) -+ -+static const unsigned int FORMATS = ARRAY_SIZE(formats); -+ -+static char *fourcc2str(unsigned int fourcc, char buf[5]) -+{ -+ buf[0] = (fourcc >> 0) & 0xFF; -+ buf[1] = (fourcc >> 8) & 0xFF; -+ buf[2] = (fourcc >> 16) & 0xFF; -+ buf[3] = (fourcc >> 24) & 0xFF; -+ buf[4] = 0; -+ -+ return buf; -+} -+ -+static const struct v4l2l_format *format_by_fourcc(int fourcc) -+{ -+ unsigned int i; -+ char buf[5]; -+ -+ for (i = 0; i < FORMATS; i++) { -+ if (formats[i].fourcc == fourcc) -+ return formats + i; -+ } -+ -+ dprintk("unsupported format '%4s'\n", fourcc2str(fourcc, buf)); -+ return NULL; -+} -+ -+static void pix_format_set_size(struct v4l2_pix_format *f, -+ const struct v4l2l_format *fmt, -+ unsigned int width, unsigned int height) -+{ -+ f->width = width; -+ f->height = height; -+ -+ if (fmt->flags & FORMAT_FLAGS_PLANAR) { -+ f->bytesperline = width; /* Y plane */ -+ f->sizeimage = (width * height * fmt->depth) >> 3; -+ } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { -+ /* doesn't make sense for compressed formats */ -+ f->bytesperline = 0; -+ f->sizeimage = (width * height * fmt->depth) >> 3; -+ } else { -+ f->bytesperline = (width * fmt->depth) >> 3; -+ f->sizeimage = height * f->bytesperline; -+ } -+} -+ -+static int v4l2l_fill_format(struct v4l2_format *fmt, const u32 minwidth, -+ const u32 maxwidth, const u32 minheight, -+ const u32 maxheight) -+{ -+ u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height; -+ u32 pixelformat = fmt->fmt.pix.pixelformat; -+ struct v4l2_format fmt0 = *fmt; -+ u32 bytesperline = 0, sizeimage = 0; -+ -+ if (!width) -+ width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; -+ if (!height) -+ height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; -+ width = clamp_val(width, minwidth, maxwidth); -+ height = clamp_val(height, minheight, maxheight); -+ -+ /* sets: width,height,pixelformat,bytesperline,sizeimage */ -+ if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) { -+ fmt0.fmt.pix.bytesperline = 0; -+ fmt0.fmt.pix.sizeimage = 0; -+ } -+ -+ if (0) { -+ ; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) -+ } else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width, -+ height)) { -+ ; -+ } else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width, -+ height)) { -+ ; -+#endif -+ } else { -+ const struct v4l2l_format *format = -+ format_by_fourcc(pixelformat); -+ if (!format) -+ return -EINVAL; -+ pix_format_set_size(&fmt0.fmt.pix, format, width, height); -+ fmt0.fmt.pix.pixelformat = format->fourcc; -+ } -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) { -+ *fmt = fmt0; -+ -+ if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) || -+ (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3)) -+ fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB; -+ if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field) -+ fmt->fmt.pix_mp.field = V4L2_FIELD_NONE; -+ } else { -+ bytesperline = fmt->fmt.pix.bytesperline; -+ sizeimage = fmt->fmt.pix.sizeimage; -+ -+ *fmt = fmt0; -+ -+ if (!fmt->fmt.pix.bytesperline) -+ fmt->fmt.pix.bytesperline = bytesperline; -+ if (!fmt->fmt.pix.sizeimage) -+ fmt->fmt.pix.sizeimage = sizeimage; -+ -+ if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || -+ (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) -+ fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; -+ if (V4L2_FIELD_ANY == fmt->fmt.pix.field) -+ fmt->fmt.pix.field = V4L2_FIELD_NONE; -+ } -+ -+ return 0; -+} -+ -+/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */ -+static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt) -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) -+ const struct v4l2_format_info *info; -+ -+ info = v4l2_format_info(fmt->fmt.pix.pixelformat); -+ if (info && info->mem_planes == 1) -+ return true; -+#endif -+ -+ return false; -+} -+ -+static int pix_format_eq(const struct v4l2_pix_format *ref, -+ const struct v4l2_pix_format *tgt, int strict) -+{ -+ /* check if the two formats are equivalent. -+ * ANY fields are handled gracefully -+ */ -+#define _pix_format_eq0(x) \ -+ if (ref->x != tgt->x) \ -+ result = 0 -+#define _pix_format_eq1(x, def) \ -+ do { \ -+ if ((def != tgt->x) && (ref->x != tgt->x)) { \ -+ printk(KERN_INFO #x " failed"); \ -+ result = 0; \ -+ } \ -+ } while (0) -+ int result = 1; -+ _pix_format_eq0(width); -+ _pix_format_eq0(height); -+ _pix_format_eq0(pixelformat); -+ if (!strict) -+ return result; -+ _pix_format_eq1(field, V4L2_FIELD_ANY); -+ _pix_format_eq0(bytesperline); -+ _pix_format_eq0(sizeimage); -+ _pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT); -+ return result; -+} -+ -+static void set_timeperframe(struct v4l2_loopback_device *dev, -+ struct v4l2_fract *tpf) -+{ -+ if (!tpf->denominator && !tpf->numerator) { -+ tpf->numerator = 1; -+ tpf->denominator = V4L2LOOPBACK_FPS_DEFAULT; -+ } else if (tpf->numerator > -+ V4L2LOOPBACK_FRAME_INTERVAL_MAX * tpf->denominator) { -+ /* divide-by-zero or greater than maximum interval => min FPS */ -+ tpf->numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX; -+ tpf->denominator = 1; -+ } else if (tpf->numerator * V4L2LOOPBACK_FPS_MAX < tpf->denominator) { -+ /* zero or lower than minimum interval => max FPS */ -+ tpf->numerator = 1; -+ tpf->denominator = V4L2LOOPBACK_FPS_MAX; -+ } -+ -+ dev->capture_param.timeperframe = *tpf; -+ dev->frame_jiffies = -+ max(1UL, (msecs_to_jiffies(1000) * tpf->numerator) / -+ tpf->denominator); -+} -+ -+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); -+ -+/* device attributes */ -+/* available via sysfs: /sys/devices/virtual/video4linux/video* */ -+ -+static ssize_t attr_show_format(struct device *cd, -+ struct device_attribute *attr, char *buf) -+{ -+ /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ const struct v4l2_fract *tpf; -+ char buf4cc[5], buf_fps[32]; -+ -+ if (!dev || (has_no_owners(dev) && !dev->keep_format)) -+ return 0; -+ tpf = &dev->capture_param.timeperframe; -+ -+ fourcc2str(dev->pix_format.pixelformat, buf4cc); -+ if (tpf->numerator == 1) -+ snprintf(buf_fps, sizeof(buf_fps), "%u", tpf->denominator); -+ else -+ snprintf(buf_fps, sizeof(buf_fps), "%u/%u", tpf->denominator, -+ tpf->numerator); -+ return sprintf(buf, "%4s:%ux%u@%s\n", buf4cc, dev->pix_format.width, -+ dev->pix_format.height, buf_fps); -+} -+ -+static ssize_t attr_store_format(struct device *cd, -+ struct device_attribute *attr, const char *buf, -+ size_t len) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ int fps_num = 0, fps_den = 1; -+ -+ if (!dev) -+ return -ENODEV; -+ -+ /* only fps changing is supported */ -+ if (sscanf(buf, "@%u/%u", &fps_num, &fps_den) > 0) { -+ struct v4l2_fract f = { .numerator = fps_den, -+ .denominator = fps_num }; -+ set_timeperframe(dev, &f); -+ return len; -+ } -+ return -EINVAL; -+} -+ -+static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, -+ attr_store_format); -+ -+static ssize_t attr_show_buffers(struct device *cd, -+ struct device_attribute *attr, char *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ return sprintf(buf, "%u\n", dev->used_buffer_count); -+} -+ -+static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); -+ -+static ssize_t attr_show_maxopeners(struct device *cd, -+ struct device_attribute *attr, char *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ return sprintf(buf, "%d\n", dev->max_openers); -+} -+ -+static ssize_t attr_store_maxopeners(struct device *cd, -+ struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ struct v4l2_loopback_device *dev = NULL; -+ unsigned long curr = 0; -+ -+ if (kstrtoul(buf, 0, &curr)) -+ return -EINVAL; -+ -+ dev = v4l2loopback_cd2dev(cd); -+ if (!dev) -+ return -ENODEV; -+ -+ if (dev->max_openers == curr) -+ return len; -+ -+ if (curr > __INT_MAX__ || dev->open_count.counter > curr) { -+ /* request to limit to less openers as are currently attached to us */ -+ return -EINVAL; -+ } -+ -+ dev->max_openers = (int)curr; -+ -+ return len; -+} -+ -+static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, -+ attr_store_maxopeners); -+ -+static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr, -+ char *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ if (!has_output_token(dev->stream_tokens) || dev->keep_format) { -+ return sprintf(buf, "capture\n"); -+ } else -+ return sprintf(buf, "output\n"); -+ -+ return -EAGAIN; -+} -+ -+static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL); -+ -+static void v4l2loopback_remove_sysfs(struct video_device *vdev) -+{ -+#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) -+ -+ if (vdev) { -+ V4L2_SYSFS_DESTROY(format); -+ V4L2_SYSFS_DESTROY(buffers); -+ V4L2_SYSFS_DESTROY(max_openers); -+ V4L2_SYSFS_DESTROY(state); -+ /* ... */ -+ } -+} -+ -+static void v4l2loopback_create_sysfs(struct video_device *vdev) -+{ -+ int res = 0; -+ -+#define V4L2_SYSFS_CREATE(x) \ -+ res = device_create_file(&vdev->dev, &dev_attr_##x); \ -+ if (res < 0) \ -+ break -+ if (!vdev) -+ return; -+ do { -+ V4L2_SYSFS_CREATE(format); -+ V4L2_SYSFS_CREATE(buffers); -+ V4L2_SYSFS_CREATE(max_openers); -+ V4L2_SYSFS_CREATE(state); -+ /* ... */ -+ } while (0); -+ -+ if (res >= 0) -+ return; -+ dev_err(&vdev->dev, "%s error: %d\n", __func__, res); -+} -+ -+/* Event APIs */ -+ -+#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START) -+#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000 -+#define V4L2_EVENT_PRI_CLIENT_USAGE \ -+ (V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1) -+ -+struct v4l2_event_client_usage { -+ __u32 count; -+}; -+ -+/* global module data */ -+/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ -+struct v4l2loopback_lookup_cb_data { -+ int device_nr; -+ struct v4l2_loopback_device *device; -+}; -+static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) -+{ -+ struct v4l2_loopback_device *device = ptr; -+ struct v4l2loopback_lookup_cb_data *cbdata = data; -+ if (cbdata && device && device->vdev) { -+ if (device->vdev->num == cbdata->device_nr) { -+ cbdata->device = device; -+ cbdata->device_nr = id; -+ return 1; -+ } -+ } -+ return 0; -+} -+static int v4l2loopback_lookup(int device_nr, -+ struct v4l2_loopback_device **device) -+{ -+ struct v4l2loopback_lookup_cb_data data = { -+ .device_nr = device_nr, -+ .device = NULL, -+ }; -+ int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, -+ &data); -+ if (1 == err) { -+ if (device) -+ *device = data.device; -+ return data.device_nr; -+ } -+ return -ENODEV; -+} -+#define v4l2loopback_get_vdev_nr(vdev) \ -+ ((struct v4l2loopback_private *)video_get_drvdata(vdev))->device_nr -+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) -+{ -+ struct video_device *loopdev = to_video_device(cd); -+ int device_nr = v4l2loopback_get_vdev_nr(loopdev); -+ -+ return idr_find(&v4l2loopback_index_idr, device_nr); -+} -+ -+static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) -+{ -+ struct v4l2loopback_private *ptr = video_drvdata(f); -+ int nr = ptr->device_nr; -+ -+ return idr_find(&v4l2loopback_index_idr, nr); -+} -+ -+/* forward declarations */ -+static void client_usage_queue_event(struct video_device *vdev); -+static bool any_buffers_mapped(struct v4l2_loopback_device *dev); -+static int allocate_buffers(struct v4l2_loopback_device *dev, -+ struct v4l2_pix_format *pix_format); -+static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used, -+ u32 buffer_size); -+static void free_buffers(struct v4l2_loopback_device *dev); -+static int allocate_timeout_buffer(struct v4l2_loopback_device *dev); -+static void free_timeout_buffer(struct v4l2_loopback_device *dev); -+static void check_timers(struct v4l2_loopback_device *dev); -+static const struct v4l2_file_operations v4l2_loopback_fops; -+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; -+ -+/* V4L2 ioctl caps and params calls */ -+/* returns device capabilities -+ * called on VIDIOC_QUERYCAP -+ */ -+static int vidioc_querycap(struct file *file, void *fh, -+ struct v4l2_capability *cap) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ int device_nr = v4l2loopback_get_vdev_nr(dev->vdev); -+ __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; -+ -+ strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); -+ snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label); -+ snprintf(cap->bus_info, sizeof(cap->bus_info), -+ "platform:v4l2loopback-%03d", device_nr); -+ -+ if (dev->announce_all_caps) { -+ capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; -+ } else { -+ if (opener->io_method == V4L2L_IO_TIMEOUT || -+ (has_output_token(dev->stream_tokens) && -+ !dev->keep_format)) { -+ capabilities |= V4L2_CAP_VIDEO_OUTPUT; -+ } else -+ capabilities |= V4L2_CAP_VIDEO_CAPTURE; -+ } -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) -+ dev->vdev->device_caps = -+#endif /* >=linux-4.7.0 */ -+ cap->device_caps = cap->capabilities = capabilities; -+ -+ cap->capabilities |= V4L2_CAP_DEVICE_CAPS; -+ -+ memset(cap->reserved, 0, sizeof(cap->reserved)); -+ return 0; -+} -+ -+static int vidioc_enum_framesizes(struct file *file, void *fh, -+ struct v4l2_frmsizeenum *argp) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ -+ /* there can be only one... */ -+ if (argp->index) -+ return -EINVAL; -+ -+ if (dev->keep_format || has_other_owners(opener, dev)) { -+ /* only current frame size supported */ -+ if (argp->pixel_format != dev->pix_format.pixelformat) -+ return -EINVAL; -+ -+ argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; -+ -+ argp->discrete.width = dev->pix_format.width; -+ argp->discrete.height = dev->pix_format.height; -+ } else { -+ /* return continuous sizes if pixel format is supported */ -+ if (NULL == format_by_fourcc(argp->pixel_format)) -+ return -EINVAL; -+ -+ if (dev->min_width == dev->max_width && -+ dev->min_height == dev->max_height) { -+ argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; -+ -+ argp->discrete.width = dev->min_width; -+ argp->discrete.height = dev->min_height; -+ } else { -+ argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; -+ -+ argp->stepwise.min_width = dev->min_width; -+ argp->stepwise.min_height = dev->min_height; -+ -+ argp->stepwise.max_width = dev->max_width; -+ argp->stepwise.max_height = dev->max_height; -+ -+ argp->stepwise.step_width = 1; -+ argp->stepwise.step_height = 1; -+ } -+ } -+ return 0; -+} -+ -+/* Test if the device is currently 'capable' of the buffer (stream) type when -+ * the `exclusive_caps` parameter is set. `keep_format` should lock the format -+ * and prevent free of buffers */ -+static int check_buffer_capability(struct v4l2_loopback_device *dev, -+ struct v4l2_loopback_opener *opener, -+ enum v4l2_buf_type type) -+{ -+ /* short-circuit for (non-compliant) timeout image mode */ -+ if (opener->io_method == V4L2L_IO_TIMEOUT) -+ return 0; -+ if (dev->announce_all_caps) -+ return (type == V4L2_BUF_TYPE_VIDEO_CAPTURE || -+ type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ? -+ 0 : -+ -EINVAL; -+ /* CAPTURE if opener has a capture format or a writer is streaming; -+ * else OUTPUT. */ -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ if (!(has_capture_token(opener->format_token) || -+ !has_output_token(dev->stream_tokens))) -+ return -EINVAL; -+ break; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if (!(has_output_token(opener->format_token) || -+ has_output_token(dev->stream_tokens))) -+ return -EINVAL; -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+/* returns frameinterval (fps) for the set resolution -+ * called on VIDIOC_ENUM_FRAMEINTERVALS -+ */ -+static int vidioc_enum_frameintervals(struct file *file, void *fh, -+ struct v4l2_frmivalenum *argp) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ -+ /* there can be only one... */ -+ if (argp->index) -+ return -EINVAL; -+ -+ if (dev->keep_format || has_other_owners(opener, dev)) { -+ /* keep_format also locks the frame rate */ -+ if (argp->width != dev->pix_format.width || -+ argp->height != dev->pix_format.height || -+ argp->pixel_format != dev->pix_format.pixelformat) -+ return -EINVAL; -+ -+ argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; -+ argp->discrete = dev->capture_param.timeperframe; -+ } else { -+ if (argp->width < dev->min_width || -+ argp->width > dev->max_width || -+ argp->height < dev->min_height || -+ argp->height > dev->max_height || -+ !format_by_fourcc(argp->pixel_format)) -+ return -EINVAL; -+ -+ argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; -+ argp->stepwise.min.numerator = 1; -+ argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX; -+ argp->stepwise.max.numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX; -+ argp->stepwise.max.denominator = 1; -+ argp->stepwise.step.numerator = 1; -+ argp->stepwise.step.denominator = 1; -+ } -+ -+ return 0; -+} -+ -+/* Enumerate device formats -+ * Returns: -+ * - EINVAL the index is out of bounds; or if non-zero when format is fixed -+ * - EFAULT unexpected null pointer */ -+static int vidioc_enum_fmt_vid(struct file *file, void *fh, -+ struct v4l2_fmtdesc *f) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ int fixed = dev->keep_format || has_other_owners(opener, dev); -+ const struct v4l2l_format *fmt; -+ -+ if (check_buffer_capability(dev, opener, f->type) < 0) -+ return -EINVAL; -+ -+ if (!(f->index < FORMATS)) -+ return -EINVAL; -+ /* TODO: Support 6.14 V4L2_FMTDESC_FLAG_ENUM_ALL */ -+ if (fixed && f->index) -+ return -EINVAL; -+ -+ fmt = fixed ? format_by_fourcc(dev->pix_format.pixelformat) : -+ &formats[f->index]; -+ if (!fmt) -+ return -EFAULT; -+ -+ f->flags = 0; -+ if (fmt->flags & FORMAT_FLAGS_COMPRESSED) -+ f->flags |= V4L2_FMT_FLAG_COMPRESSED; -+ snprintf(f->description, sizeof(f->description), fmt->name); -+ f->pixelformat = fmt->fourcc; -+ return 0; -+} -+ -+/* Tests (or tries) the format. -+ * Returns: -+ * - EINVAL if the buffer type or format is not supported -+ */ -+static int vidioc_try_fmt_vid(struct file *file, void *fh, -+ struct v4l2_format *f) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ -+ if (check_buffer_capability(dev, opener, f->type) < 0) -+ return -EINVAL; -+ if (v4l2l_fill_format(f, dev->min_width, dev->max_width, -+ dev->min_height, dev->max_height) != 0) -+ return -EINVAL; -+ if (dev->keep_format || has_other_owners(opener, dev)) -+ /* use existing format - including colorspace info */ -+ f->fmt.pix = dev->pix_format; -+ -+ return 0; -+} -+ -+/* Sets new format. Fills 'f' argument with the requested or existing format. -+ * Side-effect: buffers are allocated for the (returned) format. -+ * Returns: -+ * - EINVAL if the type is not supported -+ * - EBUSY if buffers are already allocated -+ * TODO: (vasaka) set subregions of input -+ */ -+static int vidioc_s_fmt_vid(struct file *file, void *fh, struct v4l2_format *f) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ u32 token = opener->io_method == V4L2L_IO_TIMEOUT ? -+ V4L2L_TOKEN_TIMEOUT : -+ token_from_type(f->type); -+ int changed, result; -+ char buf[5]; -+ -+ result = vidioc_try_fmt_vid(file, fh, f); -+ if (result < 0) -+ return result; -+ -+ if (opener->buffer_count > 0) -+ /* must free buffers before format can be set */ -+ return -EBUSY; -+ -+ result = mutex_lock_killable(&dev->image_mutex); -+ if (result < 0) -+ return result; -+ -+ if (opener->format_token) -+ release_token(dev, opener, format); -+ if (!(dev->format_tokens & token)) { -+ result = -EBUSY; -+ goto exit_s_fmt_unlock; -+ } -+ -+ dprintk("S_FMT[%s] %4s:%ux%u size=%u\n", -+ V4L2_TYPE_IS_CAPTURE(f->type) ? "CAPTURE" : "OUTPUT", -+ fourcc2str(f->fmt.pix.pixelformat, buf), f->fmt.pix.width, -+ f->fmt.pix.height, f->fmt.pix.sizeimage); -+ changed = !pix_format_eq(&dev->pix_format, &f->fmt.pix, 0); -+ if (changed || has_no_owners(dev)) { -+ result = allocate_buffers(dev, &f->fmt.pix); -+ if (result < 0) -+ goto exit_s_fmt_unlock; -+ } -+ if ((dev->timeout_image && changed) || -+ (!dev->timeout_image && need_timeout_buffer(dev, token))) { -+ result = allocate_timeout_buffer(dev); -+ if (result < 0) -+ goto exit_s_fmt_free; -+ } -+ if (changed) { -+ dev->pix_format = f->fmt.pix; -+ dev->pix_format_has_valid_sizeimage = -+ v4l2l_pix_format_has_valid_sizeimage(f); -+ } -+ acquire_token(dev, opener, format, token); -+ if (opener->io_method == V4L2L_IO_TIMEOUT) -+ dev->timeout_image_io = 0; -+ goto exit_s_fmt_unlock; -+exit_s_fmt_free: -+ free_buffers(dev); -+exit_s_fmt_unlock: -+ mutex_unlock(&dev->image_mutex); -+ return result; -+} -+ -+/* ------------------ CAPTURE ----------------------- */ -+/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type -+ * is V4L2_BUF_TYPE_VIDEO_CAPTURE */ -+ -+static int vidioc_enum_fmt_cap(struct file *file, void *fh, -+ struct v4l2_fmtdesc *f) -+{ -+ return vidioc_enum_fmt_vid(file, fh, f); -+} -+ -+static int vidioc_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (check_buffer_capability(dev, opener, f->type) < 0) -+ return -EINVAL; -+ f->fmt.pix = dev->pix_format; -+ return 0; -+} -+ -+static int vidioc_try_fmt_cap(struct file *file, void *fh, -+ struct v4l2_format *f) -+{ -+ return vidioc_try_fmt_vid(file, fh, f); -+} -+ -+static int vidioc_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) -+{ -+ return vidioc_s_fmt_vid(file, fh, f); -+} -+ -+/* ------------------ OUTPUT ----------------------- */ -+/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type -+ * is V4L2_BUF_TYPE_VIDEO_OUTPUT */ -+ -+static int vidioc_enum_fmt_out(struct file *file, void *fh, -+ struct v4l2_fmtdesc *f) -+{ -+ return vidioc_enum_fmt_vid(file, fh, f); -+} -+ -+static int vidioc_g_fmt_out(struct file *file, void *fh, struct v4l2_format *f) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (check_buffer_capability(dev, opener, f->type) < 0) -+ return -EINVAL; -+ /* -+ * LATER: this should return the currently valid format -+ * gstreamer doesn't like it, if this returns -EINVAL, as it -+ * then concludes that there is _no_ valid format -+ * CHECK whether this assumption is wrong, -+ * or whether we have to always provide a valid format -+ */ -+ f->fmt.pix = dev->pix_format; -+ return 0; -+} -+ -+static int vidioc_try_fmt_out(struct file *file, void *fh, -+ struct v4l2_format *f) -+{ -+ return vidioc_try_fmt_vid(file, fh, f); -+} -+ -+static int vidioc_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f) -+{ -+ return vidioc_s_fmt_vid(file, fh, f); -+} -+ -+// #define V4L2L_OVERLAY -+#ifdef V4L2L_OVERLAY -+/* ------------------ OVERLAY ----------------------- */ -+/* currently unsupported */ -+/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work -+ * while it should only require it, if overlay is requested -+ * once the gstreamer element is fixed, remove the overlay dummies -+ */ -+#warning OVERLAY dummies -+static int vidioc_g_fmt_overlay(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ return 0; -+} -+ -+static int vidioc_s_fmt_overlay(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ return 0; -+} -+#endif /* V4L2L_OVERLAY */ -+ -+/* ------------------ PARAMs ----------------------- */ -+ -+/* get some data flow parameters, only capability, fps and readbuffers has -+ * effect on this driver -+ * called on VIDIOC_G_PARM -+ */ -+static int vidioc_g_parm(struct file *file, void *fh, -+ struct v4l2_streamparm *parm) -+{ -+ /* do not care about type of opener, hope these enums would always be -+ * compatible */ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (check_buffer_capability(dev, opener, parm->type) < 0) -+ return -EINVAL; -+ parm->parm.capture = dev->capture_param; -+ return 0; -+} -+ -+/* get some data flow parameters, only capability, fps and readbuffers has -+ * effect on this driver -+ * called on VIDIOC_S_PARM -+ */ -+static int vidioc_s_parm(struct file *file, void *fh, -+ struct v4l2_streamparm *parm) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ -+ dprintk("S_PARM(frame-time=%u/%u)\n", -+ parm->parm.capture.timeperframe.numerator, -+ parm->parm.capture.timeperframe.denominator); -+ if (check_buffer_capability(dev, opener, parm->type) < 0) -+ return -EINVAL; -+ -+ switch (parm->type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ set_timeperframe(dev, &parm->parm.capture.timeperframe); -+ break; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ set_timeperframe(dev, &parm->parm.output.timeperframe); -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ parm->parm.capture = dev->capture_param; -+ return 0; -+} -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+/* sets a tv standard, actually we do not need to handle this any special way -+ * added to support effecttv -+ * called on VIDIOC_S_STD -+ */ -+static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) -+{ -+ v4l2_std_id req_std = 0, supported_std = 0; -+ const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; -+ -+ if (_std) { -+ req_std = *_std; -+ *_std = all_std; -+ } -+ -+ /* we support everything in V4L2_STD_ALL, but not more... */ -+ supported_std = (all_std & req_std); -+ if (no_std == supported_std) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+/* gets a fake video standard -+ * called on VIDIOC_G_STD -+ */ -+static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) -+{ -+ if (norm) -+ *norm = V4L2_STD_ALL; -+ return 0; -+} -+/* gets a fake video standard -+ * called on VIDIOC_QUERYSTD -+ */ -+static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) -+{ -+ if (norm) -+ *norm = V4L2_STD_ALL; -+ return 0; -+} -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, -+ s64 val) -+{ -+ int result = 0; -+ switch (id) { -+ case CID_KEEP_FORMAT: -+ if (val < 0 || val > 1) -+ return -EINVAL; -+ dev->keep_format = val; -+ result = mutex_lock_killable(&dev->image_mutex); -+ if (result < 0) -+ return result; -+ if (!dev->keep_format) { -+ if (has_no_owners(dev) && !any_buffers_mapped(dev)) -+ free_buffers(dev); -+ } -+ mutex_unlock(&dev->image_mutex); -+ break; -+ case CID_SUSTAIN_FRAMERATE: -+ if (val < 0 || val > 1) -+ return -EINVAL; -+ spin_lock_bh(&dev->lock); -+ dev->sustain_framerate = val; -+ check_timers(dev); -+ spin_unlock_bh(&dev->lock); -+ break; -+ case CID_TIMEOUT: -+ if (val < 0 || val > MAX_TIMEOUT) -+ return -EINVAL; -+ if (val > 0) { -+ result = mutex_lock_killable(&dev->image_mutex); -+ if (result < 0) -+ return result; -+ /* on-the-fly allocate if device is owned; else -+ * allocate occurs on next S_FMT or REQBUFS */ -+ if (!has_no_owners(dev)) -+ result = allocate_timeout_buffer(dev); -+ mutex_unlock(&dev->image_mutex); -+ if (result < 0) { -+ /* disable timeout as buffer not alloc'd */ -+ spin_lock_bh(&dev->lock); -+ dev->timeout_jiffies = 0; -+ spin_unlock_bh(&dev->lock); -+ return result; -+ } -+ } -+ spin_lock_bh(&dev->lock); -+ dev->timeout_jiffies = msecs_to_jiffies(val); -+ check_timers(dev); -+ spin_unlock_bh(&dev->lock); -+ break; -+ case CID_TIMEOUT_IMAGE_IO: -+ dev->timeout_image_io = 1; -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) -+{ -+ struct v4l2_loopback_device *dev = container_of( -+ ctrl->handler, struct v4l2_loopback_device, ctrl_handler); -+ return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); -+} -+ -+/* returns set of device outputs, in our case there is only one -+ * called on VIDIOC_ENUMOUTPUT -+ */ -+static int vidioc_enum_output(struct file *file, void *fh, -+ struct v4l2_output *outp) -+{ -+ __u32 index = outp->index; -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ -+ if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) -+ return -ENOTTY; -+ if (index) -+ return -EINVAL; -+ -+ /* clear all data (including the reserved fields) */ -+ memset(outp, 0, sizeof(*outp)); -+ -+ outp->index = index; -+ strscpy(outp->name, "loopback in", sizeof(outp->name)); -+ outp->type = V4L2_OUTPUT_TYPE_ANALOG; -+ outp->audioset = 0; -+ outp->modulator = 0; -+#ifdef V4L2LOOPBACK_WITH_STD -+ outp->std = V4L2_STD_ALL; -+#ifdef V4L2_OUT_CAP_STD -+ outp->capabilities |= V4L2_OUT_CAP_STD; -+#endif /* V4L2_OUT_CAP_STD */ -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ return 0; -+} -+ -+/* which output is currently active, -+ * called on VIDIOC_G_OUTPUT -+ */ -+static int vidioc_g_output(struct file *file, void *fh, unsigned int *index) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) -+ return -ENOTTY; -+ if (index) -+ *index = 0; -+ return 0; -+} -+ -+/* set output, can make sense if we have more than one video src, -+ * called on VIDIOC_S_OUTPUT -+ */ -+static int vidioc_s_output(struct file *file, void *fh, unsigned int index) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) -+ return -ENOTTY; -+ return index == 0 ? index : -EINVAL; -+} -+ -+/* returns set of device inputs, in our case there is only one, -+ * but later I may add more -+ * called on VIDIOC_ENUMINPUT -+ */ -+static int vidioc_enum_input(struct file *file, void *fh, -+ struct v4l2_input *inp) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ __u32 index = inp->index; -+ -+ if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) -+ return -ENOTTY; -+ if (index) -+ return -EINVAL; -+ -+ /* clear all data (including the reserved fields) */ -+ memset(inp, 0, sizeof(*inp)); -+ -+ inp->index = index; -+ strscpy(inp->name, "loopback", sizeof(inp->name)); -+ inp->type = V4L2_INPUT_TYPE_CAMERA; -+ inp->audioset = 0; -+ inp->tuner = 0; -+ inp->status = 0; -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+ inp->std = V4L2_STD_ALL; -+#ifdef V4L2_IN_CAP_STD -+ inp->capabilities |= V4L2_IN_CAP_STD; -+#endif -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ if (has_output_token(dev->stream_tokens) && !dev->keep_format) -+ /* if no outputs attached; pretend device is powered off */ -+ inp->status |= V4L2_IN_ST_NO_SIGNAL; -+ -+ return 0; -+} -+ -+/* which input is currently active, -+ * called on VIDIOC_G_INPUT -+ */ -+static int vidioc_g_input(struct file *file, void *fh, unsigned int *index) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) -+ return -ENOTTY; /* NOTE: -EAGAIN might be more informative */ -+ if (index) -+ *index = 0; -+ return 0; -+} -+ -+/* set input, can make sense if we have more than one video src, -+ * called on VIDIOC_S_INPUT -+ */ -+static int vidioc_s_input(struct file *file, void *fh, unsigned int index) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ if (index != 0) -+ return -EINVAL; -+ if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) -+ return -ENOTTY; /* NOTE: -EAGAIN might be more informative */ -+ return 0; -+} -+ -+/* --------------- V4L2 ioctl buffer related calls ----------------- */ -+ -+#define is_allocated(opener, type, index) \ -+ (opener->format_token & (opener->io_method == V4L2L_IO_TIMEOUT ? \ -+ V4L2L_TOKEN_TIMEOUT : \ -+ token_from_type(type)) && \ -+ (index) < (opener)->buffer_count) -+#define BUFFER_DEBUG_FMT_STR \ -+ "buffer#%u @ %p type=%u bytesused=%u length=%u flags=%x " \ -+ "field=%u timestamp= %lld.%06lldsequence=%u\n" -+#define BUFFER_DEBUG_FMT_ARGS(buf) \ -+ (buf)->index, (buf), (buf)->type, (buf)->bytesused, (buf)->length, \ -+ (buf)->flags, (buf)->field, \ -+ (long long)(buf)->timestamp.tv_sec, \ -+ (long long)(buf)->timestamp.tv_usec, (buf)->sequence -+/* Buffer flag helpers */ -+#define unset_flags(flags) \ -+ do { \ -+ flags &= ~V4L2_BUF_FLAG_QUEUED; \ -+ flags &= ~V4L2_BUF_FLAG_DONE; \ -+ } while (0) -+#define set_queued(flags) \ -+ do { \ -+ flags |= V4L2_BUF_FLAG_QUEUED; \ -+ flags &= ~V4L2_BUF_FLAG_DONE; \ -+ } while (0) -+#define set_done(flags) \ -+ do { \ -+ flags &= ~V4L2_BUF_FLAG_QUEUED; \ -+ flags |= V4L2_BUF_FLAG_DONE; \ -+ } while (0) -+ -+static bool any_buffers_mapped(struct v4l2_loopback_device *dev) -+{ -+ u32 index; -+ for (index = 0; index < dev->buffer_count; ++index) -+ if (dev->buffers[index].buffer.flags & V4L2_BUF_FLAG_MAPPED) -+ return true; -+ return false; -+} -+ -+static void prepare_buffer_queue(struct v4l2_loopback_device *dev, int count) -+{ -+ struct v4l2l_buffer *bufd, *n; -+ u32 pos; -+ -+ spin_lock_bh(&dev->list_lock); -+ -+ /* ensure sufficient number of buffers in queue */ -+ for (pos = 0; pos < count; ++pos) { -+ bufd = &dev->buffers[pos]; -+ if (list_empty(&bufd->list_head)) -+ list_add_tail(&bufd->list_head, &dev->outbufs_list); -+ } -+ if (list_empty(&dev->outbufs_list)) -+ goto exit_prepare_queue_unlock; -+ -+ /* remove any excess buffers */ -+ list_for_each_entry_safe(bufd, n, &dev->outbufs_list, list_head) { -+ if (bufd->buffer.index >= count) -+ list_del_init(&bufd->list_head); -+ } -+ -+ /* buffers are no longer queued; and `write_position` will correspond -+ * to the first item of `outbufs_list`. */ -+ pos = v4l2l_mod64(dev->write_position, count); -+ list_for_each_entry(bufd, &dev->outbufs_list, list_head) { -+ unset_flags(bufd->buffer.flags); -+ dev->bufpos2index[pos % count] = bufd->buffer.index; -+ ++pos; -+ } -+exit_prepare_queue_unlock: -+ spin_unlock_bh(&dev->list_lock); -+} -+ -+/* forward declaration */ -+static int vidioc_streamoff(struct file *file, void *fh, -+ enum v4l2_buf_type type); -+/* negotiate buffer type -+ * only mmap streaming supported -+ * called on VIDIOC_REQBUFS -+ */ -+static int vidioc_reqbufs(struct file *file, void *fh, -+ struct v4l2_requestbuffers *reqbuf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ u32 token = opener->io_method == V4L2L_IO_TIMEOUT ? -+ V4L2L_TOKEN_TIMEOUT : -+ token_from_type(reqbuf->type); -+ u32 req_count = reqbuf->count; -+ int result = 0; -+ -+ dprintk("REQBUFS(memory=%u, req_count=%u) and device-bufs=%u/%u " -+ "[used/max]\n", -+ reqbuf->memory, req_count, dev->used_buffer_count, -+ dev->buffer_count); -+ -+ switch (reqbuf->memory) { -+ case V4L2_MEMORY_MMAP: -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) -+ reqbuf->capabilities = 0; /* only guarantee MMAP support */ -+#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) -+ reqbuf->flags = 0; /* no memory consistency support */ -+#endif -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ if (opener->format_token & ~token) -+ /* different (buffer) type already assigned to descriptor by -+ * S_FMT or REQBUFS */ -+ return -EINVAL; -+ -+ MARK(); -+ result = mutex_lock_killable(&dev->image_mutex); -+ if (result < 0) -+ return result; /* -EINTR */ -+ -+ /* CASE queue/dequeue timeout-buffer only: */ -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { -+ opener->buffer_count = req_count; -+ if (req_count == 0) -+ release_token(dev, opener, format); -+ goto exit_reqbufs_unlock; -+ } -+ -+ MARK(); -+ /* CASE count is zero: streamoff, free buffers, release their token */ -+ if (req_count == 0) { -+ if (dev->format_tokens & token) { -+ acquire_token(dev, opener, format, token); -+ opener->io_method = V4L2L_IO_MMAP; -+ } -+ result = vidioc_streamoff(file, fh, reqbuf->type); -+ opener->buffer_count = 0; -+ /* undocumented requirement - REQBUFS with count zero should -+ * ALSO release lock on logical stream */ -+ if (opener->format_token) -+ release_token(dev, opener, format); -+ if (has_no_owners(dev)) -+ dev->used_buffer_count = 0; -+ goto exit_reqbufs_unlock; -+ } -+ -+ /* CASE count non-zero: allocate buffers and acquire token for them */ -+ MARK(); -+ switch (reqbuf->type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if (!(dev->format_tokens & token || -+ opener->format_token & token)) -+ /* only exclusive ownership for each stream */ -+ result = -EBUSY; -+ break; -+ default: -+ result = -EINVAL; -+ } -+ if (result < 0) -+ goto exit_reqbufs_unlock; -+ -+ if (has_other_owners(opener, dev) && dev->used_buffer_count > 0) { -+ /* allow 'allocation' of existing number of buffers */ -+ req_count = dev->used_buffer_count; -+ } else if (any_buffers_mapped(dev)) { -+ /* do not allow re-allocation if buffers are mapped */ -+ result = -EBUSY; -+ goto exit_reqbufs_unlock; -+ } -+ -+ MARK(); -+ opener->buffer_count = 0; -+ -+ if (req_count > dev->buffer_count) -+ req_count = dev->buffer_count; -+ -+ if (has_no_owners(dev)) { -+ result = allocate_buffers(dev, &dev->pix_format); -+ if (result < 0) -+ goto exit_reqbufs_unlock; -+ } -+ if (!dev->timeout_image && need_timeout_buffer(dev, token)) { -+ result = allocate_timeout_buffer(dev); -+ if (result < 0) -+ goto exit_reqbufs_unlock; -+ } -+ acquire_token(dev, opener, format, token); -+ -+ MARK(); -+ switch (opener->io_method) { -+ case V4L2L_IO_TIMEOUT: -+ dev->timeout_image_io = 0; -+ opener->buffer_count = req_count; -+ break; -+ default: -+ opener->io_method = V4L2L_IO_MMAP; -+ prepare_buffer_queue(dev, req_count); -+ dev->used_buffer_count = opener->buffer_count = req_count; -+ } -+exit_reqbufs_unlock: -+ mutex_unlock(&dev->image_mutex); -+ reqbuf->count = opener->buffer_count; -+ return result; -+} -+ -+/* returns buffer asked for; -+ * give app as many buffers as it wants, if it less than MAX, -+ * but map them in our inner buffers -+ * called on VIDIOC_QUERYBUF -+ */ -+static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ u32 type = buf->type; -+ u32 index = buf->index; -+ -+ if ((type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && -+ (type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) -+ return -EINVAL; -+ if (!is_allocated(opener, type, index)) -+ return -EINVAL; -+ -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { -+ *buf = dev->timeout_buffer.buffer; -+ buf->index = index; -+ } else -+ *buf = dev->buffers[index].buffer; -+ -+ buf->type = type; -+ -+ if (!(buf->flags & (V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_QUEUED))) { -+ /* v4l2-compliance requires these to be zero */ -+ buf->sequence = 0; -+ buf->timestamp.tv_sec = buf->timestamp.tv_usec = 0; -+ } else if (V4L2_TYPE_IS_CAPTURE(type)) { -+ /* guess flags based on sequence values */ -+ if (buf->sequence >= opener->read_position) { -+ set_done(buf->flags); -+ } else if (buf->flags & V4L2_BUF_FLAG_DONE) { -+ set_queued(buf->flags); -+ } -+ } -+ dprintkrw("QUERYBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR, -+ V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index, -+ BUFFER_DEBUG_FMT_ARGS(buf)); -+ return 0; -+} -+ -+static void buffer_written(struct v4l2_loopback_device *dev, -+ struct v4l2l_buffer *buf) -+{ -+ timer_delete_sync(&dev->sustain_timer); -+ timer_delete_sync(&dev->timeout_timer); -+ -+ spin_lock_bh(&dev->list_lock); -+ list_move_tail(&buf->list_head, &dev->outbufs_list); -+ spin_unlock_bh(&dev->list_lock); -+ -+ spin_lock_bh(&dev->lock); -+ dev->bufpos2index[v4l2l_mod64(dev->write_position, -+ dev->used_buffer_count)] = -+ buf->buffer.index; -+ ++dev->write_position; -+ dev->reread_count = 0; -+ -+ check_timers(dev); -+ spin_unlock_bh(&dev->lock); -+} -+ -+/* put buffer to queue -+ * called on VIDIOC_QBUF -+ */ -+static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ struct v4l2l_buffer *bufd; -+ u32 index = buf->index; -+ u32 type = buf->type; -+ -+ if (!is_allocated(opener, type, index)) -+ return -EINVAL; -+ bufd = &dev->buffers[index]; -+ -+ switch (buf->memory) { -+ case V4L2_MEMORY_MMAP: -+ if (!(bufd->buffer.flags & V4L2_BUF_FLAG_MAPPED)) -+ dprintkrw("QBUF() unmapped buffer [index=%u]\n", index); -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { -+ set_queued(buf->flags); -+ return 0; -+ } -+ -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ dprintkrw("QBUF(CAPTURE, index=%u) -> " BUFFER_DEBUG_FMT_STR, -+ index, BUFFER_DEBUG_FMT_ARGS(buf)); -+ set_queued(buf->flags); -+ break; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ dprintkrw("QBUF(OUTPUT, index=%u) -> " BUFFER_DEBUG_FMT_STR, -+ index, BUFFER_DEBUG_FMT_ARGS(buf)); -+ if (!(bufd->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY) && -+ (buf->timestamp.tv_sec == 0 && -+ buf->timestamp.tv_usec == 0)) { -+ v4l2l_get_timestamp(&bufd->buffer); -+ } else { -+ bufd->buffer.timestamp = buf->timestamp; -+ bufd->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY; -+ bufd->buffer.flags &= -+ ~V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; -+ } -+ if (dev->pix_format_has_valid_sizeimage) { -+ if (buf->bytesused >= dev->pix_format.sizeimage) { -+ bufd->buffer.bytesused = -+ dev->pix_format.sizeimage; -+ } else { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) -+ dev_warn_ratelimited( -+ &dev->vdev->dev, -+#else -+ dprintkrw( -+#endif -+ "warning queued output buffer bytesused too small %u < %u\n", -+ buf->bytesused, -+ dev->pix_format.sizeimage); -+ bufd->buffer.bytesused = buf->bytesused; -+ } -+ } else { -+ bufd->buffer.bytesused = buf->bytesused; -+ } -+ bufd->buffer.sequence = dev->write_position; -+ set_queued(bufd->buffer.flags); -+ *buf = bufd->buffer; -+ buffer_written(dev, bufd); -+ set_done(bufd->buffer.flags); -+ wake_up_all(&dev->read_event); -+ break; -+ default: -+ return -EINVAL; -+ } -+ buf->type = type; -+ return 0; -+} -+ -+static int can_read(struct v4l2_loopback_device *dev, -+ struct v4l2_loopback_opener *opener) -+{ -+ int ret; -+ -+ spin_lock_bh(&dev->lock); -+ check_timers(dev); -+ ret = dev->write_position > opener->read_position || -+ dev->reread_count > opener->reread_count || dev->timeout_happened; -+ spin_unlock_bh(&dev->lock); -+ return ret; -+} -+ -+static int get_capture_buffer(struct file *file) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); -+ int pos, timeout_happened; -+ u32 index; -+ -+ if ((file->f_flags & O_NONBLOCK) && -+ (dev->write_position <= opener->read_position && -+ dev->reread_count <= opener->reread_count && -+ !dev->timeout_happened)) -+ return -EAGAIN; -+ wait_event_interruptible(dev->read_event, can_read(dev, opener)); -+ -+ spin_lock_bh(&dev->lock); -+ if (dev->write_position == opener->read_position) { -+ if (dev->reread_count > opener->reread_count + 2) -+ opener->reread_count = dev->reread_count - 1; -+ ++opener->reread_count; -+ pos = v4l2l_mod64(opener->read_position + -+ dev->used_buffer_count - 1, -+ dev->used_buffer_count); -+ } else { -+ opener->reread_count = 0; -+ if (dev->write_position > -+ opener->read_position + dev->used_buffer_count) -+ opener->read_position = dev->write_position - 1; -+ pos = v4l2l_mod64(opener->read_position, -+ dev->used_buffer_count); -+ ++opener->read_position; -+ } -+ timeout_happened = dev->timeout_happened && (dev->timeout_jiffies > 0); -+ dev->timeout_happened = 0; -+ spin_unlock_bh(&dev->lock); -+ -+ index = dev->bufpos2index[pos]; -+ if (timeout_happened) { -+ if (index >= dev->used_buffer_count) { -+ dprintkrw("get_capture_buffer() read position is at " -+ "an unallocated buffer [index=%u]\n", -+ index); -+ return -EFAULT; -+ } -+ /* although allocated on-demand, timeout_image is freed only -+ * in free_buffers(), so we don't need to worry about it being -+ * deallocated suddenly */ -+ memcpy(dev->image + dev->buffers[index].buffer.m.offset, -+ dev->timeout_image, dev->buffer_size); -+ } -+ return (int)index; -+} -+ -+/* put buffer to dequeue -+ * called on VIDIOC_DQBUF -+ */ -+static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ u32 type = buf->type; -+ int index; -+ struct v4l2l_buffer *bufd; -+ -+ if (buf->memory != V4L2_MEMORY_MMAP) -+ return -EINVAL; -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { -+ *buf = dev->timeout_buffer.buffer; -+ buf->type = type; -+ unset_flags(buf->flags); -+ return 0; -+ } -+ if ((opener->buffer_count == 0) || -+ !(opener->format_token & token_from_type(type))) -+ return -EINVAL; -+ -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ index = get_capture_buffer(file); -+ if (index < 0) -+ return index; -+ *buf = dev->buffers[index].buffer; -+ unset_flags(buf->flags); -+ break; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ spin_lock_bh(&dev->list_lock); -+ -+ bufd = list_first_entry_or_null(&dev->outbufs_list, -+ struct v4l2l_buffer, list_head); -+ if (bufd) -+ list_move_tail(&bufd->list_head, &dev->outbufs_list); -+ -+ spin_unlock_bh(&dev->list_lock); -+ if (!bufd) -+ return -EFAULT; -+ unset_flags(bufd->buffer.flags); -+ *buf = bufd->buffer; -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ buf->type = type; -+ dprintkrw("DQBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR, -+ V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index, -+ BUFFER_DEBUG_FMT_ARGS(buf)); -+ return 0; -+} -+ -+/* ------------- STREAMING ------------------- */ -+ -+/* start streaming -+ * called on VIDIOC_STREAMON -+ */ -+static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ u32 token = token_from_type(type); -+ -+ /* short-circuit when using timeout buffer set */ -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT) -+ return 0; -+ /* opener must have claimed (same) buffer set via REQBUFS */ -+ if (!opener->buffer_count || !(opener->format_token & token)) -+ return -EINVAL; -+ -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ if (has_output_token(dev->stream_tokens) && !dev->keep_format) -+ return -EIO; -+ if (dev->stream_tokens & token) { -+ acquire_token(dev, opener, stream, token); -+ client_usage_queue_event(dev->vdev); -+ } -+ return 0; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if (dev->stream_tokens & token) -+ acquire_token(dev, opener, stream, token); -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+/* stop streaming -+ * called on VIDIOC_STREAMOFF -+ */ -+static int vidioc_streamoff(struct file *file, void *fh, -+ enum v4l2_buf_type type) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ u32 token = token_from_type(type); -+ -+ /* short-circuit when using timeout buffer set */ -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT) -+ return 0; -+ /* short-circuit when buffer set has no owner */ -+ if (dev->format_tokens & token) -+ return 0; -+ /* opener needs a claim to buffer set */ -+ if (!opener->format_token) -+ return -EBUSY; -+ if (opener->format_token & ~token) -+ return -EINVAL; -+ -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if (opener->stream_token & token) -+ release_token(dev, opener, stream); -+ /* reset output queue */ -+ if (dev->used_buffer_count > 0) -+ prepare_buffer_queue(dev, dev->used_buffer_count); -+ return 0; -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ if (opener->stream_token & token) { -+ release_token(dev, opener, stream); -+ client_usage_queue_event(dev->vdev); -+ } -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+#ifdef CONFIG_VIDEO_V4L1_COMPAT -+static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) -+{ -+ struct v4l2_loopback_device *dev; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ p->frames = dev->buffer_count; -+ p->offsets[0] = 0; -+ p->offsets[1] = 0; -+ p->size = dev->buffer_size; -+ return 0; -+} -+#endif -+ -+static void client_usage_queue_event(struct video_device *vdev) -+{ -+ struct v4l2_event ev; -+ struct v4l2_loopback_device *dev; -+ -+ dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device, -+ v4l2_dev); -+ -+ memset(&ev, 0, sizeof(ev)); -+ ev.type = V4L2_EVENT_PRI_CLIENT_USAGE; -+ ((struct v4l2_event_client_usage *)&ev.u)->count = -+ !has_capture_token(dev->stream_tokens); -+ -+ v4l2_event_queue(vdev, &ev); -+} -+ -+static int client_usage_ops_add(struct v4l2_subscribed_event *sev, -+ unsigned elems) -+{ -+ if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL)) -+ return 0; -+ -+ client_usage_queue_event(sev->fh->vdev); -+ return 0; -+} -+ -+static void client_usage_ops_replace(struct v4l2_event *old, -+ const struct v4l2_event *new) -+{ -+ *((struct v4l2_event_client_usage *)&old->u) = -+ *((struct v4l2_event_client_usage *)&new->u); -+} -+ -+static void client_usage_ops_merge(const struct v4l2_event *old, -+ struct v4l2_event *new) -+{ -+ *((struct v4l2_event_client_usage *)&new->u) = -+ *((struct v4l2_event_client_usage *)&old->u); -+} -+ -+const struct v4l2_subscribed_event_ops client_usage_ops = { -+ .add = client_usage_ops_add, -+ .replace = client_usage_ops_replace, -+ .merge = client_usage_ops_merge, -+}; -+ -+static int vidioc_subscribe_event(struct v4l2_fh *fh, -+ const struct v4l2_event_subscription *sub) -+{ -+ switch (sub->type) { -+ case V4L2_EVENT_CTRL: -+ return v4l2_ctrl_subscribe_event(fh, sub); -+ case V4L2_EVENT_PRI_CLIENT_USAGE: -+ return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops); -+ } -+ -+ return -EINVAL; -+} -+ -+/* file operations */ -+static void vm_open(struct vm_area_struct *vma) -+{ -+ struct v4l2l_buffer *buf; -+ MARK(); -+ -+ buf = vma->vm_private_data; -+ atomic_inc(&buf->use_count); -+ buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED; -+} -+ -+static void vm_close(struct vm_area_struct *vma) -+{ -+ struct v4l2l_buffer *buf; -+ MARK(); -+ -+ buf = vma->vm_private_data; -+ if (atomic_dec_and_test(&buf->use_count)) -+ buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED; -+} -+ -+static struct vm_operations_struct vm_ops = { -+ .open = vm_open, -+ .close = vm_close, -+}; -+ -+static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ u8 *addr; -+ unsigned long start, size, offset; -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); -+ struct v4l2l_buffer *buffer = NULL; -+ int result = 0; -+ MARK(); -+ -+ offset = (unsigned long)vma->vm_pgoff << PAGE_SHIFT; -+ start = (unsigned long)vma->vm_start; -+ size = (unsigned long)(vma->vm_end - vma->vm_start); /* always != 0 */ -+ -+ /* ensure buffer size, count, and allocated image(s) are not altered by -+ * other file descriptors */ -+ result = mutex_lock_killable(&dev->image_mutex); -+ if (result < 0) -+ return result; -+ -+ if (size > dev->buffer_size) { -+ dprintk("mmap() attempt to map %lubytes when %ubytes are " -+ "allocated to buffers\n", -+ size, dev->buffer_size); -+ result = -EINVAL; -+ goto exit_mmap_unlock; -+ } -+ if (offset % dev->buffer_size != 0) { -+ dprintk("mmap() offset does not match start of any buffer\n"); -+ result = -EINVAL; -+ goto exit_mmap_unlock; -+ } -+ switch (opener->format_token) { -+ case V4L2L_TOKEN_TIMEOUT: -+ if (offset != (unsigned long)dev->buffer_size * MAX_BUFFERS) { -+ dprintk("mmap() incorrect offset for timeout image\n"); -+ result = -EINVAL; -+ goto exit_mmap_unlock; -+ } -+ buffer = &dev->timeout_buffer; -+ addr = dev->timeout_image; -+ break; -+ default: -+ if (offset >= dev->image_size) { -+ dprintk("mmap() attempt to map beyond all buffers\n"); -+ result = -EINVAL; -+ goto exit_mmap_unlock; -+ } -+ u32 index = offset / dev->buffer_size; -+ buffer = &dev->buffers[index]; -+ addr = dev->image + offset; -+ break; -+ } -+ -+ while (size > 0) { -+ struct page *page = vmalloc_to_page(addr); -+ -+ result = vm_insert_page(vma, start, page); -+ if (result < 0) -+ goto exit_mmap_unlock; -+ -+ start += PAGE_SIZE; -+ addr += PAGE_SIZE; -+ size -= PAGE_SIZE; -+ } -+ -+ vma->vm_ops = &vm_ops; -+ vma->vm_private_data = buffer; -+ -+ vm_open(vma); -+exit_mmap_unlock: -+ mutex_unlock(&dev->image_mutex); -+ return result; -+} -+ -+static unsigned int v4l2_loopback_poll(struct file *file, -+ struct poll_table_struct *pts) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); -+ __poll_t req_events = poll_requested_events(pts); -+ int ret_mask = 0; -+ -+ /* call poll_wait in first call, regardless, to ensure that the -+ * wait-queue is not null */ -+ poll_wait(file, &dev->read_event, pts); -+ poll_wait(file, &opener->fh.wait, pts); -+ -+ if (req_events & POLLPRI) { -+ if (v4l2_event_pending(&opener->fh)) { -+ ret_mask |= POLLPRI; -+ if (!(req_events & DEFAULT_POLLMASK)) -+ return ret_mask; -+ } -+ } -+ -+ switch (opener->format_token) { -+ case V4L2L_TOKEN_OUTPUT: -+ if (opener->stream_token != 0 || -+ opener->io_method == V4L2L_IO_NONE) -+ ret_mask |= POLLOUT | POLLWRNORM; -+ break; -+ case V4L2L_TOKEN_CAPTURE: -+ if ((opener->io_method == V4L2L_IO_NONE || -+ opener->stream_token != 0) && -+ can_read(dev, opener)) -+ ret_mask |= POLLIN | POLLWRNORM; -+ break; -+ case V4L2L_TOKEN_TIMEOUT: -+ ret_mask |= POLLOUT | POLLWRNORM; -+ break; -+ default: -+ break; -+ } -+ -+ return ret_mask; -+} -+ -+/* do not want to limit device opens, it can be as many readers as user want, -+ * writers are limited by means of setting writer field */ -+static int v4l2_loopback_open(struct file *file) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ -+ dev = v4l2loopback_getdevice(file); -+ if (dev->open_count.counter >= dev->max_openers) -+ return -EBUSY; -+ /* kfree on close */ -+ opener = kzalloc(sizeof(*opener), GFP_KERNEL); -+ if (opener == NULL) -+ return -ENOMEM; -+ -+ atomic_inc(&dev->open_count); -+ if (dev->timeout_image_io && dev->format_tokens & V4L2L_TOKEN_TIMEOUT) -+ /* will clear timeout_image_io once buffer set acquired */ -+ opener->io_method = V4L2L_IO_TIMEOUT; -+ -+ v4l2_fh_init(&opener->fh, video_devdata(file)); -+ file->private_data = &opener->fh; -+ -+ v4l2_fh_add(&opener->fh); -+ dprintk("open() -> dev@%p with image@%p\n", dev, -+ dev ? dev->image : NULL); -+ return 0; -+} -+ -+static int v4l2_loopback_close(struct file *file) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); -+ int result = 0; -+ dprintk("close() -> dev@%p with image@%p\n", dev, -+ dev ? dev->image : NULL); -+ -+ if (opener->format_token) { -+ struct v4l2_requestbuffers reqbuf = { -+ .count = 0, .memory = V4L2_MEMORY_MMAP, .type = 0 -+ }; -+ switch (opener->format_token) { -+ case V4L2L_TOKEN_CAPTURE: -+ reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ break; -+ case V4L2L_TOKEN_OUTPUT: -+ case V4L2L_TOKEN_TIMEOUT: -+ reqbuf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ break; -+ } -+ if (reqbuf.type) -+ result = vidioc_reqbufs(file, file->private_data, -+ &reqbuf); -+ if (result < 0) -+ dprintk("failed to free buffers REQBUFS(count=0) " -+ " returned %d\n", -+ result); -+ mutex_lock(&dev->image_mutex); -+ release_token(dev, opener, format); -+ mutex_unlock(&dev->image_mutex); -+ } -+ -+ if (atomic_dec_and_test(&dev->open_count)) { -+ timer_delete_sync(&dev->sustain_timer); -+ timer_delete_sync(&dev->timeout_timer); -+ if (!dev->keep_format) { -+ mutex_lock(&dev->image_mutex); -+ free_buffers(dev); -+ mutex_unlock(&dev->image_mutex); -+ } -+ } -+ -+ v4l2_fh_del(&opener->fh); -+ v4l2_fh_exit(&opener->fh); -+ -+ kfree(opener); -+ return 0; -+} -+ -+static int start_fileio(struct file *file, void *fh, enum v4l2_buf_type type) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(fh); -+ struct v4l2_requestbuffers reqbuf = { .count = dev->buffer_count, -+ .memory = V4L2_MEMORY_MMAP, -+ .type = type }; -+ int token = token_from_type(type); -+ int result; -+ -+ if (opener->format_token & V4L2L_TOKEN_TIMEOUT || -+ opener->format_token & ~token) -+ return -EBUSY; /* NOTE: -EBADF might be more informative */ -+ -+ /* short-circuit if already have stream token */ -+ if (opener->stream_token && opener->io_method == V4L2L_IO_FILE) -+ return 0; -+ -+ /* otherwise attempt to acquire stream token and assign IO method */ -+ if (!(dev->stream_tokens & token) || opener->io_method != V4L2L_IO_NONE) -+ return -EBUSY; -+ -+ result = vidioc_reqbufs(file, fh, &reqbuf); -+ if (result < 0) -+ return result; -+ result = vidioc_streamon(file, fh, type); -+ if (result < 0) -+ return result; -+ -+ opener->io_method = V4L2L_IO_FILE; -+ return 0; -+} -+ -+static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_buffer *b; -+ int index, result; -+ -+ dprintkrw("read() %zu bytes\n", count); -+ result = start_fileio(file, file->private_data, -+ V4L2_BUF_TYPE_VIDEO_CAPTURE); -+ if (result < 0) -+ return result; -+ -+ index = get_capture_buffer(file); -+ if (index < 0) -+ return index; -+ b = &dev->buffers[index].buffer; -+ if (count > b->bytesused) -+ count = b->bytesused; -+ if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), -+ count)) { -+ printk(KERN_ERR "v4l2-loopback read() failed copy_to_user()\n"); -+ return -EFAULT; -+ } -+ return count; -+} -+ -+static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_buffer *b; -+ int index, result; -+ -+ dprintkrw("write() %zu bytes\n", count); -+ result = start_fileio(file, file->private_data, -+ V4L2_BUF_TYPE_VIDEO_OUTPUT); -+ if (result < 0) -+ return result; -+ -+ if (count > dev->buffer_size) -+ count = dev->buffer_size; -+ index = v4l2l_mod64(dev->write_position, dev->used_buffer_count); -+ b = &dev->buffers[index].buffer; -+ -+ if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, -+ count)) { -+ printk(KERN_ERR -+ "v4l2-loopback write() failed copy_from_user()\n"); -+ return -EFAULT; -+ } -+ b->bytesused = count; -+ -+ v4l2l_get_timestamp(b); -+ b->sequence = dev->write_position; -+ set_queued(b->flags); -+ buffer_written(dev, &dev->buffers[index]); -+ set_done(b->flags); -+ wake_up_all(&dev->read_event); -+ -+ return count; -+} -+ -+/* init functions */ -+/* frees buffers, if allocated */ -+static void free_buffers(struct v4l2_loopback_device *dev) -+{ -+ dprintk("free_buffers() with image@%p\n", dev->image); -+ if (!dev->image) -+ return; -+ if (!has_no_owners(dev) || any_buffers_mapped(dev)) -+ /* maybe an opener snuck in before image_mutex was acquired */ -+ printk(KERN_WARNING -+ "v4l2-loopback free_buffers() buffers of video device " -+ "#%u freed while still mapped to userspace\n", -+ dev->vdev->num); -+ vfree(dev->image); -+ dev->image = NULL; -+ dev->image_size = 0; -+ dev->buffer_size = 0; -+} -+ -+static void free_timeout_buffer(struct v4l2_loopback_device *dev) -+{ -+ dprintk("free_timeout_buffer() with timeout_image@%p\n", -+ dev->timeout_image); -+ if (!dev->timeout_image) -+ return; -+ -+ if ((dev->timeout_jiffies > 0 && !has_no_owners(dev)) || -+ dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED) -+ printk(KERN_WARNING -+ "v4l2-loopback free_timeout_buffer() timeout image " -+ "of device #%u freed while still mapped to userspace\n", -+ dev->vdev->num); -+ -+ vfree(dev->timeout_image); -+ dev->timeout_image = NULL; -+ dev->timeout_buffer_size = 0; -+} -+/* allocates buffers if no (other) openers are already using them */ -+static int allocate_buffers(struct v4l2_loopback_device *dev, -+ struct v4l2_pix_format *pix_format) -+{ -+ u32 buffer_size = PAGE_ALIGN(pix_format->sizeimage); -+ unsigned long image_size = -+ (unsigned long)buffer_size * (unsigned long)dev->buffer_count; -+ /* vfree on close file operation in case no open handles left */ -+ -+ if (buffer_size == 0 || dev->buffer_count == 0 || -+ buffer_size < pix_format->sizeimage) -+ return -EINVAL; -+ -+ if ((__LONG_MAX__ / buffer_size) < dev->buffer_count) -+ return -ENOSPC; -+ -+ dprintk("allocate_buffers() size %lubytes = %ubytes x %ubuffers\n", -+ image_size, buffer_size, dev->buffer_count); -+ if (dev->image) { -+ /* check that no buffers are expected in user-space */ -+ if (!has_no_owners(dev) || any_buffers_mapped(dev)) -+ return -EBUSY; -+ dprintk("allocate_buffers() existing size=%lubytes\n", -+ dev->image_size); -+ /* FIXME: prevent double allocation more intelligently! */ -+ if (image_size == dev->image_size) { -+ dprintk("allocate_buffers() keep existing\n"); -+ return 0; -+ } -+ free_buffers(dev); -+ } -+ -+ /* FIXME: set buffers to 0 */ -+ dev->image = vmalloc(image_size); -+ if (dev->image == NULL) { -+ dev->buffer_size = dev->image_size = 0; -+ return -ENOMEM; -+ } -+ init_buffers(dev, pix_format->sizeimage, buffer_size); -+ dev->buffer_size = buffer_size; -+ dev->image_size = image_size; -+ dprintk("allocate_buffers() -> vmalloc'd %lubytes\n", dev->image_size); -+ return 0; -+} -+static int allocate_timeout_buffer(struct v4l2_loopback_device *dev) -+{ -+ /* device's `buffer_size` and `buffers` must be initialised in -+ * allocate_buffers() */ -+ -+ dprintk("allocate_timeout_buffer() size %ubytes\n", dev->buffer_size); -+ if (dev->buffer_size == 0) -+ return -EINVAL; -+ -+ if (dev->timeout_image) { -+ if (dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED) -+ return -EBUSY; -+ if (dev->buffer_size == dev->timeout_buffer_size) -+ return 0; -+ free_timeout_buffer(dev); -+ } -+ -+ dev->timeout_image = vzalloc(dev->buffer_size); -+ if (!dev->timeout_image) { -+ dev->timeout_buffer_size = 0; -+ return -ENOMEM; -+ } -+ dev->timeout_buffer_size = dev->buffer_size; -+ return 0; -+} -+/* init inner buffers, they are capture mode and flags are set as for capture -+ * mode buffers */ -+static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used, -+ u32 buffer_size) -+{ -+ u32 i; -+ -+ for (i = 0; i < dev->buffer_count; ++i) { -+ struct v4l2_buffer *b = &dev->buffers[i].buffer; -+ b->index = i; -+ b->bytesused = bytes_used; -+ b->length = buffer_size; -+ b->field = V4L2_FIELD_NONE; -+ b->flags = 0; -+ b->m.offset = i * buffer_size; -+ b->memory = V4L2_MEMORY_MMAP; -+ b->sequence = 0; -+ b->timestamp.tv_sec = 0; -+ b->timestamp.tv_usec = 0; -+ b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ -+ v4l2l_get_timestamp(b); -+ } -+ dev->timeout_buffer = dev->buffers[0]; -+ dev->timeout_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; -+} -+ -+/* fills and register video device */ -+static void init_vdev(struct video_device *vdev, int nr) -+{ -+#ifdef V4L2LOOPBACK_WITH_STD -+ vdev->tvnorms = V4L2_STD_ALL; -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ vdev->vfl_type = VFL_TYPE_VIDEO; -+ vdev->fops = &v4l2_loopback_fops; -+ vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; -+ vdev->release = &video_device_release; -+ vdev->minor = -1; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) -+ vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE | -+ V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE | -+ V4L2_CAP_STREAMING; -+#endif -+ -+ if (debug > 1) -+ vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL | -+ V4L2_DEV_DEBUG_IOCTL_ARG; -+ -+ vdev->vfl_dir = VFL_DIR_M2M; -+} -+ -+/* init default capture parameters, only fps may be changed in future */ -+static void init_capture_param(struct v4l2_captureparm *capture_param) -+{ -+ capture_param->capability = V4L2_CAP_TIMEPERFRAME; /* since 2.16 */ -+ capture_param->capturemode = 0; -+ capture_param->extendedmode = 0; -+ capture_param->readbuffers = max_buffers; -+ capture_param->timeperframe.numerator = 1; -+ capture_param->timeperframe.denominator = V4L2LOOPBACK_FPS_DEFAULT; -+} -+ -+static void check_timers(struct v4l2_loopback_device *dev) -+{ -+ if (has_output_token(dev->stream_tokens)) -+ return; -+ -+ if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) -+ mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); -+ if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) -+ mod_timer(&dev->sustain_timer, -+ jiffies + dev->frame_jiffies * 3 / 2); -+} -+#ifdef HAVE_TIMER_SETUP -+static void sustain_timer_clb(struct timer_list *t) -+{ -+ struct v4l2_loopback_device *dev = -+ container_of(t, struct v4l2_loopback_device, sustain_timer); -+#else -+static void sustain_timer_clb(unsigned long nr) -+{ -+ struct v4l2_loopback_device *dev = -+ idr_find(&v4l2loopback_index_idr, nr); -+#endif -+ spin_lock(&dev->lock); -+ if (dev->sustain_framerate) { -+ dev->reread_count++; -+ dprintkrw("sustain_timer_clb() write_pos=%lld reread=%u\n", -+ (long long)dev->write_position, dev->reread_count); -+ if (dev->reread_count == 1) -+ mod_timer(&dev->sustain_timer, -+ jiffies + max(1UL, dev->frame_jiffies / 2)); -+ else -+ mod_timer(&dev->sustain_timer, -+ jiffies + dev->frame_jiffies); -+ wake_up_all(&dev->read_event); -+ } -+ spin_unlock(&dev->lock); -+} -+#ifdef HAVE_TIMER_SETUP -+static void timeout_timer_clb(struct timer_list *t) -+{ -+ struct v4l2_loopback_device *dev = -+ container_of(t, struct v4l2_loopback_device, timeout_timer); -+#else -+static void timeout_timer_clb(unsigned long nr) -+{ -+ struct v4l2_loopback_device *dev = -+ idr_find(&v4l2loopback_index_idr, nr); -+#endif -+ spin_lock(&dev->lock); -+ if (dev->timeout_jiffies > 0) { -+ dev->timeout_happened = 1; -+ mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); -+ wake_up_all(&dev->read_event); -+ } -+ spin_unlock(&dev->lock); -+} -+ -+/* init loopback main structure */ -+#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ -+ ((conf) ? \ -+ ((conf->confmember default_condition) ? (default_value) : \ -+ (conf->confmember)) : \ -+ default_value) -+ -+static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_ctrl_handler *hdl; -+ struct v4l2loopback_private *vdev_priv = NULL; -+ int err; -+ -+ u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; -+ u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; -+ -+ u32 _min_width = DEFAULT_FROM_CONF(min_width, -+ < V4L2LOOPBACK_SIZE_MIN_WIDTH, -+ V4L2LOOPBACK_SIZE_MIN_WIDTH); -+ u32 _min_height = DEFAULT_FROM_CONF(min_height, -+ < V4L2LOOPBACK_SIZE_MIN_HEIGHT, -+ V4L2LOOPBACK_SIZE_MIN_HEIGHT); -+ u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width); -+ u32 _max_height = -+ DEFAULT_FROM_CONF(max_height, < _min_height, max_height); -+ bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? -+ (bool)(conf->announce_all_caps) : -+ !(V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS); -+ int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); -+ int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); -+ struct v4l2_format _fmt; -+ -+ int nr = -1; -+ -+ if (conf) { -+ const int output_nr = conf->output_nr; -+#ifdef SPLIT_DEVICES -+ const int capture_nr = conf->capture_nr; -+#else -+ const int capture_nr = output_nr; -+#endif -+ if (capture_nr >= 0 && output_nr == capture_nr) { -+ nr = output_nr; -+ } else if (capture_nr < 0 && output_nr < 0) { -+ nr = -1; -+ } else if (capture_nr < 0) { -+ nr = output_nr; -+ } else if (output_nr < 0) { -+ nr = capture_nr; -+ } else { -+ printk(KERN_ERR -+ "v4l2-loopback add() split OUTPUT and CAPTURE " -+ "devices not yet supported.\n"); -+ printk(KERN_INFO -+ "v4l2-loopback add() both devices must have the " -+ "same number (%d != %d).\n", -+ output_nr, capture_nr); -+ return -EINVAL; -+ } -+ } -+ -+ if (idr_find(&v4l2loopback_index_idr, nr)) -+ return -EEXIST; -+ -+ /* initialisation of a new device */ -+ dprintk("add() creating device #%d\n", nr); -+ dev = kzalloc(sizeof(*dev), GFP_KERNEL); -+ if (!dev) -+ return -ENOMEM; -+ -+ /* allocate id, if @id >= 0, we're requesting that specific id */ -+ if (nr >= 0) { -+ err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, -+ GFP_KERNEL); -+ if (err == -ENOSPC) -+ err = -EEXIST; -+ } else { -+ err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); -+ } -+ if (err < 0) -+ goto out_free_dev; -+ -+ /* register new device */ -+ MARK(); -+ nr = err; -+ -+ if (conf && conf->card_label[0]) { -+ snprintf(dev->card_label, sizeof(dev->card_label), "%s", -+ conf->card_label); -+ } else { -+ snprintf(dev->card_label, sizeof(dev->card_label), -+ "Dummy video device (0x%04X)", nr); -+ } -+ snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), -+ "v4l2loopback-%03d", nr); -+ -+ err = v4l2_device_register(NULL, &dev->v4l2_dev); -+ if (err) -+ goto out_free_idr; -+ -+ /* initialise the _video_ device */ -+ MARK(); -+ err = -ENOMEM; -+ dev->vdev = video_device_alloc(); -+ if (dev->vdev == NULL) -+ goto out_unregister; -+ -+ vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL); -+ if (vdev_priv == NULL) -+ goto out_unregister; -+ -+ video_set_drvdata(dev->vdev, vdev_priv); -+ if (video_get_drvdata(dev->vdev) == NULL) -+ goto out_unregister; -+ -+ snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s", -+ dev->card_label); -+ vdev_priv->device_nr = nr; -+ init_vdev(dev->vdev, nr); -+ dev->vdev->v4l2_dev = &dev->v4l2_dev; -+ -+ /* initialise v4l2-loopback specific parameters */ -+ MARK(); -+ dev->announce_all_caps = _announce_all_caps; -+ dev->min_width = _min_width; -+ dev->min_height = _min_height; -+ dev->max_width = _max_width; -+ dev->max_height = _max_height; -+ dev->max_openers = _max_openers; -+ -+ /* set (initial) pixel and stream format */ -+ _width = clamp_val(_width, _min_width, _max_width); -+ _height = clamp_val(_height, _min_height, _max_height); -+ _fmt = (struct v4l2_format){ -+ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, -+ .fmt.pix = { .width = _width, -+ .height = _height, -+ .pixelformat = formats[0].fourcc, -+ .colorspace = V4L2_COLORSPACE_DEFAULT, -+ .field = V4L2_FIELD_NONE } -+ }; -+ -+ err = v4l2l_fill_format(&_fmt, _min_width, _max_width, _min_height, -+ _max_height); -+ if (err) -+ /* highly unexpected failure to assign default format */ -+ goto out_unregister; -+ dev->pix_format = _fmt.fmt.pix; -+ init_capture_param(&dev->capture_param); -+ set_timeperframe(dev, &dev->capture_param.timeperframe); -+ -+ /* ctrls parameters */ -+ dev->keep_format = 0; -+ dev->sustain_framerate = 0; -+ dev->timeout_jiffies = 0; -+ dev->timeout_image_io = 0; -+ -+ /* initialise OUTPUT and CAPTURE buffer values */ -+ dev->image = NULL; -+ dev->image_size = 0; -+ dev->buffer_count = _max_buffers; -+ dev->buffer_size = 0; -+ dev->used_buffer_count = 0; -+ INIT_LIST_HEAD(&dev->outbufs_list); -+ do { -+ u32 index; -+ for (index = 0; index < dev->buffer_count; ++index) -+ INIT_LIST_HEAD(&dev->buffers[index].list_head); -+ -+ } while (0); -+ memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); -+ dev->write_position = 0; -+ -+ /* initialise synchronisation data */ -+ atomic_set(&dev->open_count, 0); -+ mutex_init(&dev->image_mutex); -+ spin_lock_init(&dev->lock); -+ spin_lock_init(&dev->list_lock); -+ init_waitqueue_head(&dev->read_event); -+ dev->format_tokens = V4L2L_TOKEN_MASK; -+ dev->stream_tokens = V4L2L_TOKEN_MASK; -+ -+ /* initialise sustain frame rate and timeout parameters, and timers */ -+ dev->reread_count = 0; -+ dev->timeout_image = NULL; -+ dev->timeout_happened = 0; -+#ifdef HAVE_TIMER_SETUP -+ timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); -+ timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); -+#else -+ setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); -+ setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); -+#endif -+ -+ /* initialise the control handler and add controls */ -+ MARK(); -+ hdl = &dev->ctrl_handler; -+ err = v4l2_ctrl_handler_init(hdl, 4); -+ if (err) -+ goto out_unregister; -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); -+ if (hdl->error) { -+ err = hdl->error; -+ goto out_free_handler; -+ } -+ dev->v4l2_dev.ctrl_handler = hdl; -+ -+ err = v4l2_ctrl_handler_setup(hdl); -+ if (err) -+ goto out_free_handler; -+ -+ /* register the device (creates /dev/video*) */ -+ MARK(); -+ if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { -+ printk(KERN_ERR -+ "v4l2-loopback add() failed video_register_device()\n"); -+ err = -EFAULT; -+ goto out_free_device; -+ } -+ v4l2loopback_create_sysfs(dev->vdev); -+ /* NOTE: ambivalent if sysfs entries fail */ -+ -+ if (ret_nr) -+ *ret_nr = dev->vdev->num; -+ return 0; -+ -+out_free_device: -+ video_device_release(dev->vdev); -+out_free_handler: -+ v4l2_ctrl_handler_free(&dev->ctrl_handler); -+out_unregister: -+ video_set_drvdata(dev->vdev, NULL); -+ if (vdev_priv != NULL) -+ kfree(vdev_priv); -+ v4l2_device_unregister(&dev->v4l2_dev); -+out_free_idr: -+ idr_remove(&v4l2loopback_index_idr, nr); -+out_free_dev: -+ kfree(dev); -+ return err; -+} -+ -+static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) -+{ -+ int device_nr = v4l2loopback_get_vdev_nr(dev->vdev); -+ mutex_lock(&dev->image_mutex); -+ free_buffers(dev); -+ free_timeout_buffer(dev); -+ mutex_unlock(&dev->image_mutex); -+ v4l2loopback_remove_sysfs(dev->vdev); -+ v4l2_ctrl_handler_free(&dev->ctrl_handler); -+ kfree(video_get_drvdata(dev->vdev)); -+ video_unregister_device(dev->vdev); -+ v4l2_device_unregister(&dev->v4l2_dev); -+ idr_remove(&v4l2loopback_index_idr, device_nr); -+ kfree(dev); -+} -+ -+static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, -+ unsigned long parm) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_config conf; -+ struct v4l2_loopback_config *confptr = &conf; -+ int device_nr, capture_nr, output_nr; -+ int ret; -+ const __u32 version = V4L2LOOPBACK_VERSION_CODE; -+ -+ ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); -+ if (ret) -+ return ret; -+ -+ ret = -EINVAL; -+ switch (cmd) { -+ default: -+ ret = -ENOSYS; -+ break; -+ /* add a v4l2loopback device (pair), based on the user-provided specs */ -+ case V4L2LOOPBACK_CTL_ADD: -+ case V4L2LOOPBACK_CTL_ADD_legacy: -+ if (parm) { -+ if ((ret = copy_from_user(&conf, (void *)parm, -+ sizeof(conf))) < 0) -+ break; -+ } else -+ confptr = NULL; -+ ret = v4l2_loopback_add(confptr, &device_nr); -+ if (ret >= 0) -+ ret = device_nr; -+ break; -+ /* remove a v4l2loopback device (both capture and output) */ -+ case V4L2LOOPBACK_CTL_REMOVE: -+ case V4L2LOOPBACK_CTL_REMOVE_legacy: -+ ret = v4l2loopback_lookup((__u32)parm, &dev); -+ if (ret >= 0 && dev) { -+ ret = -EBUSY; -+ if (dev->open_count.counter > 0) -+ break; -+ v4l2_loopback_remove(dev); -+ ret = 0; -+ }; -+ break; -+ /* get information for a loopback device. -+ * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends -+ */ -+ case V4L2LOOPBACK_CTL_QUERY: -+ case V4L2LOOPBACK_CTL_QUERY_legacy: -+ if (!parm) -+ break; -+ if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < -+ 0) -+ break; -+ capture_nr = output_nr = conf.output_nr; -+#ifdef SPLIT_DEVICES -+ capture_nr = conf.capture_nr; -+#endif -+ device_nr = (output_nr < 0) ? capture_nr : output_nr; -+ MARK(); -+ /* get the device from either capture_nr or output_nr (whatever is valid) */ -+ if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) -+ break; -+ MARK(); -+ /* if we got the device from output_nr and there is a valid capture_nr, -+ * make sure that both refer to the same device (or bail out) -+ */ -+ if ((device_nr != capture_nr) && (capture_nr >= 0) && -+ ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0)) -+ break; -+ MARK(); -+ /* if otoh, we got the device from capture_nr and there is a valid output_nr, -+ * make sure that both refer to the same device (or bail out) -+ */ -+ if ((device_nr != output_nr) && (output_nr >= 0) && -+ ((ret = v4l2loopback_lookup(output_nr, 0)) < 0)) -+ break; -+ -+ /* v4l2_loopback_config identified a single device, so fetch the data */ -+ snprintf(conf.card_label, sizeof(conf.card_label), "%s", -+ dev->card_label); -+ -+ conf.output_nr = dev->vdev->num; -+#ifdef SPLIT_DEVICES -+ conf.capture_nr = dev->vdev->num; -+#endif -+ conf.min_width = dev->min_width; -+ conf.min_height = dev->min_height; -+ conf.max_width = dev->max_width; -+ conf.max_height = dev->max_height; -+ conf.announce_all_caps = dev->announce_all_caps; -+ conf.max_buffers = dev->buffer_count; -+ conf.max_openers = dev->max_openers; -+ conf.debug = debug; -+ MARK(); -+ if (copy_to_user((void *)parm, &conf, sizeof(conf))) { -+ ret = -EFAULT; -+ break; -+ } -+ ret = 0; -+ break; -+ case V4L2LOOPBACK_CTL_VERSION: -+ if (!parm) -+ break; -+ if (copy_to_user((void *)parm, &version, sizeof(version))) { -+ ret = -EFAULT; -+ break; -+ } -+ ret = 0; -+ break; -+ } -+ -+ mutex_unlock(&v4l2loopback_ctl_mutex); -+ MARK(); -+ return ret; -+} -+ -+/* LINUX KERNEL */ -+ -+static const struct file_operations v4l2loopback_ctl_fops = { -+ // clang-format off -+ .owner = THIS_MODULE, -+ .open = nonseekable_open, -+ .unlocked_ioctl = v4l2loopback_control_ioctl, -+ .compat_ioctl = v4l2loopback_control_ioctl, -+ .llseek = noop_llseek, -+ // clang-format on -+}; -+ -+static struct miscdevice v4l2loopback_misc = { -+ // clang-format off -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "v4l2loopback", -+ .fops = &v4l2loopback_ctl_fops, -+ // clang-format on -+}; -+ -+static const struct v4l2_file_operations v4l2_loopback_fops = { -+ // clang-format off -+ .owner = THIS_MODULE, -+ .open = v4l2_loopback_open, -+ .release = v4l2_loopback_close, -+ .read = v4l2_loopback_read, -+ .write = v4l2_loopback_write, -+ .poll = v4l2_loopback_poll, -+ .mmap = v4l2_loopback_mmap, -+ .unlocked_ioctl = video_ioctl2, -+ // clang-format on -+}; -+ -+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { -+ // clang-format off -+ .vidioc_querycap = &vidioc_querycap, -+ .vidioc_enum_framesizes = &vidioc_enum_framesizes, -+ .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, -+ -+ .vidioc_enum_output = &vidioc_enum_output, -+ .vidioc_g_output = &vidioc_g_output, -+ .vidioc_s_output = &vidioc_s_output, -+ -+ .vidioc_enum_input = &vidioc_enum_input, -+ .vidioc_g_input = &vidioc_g_input, -+ .vidioc_s_input = &vidioc_s_input, -+ -+ .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, -+ .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, -+ .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, -+ .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, -+ -+ .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, -+ .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, -+ .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, -+ .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, -+ -+#ifdef V4L2L_OVERLAY -+ .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, -+ .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, -+#endif -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+ .vidioc_s_std = &vidioc_s_std, -+ .vidioc_g_std = &vidioc_g_std, -+ .vidioc_querystd = &vidioc_querystd, -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ .vidioc_g_parm = &vidioc_g_parm, -+ .vidioc_s_parm = &vidioc_s_parm, -+ -+ .vidioc_reqbufs = &vidioc_reqbufs, -+ .vidioc_querybuf = &vidioc_querybuf, -+ .vidioc_qbuf = &vidioc_qbuf, -+ .vidioc_dqbuf = &vidioc_dqbuf, -+ -+ .vidioc_streamon = &vidioc_streamon, -+ .vidioc_streamoff = &vidioc_streamoff, -+ -+#ifdef CONFIG_VIDEO_V4L1_COMPAT -+ .vidiocgmbuf = &vidiocgmbuf, -+#endif -+ -+ .vidioc_subscribe_event = &vidioc_subscribe_event, -+ .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, -+ // clang-format on -+}; -+ -+static int free_device_cb(int id, void *ptr, void *data) -+{ -+ struct v4l2_loopback_device *dev = ptr; -+ v4l2_loopback_remove(dev); -+ return 0; -+} -+static void free_devices(void) -+{ -+ idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); -+ idr_destroy(&v4l2loopback_index_idr); -+} -+ -+static int __init v4l2loopback_init_module(void) -+{ -+ const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; -+ const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; -+ int err; -+ int i; -+ MARK(); -+ -+ err = misc_register(&v4l2loopback_misc); -+ if (err < 0) -+ return err; -+ -+ if (devices < 0) { -+ devices = 1; -+ -+ /* try guessing the devices from the "video_nr" parameter */ -+ for (i = MAX_DEVICES - 1; i >= 0; i--) { -+ if (video_nr[i] >= 0) { -+ devices = i + 1; -+ break; -+ } -+ } -+ } -+ -+ if (devices > MAX_DEVICES) { -+ devices = MAX_DEVICES; -+ printk(KERN_INFO -+ "v4l2-loopback init() number of initial devices is " -+ "limited to: %d\n", -+ MAX_DEVICES); -+ } -+ -+ if (max_buffers > MAX_BUFFERS) { -+ max_buffers = MAX_BUFFERS; -+ printk(KERN_INFO -+ "v4l2-loopback init() number of buffers is limited " -+ "to: %d\n", -+ MAX_BUFFERS); -+ } -+ -+ if (max_openers < 0) { -+ printk(KERN_INFO -+ "v4l2-loopback init() allowing %d openers rather " -+ "than %d\n", -+ 2, max_openers); -+ max_openers = 2; -+ } -+ -+ if (max_width < min_width) { -+ max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; -+ printk(KERN_INFO "v4l2-loopback init() using max_width %d\n", -+ max_width); -+ } -+ if (max_height < min_height) { -+ max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; -+ printk(KERN_INFO "v4l2-loopback init() using max_height %d\n", -+ max_height); -+ } -+ -+ for (i = 0; i < devices; i++) { -+ struct v4l2_loopback_config cfg = { -+ // clang-format off -+ .output_nr = video_nr[i], -+#ifdef SPLIT_DEVICES -+ .capture_nr = video_nr[i], -+#endif -+ .min_width = min_width, -+ .min_height = min_height, -+ .max_width = max_width, -+ .max_height = max_height, -+ .announce_all_caps = (!exclusive_caps[i]), -+ .max_buffers = max_buffers, -+ .max_openers = max_openers, -+ .debug = debug, -+ // clang-format on -+ }; -+ cfg.card_label[0] = 0; -+ if (card_label[i]) -+ snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", -+ card_label[i]); -+ err = v4l2_loopback_add(&cfg, 0); -+ if (err) { -+ free_devices(); -+ goto error; -+ } -+ } -+ -+ dprintk("module installed\n"); -+ -+ printk(KERN_INFO "v4l2-loopback driver version %d.%d.%d%s loaded\n", -+ // clang-format off -+ (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, -+ (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, -+ (V4L2LOOPBACK_VERSION_CODE ) & 0xff, -+#ifdef SNAPSHOT_VERSION -+ " (" __stringify(SNAPSHOT_VERSION) ")" -+#else -+ "" -+#endif -+ ); -+ // clang-format on -+ -+ return 0; -+error: -+ misc_deregister(&v4l2loopback_misc); -+ return err; -+} -+ -+static void v4l2loopback_cleanup_module(void) -+{ -+ MARK(); -+ /* unregister the device -> it deletes /dev/video* */ -+ free_devices(); -+ /* and get rid of /dev/v4l2loopback */ -+ misc_deregister(&v4l2loopback_misc); -+ dprintk("module removed\n"); -+} -+ -+MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); -+ -+module_init(v4l2loopback_init_module); -+module_exit(v4l2loopback_cleanup_module); -diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h -new file mode 100644 -index 000000000000..e48e0ce5949d ---- /dev/null -+++ b/drivers/media/v4l2-core/v4l2loopback.h -@@ -0,0 +1,108 @@ -+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -+/* -+ * v4l2loopback.h -+ * -+ * Written by IOhannes m zmölnig, 7/1/20. -+ * -+ * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is -+ * permitted under the GNU General Public License. -+ */ -+#ifndef _V4L2LOOPBACK_H -+#define _V4L2LOOPBACK_H -+ -+#define V4L2LOOPBACK_VERSION_MAJOR 0 -+#define V4L2LOOPBACK_VERSION_MINOR 15 -+#define V4L2LOOPBACK_VERSION_BUGFIX 0 -+ -+/* /dev/v4l2loopback interface */ -+ -+struct v4l2_loopback_config { -+ /** -+ * the device-number (/dev/video) -+ * V4L2LOOPBACK_CTL_ADD: -+ * setting this to a value<0, will allocate an available one -+ * if nr>=0 and the device already exists, the ioctl will EEXIST -+ * if output_nr and capture_nr are the same, only a single device will be created -+ * NOTE: currently split-devices (where output_nr and capture_nr differ) -+ * are not implemented yet. -+ * until then, requesting different device-IDs will result in EINVAL. -+ * -+ * V4L2LOOPBACK_CTL_QUERY: -+ * either both output_nr and capture_nr must refer to the same loopback, -+ * or one (and only one) of them must be -1 -+ * -+ */ -+ __s32 output_nr; -+ __s32 unused; /*capture_nr;*/ -+ -+ /** -+ * a nice name for your device -+ * if (*card_label)==0, an automatic name is assigned -+ */ -+ char card_label[32]; -+ -+ /** -+ * allowed frame size -+ * if too low, default values are used -+ */ -+ __u32 min_width; -+ __u32 max_width; -+ __u32 min_height; -+ __u32 max_height; -+ -+ /** -+ * number of buffers to allocate for the queue -+ * if set to <=0, default values are used -+ */ -+ __s32 max_buffers; -+ -+ /** -+ * how many consumers are allowed to open this device concurrently -+ * if set to <=0, default values are used -+ */ -+ __s32 max_openers; -+ -+ /** -+ * set the debugging level for this device -+ */ -+ __s32 debug; -+ -+ /** -+ * whether to announce OUTPUT/CAPTURE capabilities exclusively -+ * for this device or not -+ * (!exclusive_caps) -+ * NOTE: this is going to be removed once separate output/capture -+ * devices are implemented -+ */ -+ __s32 announce_all_caps; -+}; -+ -+#define V4L2LOOPBACK_CTL_IOCTLMAGIC '~' -+ -+/* a pointer to an (unsigned int) that - on success - will hold -+ * the version code of the v4l2loopback module -+ * as returned by KERNEL_VERSION(MAJOR, MINOR, BUGFIX) -+ */ -+#define V4L2LOOPBACK_CTL_VERSION _IOR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 0, __u32) -+ -+/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the -+ * to-be-created device set. -+ * if the ptr is NULL, a new device is created with default values at the driver's discretion. -+ * -+ * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, -+ * to get more information on the device) -+ */ -+#define V4L2LOOPBACK_CTL_ADD \ -+ _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 1, struct v4l2_loopback_config) -+ -+/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ -+#define V4L2LOOPBACK_CTL_REMOVE _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 2, __u32) -+ -+/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set -+ * (the two values must either refer to video-devices associated with the same loopback device -+ * or exactly one of them must be <0 -+ */ -+#define V4L2LOOPBACK_CTL_QUERY \ -+ _IOWR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 3, struct v4l2_loopback_config) -+ -+#endif /* _V4L2LOOPBACK_H */ -diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h -new file mode 100644 -index 000000000000..d855a3796554 ---- /dev/null -+++ b/drivers/media/v4l2-core/v4l2loopback_formats.h -@@ -0,0 +1,445 @@ -+static const struct v4l2l_format formats[] = { -+#ifndef V4L2_PIX_FMT_VP9 -+#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') -+#endif -+#ifndef V4L2_PIX_FMT_HEVC -+#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') -+#endif -+ -+ /* here come the packed formats */ -+ { -+ .name = "32 bpp RGB, le", -+ .fourcc = V4L2_PIX_FMT_BGR32, -+ .depth = 32, -+ .flags = 0, -+ }, -+ { -+ .name = "32 bpp RGB, be", -+ .fourcc = V4L2_PIX_FMT_RGB32, -+ .depth = 32, -+ .flags = 0, -+ }, -+ { -+ .name = "24 bpp RGB, le", -+ .fourcc = V4L2_PIX_FMT_BGR24, -+ .depth = 24, -+ .flags = 0, -+ }, -+ { -+ .name = "24 bpp RGB, be", -+ .fourcc = V4L2_PIX_FMT_RGB24, -+ .depth = 24, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_ABGR32 -+ { -+ .name = "32 bpp RGBA, le", -+ .fourcc = V4L2_PIX_FMT_ABGR32, -+ .depth = 32, -+ .flags = 0, -+ }, -+#endif -+#ifdef V4L2_PIX_FMT_RGBA32 -+ { -+ .name = "32 bpp RGBA", -+ .fourcc = V4L2_PIX_FMT_RGBA32, -+ .depth = 32, -+ .flags = 0, -+ }, -+#endif -+#ifdef V4L2_PIX_FMT_RGB332 -+ { -+ .name = "8 bpp RGB-3-3-2", -+ .fourcc = V4L2_PIX_FMT_RGB332, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB332 */ -+#ifdef V4L2_PIX_FMT_RGB444 -+ { -+ .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", -+ .fourcc = V4L2_PIX_FMT_RGB444, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB444 */ -+#ifdef V4L2_PIX_FMT_RGB555 -+ { -+ .name = "16 bpp RGB-5-5-5", -+ .fourcc = V4L2_PIX_FMT_RGB555, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB555 */ -+#ifdef V4L2_PIX_FMT_RGB565 -+ { -+ .name = "16 bpp RGB-5-6-5", -+ .fourcc = V4L2_PIX_FMT_RGB565, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB565 */ -+#ifdef V4L2_PIX_FMT_RGB555X -+ { -+ .name = "16 bpp RGB-5-5-5 BE", -+ .fourcc = V4L2_PIX_FMT_RGB555X, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB555X */ -+#ifdef V4L2_PIX_FMT_RGB565X -+ { -+ .name = "16 bpp RGB-5-6-5 BE", -+ .fourcc = V4L2_PIX_FMT_RGB565X, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB565X */ -+#ifdef V4L2_PIX_FMT_BGR666 -+ { -+ .name = "18 bpp BGR-6-6-6", -+ .fourcc = V4L2_PIX_FMT_BGR666, -+ .depth = 18, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_BGR666 */ -+ { -+ .name = "4:2:2, packed, YUYV", -+ .fourcc = V4L2_PIX_FMT_YUYV, -+ .depth = 16, -+ .flags = 0, -+ }, -+ { -+ .name = "4:2:2, packed, UYVY", -+ .fourcc = V4L2_PIX_FMT_UYVY, -+ .depth = 16, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_YVYU -+ { -+ .name = "4:2:2, packed YVYU", -+ .fourcc = V4L2_PIX_FMT_YVYU, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif -+#ifdef V4L2_PIX_FMT_VYUY -+ { -+ .name = "4:2:2, packed VYUY", -+ .fourcc = V4L2_PIX_FMT_VYUY, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif -+ { -+ .name = "4:2:2, packed YYUV", -+ .fourcc = V4L2_PIX_FMT_YYUV, -+ .depth = 16, -+ .flags = 0, -+ }, -+ { -+ .name = "YUV-8-8-8-8", -+ .fourcc = V4L2_PIX_FMT_YUV32, -+ .depth = 32, -+ .flags = 0, -+ }, -+ { -+ .name = "8 bpp, Greyscale", -+ .fourcc = V4L2_PIX_FMT_GREY, -+ .depth = 8, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_Y4 -+ { -+ .name = "4 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y4, -+ .depth = 4, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y4 */ -+#ifdef V4L2_PIX_FMT_Y6 -+ { -+ .name = "6 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y6, -+ .depth = 6, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y6 */ -+#ifdef V4L2_PIX_FMT_Y10 -+ { -+ .name = "10 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y10, -+ .depth = 10, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y10 */ -+#ifdef V4L2_PIX_FMT_Y12 -+ { -+ .name = "12 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y12, -+ .depth = 12, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y12 */ -+ { -+ .name = "16 bpp, Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y16, -+ .depth = 16, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_YUV444 -+ { -+ .name = "16 bpp xxxxyyyy uuuuvvvv", -+ .fourcc = V4L2_PIX_FMT_YUV444, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_YUV444 */ -+#ifdef V4L2_PIX_FMT_YUV555 -+ { -+ .name = "16 bpp YUV-5-5-5", -+ .fourcc = V4L2_PIX_FMT_YUV555, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_YUV555 */ -+#ifdef V4L2_PIX_FMT_YUV565 -+ { -+ .name = "16 bpp YUV-5-6-5", -+ .fourcc = V4L2_PIX_FMT_YUV565, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_YUV565 */ -+ -+/* bayer formats */ -+#ifdef V4L2_PIX_FMT_SRGGB8 -+ { -+ .name = "Bayer RGGB 8bit", -+ .fourcc = V4L2_PIX_FMT_SRGGB8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SRGGB8 */ -+#ifdef V4L2_PIX_FMT_SGRBG8 -+ { -+ .name = "Bayer GRBG 8bit", -+ .fourcc = V4L2_PIX_FMT_SGRBG8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SGRBG8 */ -+#ifdef V4L2_PIX_FMT_SGBRG8 -+ { -+ .name = "Bayer GBRG 8bit", -+ .fourcc = V4L2_PIX_FMT_SGBRG8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SGBRG8 */ -+#ifdef V4L2_PIX_FMT_SBGGR8 -+ { -+ .name = "Bayer BA81 8bit", -+ .fourcc = V4L2_PIX_FMT_SBGGR8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SBGGR8 */ -+ -+ /* here come the planar formats */ -+ { -+ .name = "4:1:0, planar, Y-Cr-Cb", -+ .fourcc = V4L2_PIX_FMT_YVU410, -+ .depth = 9, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+ { -+ .name = "4:2:0, planar, Y-Cr-Cb", -+ .fourcc = V4L2_PIX_FMT_YVU420, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+ { -+ .name = "4:1:0, planar, Y-Cb-Cr", -+ .fourcc = V4L2_PIX_FMT_YUV410, -+ .depth = 9, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+ { -+ .name = "4:2:0, planar, Y-Cb-Cr", -+ .fourcc = V4L2_PIX_FMT_YUV420, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#ifdef V4L2_PIX_FMT_YUV422P -+ { -+ .name = "16 bpp YVU422 planar", -+ .fourcc = V4L2_PIX_FMT_YUV422P, -+ .depth = 16, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_YUV422P */ -+#ifdef V4L2_PIX_FMT_YUV411P -+ { -+ .name = "16 bpp YVU411 planar", -+ .fourcc = V4L2_PIX_FMT_YUV411P, -+ .depth = 16, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_YUV411P */ -+#ifdef V4L2_PIX_FMT_Y41P -+ { -+ .name = "12 bpp YUV 4:1:1", -+ .fourcc = V4L2_PIX_FMT_Y41P, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_Y41P */ -+#ifdef V4L2_PIX_FMT_NV12 -+ { -+ .name = "12 bpp Y/CbCr 4:2:0 ", -+ .fourcc = V4L2_PIX_FMT_NV12, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_NV12 */ -+ -+/* here come the compressed formats */ -+ -+#ifdef V4L2_PIX_FMT_MJPEG -+ { -+ .name = "Motion-JPEG", -+ .fourcc = V4L2_PIX_FMT_MJPEG, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MJPEG */ -+#ifdef V4L2_PIX_FMT_JPEG -+ { -+ .name = "JFIF JPEG", -+ .fourcc = V4L2_PIX_FMT_JPEG, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_JPEG */ -+#ifdef V4L2_PIX_FMT_DV -+ { -+ .name = "DV1394", -+ .fourcc = V4L2_PIX_FMT_DV, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_DV */ -+#ifdef V4L2_PIX_FMT_MPEG -+ { -+ .name = "MPEG-1/2/4 Multiplexed", -+ .fourcc = V4L2_PIX_FMT_MPEG, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG */ -+#ifdef V4L2_PIX_FMT_H264 -+ { -+ .name = "H264 with start codes", -+ .fourcc = V4L2_PIX_FMT_H264, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H264 */ -+#ifdef V4L2_PIX_FMT_H264_NO_SC -+ { -+ .name = "H264 without start codes", -+ .fourcc = V4L2_PIX_FMT_H264_NO_SC, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H264_NO_SC */ -+#ifdef V4L2_PIX_FMT_H264_MVC -+ { -+ .name = "H264 MVC", -+ .fourcc = V4L2_PIX_FMT_H264_MVC, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H264_MVC */ -+#ifdef V4L2_PIX_FMT_H263 -+ { -+ .name = "H263", -+ .fourcc = V4L2_PIX_FMT_H263, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H263 */ -+#ifdef V4L2_PIX_FMT_MPEG1 -+ { -+ .name = "MPEG-1 ES", -+ .fourcc = V4L2_PIX_FMT_MPEG1, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG1 */ -+#ifdef V4L2_PIX_FMT_MPEG2 -+ { -+ .name = "MPEG-2 ES", -+ .fourcc = V4L2_PIX_FMT_MPEG2, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG2 */ -+#ifdef V4L2_PIX_FMT_MPEG4 -+ { -+ .name = "MPEG-4 part 2 ES", -+ .fourcc = V4L2_PIX_FMT_MPEG4, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG4 */ -+#ifdef V4L2_PIX_FMT_XVID -+ { -+ .name = "Xvid", -+ .fourcc = V4L2_PIX_FMT_XVID, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_XVID */ -+#ifdef V4L2_PIX_FMT_VC1_ANNEX_G -+ { -+ .name = "SMPTE 421M Annex G compliant stream", -+ .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ -+#ifdef V4L2_PIX_FMT_VC1_ANNEX_L -+ { -+ .name = "SMPTE 421M Annex L compliant stream", -+ .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ -+#ifdef V4L2_PIX_FMT_VP8 -+ { -+ .name = "VP8", -+ .fourcc = V4L2_PIX_FMT_VP8, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VP8 */ -+#ifdef V4L2_PIX_FMT_VP9 -+ { -+ .name = "VP9", -+ .fourcc = V4L2_PIX_FMT_VP9, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VP9 */ -+#ifdef V4L2_PIX_FMT_HEVC -+ { -+ .name = "HEVC", -+ .fourcc = V4L2_PIX_FMT_HEVC, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_HEVC */ -+}; -diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile -index 038ccbd9e3ba..de5e4f5145af 100644 ---- a/drivers/pci/controller/Makefile -+++ b/drivers/pci/controller/Makefile -@@ -1,4 +1,10 @@ - # SPDX-License-Identifier: GPL-2.0 -+ifdef CONFIG_X86_64 -+ifdef CONFIG_SATA_AHCI -+obj-y += intel-nvme-remap.o -+endif -+endif -+ - obj-$(CONFIG_PCIE_CADENCE) += cadence/ - obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o - obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o -diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c -new file mode 100644 -index 000000000000..e105e6f5cc91 ---- /dev/null -+++ b/drivers/pci/controller/intel-nvme-remap.c -@@ -0,0 +1,462 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Intel remapped NVMe device support. -+ * -+ * Copyright (c) 2019 Endless Mobile, Inc. -+ * Author: Daniel Drake -+ * -+ * Some products ship by default with the SATA controller in "RAID" or -+ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this -+ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe -+ * devices disappear from the PCI bus, and instead their I/O memory becomes -+ * available within the AHCI device BARs. -+ * -+ * This scheme is understood to be a way of avoiding usage of the standard -+ * Windows NVMe driver under that OS, instead mandating usage of Intel's -+ * driver instead, which has better power management, and presumably offers -+ * some RAID/disk-caching solutions too. -+ * -+ * Here in this driver, we support the remapped NVMe mode by claiming the -+ * AHCI device and creating a fake PCIe root port. On the new bus, the -+ * original AHCI device is exposed with only minor tweaks. Then, fake PCI -+ * devices corresponding to the remapped NVMe devices are created. The usual -+ * ahci and nvme drivers are then expected to bind to these devices and -+ * operate as normal. -+ * -+ * The PCI configuration space for the NVMe devices is completely -+ * unavailable, so we fake a minimal one and hope for the best. -+ * -+ * Interrupts are shared between the AHCI and NVMe devices. For simplicity, -+ * we only support the legacy interrupt here, although MSI support -+ * could potentially be added later. -+ */ -+ -+#define MODULE_NAME "intel-nvme-remap" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#define AHCI_PCI_BAR_STANDARD 5 -+ -+struct nvme_remap_dev { -+ struct pci_dev *dev; /* AHCI device */ -+ struct pci_bus *bus; /* our fake PCI bus */ -+ struct pci_sysdata sysdata; -+ int irq_base; /* our fake interrupts */ -+ -+ /* -+ * When we detect an all-ones write to a BAR register, this flag -+ * is set, so that we return the BAR size on the next read (a -+ * standard PCI behaviour). -+ * This includes the assumption that an all-ones BAR write is -+ * immediately followed by a read of the same register. -+ */ -+ bool bar_sizing; -+ -+ /* -+ * Resources copied from the AHCI device, to be regarded as -+ * resources on our fake bus. -+ */ -+ struct resource ahci_resources[PCI_NUM_RESOURCES]; -+ -+ /* Resources corresponding to the NVMe devices. */ -+ struct resource remapped_dev_mem[AHCI_MAX_REMAP]; -+ -+ /* Number of remapped NVMe devices found. */ -+ int num_remapped_devices; -+}; -+ -+static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus) -+{ -+ return container_of(bus->sysdata, struct nvme_remap_dev, sysdata); -+} -+ -+ -+/******** PCI configuration space **********/ -+ -+/* -+ * Helper macros for tweaking returned contents of PCI configuration space. -+ * -+ * value contains len bytes of data read from reg. -+ * If fixup_reg is included in that range, fix up the contents of that -+ * register to fixed_value. -+ */ -+#define NR_FIX8(fixup_reg, fixed_value) do { \ -+ if (reg <= fixup_reg && fixup_reg < reg + len) \ -+ ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \ -+ } while (0) -+ -+#define NR_FIX16(fixup_reg, fixed_value) do { \ -+ NR_FIX8(fixup_reg, fixed_value); \ -+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ -+ } while (0) -+ -+#define NR_FIX24(fixup_reg, fixed_value) do { \ -+ NR_FIX8(fixup_reg, fixed_value); \ -+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ -+ NR_FIX8(fixup_reg + 2, fixed_value >> 16); \ -+ } while (0) -+ -+#define NR_FIX32(fixup_reg, fixed_value) do { \ -+ NR_FIX16(fixup_reg, (u16) fixed_value); \ -+ NR_FIX16(fixup_reg + 2, fixed_value >> 16); \ -+ } while (0) -+ -+/* -+ * Read PCI config space of the slot 0 (AHCI) device. -+ * We pass through the read request to the underlying device, but -+ * tweak the results in some cases. -+ */ -+static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg, -+ int len, u32 *value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus; -+ int ret; -+ -+ ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn, -+ reg, len, value); -+ if (ret) -+ return ret; -+ -+ /* -+ * Adjust the device class, to prevent this driver from attempting to -+ * additionally probe the device we're simulating here. -+ */ -+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI); -+ -+ /* -+ * Unset interrupt pin, otherwise ACPI tries to find routing -+ * info for our virtual IRQ, fails, and complains. -+ */ -+ NR_FIX8(PCI_INTERRUPT_PIN, 0); -+ -+ /* -+ * Truncate the AHCI BAR to not include the region that covers the -+ * hidden devices. This will cause the ahci driver to successfully -+ * probe th new device (instead of handing it over to this driver). -+ */ -+ if (nrdev->bar_sizing) { -+ NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1)); -+ nrdev->bar_sizing = false; -+ } -+ -+ return PCIBIOS_SUCCESSFUL; -+} -+ -+/* -+ * Read PCI config space of a remapped device. -+ * Since the original PCI config space is inaccessible, we provide a minimal, -+ * fake config space instead. -+ */ -+static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port, -+ int reg, int len, u32 *value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ struct resource *remapped_mem; -+ -+ if (port > nrdev->num_remapped_devices) -+ return PCIBIOS_DEVICE_NOT_FOUND; -+ -+ *value = 0; -+ remapped_mem = &nrdev->remapped_dev_mem[port - 1]; -+ -+ /* Set a Vendor ID, otherwise Linux assumes no device is present */ -+ NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL); -+ -+ /* Always appear on & bus mastering */ -+ NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); -+ -+ /* Set class so that nvme driver probes us */ -+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS); -+ -+ if (nrdev->bar_sizing) { -+ NR_FIX32(PCI_BASE_ADDRESS_0, -+ ~(resource_size(remapped_mem) - 1)); -+ nrdev->bar_sizing = false; -+ } else { -+ resource_size_t mem_start = remapped_mem->start; -+ -+ mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64; -+ NR_FIX32(PCI_BASE_ADDRESS_0, mem_start); -+ mem_start >>= 32; -+ NR_FIX32(PCI_BASE_ADDRESS_1, mem_start); -+ } -+ -+ return PCIBIOS_SUCCESSFUL; -+} -+ -+/* Read PCI configuration space. */ -+static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn, -+ int reg, int len, u32 *value) -+{ -+ if (PCI_SLOT(devfn) == 0) -+ return nvme_remap_pci_read_slot0(bus, reg, len, value); -+ else -+ return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn), -+ reg, len, value); -+} -+ -+/* -+ * Write PCI config space of the slot 0 (AHCI) device. -+ * Apart from the special case of BAR sizing, we disable all writes. -+ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master) -+ * that would affect the operation of the NVMe devices. -+ */ -+static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg, -+ int len, u32 value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus; -+ -+ if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) { -+ /* -+ * Writing all-ones to a BAR means that the size of the -+ * memory region is being checked. Flag this so that we can -+ * reply with an appropriate size on the next read. -+ */ -+ if (value == ~0) -+ nrdev->bar_sizing = true; -+ -+ return ahci_dev_bus->ops->write(ahci_dev_bus, -+ nrdev->dev->devfn, -+ reg, len, value); -+ } -+ -+ return PCIBIOS_SET_FAILED; -+} -+ -+/* -+ * Write PCI config space of a remapped device. -+ * Since the original PCI config space is inaccessible, we reject all -+ * writes, except for the special case of BAR probing. -+ */ -+static int nvme_remap_pci_write_remapped(struct pci_bus *bus, -+ unsigned int port, -+ int reg, int len, u32 value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ -+ if (port > nrdev->num_remapped_devices) -+ return PCIBIOS_DEVICE_NOT_FOUND; -+ -+ /* -+ * Writing all-ones to a BAR means that the size of the memory -+ * region is being checked. Flag this so that we can reply with -+ * an appropriate size on the next read. -+ */ -+ if (value == ~0 && reg >= PCI_BASE_ADDRESS_0 -+ && reg <= PCI_BASE_ADDRESS_5) { -+ nrdev->bar_sizing = true; -+ return PCIBIOS_SUCCESSFUL; -+ } -+ -+ return PCIBIOS_SET_FAILED; -+} -+ -+/* Write PCI configuration space. */ -+static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn, -+ int reg, int len, u32 value) -+{ -+ if (PCI_SLOT(devfn) == 0) -+ return nvme_remap_pci_write_slot0(bus, reg, len, value); -+ else -+ return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn), -+ reg, len, value); -+} -+ -+static struct pci_ops nvme_remap_pci_ops = { -+ .read = nvme_remap_pci_read, -+ .write = nvme_remap_pci_write, -+}; -+ -+ -+/******** Initialization & exit **********/ -+ -+/* -+ * Find a PCI domain ID to use for our fake bus. -+ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits). -+ */ -+static int find_free_domain(void) -+{ -+ int domain = 0xffff; -+ struct pci_bus *bus = NULL; -+ -+ while ((bus = pci_find_next_bus(bus)) != NULL) -+ domain = max_t(int, domain, pci_domain_nr(bus)); -+ -+ return domain + 1; -+} -+ -+static int find_remapped_devices(struct nvme_remap_dev *nrdev, -+ struct list_head *resources) -+{ -+ void __iomem *mmio; -+ int i, count = 0; -+ u32 cap; -+ -+ mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD, -+ pci_resource_len(nrdev->dev, -+ AHCI_PCI_BAR_STANDARD)); -+ if (!mmio) -+ return -ENODEV; -+ -+ /* Check if this device might have remapped nvme devices. */ -+ if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K || -+ !(readl(mmio + AHCI_VSCAP) & 1)) -+ return -ENODEV; -+ -+ cap = readq(mmio + AHCI_REMAP_CAP); -+ for (i = AHCI_MAX_REMAP-1; i >= 0; i--) { -+ struct resource *remapped_mem; -+ -+ if ((cap & (1 << i)) == 0) -+ continue; -+ if (readl(mmio + ahci_remap_dcc(i)) -+ != PCI_CLASS_STORAGE_EXPRESS) -+ continue; -+ -+ /* We've found a remapped device */ -+ remapped_mem = &nrdev->remapped_dev_mem[count++]; -+ remapped_mem->start = -+ pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD) -+ + ahci_remap_base(i); -+ remapped_mem->end = remapped_mem->start -+ + AHCI_REMAP_N_SIZE - 1; -+ remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; -+ pci_add_resource(resources, remapped_mem); -+ } -+ -+ pcim_iounmap(nrdev->dev, mmio); -+ -+ if (count == 0) -+ return -ENODEV; -+ -+ nrdev->num_remapped_devices = count; -+ dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n", -+ nrdev->num_remapped_devices); -+ return 0; -+} -+ -+static void nvme_remap_remove_root_bus(void *data) -+{ -+ struct pci_bus *bus = data; -+ -+ pci_stop_root_bus(bus); -+ pci_remove_root_bus(bus); -+} -+ -+static int nvme_remap_probe(struct pci_dev *dev, -+ const struct pci_device_id *id) -+{ -+ struct nvme_remap_dev *nrdev; -+ LIST_HEAD(resources); -+ int i; -+ int ret; -+ struct pci_dev *child; -+ -+ nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL); -+ nrdev->sysdata.domain = find_free_domain(); -+ nrdev->sysdata.nvme_remap_dev = dev; -+ nrdev->dev = dev; -+ pci_set_drvdata(dev, nrdev); -+ -+ ret = pcim_enable_device(dev); -+ if (ret < 0) -+ return ret; -+ -+ pci_set_master(dev); -+ -+ ret = find_remapped_devices(nrdev, &resources); -+ if (ret) -+ return ret; -+ -+ /* Add resources from the original AHCI device */ -+ for (i = 0; i < PCI_NUM_RESOURCES; i++) { -+ struct resource *res = &dev->resource[i]; -+ -+ if (res->start) { -+ struct resource *nr_res = &nrdev->ahci_resources[i]; -+ -+ nr_res->start = res->start; -+ nr_res->end = res->end; -+ nr_res->flags = res->flags; -+ pci_add_resource(&resources, nr_res); -+ } -+ } -+ -+ /* Create virtual interrupts */ -+ nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0, -+ nrdev->num_remapped_devices + 1, -+ 0); -+ if (nrdev->irq_base < 0) -+ return nrdev->irq_base; -+ -+ /* Create and populate PCI bus */ -+ nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops, -+ &nrdev->sysdata, &resources); -+ if (!nrdev->bus) -+ return -ENODEV; -+ -+ if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus, -+ nrdev->bus)) -+ return -ENOMEM; -+ -+ /* We don't support sharing MSI interrupts between these devices */ -+ nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; -+ -+ pci_scan_child_bus(nrdev->bus); -+ -+ list_for_each_entry(child, &nrdev->bus->devices, bus_list) { -+ /* -+ * Prevent PCI core from trying to move memory BARs around. -+ * The hidden NVMe devices are at fixed locations. -+ */ -+ for (i = 0; i < PCI_NUM_RESOURCES; i++) { -+ struct resource *res = &child->resource[i]; -+ -+ if (res->flags & IORESOURCE_MEM) -+ res->flags |= IORESOURCE_PCI_FIXED; -+ } -+ -+ /* Share the legacy IRQ between all devices */ -+ child->irq = dev->irq; -+ } -+ -+ pci_assign_unassigned_bus_resources(nrdev->bus); -+ pci_bus_add_devices(nrdev->bus); -+ -+ return 0; -+} -+ -+static const struct pci_device_id nvme_remap_ids[] = { -+ /* -+ * Match all Intel RAID controllers. -+ * -+ * There's overlap here with the set of devices detected by the ahci -+ * driver, but ahci will only successfully probe when there -+ * *aren't* any remapped NVMe devices, and this driver will only -+ * successfully probe when there *are* remapped NVMe devices that -+ * need handling. -+ */ -+ { -+ PCI_VDEVICE(INTEL, PCI_ANY_ID), -+ .class = PCI_CLASS_STORAGE_RAID << 8, -+ .class_mask = 0xffffff00, -+ }, -+ {0,} -+}; -+MODULE_DEVICE_TABLE(pci, nvme_remap_ids); -+ -+static struct pci_driver nvme_remap_drv = { -+ .name = MODULE_NAME, -+ .id_table = nvme_remap_ids, -+ .probe = nvme_remap_probe, -+}; -+module_pci_driver(nvme_remap_drv); -+ -+MODULE_AUTHOR("Daniel Drake "); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index d97335a40193..acab5556a354 100644 ---- a/drivers/pci/quirks.c -+++ b/drivers/pci/quirks.c -@@ -3745,6 +3745,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) - dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; - } - -+static bool acs_on_downstream; -+static bool acs_on_multifunction; -+ -+#define NUM_ACS_IDS 16 -+struct acs_on_id { -+ unsigned short vendor; -+ unsigned short device; -+}; -+static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; -+static u8 max_acs_id; -+ -+static __init int pcie_acs_override_setup(char *p) -+{ -+ if (!p) -+ return -EINVAL; -+ -+ while (*p) { -+ if (!strncmp(p, "downstream", 10)) -+ acs_on_downstream = true; -+ if (!strncmp(p, "multifunction", 13)) -+ acs_on_multifunction = true; -+ if (!strncmp(p, "id:", 3)) { -+ char opt[5]; -+ int ret; -+ long val; -+ -+ if (max_acs_id >= NUM_ACS_IDS - 1) { -+ pr_warn("Out of PCIe ACS override slots (%d)\n", -+ NUM_ACS_IDS); -+ goto next; -+ } -+ -+ p += 3; -+ snprintf(opt, 5, "%s", p); -+ ret = kstrtol(opt, 16, &val); -+ if (ret) { -+ pr_warn("PCIe ACS ID parse error %d\n", ret); -+ goto next; -+ } -+ acs_on_ids[max_acs_id].vendor = val; -+ -+ p += strcspn(p, ":"); -+ if (*p != ':') { -+ pr_warn("PCIe ACS invalid ID\n"); -+ goto next; -+ } -+ -+ p++; -+ snprintf(opt, 5, "%s", p); -+ ret = kstrtol(opt, 16, &val); -+ if (ret) { -+ pr_warn("PCIe ACS ID parse error %d\n", ret); -+ goto next; -+ } -+ acs_on_ids[max_acs_id].device = val; -+ max_acs_id++; -+ } -+next: -+ p += strcspn(p, ","); -+ if (*p == ',') -+ p++; -+ } -+ -+ if (acs_on_downstream || acs_on_multifunction || max_acs_id) -+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); -+ -+ return 0; -+} -+early_param("pcie_acs_override", pcie_acs_override_setup); -+ -+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) -+{ -+ int i; -+ -+ /* Never override ACS for legacy devices or devices with ACS caps */ -+ if (!pci_is_pcie(dev) || -+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) -+ return -ENOTTY; -+ -+ for (i = 0; i < max_acs_id; i++) -+ if (acs_on_ids[i].vendor == dev->vendor && -+ acs_on_ids[i].device == dev->device) -+ return 1; -+ -+ switch (pci_pcie_type(dev)) { -+ case PCI_EXP_TYPE_DOWNSTREAM: -+ case PCI_EXP_TYPE_ROOT_PORT: -+ if (acs_on_downstream) -+ return 1; -+ break; -+ case PCI_EXP_TYPE_ENDPOINT: -+ case PCI_EXP_TYPE_UPSTREAM: -+ case PCI_EXP_TYPE_LEG_END: -+ case PCI_EXP_TYPE_RC_END: -+ if (acs_on_multifunction && dev->multifunction) -+ return 1; -+ } -+ -+ return -ENOTTY; -+} - /* - * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be - * prevented for those affected devices. -@@ -5192,6 +5292,7 @@ static const struct pci_dev_acs_enabled { - { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, - /* Wangxun nics */ - { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, -+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, - { 0 } - }; - -diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig -index 5522310bab8d..9e1c4634eb7b 100644 ---- a/drivers/scsi/Kconfig -+++ b/drivers/scsi/Kconfig -@@ -1524,4 +1524,6 @@ endif # SCSI_LOWLEVEL - - source "drivers/scsi/device_handler/Kconfig" - -+source "drivers/scsi/vhba/Kconfig" -+ - endmenu -diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile -index 16de3e41f94c..4e88f6e3e67b 100644 ---- a/drivers/scsi/Makefile -+++ b/drivers/scsi/Makefile -@@ -152,6 +152,7 @@ obj-$(CONFIG_CHR_DEV_SCH) += ch.o - obj-$(CONFIG_SCSI_ENCLOSURE) += ses.o - - obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/ -+obj-$(CONFIG_VHBA) += vhba/ - - # This goes last, so that "real" scsi devices probe earlier - obj-$(CONFIG_SCSI_DEBUG) += scsi_debug.o -diff --git a/drivers/scsi/vhba/Kconfig b/drivers/scsi/vhba/Kconfig -new file mode 100644 -index 000000000000..e70a381fe3df ---- /dev/null -+++ b/drivers/scsi/vhba/Kconfig -@@ -0,0 +1,9 @@ -+config VHBA -+ tristate "Virtual (SCSI) Host Bus Adapter" -+ depends on SCSI -+ help -+ This is the in-kernel part of CDEmu, a CD/DVD-ROM device -+ emulator. -+ -+ This driver can also be built as a module. If so, the module -+ will be called vhba. -diff --git a/drivers/scsi/vhba/Makefile b/drivers/scsi/vhba/Makefile -new file mode 100644 -index 000000000000..2d7524b66199 ---- /dev/null -+++ b/drivers/scsi/vhba/Makefile -@@ -0,0 +1,4 @@ -+VHBA_VERSION := 20240917 -+ -+obj-$(CONFIG_VHBA) += vhba.o -+ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror -diff --git a/drivers/scsi/vhba/vhba.c b/drivers/scsi/vhba/vhba.c -new file mode 100644 -index 000000000000..878a3be0ba2b ---- /dev/null -+++ b/drivers/scsi/vhba/vhba.c -@@ -0,0 +1,1132 @@ -+/* -+ * vhba.c -+ * -+ * Copyright (C) 2007-2012 Chia-I Wu -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ */ -+ -+#define pr_fmt(fmt) "vhba: " fmt -+ -+#include -+ -+#include -+#include -+#include -+#include -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) -+#include -+#else -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_COMPAT -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+MODULE_AUTHOR("Chia-I Wu"); -+MODULE_VERSION(VHBA_VERSION); -+MODULE_DESCRIPTION("Virtual SCSI HBA"); -+MODULE_LICENSE("GPL"); -+ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) -+#define sdev_dbg(sdev, fmt, a...) \ -+ dev_dbg(&(sdev)->sdev_gendev, fmt, ##a) -+#define scmd_dbg(scmd, fmt, a...) \ -+ dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a) -+#endif -+ -+#define VHBA_MAX_SECTORS_PER_IO 256 -+#define VHBA_MAX_BUS 16 -+#define VHBA_MAX_ID 16 -+#define VHBA_MAX_DEVICES (VHBA_MAX_BUS * (VHBA_MAX_ID-1)) -+#define VHBA_KBUF_SIZE PAGE_SIZE -+ -+#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL) -+#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL) -+ -+ -+static int vhba_can_queue = 32; -+module_param_named(can_queue, vhba_can_queue, int, 0); -+ -+ -+enum vhba_req_state { -+ VHBA_REQ_FREE, -+ VHBA_REQ_PENDING, -+ VHBA_REQ_READING, -+ VHBA_REQ_SENT, -+ VHBA_REQ_WRITING, -+}; -+ -+struct vhba_command { -+ struct scsi_cmnd *cmd; -+ /* metatags are per-host. not to be confused with -+ queue tags that are usually per-lun */ -+ unsigned long metatag; -+ int status; -+ struct list_head entry; -+}; -+ -+struct vhba_device { -+ unsigned int num; -+ spinlock_t cmd_lock; -+ struct list_head cmd_list; -+ wait_queue_head_t cmd_wq; -+ atomic_t refcnt; -+ -+ unsigned char *kbuf; -+ size_t kbuf_size; -+}; -+ -+struct vhba_host { -+ struct Scsi_Host *shost; -+ spinlock_t cmd_lock; -+ int cmd_next; -+ struct vhba_command *commands; -+ spinlock_t dev_lock; -+ struct vhba_device *devices[VHBA_MAX_DEVICES]; -+ int num_devices; -+ DECLARE_BITMAP(chgmap, VHBA_MAX_DEVICES); -+ int chgtype[VHBA_MAX_DEVICES]; -+ struct work_struct scan_devices; -+}; -+ -+#define MAX_COMMAND_SIZE 16 -+ -+struct vhba_request { -+ __u32 metatag; -+ __u32 lun; -+ __u8 cdb[MAX_COMMAND_SIZE]; -+ __u8 cdb_len; -+ __u32 data_len; -+}; -+ -+struct vhba_response { -+ __u32 metatag; -+ __u32 status; -+ __u32 data_len; -+}; -+ -+ -+ -+static struct vhba_command *vhba_alloc_command (void); -+static void vhba_free_command (struct vhba_command *vcmd); -+ -+static struct platform_device vhba_platform_device; -+ -+ -+ -+/* These functions define a symmetric 1:1 mapping between device numbers and -+ the bus and id. We have reserved the last id per bus for the host itself. */ -+static void devnum_to_bus_and_id(unsigned int devnum, unsigned int *bus, unsigned int *id) -+{ -+ *bus = devnum / (VHBA_MAX_ID-1); -+ *id = devnum % (VHBA_MAX_ID-1); -+} -+ -+static unsigned int bus_and_id_to_devnum(unsigned int bus, unsigned int id) -+{ -+ return (bus * (VHBA_MAX_ID-1)) + id; -+} -+ -+static struct vhba_device *vhba_device_alloc (void) -+{ -+ struct vhba_device *vdev; -+ -+ vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL); -+ if (!vdev) { -+ return NULL; -+ } -+ -+ spin_lock_init(&vdev->cmd_lock); -+ INIT_LIST_HEAD(&vdev->cmd_list); -+ init_waitqueue_head(&vdev->cmd_wq); -+ atomic_set(&vdev->refcnt, 1); -+ -+ vdev->kbuf = NULL; -+ vdev->kbuf_size = 0; -+ -+ return vdev; -+} -+ -+static void vhba_device_put (struct vhba_device *vdev) -+{ -+ if (atomic_dec_and_test(&vdev->refcnt)) { -+ kfree(vdev); -+ } -+} -+ -+static struct vhba_device *vhba_device_get (struct vhba_device *vdev) -+{ -+ atomic_inc(&vdev->refcnt); -+ -+ return vdev; -+} -+ -+static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd) -+{ -+ struct vhba_host *vhost; -+ struct vhba_command *vcmd; -+ unsigned long flags; -+ -+ vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ vcmd = vhba_alloc_command(); -+ if (!vcmd) { -+ return SCSI_MLQUEUE_HOST_BUSY; -+ } -+ -+ vcmd->cmd = cmd; -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) -+ vcmd->metatag = scsi_cmd_to_rq(vcmd->cmd)->tag; -+#else -+ vcmd->metatag = vcmd->cmd->request->tag; -+#endif -+ list_add_tail(&vcmd->entry, &vdev->cmd_list); -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ wake_up_interruptible(&vdev->cmd_wq); -+ -+ return 0; -+} -+ -+static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd) -+{ -+ struct vhba_command *vcmd; -+ int retval; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { -+ if (vcmd->cmd == cmd) { -+ list_del_init(&vcmd->entry); -+ break; -+ } -+ } -+ -+ /* command not found */ -+ if (&vcmd->entry == &vdev->cmd_list) { -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ return SUCCESS; -+ } -+ -+ while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) { -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ scmd_dbg(cmd, "wait for I/O before aborting\n"); -+ schedule_timeout(1); -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ } -+ -+ retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS; -+ -+ vhba_free_command(vcmd); -+ -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ return retval; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) -+static int vhba_slave_alloc(struct scsi_device *sdev) -+{ -+ struct Scsi_Host *shost = sdev->host; -+ -+ sdev_dbg(sdev, "enabling tagging (queue depth: %i).\n", sdev->queue_depth); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) -+ if (!shost_use_blk_mq(shost) && shost->bqt) { -+#else -+ if (shost->bqt) { -+#endif -+ blk_queue_init_tags(sdev->request_queue, sdev->queue_depth, shost->bqt); -+ } -+ scsi_adjust_queue_depth(sdev, 0, sdev->queue_depth); -+ -+ return 0; -+} -+#endif -+ -+static void vhba_scan_devices_add (struct vhba_host *vhost, int bus, int id) -+{ -+ struct scsi_device *sdev; -+ -+ sdev = scsi_device_lookup(vhost->shost, bus, id, 0); -+ if (!sdev) { -+ scsi_add_device(vhost->shost, bus, id, 0); -+ } else { -+ dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device %d:%d:0!\n", bus, id); -+ scsi_device_put(sdev); -+ } -+} -+ -+static void vhba_scan_devices_remove (struct vhba_host *vhost, int bus, int id) -+{ -+ struct scsi_device *sdev; -+ -+ sdev = scsi_device_lookup(vhost->shost, bus, id, 0); -+ if (sdev) { -+ scsi_remove_device(sdev); -+ scsi_device_put(sdev); -+ } else { -+ dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device %d:%d:0!\n", bus, id); -+ } -+} -+ -+static void vhba_scan_devices (struct work_struct *work) -+{ -+ struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices); -+ unsigned long flags; -+ int change, exists; -+ unsigned int devnum; -+ unsigned int bus, id; -+ -+ for (;;) { -+ spin_lock_irqsave(&vhost->dev_lock, flags); -+ -+ devnum = find_first_bit(vhost->chgmap, VHBA_MAX_DEVICES); -+ if (devnum >= VHBA_MAX_DEVICES) { -+ spin_unlock_irqrestore(&vhost->dev_lock, flags); -+ break; -+ } -+ change = vhost->chgtype[devnum]; -+ exists = vhost->devices[devnum] != NULL; -+ -+ vhost->chgtype[devnum] = 0; -+ clear_bit(devnum, vhost->chgmap); -+ -+ spin_unlock_irqrestore(&vhost->dev_lock, flags); -+ -+ devnum_to_bus_and_id(devnum, &bus, &id); -+ -+ if (change < 0) { -+ dev_dbg(&vhost->shost->shost_gendev, "trying to remove target %d:%d:0\n", bus, id); -+ vhba_scan_devices_remove(vhost, bus, id); -+ } else if (change > 0) { -+ dev_dbg(&vhost->shost->shost_gendev, "trying to add target %d:%d:0\n", bus, id); -+ vhba_scan_devices_add(vhost, bus, id); -+ } else { -+ /* quick sequence of add/remove or remove/add; we determine -+ which one it was by checking if device structure exists */ -+ if (exists) { -+ /* remove followed by add: remove and (re)add */ -+ dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target %d:%d:0\n", bus, id); -+ vhba_scan_devices_remove(vhost, bus, id); -+ vhba_scan_devices_add(vhost, bus, id); -+ } else { -+ /* add followed by remove: no-op */ -+ dev_dbg(&vhost->shost->shost_gendev, "no-op for target %d:%d:0\n", bus, id); -+ } -+ } -+ } -+} -+ -+static int vhba_add_device (struct vhba_device *vdev) -+{ -+ struct vhba_host *vhost; -+ unsigned int devnum; -+ unsigned long flags; -+ -+ vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ vhba_device_get(vdev); -+ -+ spin_lock_irqsave(&vhost->dev_lock, flags); -+ if (vhost->num_devices >= VHBA_MAX_DEVICES) { -+ spin_unlock_irqrestore(&vhost->dev_lock, flags); -+ vhba_device_put(vdev); -+ return -EBUSY; -+ } -+ -+ for (devnum = 0; devnum < VHBA_MAX_DEVICES; devnum++) { -+ if (vhost->devices[devnum] == NULL) { -+ vdev->num = devnum; -+ vhost->devices[devnum] = vdev; -+ vhost->num_devices++; -+ set_bit(devnum, vhost->chgmap); -+ vhost->chgtype[devnum]++; -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&vhost->dev_lock, flags); -+ -+ schedule_work(&vhost->scan_devices); -+ -+ return 0; -+} -+ -+static int vhba_remove_device (struct vhba_device *vdev) -+{ -+ struct vhba_host *vhost; -+ unsigned long flags; -+ -+ vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ spin_lock_irqsave(&vhost->dev_lock, flags); -+ set_bit(vdev->num, vhost->chgmap); -+ vhost->chgtype[vdev->num]--; -+ vhost->devices[vdev->num] = NULL; -+ vhost->num_devices--; -+ spin_unlock_irqrestore(&vhost->dev_lock, flags); -+ -+ vhba_device_put(vdev); -+ -+ schedule_work(&vhost->scan_devices); -+ -+ return 0; -+} -+ -+static struct vhba_device *vhba_lookup_device (int devnum) -+{ -+ struct vhba_host *vhost; -+ struct vhba_device *vdev = NULL; -+ unsigned long flags; -+ -+ vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ if (likely(devnum < VHBA_MAX_DEVICES)) { -+ spin_lock_irqsave(&vhost->dev_lock, flags); -+ vdev = vhost->devices[devnum]; -+ if (vdev) { -+ vdev = vhba_device_get(vdev); -+ } -+ -+ spin_unlock_irqrestore(&vhost->dev_lock, flags); -+ } -+ -+ return vdev; -+} -+ -+static struct vhba_command *vhba_alloc_command (void) -+{ -+ struct vhba_host *vhost; -+ struct vhba_command *vcmd; -+ unsigned long flags; -+ int i; -+ -+ vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ spin_lock_irqsave(&vhost->cmd_lock, flags); -+ -+ vcmd = vhost->commands + vhost->cmd_next++; -+ if (vcmd->status != VHBA_REQ_FREE) { -+ for (i = 0; i < vhba_can_queue; i++) { -+ vcmd = vhost->commands + i; -+ -+ if (vcmd->status == VHBA_REQ_FREE) { -+ vhost->cmd_next = i + 1; -+ break; -+ } -+ } -+ -+ if (i == vhba_can_queue) { -+ vcmd = NULL; -+ } -+ } -+ -+ if (vcmd) { -+ vcmd->status = VHBA_REQ_PENDING; -+ } -+ -+ vhost->cmd_next %= vhba_can_queue; -+ -+ spin_unlock_irqrestore(&vhost->cmd_lock, flags); -+ -+ return vcmd; -+} -+ -+static void vhba_free_command (struct vhba_command *vcmd) -+{ -+ struct vhba_host *vhost; -+ unsigned long flags; -+ -+ vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ spin_lock_irqsave(&vhost->cmd_lock, flags); -+ vcmd->status = VHBA_REQ_FREE; -+ spin_unlock_irqrestore(&vhost->cmd_lock, flags); -+} -+ -+static int vhba_queuecommand (struct Scsi_Host *shost, struct scsi_cmnd *cmd) -+{ -+ struct vhba_device *vdev; -+ int retval; -+ unsigned int devnum; -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) -+ scmd_dbg(cmd, "queue %p tag %i\n", cmd, scsi_cmd_to_rq(cmd)->tag); -+#else -+ scmd_dbg(cmd, "queue %p tag %i\n", cmd, cmd->request->tag); -+#endif -+ -+ devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); -+ vdev = vhba_lookup_device(devnum); -+ if (!vdev) { -+ scmd_dbg(cmd, "no such device\n"); -+ -+ cmd->result = DID_NO_CONNECT << 16; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) -+ scsi_done(cmd); -+#else -+ cmd->scsi_done(cmd); -+#endif -+ -+ return 0; -+ } -+ -+ retval = vhba_device_queue(vdev, cmd); -+ -+ vhba_device_put(vdev); -+ -+ return retval; -+} -+ -+static int vhba_abort (struct scsi_cmnd *cmd) -+{ -+ struct vhba_device *vdev; -+ int retval = SUCCESS; -+ unsigned int devnum; -+ -+ scmd_dbg(cmd, "abort %p\n", cmd); -+ -+ devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); -+ vdev = vhba_lookup_device(devnum); -+ if (vdev) { -+ retval = vhba_device_dequeue(vdev, cmd); -+ vhba_device_put(vdev); -+ } else { -+ cmd->result = DID_NO_CONNECT << 16; -+ } -+ -+ return retval; -+} -+ -+static struct scsi_host_template vhba_template = { -+ .module = THIS_MODULE, -+ .name = "vhba", -+ .proc_name = "vhba", -+ .queuecommand = vhba_queuecommand, -+ .eh_abort_handler = vhba_abort, -+ .this_id = -1, -+ .max_sectors = VHBA_MAX_SECTORS_PER_IO, -+ .sg_tablesize = 256, -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) -+ .slave_alloc = vhba_slave_alloc, -+#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(6, 14, 0) -+ .tag_alloc_policy = BLK_TAG_ALLOC_RR, -+#else -+ .tag_alloc_policy_rr = true, -+#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -+ .use_blk_tags = 1, -+#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) -+ .max_segment_size = VHBA_KBUF_SIZE, -+#endif -+}; -+ -+static ssize_t do_request (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, char __user *buf, size_t buf_len) -+{ -+ struct vhba_request vreq; -+ ssize_t ret; -+ -+ scmd_dbg(cmd, "request %lu (%p), cdb 0x%x, bufflen %d, sg count %d\n", -+ metatag, cmd, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd)); -+ -+ ret = sizeof(vreq); -+ if (DATA_TO_DEVICE(cmd->sc_data_direction)) { -+ ret += scsi_bufflen(cmd); -+ } -+ -+ if (ret > buf_len) { -+ scmd_dbg(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret); -+ return -EIO; -+ } -+ -+ vreq.metatag = metatag; -+ vreq.lun = cmd->device->lun; -+ memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE); -+ vreq.cdb_len = cmd->cmd_len; -+ vreq.data_len = scsi_bufflen(cmd); -+ -+ if (copy_to_user(buf, &vreq, sizeof(vreq))) { -+ return -EFAULT; -+ } -+ -+ if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) { -+ buf += sizeof(vreq); -+ -+ if (scsi_sg_count(cmd)) { -+ unsigned char *kaddr, *uaddr; -+ struct scatterlist *sglist = scsi_sglist(cmd); -+ struct scatterlist *sg; -+ int i; -+ -+ uaddr = (unsigned char *) buf; -+ -+ for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { -+ size_t len = sg->length; -+ -+ if (len > vdev->kbuf_size) { -+ scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); -+ len = vdev->kbuf_size; -+ } -+ -+ kaddr = kmap_atomic(sg_page(sg)); -+ memcpy(vdev->kbuf, kaddr + sg->offset, len); -+ kunmap_atomic(kaddr); -+ -+ if (copy_to_user(uaddr, vdev->kbuf, len)) { -+ return -EFAULT; -+ } -+ uaddr += len; -+ } -+ } else { -+ if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) { -+ return -EFAULT; -+ } -+ } -+ } -+ -+ return ret; -+} -+ -+static ssize_t do_response (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res) -+{ -+ ssize_t ret = 0; -+ -+ scmd_dbg(cmd, "response %lu (%p), status %x, data len %d, sg count %d\n", -+ metatag, cmd, res->status, res->data_len, scsi_sg_count(cmd)); -+ -+ if (res->status) { -+ if (res->data_len > SCSI_SENSE_BUFFERSIZE) { -+ scmd_dbg(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len); -+ res->data_len = SCSI_SENSE_BUFFERSIZE; -+ } -+ -+ if (copy_from_user(cmd->sense_buffer, buf, res->data_len)) { -+ return -EFAULT; -+ } -+ -+ cmd->result = res->status; -+ -+ ret += res->data_len; -+ } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) { -+ size_t to_read; -+ -+ if (res->data_len > scsi_bufflen(cmd)) { -+ scmd_dbg(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len); -+ res->data_len = scsi_bufflen(cmd); -+ } -+ -+ to_read = res->data_len; -+ -+ if (scsi_sg_count(cmd)) { -+ unsigned char *kaddr, *uaddr; -+ struct scatterlist *sglist = scsi_sglist(cmd); -+ struct scatterlist *sg; -+ int i; -+ -+ uaddr = (unsigned char *)buf; -+ -+ for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { -+ size_t len = (sg->length < to_read) ? sg->length : to_read; -+ -+ if (len > vdev->kbuf_size) { -+ scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); -+ len = vdev->kbuf_size; -+ } -+ -+ if (copy_from_user(vdev->kbuf, uaddr, len)) { -+ return -EFAULT; -+ } -+ uaddr += len; -+ -+ kaddr = kmap_atomic(sg_page(sg)); -+ memcpy(kaddr + sg->offset, vdev->kbuf, len); -+ kunmap_atomic(kaddr); -+ -+ to_read -= len; -+ if (to_read == 0) { -+ break; -+ } -+ } -+ } else { -+ if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) { -+ return -EFAULT; -+ } -+ -+ to_read -= res->data_len; -+ } -+ -+ scsi_set_resid(cmd, to_read); -+ -+ ret += res->data_len - to_read; -+ } -+ -+ return ret; -+} -+ -+static struct vhba_command *next_command (struct vhba_device *vdev) -+{ -+ struct vhba_command *vcmd; -+ -+ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { -+ if (vcmd->status == VHBA_REQ_PENDING) { -+ break; -+ } -+ } -+ -+ if (&vcmd->entry == &vdev->cmd_list) { -+ vcmd = NULL; -+ } -+ -+ return vcmd; -+} -+ -+static struct vhba_command *match_command (struct vhba_device *vdev, __u32 metatag) -+{ -+ struct vhba_command *vcmd; -+ -+ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { -+ if (vcmd->metatag == metatag) { -+ break; -+ } -+ } -+ -+ if (&vcmd->entry == &vdev->cmd_list) { -+ vcmd = NULL; -+ } -+ -+ return vcmd; -+} -+ -+static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags) -+{ -+ struct vhba_command *vcmd; -+ DEFINE_WAIT(wait); -+ -+ while (!(vcmd = next_command(vdev))) { -+ if (signal_pending(current)) { -+ break; -+ } -+ -+ prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE); -+ -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ schedule(); -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ } -+ -+ finish_wait(&vdev->cmd_wq, &wait); -+ if (vcmd) { -+ vcmd->status = VHBA_REQ_READING; -+ } -+ -+ return vcmd; -+} -+ -+static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset) -+{ -+ struct vhba_device *vdev; -+ struct vhba_command *vcmd; -+ ssize_t ret; -+ unsigned long flags; -+ -+ vdev = file->private_data; -+ -+ /* Get next command */ -+ if (file->f_flags & O_NONBLOCK) { -+ /* Non-blocking variant */ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ vcmd = next_command(vdev); -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ if (!vcmd) { -+ return -EWOULDBLOCK; -+ } -+ } else { -+ /* Blocking variant */ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ vcmd = wait_command(vdev, flags); -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ if (!vcmd) { -+ return -ERESTARTSYS; -+ } -+ } -+ -+ ret = do_request(vdev, vcmd->metatag, vcmd->cmd, buf, buf_len); -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ if (ret >= 0) { -+ vcmd->status = VHBA_REQ_SENT; -+ *offset += ret; -+ } else { -+ vcmd->status = VHBA_REQ_PENDING; -+ } -+ -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ return ret; -+} -+ -+static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset) -+{ -+ struct vhba_device *vdev; -+ struct vhba_command *vcmd; -+ struct vhba_response res; -+ ssize_t ret; -+ unsigned long flags; -+ -+ if (buf_len < sizeof(res)) { -+ return -EIO; -+ } -+ -+ if (copy_from_user(&res, buf, sizeof(res))) { -+ return -EFAULT; -+ } -+ -+ vdev = file->private_data; -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ vcmd = match_command(vdev, res.metatag); -+ if (!vcmd || vcmd->status != VHBA_REQ_SENT) { -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ pr_debug("ctl dev #%u not expecting response\n", vdev->num); -+ return -EIO; -+ } -+ vcmd->status = VHBA_REQ_WRITING; -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ ret = do_response(vdev, vcmd->metatag, vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res); -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ if (ret >= 0) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) -+ scsi_done(vcmd->cmd); -+#else -+ vcmd->cmd->scsi_done(vcmd->cmd); -+#endif -+ ret += sizeof(res); -+ -+ /* don't compete with vhba_device_dequeue */ -+ if (!list_empty(&vcmd->entry)) { -+ list_del_init(&vcmd->entry); -+ vhba_free_command(vcmd); -+ } -+ } else { -+ vcmd->status = VHBA_REQ_SENT; -+ } -+ -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ return ret; -+} -+ -+static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ struct vhba_device *vdev = file->private_data; -+ struct vhba_host *vhost = platform_get_drvdata(&vhba_platform_device); -+ -+ switch (cmd) { -+ case 0xBEEF001: { -+ unsigned int ident[4]; /* host, channel, id, lun */ -+ -+ ident[0] = vhost->shost->host_no; -+ devnum_to_bus_and_id(vdev->num, &ident[1], &ident[2]); -+ ident[3] = 0; /* lun */ -+ -+ if (copy_to_user((void *) arg, ident, sizeof(ident))) { -+ return -EFAULT; -+ } -+ -+ return 0; -+ } -+ case 0xBEEF002: { -+ unsigned int devnum = vdev->num; -+ -+ if (copy_to_user((void *) arg, &devnum, sizeof(devnum))) { -+ return -EFAULT; -+ } -+ -+ return 0; -+ } -+ } -+ -+ return -ENOTTY; -+} -+ -+#ifdef CONFIG_COMPAT -+static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ unsigned long compat_arg = (unsigned long)compat_ptr(arg); -+ return vhba_ctl_ioctl(file, cmd, compat_arg); -+} -+#endif -+ -+static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait) -+{ -+ struct vhba_device *vdev = file->private_data; -+ unsigned int mask = 0; -+ unsigned long flags; -+ -+ poll_wait(file, &vdev->cmd_wq, wait); -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ if (next_command(vdev)) { -+ mask |= POLLIN | POLLRDNORM; -+ } -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ return mask; -+} -+ -+static int vhba_ctl_open (struct inode *inode, struct file *file) -+{ -+ struct vhba_device *vdev; -+ int retval; -+ -+ pr_debug("ctl dev open\n"); -+ -+ /* check if vhba is probed */ -+ if (!platform_get_drvdata(&vhba_platform_device)) { -+ return -ENODEV; -+ } -+ -+ vdev = vhba_device_alloc(); -+ if (!vdev) { -+ return -ENOMEM; -+ } -+ -+ vdev->kbuf_size = VHBA_KBUF_SIZE; -+ vdev->kbuf = kzalloc(vdev->kbuf_size, GFP_KERNEL); -+ if (!vdev->kbuf) { -+ return -ENOMEM; -+ } -+ -+ if (!(retval = vhba_add_device(vdev))) { -+ file->private_data = vdev; -+ } -+ -+ vhba_device_put(vdev); -+ -+ return retval; -+} -+ -+static int vhba_ctl_release (struct inode *inode, struct file *file) -+{ -+ struct vhba_device *vdev; -+ struct vhba_command *vcmd; -+ unsigned long flags; -+ -+ vdev = file->private_data; -+ -+ pr_debug("ctl dev release\n"); -+ -+ vhba_device_get(vdev); -+ vhba_remove_device(vdev); -+ -+ spin_lock_irqsave(&vdev->cmd_lock, flags); -+ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { -+ WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING); -+ -+ scmd_dbg(vcmd->cmd, "device released with command %lu (%p)\n", vcmd->metatag, vcmd->cmd); -+ vcmd->cmd->result = DID_NO_CONNECT << 16; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) -+ scsi_done(vcmd->cmd); -+#else -+ vcmd->cmd->scsi_done(vcmd->cmd); -+#endif -+ vhba_free_command(vcmd); -+ } -+ INIT_LIST_HEAD(&vdev->cmd_list); -+ spin_unlock_irqrestore(&vdev->cmd_lock, flags); -+ -+ kfree(vdev->kbuf); -+ vdev->kbuf = NULL; -+ -+ vhba_device_put(vdev); -+ -+ return 0; -+} -+ -+static struct file_operations vhba_ctl_fops = { -+ .owner = THIS_MODULE, -+ .open = vhba_ctl_open, -+ .release = vhba_ctl_release, -+ .read = vhba_ctl_read, -+ .write = vhba_ctl_write, -+ .poll = vhba_ctl_poll, -+ .unlocked_ioctl = vhba_ctl_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = vhba_ctl_compat_ioctl, -+#endif -+}; -+ -+static struct miscdevice vhba_miscdev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "vhba_ctl", -+ .fops = &vhba_ctl_fops, -+}; -+ -+static int vhba_probe (struct platform_device *pdev) -+{ -+ struct Scsi_Host *shost; -+ struct vhba_host *vhost; -+ int i; -+ -+ vhba_can_queue = clamp(vhba_can_queue, 1, 256); -+ -+ shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host)); -+ if (!shost) { -+ return -ENOMEM; -+ } -+ -+ shost->max_channel = VHBA_MAX_BUS-1; -+ shost->max_id = VHBA_MAX_ID; -+ /* we don't support lun > 0 */ -+ shost->max_lun = 1; -+ shost->max_cmd_len = MAX_COMMAND_SIZE; -+ shost->can_queue = vhba_can_queue; -+ shost->cmd_per_lun = vhba_can_queue; -+ -+ vhost = (struct vhba_host *)shost->hostdata; -+ memset(vhost, 0, sizeof(struct vhba_host)); -+ -+ vhost->shost = shost; -+ vhost->num_devices = 0; -+ spin_lock_init(&vhost->dev_lock); -+ spin_lock_init(&vhost->cmd_lock); -+ INIT_WORK(&vhost->scan_devices, vhba_scan_devices); -+ vhost->cmd_next = 0; -+ vhost->commands = kzalloc(vhba_can_queue * sizeof(struct vhba_command), GFP_KERNEL); -+ if (!vhost->commands) { -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < vhba_can_queue; i++) { -+ vhost->commands[i].status = VHBA_REQ_FREE; -+ } -+ -+ platform_set_drvdata(pdev, vhost); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -+ i = scsi_init_shared_tag_map(shost, vhba_can_queue); -+ if (i) return i; -+#endif -+ -+ if (scsi_add_host(shost, &pdev->dev)) { -+ scsi_host_put(shost); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) -+static int vhba_remove (struct platform_device *pdev) -+#else -+static void vhba_remove (struct platform_device *pdev) -+#endif -+{ -+ struct vhba_host *vhost; -+ struct Scsi_Host *shost; -+ -+ vhost = platform_get_drvdata(pdev); -+ shost = vhost->shost; -+ -+ scsi_remove_host(shost); -+ scsi_host_put(shost); -+ -+ kfree(vhost->commands); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) -+ return 0; -+#endif -+} -+ -+static void vhba_release (struct device * dev) -+{ -+ return; -+} -+ -+static struct platform_device vhba_platform_device = { -+ .name = "vhba", -+ .id = -1, -+ .dev = { -+ .release = vhba_release, -+ }, -+}; -+ -+static struct platform_driver vhba_platform_driver = { -+ .driver = { -+ .owner = THIS_MODULE, -+ .name = "vhba", -+ }, -+ .probe = vhba_probe, -+ .remove = vhba_remove, -+}; -+ -+static int __init vhba_init (void) -+{ -+ int ret; -+ -+ ret = platform_device_register(&vhba_platform_device); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ ret = platform_driver_register(&vhba_platform_driver); -+ if (ret < 0) { -+ platform_device_unregister(&vhba_platform_device); -+ return ret; -+ } -+ -+ ret = misc_register(&vhba_miscdev); -+ if (ret < 0) { -+ platform_driver_unregister(&vhba_platform_driver); -+ platform_device_unregister(&vhba_platform_device); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void __exit vhba_exit(void) -+{ -+ misc_deregister(&vhba_miscdev); -+ platform_driver_unregister(&vhba_platform_driver); -+ platform_device_unregister(&vhba_platform_device); -+} -+ -+module_init(vhba_init); -+module_exit(vhba_exit); -+ -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 1ae97a0b8ec7..db640e1b17ec 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -194,6 +194,14 @@ static inline void __mm_zero_struct_page(struct page *page) - - extern int sysctl_max_map_count; - -+extern bool sysctl_workingset_protection; -+extern u8 sysctl_anon_min_ratio; -+extern u8 sysctl_clean_low_ratio; -+extern u8 sysctl_clean_min_ratio; -+int vm_workingset_protection_update_handler( -+ const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos); -+ - extern unsigned long sysctl_user_reserve_kbytes; - extern unsigned long sysctl_admin_reserve_kbytes; - -diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h -index 12a12dae727d..b460a691b357 100644 ---- a/include/linux/pagemap.h -+++ b/include/linux/pagemap.h -@@ -1337,7 +1337,7 @@ struct readahead_control { - ._index = i, \ - } - --#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) -+#define VM_READAHEAD_PAGES (SZ_8M / PAGE_SIZE) - - void page_cache_ra_unbounded(struct readahead_control *, - unsigned long nr_to_read, unsigned long lookahead_count); -diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h -index a0bb6d012137..93129fea552e 100644 ---- a/include/linux/user_namespace.h -+++ b/include/linux/user_namespace.h -@@ -168,6 +168,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, - - #ifdef CONFIG_USER_NS - -+extern int unprivileged_userns_clone; -+ - static inline struct user_namespace *get_user_ns(struct user_namespace *ns) - { - if (ns) -@@ -201,6 +203,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); - struct ns_common *ns_get_owner(struct ns_common *ns); - #else - -+#define unprivileged_userns_clone 0 -+ - static inline struct user_namespace *get_user_ns(struct user_namespace *ns) - { - return &init_user_ns; -diff --git a/init/Kconfig b/init/Kconfig -index d811cad02a75..e4b7a7062838 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -171,6 +171,10 @@ config THREAD_INFO_IN_TASK - - menu "General setup" - -+config CACHY -+ bool "Some kernel tweaks by CachyOS" -+ default y -+ - config BROKEN - bool - help -@@ -1375,6 +1379,22 @@ config USER_NS - - If unsure, say N. - -+config USER_NS_UNPRIVILEGED -+ bool "Allow unprivileged users to create namespaces" -+ default y -+ depends on USER_NS -+ help -+ When disabled, unprivileged users will not be able to create -+ new namespaces. Allowing users to create their own namespaces -+ has been part of several recent local privilege escalation -+ exploits, so if you need user namespaces but are -+ paranoid^Wsecurity-conscious you want to disable this. -+ -+ This setting can be overridden at runtime via the -+ kernel.unprivileged_userns_clone sysctl. -+ -+ If unsure, say Y. -+ - config PID_NS - bool "PID Namespaces" - default y -@@ -1524,6 +1544,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE - with the "-O2" compiler flag for best performance and most - helpful compile-time warnings. - -+config CC_OPTIMIZE_FOR_PERFORMANCE_O3 -+ bool "Optimize more for performance (-O3)" -+ help -+ Choosing this option will pass "-O3" to your compiler to optimize -+ the kernel yet more for performance. -+ - config CC_OPTIMIZE_FOR_SIZE - bool "Optimize for size (-Os)" - help -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index ce1435cb08b1..e1359db5561e 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -40,6 +40,27 @@ choice - on SMP and NUMA systems and exactly dividing by both PAL and - NTSC frame rates for video and multimedia work. - -+ config HZ_500 -+ bool "500 HZ" -+ help -+ 500 Hz is a balanced timer frequency. Provides fast interactivity -+ on desktops with good smoothness without increasing CPU power -+ consumption and sacrificing the battery life on laptops. -+ -+ config HZ_600 -+ bool "600 HZ" -+ help -+ 600 Hz is a balanced timer frequency. Provides fast interactivity -+ on desktops with good smoothness without increasing CPU power -+ consumption and sacrificing the battery life on laptops. -+ -+ config HZ_750 -+ bool "750 HZ" -+ help -+ 750 Hz is a balanced timer frequency. Provides fast interactivity -+ on desktops with good smoothness without increasing CPU power -+ consumption and sacrificing the battery life on laptops. -+ - config HZ_1000 - bool "1000 HZ" - help -@@ -53,6 +74,9 @@ config HZ - default 100 if HZ_100 - default 250 if HZ_250 - default 300 if HZ_300 -+ default 500 if HZ_500 -+ default 600 if HZ_600 -+ default 750 if HZ_750 - default 1000 if HZ_1000 - - config SCHED_HRTICK -diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt -index 54ea59ff8fbe..18f87e0dd137 100644 ---- a/kernel/Kconfig.preempt -+++ b/kernel/Kconfig.preempt -@@ -88,7 +88,7 @@ endchoice - - config PREEMPT_RT - bool "Fully Preemptible Kernel (Real-Time)" -- depends on EXPERT && ARCH_SUPPORTS_RT && !COMPILE_TEST -+ depends on ARCH_SUPPORTS_RT && !COMPILE_TEST - select PREEMPTION - help - This option turns the kernel into a real-time kernel by replacing -diff --git a/kernel/fork.c b/kernel/fork.c -index af673856499d..d91fa2d9bce1 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -107,6 +107,10 @@ - #include - #include - -+#ifdef CONFIG_USER_NS -+#include -+#endif -+ - #include - #include - #include -@@ -1938,6 +1942,10 @@ __latent_entropy struct task_struct *copy_process( - if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) - return ERR_PTR(-EINVAL); - -+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) -+ if (!capable(CAP_SYS_ADMIN)) -+ return ERR_PTR(-EPERM); -+ - /* - * Thread groups must share signals as well, and detached threads - * can only be started up within the thread group. -@@ -3105,6 +3113,12 @@ int ksys_unshare(unsigned long unshare_flags) - if (unshare_flags & CLONE_NEWNS) - unshare_flags |= CLONE_FS; - -+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { -+ err = -EPERM; -+ if (!capable(CAP_SYS_ADMIN)) -+ goto bad_unshare_out; -+ } -+ - err = check_unshare_flags(unshare_flags); - if (err) - goto bad_unshare_out; -diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c -index 24df4d98f7d2..1d5923996fa5 100644 ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -746,6 +746,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) - struct task_struct *new, *owner; - unsigned long flags, new_flags; - enum owner_state state; -+ int i = 0; - - lockdep_assert_preemption_disabled(); - -@@ -782,7 +783,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) - break; - } - -- cpu_relax(); -+ if (i++ > 1000) -+ cpu_relax(); - } - - return state; -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index b173a059315c..226a96cd2536 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -76,10 +76,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; - * - * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds) - */ -+#ifdef CONFIG_CACHY -+unsigned int sysctl_sched_base_slice = 350000ULL; -+static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; -+#else - unsigned int sysctl_sched_base_slice = 700000ULL; - static unsigned int normalized_sysctl_sched_base_slice = 700000ULL; -+#endif /* CONFIG_CACHY */ - -+#ifdef CONFIG_CACHY -+__read_mostly unsigned int sysctl_sched_migration_cost = 300000UL; -+#else - __read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; -+#endif - - static int __init setup_sched_thermal_decay_shift(char *str) - { -@@ -122,8 +131,12 @@ int __weak arch_asym_cpu_priority(int cpu) - * - * (default: 5 msec, units: microseconds) - */ -+#ifdef CONFIG_CACHY -+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; -+#else - static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; - #endif -+#endif - - #ifdef CONFIG_NUMA_BALANCING - /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index be9745d104f7..4ee277cb92b9 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2769,7 +2769,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); - - extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); - --#ifdef CONFIG_PREEMPT_RT -+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY) - # define SCHED_NR_MIGRATE_BREAK 8 - #else - # define SCHED_NR_MIGRATE_BREAK 32 -diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index cb6196e3fa99..cc5bf841e3fe 100644 ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -23,6 +23,10 @@ - #include - #include - -+#ifdef CONFIG_USER_NS -+#include -+#endif -+ - /* shared constants to be used in various sysctls */ - const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; - EXPORT_SYMBOL(sysctl_vals); -@@ -1455,6 +1459,15 @@ int proc_do_static_key(const struct ctl_table *table, int write, - } - - static const struct ctl_table sysctl_subsys_table[] = { -+#ifdef CONFIG_USER_NS -+ { -+ .procname = "unprivileged_userns_clone", -+ .data = &unprivileged_userns_clone, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+#endif - #ifdef CONFIG_PROC_SYSCTL - { - .procname = "sysctl_writes_strict", -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 682f40d5632d..434a25f7b2ed 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -22,6 +22,13 @@ - #include - #include - -+/* sysctl */ -+#ifdef CONFIG_USER_NS_UNPRIVILEGED -+int unprivileged_userns_clone = 1; -+#else -+int unprivileged_userns_clone; -+#endif -+ - static struct kmem_cache *user_ns_cachep __ro_after_init; - static DEFINE_MUTEX(userns_state_mutex); - -diff --git a/mm/Kconfig b/mm/Kconfig -index e443fe8cd6cf..d3148d9d335d 100644 ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -462,6 +462,69 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP - config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT - bool - -+config ANON_MIN_RATIO -+ int "Default value for vm.anon_min_ratio" -+ depends on SYSCTL -+ range 0 100 -+ default 1 -+ help -+ This option sets the default value for vm.anon_min_ratio sysctl knob. -+ -+ The vm.anon_min_ratio sysctl knob provides *hard* protection of -+ anonymous pages. The anonymous pages on the current node won't be -+ reclaimed under any conditions when their amount is below -+ vm.anon_min_ratio. This knob may be used to prevent excessive swap -+ thrashing when anonymous memory is low (for example, when memory is -+ going to be overfilled by compressed data of zram module). -+ -+ Setting this value too high (close to MemTotal) can result in -+ inability to swap and can lead to early OOM under memory pressure. -+ -+config CLEAN_LOW_RATIO -+ int "Default value for vm.clean_low_ratio" -+ depends on SYSCTL -+ range 0 100 -+ default 15 -+ help -+ This option sets the default value for vm.clean_low_ratio sysctl knob. -+ -+ The vm.clean_low_ratio sysctl knob provides *best-effort* -+ protection of clean file pages. The file pages on the current node -+ won't be reclaimed under memory pressure when the amount of clean file -+ pages is below vm.clean_low_ratio *unless* we threaten to OOM. -+ Protection of clean file pages using this knob may be used when -+ swapping is still possible to -+ - prevent disk I/O thrashing under memory pressure; -+ - improve performance in disk cache-bound tasks under memory -+ pressure. -+ -+ Setting it to a high value may result in a early eviction of anonymous -+ pages into the swap space by attempting to hold the protected amount -+ of clean file pages in memory. -+ -+config CLEAN_MIN_RATIO -+ int "Default value for vm.clean_min_ratio" -+ depends on SYSCTL -+ range 0 100 -+ default 4 -+ help -+ This option sets the default value for vm.clean_min_ratio sysctl knob. -+ -+ The vm.clean_min_ratio sysctl knob provides *hard* protection of -+ clean file pages. The file pages on the current node won't be -+ reclaimed under memory pressure when the amount of clean file pages is -+ below vm.clean_min_ratio. Hard protection of clean file pages using -+ this knob may be used to -+ - prevent disk I/O thrashing under memory pressure even with no free -+ swap space; -+ - improve performance in disk cache-bound tasks under memory -+ pressure; -+ - avoid high latency and prevent livelock in near-OOM conditions. -+ -+ Setting it to a high value may result in a early out-of-memory condition -+ due to the inability to reclaim the protected amount of clean file pages -+ when other types of pages cannot be reclaimed. -+ - config HAVE_MEMBLOCK_PHYS_MAP - bool - -@@ -658,7 +721,7 @@ config COMPACTION - config COMPACT_UNEVICTABLE_DEFAULT - int - depends on COMPACTION -- default 0 if PREEMPT_RT -+ default 0 if PREEMPT_RT || CACHY - default 1 - - # -diff --git a/mm/compaction.c b/mm/compaction.c -index bf021b31c7ec..cd1c1ece9888 100644 ---- a/mm/compaction.c -+++ b/mm/compaction.c -@@ -1887,7 +1887,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE - * aggressively the kernel should compact memory in the - * background. It takes values in the range [0, 100]. - */ -+#ifdef CONFIG_CACHY -+static unsigned int __read_mostly sysctl_compaction_proactiveness; -+#else - static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; -+#endif - static int sysctl_extfrag_threshold = 500; - static int __read_mostly sysctl_compact_memory; - -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 9c38a95e9f09..4bc77b92d649 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -63,7 +63,11 @@ unsigned long transparent_hugepage_flags __read_mostly = - #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE - (1<> (20 - PAGE_SHIFT); - - /* Use a smaller cluster for small-memory machines */ -@@ -1103,6 +1107,7 @@ void __init swap_setup(void) - page_cluster = 2; - else - page_cluster = 3; -+#endif /* CONFIG_CACHY */ - /* - * Right now other parts of the system means that we - * _really_ don't want to cluster much more -diff --git a/mm/util.c b/mm/util.c -index f814e6a59ab1..a84d4f4a6195 100644 ---- a/mm/util.c -+++ b/mm/util.c -@@ -858,6 +858,40 @@ static const struct ctl_table util_sysctl_table[] = { - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - }, -+ { -+ .procname = "workingset_protection", -+ .data = &sysctl_workingset_protection, -+ .maxlen = sizeof(bool), -+ .mode = 0644, -+ .proc_handler = &proc_dobool, -+ }, -+ { -+ .procname = "anon_min_ratio", -+ .data = &sysctl_anon_min_ratio, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = &vm_workingset_protection_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE_HUNDRED, -+ }, -+ { -+ .procname = "clean_low_ratio", -+ .data = &sysctl_clean_low_ratio, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = &vm_workingset_protection_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE_HUNDRED, -+ }, -+ { -+ .procname = "clean_min_ratio", -+ .data = &sysctl_clean_min_ratio, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = &vm_workingset_protection_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE_HUNDRED, -+ }, - }; - - static int __init init_vm_util_sysctls(void) -diff --git a/mm/vmpressure.c b/mm/vmpressure.c -index c197ed47bcc4..1b359dcc88c4 100644 ---- a/mm/vmpressure.c -+++ b/mm/vmpressure.c -@@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; - * essence, they are percents: the higher the value, the more number - * unsuccessful reclaims there were. - */ -+#ifdef CONFIG_CACHY -+static const unsigned int vmpressure_level_med = 65; -+#else - static const unsigned int vmpressure_level_med = 60; -+#endif - static const unsigned int vmpressure_level_critical = 95; - - /* -diff --git a/mm/vmscan.c b/mm/vmscan.c -index a48aec8bfd92..e2c3f8712bbb 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -147,6 +147,15 @@ struct scan_control { - /* The file folios on the current node are dangerously low */ - unsigned int file_is_tiny:1; - -+ /* The anonymous pages on the current node are below vm.anon_min_ratio */ -+ unsigned int anon_below_min:1; -+ -+ /* The clean file pages on the current node are below vm.clean_low_ratio */ -+ unsigned int clean_below_low:1; -+ -+ /* The clean file pages on the current node are below vm.clean_min_ratio */ -+ unsigned int clean_below_min:1; -+ - /* Always discard instead of demoting to lower tier memory */ - unsigned int no_demotion:1; - -@@ -196,10 +205,23 @@ struct scan_control { - #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) - #endif - -+bool sysctl_workingset_protection __read_mostly = true; -+u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; -+u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; -+u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; -+static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; -+static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; -+static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; -+static u64 workingset_protection_prev_totalram __read_mostly = 0; -+ - /* - * From 0 .. MAX_SWAPPINESS. Higher means more swappy. - */ -+#ifdef CONFIG_CACHY -+int vm_swappiness = 100; -+#else - int vm_swappiness = 60; -+#endif - - #ifdef CONFIG_MEMCG - -@@ -1157,6 +1179,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, - if (!sc->may_unmap && folio_mapped(folio)) - goto keep_locked; - -+ if (folio_is_file_lru(folio) ? sc->clean_below_min : -+ (sc->anon_below_min && !sc->clean_below_min)) -+ goto keep_locked; -+ - /* - * The number of dirty pages determines if a node is marked - * reclaim_congested. kswapd will stall and start writing -@@ -2606,6 +2632,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, - goto out; - } - -+ /* -+ * Force-scan anon if clean file pages is under vm.clean_low_ratio -+ * or vm.clean_min_ratio. -+ */ -+ if (sc->clean_below_low || sc->clean_below_min) { -+ scan_balance = SCAN_ANON; -+ goto out; -+ } -+ - /* - * If there is enough inactive page cache, we do not reclaim - * anything from the anonymous working right now to make sure -@@ -2664,6 +2699,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, - BUG(); - } - -+ /* -+ * Hard protection of the working set. -+ * Don't reclaim anon/file pages when the amount is -+ * below the watermark of the same type. -+ */ -+ if (file ? sc->clean_below_min : sc->anon_below_min) -+ scan = 0; -+ - nr[lru] = scan; - } - } -@@ -2684,6 +2727,96 @@ static bool can_age_anon_pages(struct lruvec *lruvec, - lruvec_memcg(lruvec)); - } - -+int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos) -+{ -+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); -+ if (ret || !write) -+ return ret; -+ -+ workingset_protection_prev_totalram = 0; -+ -+ return 0; -+} -+ -+static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) -+{ -+ unsigned long node_mem_total; -+ struct sysinfo i; -+ -+ if (!(sysctl_workingset_protection)) { -+ sc->anon_below_min = 0; -+ sc->clean_below_low = 0; -+ sc->clean_below_min = 0; -+ return; -+ } -+ -+ if (likely(sysctl_anon_min_ratio || -+ sysctl_clean_low_ratio || -+ sysctl_clean_min_ratio)) { -+#ifdef CONFIG_NUMA -+ si_meminfo_node(&i, pgdat->node_id); -+#else //CONFIG_NUMA -+ si_meminfo(&i); -+#endif //CONFIG_NUMA -+ node_mem_total = i.totalram; -+ -+ if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { -+ sysctl_anon_min_ratio_kb = -+ node_mem_total * sysctl_anon_min_ratio / 100; -+ sysctl_clean_low_ratio_kb = -+ node_mem_total * sysctl_clean_low_ratio / 100; -+ sysctl_clean_min_ratio_kb = -+ node_mem_total * sysctl_clean_min_ratio / 100; -+ workingset_protection_prev_totalram = node_mem_total; -+ } -+ } -+ -+ /* -+ * Check the number of anonymous pages to protect them from -+ * reclaiming if their amount is below the specified. -+ */ -+ if (sysctl_anon_min_ratio) { -+ unsigned long reclaimable_anon; -+ -+ reclaimable_anon = -+ node_page_state(pgdat, NR_ACTIVE_ANON) + -+ node_page_state(pgdat, NR_INACTIVE_ANON) + -+ node_page_state(pgdat, NR_ISOLATED_ANON); -+ -+ sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; -+ } else -+ sc->anon_below_min = 0; -+ -+ /* -+ * Check the number of clean file pages to protect them from -+ * reclaiming if their amount is below the specified. -+ */ -+ if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { -+ unsigned long reclaimable_file, dirty, clean; -+ -+ reclaimable_file = -+ node_page_state(pgdat, NR_ACTIVE_FILE) + -+ node_page_state(pgdat, NR_INACTIVE_FILE) + -+ node_page_state(pgdat, NR_ISOLATED_FILE); -+ dirty = node_page_state(pgdat, NR_FILE_DIRTY); -+ /* -+ * node_page_state() sum can go out of sync since -+ * all the values are not read at once. -+ */ -+ if (likely(reclaimable_file > dirty)) -+ clean = reclaimable_file - dirty; -+ else -+ clean = 0; -+ -+ sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; -+ sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; -+ } else { -+ sc->clean_below_low = 0; -+ sc->clean_below_min = 0; -+ } -+} -+ - #ifdef CONFIG_LRU_GEN - - #ifdef CONFIG_LRU_GEN_ENABLED -@@ -4667,11 +4800,21 @@ static int get_tier_idx(struct lruvec *lruvec, int type) - return tier - 1; - } - --static int get_type_to_scan(struct lruvec *lruvec, int swappiness) -+static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) - { - struct ctrl_pos sp, pv; - -- if (swappiness <= MIN_SWAPPINESS + 1) -+ if (swappiness == MIN_SWAPPINESS) -+ return LRU_GEN_FILE; -+ -+ if (sc->clean_below_min) -+ return LRU_GEN_ANON; -+ if (sc->anon_below_min) -+ return LRU_GEN_FILE; -+ if (sc->clean_below_low) -+ return LRU_GEN_ANON; -+ -+ if (swappiness == MIN_SWAPPINESS + 1) - return LRU_GEN_FILE; - - if (swappiness >= MAX_SWAPPINESS) -@@ -4691,7 +4834,7 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec, - int *type_scanned, struct list_head *list) - { - int i; -- int type = get_type_to_scan(lruvec, swappiness); -+ int type = get_type_to_scan(lruvec, sc, swappiness); - - for_each_evictable_type(i, swappiness) { - int scanned; -@@ -4937,6 +5080,12 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - -+ prepare_workingset_protection(pgdat, sc); -+ -+ if (sysctl_workingset_protection && sc->clean_below_min && -+ !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) -+ return 0; -+ - /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ - if (mem_cgroup_below_min(NULL, memcg)) - return MEMCG_LRU_YOUNG; -@@ -6089,6 +6238,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) - - prepare_scan_control(pgdat, sc); - -+ prepare_workingset_protection(pgdat, sc); -+ - shrink_node_memcgs(pgdat, sc); - - flush_reclaim_state(sc); -diff --git a/scripts/Makefile.thinlto b/scripts/Makefile.thinlto -new file mode 100644 -index 000000000000..ec98fa2ead3b ---- /dev/null -+++ b/scripts/Makefile.thinlto -@@ -0,0 +1,38 @@ -+PHONY := __default -+__default: -+ -+include include/config/auto.conf -+include $(srctree)/scripts/Kbuild.include -+include $(srctree)/scripts/Makefile.lib -+ -+native-objs := $(patsubst %.o,%.thinlto-native.o,$(call read-file, vmlinux.thinlto-index)) -+ -+__default: $(native-objs) -+ -+# Generate .thinlto-native.o (obj) from .o (bitcode) and .thinlto.bc (summary) files -+# --------------------------------------------------------------------------- -+quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ -+ cmd_cc_o_bc = \ -+ $(CC) $(_c_flags) -fno-lto -Wno-unused-command-line-argument \ -+ -fthinlto-index=$(word 2, $^) -c -o $@ $< -+ -+targets += $(native-objs) -+$(native-objs): %.thinlto-native.o: %.o %.o.thinlto.bc FORCE -+ $(call if_changed,cc_o_bc) -+ -+# Add FORCE to the prerequisites of a target to force it to be always rebuilt. -+# --------------------------------------------------------------------------- -+ -+PHONY += FORCE -+FORCE: -+ -+# Read all saved command lines and dependencies for the $(targets) we -+# may be building above, using $(if_changed{,_dep}). As an -+# optimization, we don't need to read them if the target does not -+# exist, we will rebuild anyway in that case. -+ -+existing-targets := $(wildcard $(sort $(targets))) -+ -+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) -+ -+.PHONY: $(PHONY) -diff --git a/scripts/Makefile.vmlinux_a b/scripts/Makefile.vmlinux_a -new file mode 100644 -index 000000000000..73c9545de7cf ---- /dev/null -+++ b/scripts/Makefile.vmlinux_a -@@ -0,0 +1,83 @@ -+# SPDX-License-Identifier: GPL-2.0-only -+ -+PHONY := __default -+__default: vmlinux.a -+ -+include include/config/auto.conf -+include $(srctree)/scripts/Kbuild.include -+include $(srctree)/scripts/Makefile.lib -+ -+# Link of built-in-fixup.a -+# --------------------------------------------------------------------------- -+ -+# '$(AR) mPi' needs 'T' to workaround the bug of llvm-ar <= 14 -+quiet_cmd_ar_builtin_fixup = AR $@ -+ cmd_ar_builtin_fixup = \ -+ rm -f $@; \ -+ $(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \ -+ $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) -+ -+targets += built-in-fixup.a -+built-in-fixup.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE -+ $(call if_changed,ar_builtin_fixup) -+ -+ifdef CONFIG_LTO_CLANG_THIN_DIST -+ -+quiet_cmd_builtin.order = GEN $@ -+ cmd_builtin.order = $(AR) t $< > $@ -+ -+targets += builtin.order -+builtin.order: built-in-fixup.a FORCE -+ $(call if_changed,builtin.order) -+ -+quiet_cmd_ld_thinlto_index = LD $@ -+ cmd_ld_thinlto_index = \ -+ $(LD) $(KBUILD_LDFLAGS) -r --thinlto-index-only=$@ @$< -+ -+targets += vmlinux.thinlto-index -+vmlinux.thinlto-index: builtin.order FORCE -+ $(call if_changed,ld_thinlto_index) -+ -+quiet_cmd_ar_vmlinux.a = GEN $@ -+ cmd_ar_vmlinux.a = \ -+ rm -f $@; \ -+ while read -r obj; do \ -+ if grep -q $${obj} $(word 2, $^); then \ -+ echo $${obj%.o}.thinlto-native.o; \ -+ else \ -+ echo $${obj}; \ -+ fi; \ -+ done < $< | xargs $(AR) cDPrS $@ -+ -+targets += vmlinux.a -+vmlinux.a: builtin.order vmlinux.thinlto-index FORCE -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.thinlto -+ $(call if_changed,ar_vmlinux.a) -+ -+else -+ -+# vmlinux.a -+# --------------------------------------------------------------------------- -+ -+targets += vmlinux.a -+vmlinux.a: built-in-fixup.a FORCE -+ $(call if_changed,copy) -+ -+endif -+ -+# Add FORCE to the prerequisites of a target to force it to be always rebuilt. -+# --------------------------------------------------------------------------- -+ -+PHONY += FORCE -+FORCE: -+ -+# Read all saved command lines and dependencies for the $(targets) we -+# may be building above, using $(if_changed{,_dep}). As an -+# optimization, we don't need to read them if the target does not -+# exist, we will rebuild anyway in that case. -+ -+existing-targets := $(wildcard $(sort $(targets))) -+ -+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) -+ -+.PHONY: $(PHONY) -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 5ca7c268294e..8b01746c9ce6 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -1473,13 +1473,22 @@ static void extract_crcs_for_object(const char *object, struct module *mod) - char cmd_file[PATH_MAX]; - char *buf, *p; - const char *base; -- int dirlen, ret; -+ int dirlen, baselen_without_suffix, ret; - - base = get_basename(object); - dirlen = base - object; - -- ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%s.cmd", -- dirlen, object, base); -+ baselen_without_suffix = strlen(object) - dirlen - strlen(".o"); -+ -+ /* -+ * When CONFIG_LTO_CLANG_THIN_DIST=y, the ELF is *.thinlto-native.o -+ * but the symbol CRCs are recorded in *.o.cmd file. -+ */ -+ if (strends(object, ".thinlto-native.o")) -+ baselen_without_suffix -= strlen(".thinlto-native"); -+ -+ ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%.*s.o.cmd", -+ dirlen, object, baselen_without_suffix, base); - if (ret >= sizeof(cmd_file)) { - error("%s: too long path was truncated\n", cmd_file); - return; --- -2.51.0 - diff --git a/sys-kernel/git-sources/0004-fixes.patch b/sys-kernel/git-sources/0004-fixes.patch deleted file mode 100644 index 1f68361..0000000 --- a/sys-kernel/git-sources/0004-fixes.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 3a2358a5db595bd3797db3e5d65cd01863f42b94 Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Mon, 1 Sep 2025 09:38:55 +0800 -Subject: [PATCH 4/4] fixes - -Signed-off-by: Eric Naim ---- - drivers/gpu/drm/drm_atomic_uapi.c | 23 ++++++++++++----------- - include/linux/btf.h | 2 +- - net/ipv4/route.c | 7 ++++++- - scripts/package/PKGBUILD | 5 +++++ - 4 files changed, 24 insertions(+), 13 deletions(-) - -diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c -index ecc73d52bfae..85dbdaa4a2e2 100644 ---- a/drivers/gpu/drm/drm_atomic_uapi.c -+++ b/drivers/gpu/drm/drm_atomic_uapi.c -@@ -1078,19 +1078,20 @@ int drm_atomic_set_property(struct drm_atomic_state *state, - } - - if (async_flip) { -- /* check if the prop does a nop change */ -- if ((prop != config->prop_fb_id && -- prop != config->prop_in_fence_fd && -- prop != config->prop_fb_damage_clips)) { -- ret = drm_atomic_plane_get_property(plane, plane_state, -- prop, &old_val); -- ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); -- } -+ /* no-op changes are always allowed */ -+ ret = drm_atomic_plane_get_property(plane, plane_state, -+ prop, &old_val); -+ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); - -- /* ask the driver if this non-primary plane is supported */ -- if (plane->type != DRM_PLANE_TYPE_PRIMARY) { -- ret = -EINVAL; -+ /* fail everything that isn't no-op or a pure flip */ -+ if (ret && prop != config->prop_fb_id && -+ prop != config->prop_in_fence_fd && -+ prop != config->prop_fb_damage_clips) { -+ break; -+ } - -+ if (ret && plane->type != DRM_PLANE_TYPE_PRIMARY) { -+ /* ask the driver if this non-primary plane is supported */ - if (plane_funcs && plane_funcs->atomic_async_check) - ret = plane_funcs->atomic_async_check(plane, state, true); - -diff --git a/include/linux/btf.h b/include/linux/btf.h -index 9eda6b113f9b..f06976ffb63f 100644 ---- a/include/linux/btf.h -+++ b/include/linux/btf.h -@@ -86,7 +86,7 @@ - * as to avoid issues such as the compiler inlining or eliding either a static - * kfunc, or a global kfunc in an LTO build. - */ --#define __bpf_kfunc __used __retain noinline -+#define __bpf_kfunc __used __retain __noclone noinline - - #define __bpf_kfunc_start_defs() \ - __diag_push(); \ -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index baa43e5966b1..05a5d185807a 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -2592,6 +2592,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, - do_cache = true; - if (type == RTN_BROADCAST) { - flags |= RTCF_BROADCAST | RTCF_LOCAL; -+ fi = NULL; - } else if (type == RTN_MULTICAST) { - flags |= RTCF_MULTICAST | RTCF_LOCAL; - if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, -@@ -2661,8 +2662,12 @@ static struct rtable *__mkroute_output(const struct fib_result *res, - rth->dst.output = ip_mc_output; - RT_CACHE_STAT_INC(out_slow_mc); - } -+ if (type == RTN_BROADCAST && res->fi) { -+ /* ensure MTU value for broadcast routes is retained */ -+ ip_dst_init_metrics(&rth->dst, res->fi->fib_metrics); -+ } - #ifdef CONFIG_IP_MROUTE -- if (type == RTN_MULTICAST) { -+ else if (type == RTN_MULTICAST) { - if (IN_DEV_MFORWARD(in_dev) && - !ipv4_is_local_multicast(fl4->daddr)) { - rth->dst.input = ip_mr_input; -diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD -index 452374d63c24..08f80d7c5df0 100644 ---- a/scripts/package/PKGBUILD -+++ b/scripts/package/PKGBUILD -@@ -90,6 +90,11 @@ _package-headers() { - "${srctree}/scripts/package/install-extmod-build" "${builddir}" - fi - -+ # required when DEBUG_INFO_BTF_MODULES is enabled -+ if [ -f tools/bpf/resolve_btfids/resolve_btfids ]; then -+ install -Dt "$builddir/tools/bpf/resolve_btfids" tools/bpf/resolve_btfids/resolve_btfids -+ fi -+ - echo "Installing System.map and config..." - mkdir -p "${builddir}" - cp System.map "${builddir}/System.map" --- -2.51.0 - diff --git a/sys-kernel/git-sources/0101-glitched-additional-timer-tick-frequencies.patch b/sys-kernel/git-sources/0101-glitched-additional-timer-tick-frequencies.patch new file mode 100644 index 0000000..fd6b943 --- /dev/null +++ b/sys-kernel/git-sources/0101-glitched-additional-timer-tick-frequencies.patch @@ -0,0 +1,55 @@ +From b27b06990e40226b04623ee1a863e807cebee48f Mon Sep 17 00:00:00 2001 +From: Andre Ramnitz +Date: Tue, 21 Mar 2023 00:12:08 +0100 +Subject: glitched: additional timer tick frequencies. + +--- + kernel/Kconfig.hz | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 38ef6d06888e..f648df15ef4c 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -5,7 +5,7 @@ + + choice + prompt "Timer frequency" +- default HZ_250 ++ default HZ_600 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -40,6 +40,20 @@ choice + on SMP and NUMA systems and exactly dividing by both PAL and + NTSC frame rates for video and multimedia work. + ++ config HZ_600 ++ bool "600 HZ" ++ help ++ 600 Hz is a balanced timer frequency. Provides fast interactivity ++ on desktops with great smoothness without increasing CPU power ++ consumption and sacrificing the battery life on laptops. ++ ++ config HZ_750 ++ bool "750 HZ" ++ help ++ 750 Hz is a good timer frequency for desktops. Provides fast ++ interactivity with great smoothness without sacrificing too ++ much throughput. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -53,6 +67,8 @@ config HZ + default 100 if HZ_100 + default 250 if HZ_250 + default 300 if HZ_300 ++ default 600 if HZ_600 ++ default 750 if HZ_750 + default 1000 if HZ_1000 + + config SCHED_HRTICK +-- +2.39.2 +