Spaces:
Running
Running
Wagner Bruna
commited on
Commit
·
f9fd130
1
Parent(s):
758970f
vulkan: add environment variable GGML_VK_PREFER_HOST_MEMORY to avoid VRAM allocation (llama/11592)
Browse files
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -167,6 +167,7 @@ struct vk_device_struct {
|
|
| 167 |
uint32_t subgroup_size;
|
| 168 |
uint32_t shader_core_count;
|
| 169 |
bool uma;
|
|
|
|
| 170 |
bool float_controls_rte_fp16;
|
| 171 |
|
| 172 |
bool subgroup_size_control;
|
|
@@ -1294,7 +1295,9 @@ static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk:
|
|
| 1294 |
static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
|
| 1295 |
vk_buffer buf;
|
| 1296 |
try {
|
| 1297 |
-
if (device->
|
|
|
|
|
|
|
| 1298 |
// Fall back to host memory type
|
| 1299 |
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 1300 |
} else {
|
|
@@ -2199,6 +2202,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
| 2199 |
device->physical_device = physical_devices[dev_num];
|
| 2200 |
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
|
| 2201 |
|
|
|
|
|
|
|
|
|
|
| 2202 |
bool fp16_storage = false;
|
| 2203 |
bool fp16_compute = false;
|
| 2204 |
bool maintenance4_support = false;
|
|
|
|
| 167 |
uint32_t subgroup_size;
|
| 168 |
uint32_t shader_core_count;
|
| 169 |
bool uma;
|
| 170 |
+
bool prefer_host_memory;
|
| 171 |
bool float_controls_rte_fp16;
|
| 172 |
|
| 173 |
bool subgroup_size_control;
|
|
|
|
| 1295 |
static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
|
| 1296 |
vk_buffer buf;
|
| 1297 |
try {
|
| 1298 |
+
if (device->prefer_host_memory) {
|
| 1299 |
+
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
| 1300 |
+
} else if (device->uma) {
|
| 1301 |
// Fall back to host memory type
|
| 1302 |
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 1303 |
} else {
|
|
|
|
| 2202 |
device->physical_device = physical_devices[dev_num];
|
| 2203 |
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
|
| 2204 |
|
| 2205 |
+
const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
|
| 2206 |
+
device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
|
| 2207 |
+
|
| 2208 |
bool fp16_storage = false;
|
| 2209 |
bool fp16_compute = false;
|
| 2210 |
bool maintenance4_support = false;
|