Wagner Bruna commited on
Commit
f9fd130
·
1 Parent(s): 758970f

vulkan: add environment variable GGML_VK_PREFER_HOST_MEMORY to avoid VRAM allocation (llama/11592)

Browse files
ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -167,6 +167,7 @@ struct vk_device_struct {
167
  uint32_t subgroup_size;
168
  uint32_t shader_core_count;
169
  bool uma;
 
170
  bool float_controls_rte_fp16;
171
 
172
  bool subgroup_size_control;
@@ -1294,7 +1295,9 @@ static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk:
1294
  static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
1295
  vk_buffer buf;
1296
  try {
1297
- if (device->uma) {
 
 
1298
  // Fall back to host memory type
1299
  buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
1300
  } else {
@@ -2199,6 +2202,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
2199
  device->physical_device = physical_devices[dev_num];
2200
  const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
2201
 
 
 
 
2202
  bool fp16_storage = false;
2203
  bool fp16_compute = false;
2204
  bool maintenance4_support = false;
 
167
  uint32_t subgroup_size;
168
  uint32_t shader_core_count;
169
  bool uma;
170
+ bool prefer_host_memory;
171
  bool float_controls_rte_fp16;
172
 
173
  bool subgroup_size_control;
 
1295
  static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
1296
  vk_buffer buf;
1297
  try {
1298
+ if (device->prefer_host_memory) {
1299
+ buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
1300
+ } else if (device->uma) {
1301
  // Fall back to host memory type
1302
  buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
1303
  } else {
 
2202
  device->physical_device = physical_devices[dev_num];
2203
  const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
2204
 
2205
+ const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
2206
+ device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
2207
+
2208
  bool fp16_storage = false;
2209
  bool fp16_compute = false;
2210
  bool maintenance4_support = false;