menloresearch · vansangpfiev · Feb 25, 2025 · Feb 25, 2025
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
@@ -5356,7 +5356,7 @@
             "type": "string",
             "description": "The identifier or URL of the model to use. It can be a model ID on Cortexso (https://huggingface.co/cortexso) or a HuggingFace URL pointing to the model file. For example: 'gpt2' or 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf'",
             "examples": [
-              "tinyllama:gguf",
+              "tinyllama:1b",
               "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
             ]
           },

diff --git a/engine/e2e-test/api/model/test_api_model.py b/engine/e2e-test/api/model/test_api_model.py
@@ -95,14 +95,14 @@ async def test_models_start_stop_should_be_successful(self):
         time.sleep(30)
 
         print("Pull model")
-        json_body = {"model": "tinyllama:gguf"}
+        json_body = {"model": "tinyllama:1b"}
         response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
-        assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf"
+        assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
         await wait_for_websocket_download_success_event(timeout=None)
 
         # get API
         print("Get model")
-        response = requests.get("http://localhost:3928/v1/models/tinyllama:gguf")
+        response = requests.get("http://localhost:3928/v1/models/tinyllama:1b")
         assert response.status_code == 200
 
         # list API
@@ -111,7 +111,7 @@ async def test_models_start_stop_should_be_successful(self):
         assert response.status_code == 200
 
         print("Start model")
-        json_body = {"model": "tinyllama:gguf"}
+        json_body = {"model": "tinyllama:1b"}
         response = requests.post(
             "http://localhost:3928/v1/models/start", json=json_body
         )
@@ -123,13 +123,13 @@ async def test_models_start_stop_should_be_successful(self):
 
         # update API
         print("Update model")
-        body_json = {'model': 'tinyllama:gguf'}
-        response = requests.patch("http://localhost:3928/v1/models/tinyllama:gguf", json = body_json)        
+        body_json = {'model': 'tinyllama:1b'}
+        response = requests.patch("http://localhost:3928/v1/models/tinyllama:1b", json = body_json)        
         assert response.status_code == 200
 
         # delete API
         print("Delete model")
-        response = requests.delete("http://localhost:3928/v1/models/tinyllama:gguf")
+        response = requests.delete("http://localhost:3928/v1/models/tinyllama:1b")
         assert response.status_code == 200
 
     def test_models_sources_api(self):

diff --git a/engine/e2e-test/api/model/test_api_model_import.py b/engine/e2e-test/api/model/test_api_model_import.py
@@ -16,14 +16,14 @@ def setup_and_teardown(self):
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
     def test_model_import_should_be_success(self):
-        body_json = {'model': 'tinyllama:gguf',
+        body_json = {'model': 'tinyllama:1b',
                      'modelPath': '/path/to/local/gguf'}
         response = requests.post("http://localhost:3928/v1/models/import", json=body_json)              
         assert response.status_code == 200
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
     def test_model_import_with_name_should_be_success(self):
-        body_json = {'model': 'tinyllama:gguf',
+        body_json = {'model': 'tinyllama:1b',
                      'modelPath': '/path/to/local/gguf',
                      'name': 'test_model'}
         response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
@@ -45,7 +45,7 @@ def test_model_import_with_name_should_be_success(self):
         assert response.json()['files'][0] != '/path/to/local/gguf'
 
     def test_model_import_with_invalid_path_should_fail(self):
-        body_json = {'model': 'tinyllama:gguf',
+        body_json = {'model': 'tinyllama:1b',
                      'modelPath': '/invalid/path/to/gguf'}
         response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
         assert response.status_code == 400

diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py
@@ -22,7 +22,7 @@ def setup_and_teardown(self):
 
         # Teardown
         # Clean up
-        run("Delete model", ["models", "delete", "tinyllama:gguf"])
+        run("Delete model", ["models", "delete", "tinyllama:1b"])
         stop_server()
 
     def test_model_pull_with_direct_url_should_be_success(self):
@@ -40,13 +40,13 @@ def test_model_pull_with_direct_url_should_be_success(self):
 
     @pytest.mark.asyncio
     async def test_models_delete_should_be_successful(self):
-        json_body = {"model": "tinyllama:gguf"}
+        json_body = {"model": "tinyllama:1b"}
         response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
-        assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf"
+        assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
         await wait_for_websocket_download_success_event(timeout=None)
 
         exit_code, output, error = run(
-            "Delete model", ["models", "delete", "tinyllama:gguf"]
+            "Delete model", ["models", "delete", "tinyllama:1b"]
         )
-        assert "Model tinyllama:gguf deleted successfully" in output
+        assert "Model tinyllama:1b deleted successfully" in output
         assert exit_code == 0, f"Model does not exist: {error}"
diff --git a/engine/e2e-test/local_test.py b/engine/e2e-test/local_test.py
@@ -9,7 +9,7 @@
 import websockets
 
 # Define a list of request configurations
-model_id = "tinyllama:gguf"
+model_id = "tinyllama:1b"
 
 
 def make_request(config):
@@ -78,7 +78,7 @@ def get_setup_configs(host_port):
             "method": "POST",
             "url": "http://" + host_port + "/v1/models/pull",
             "headers": {"Content-Type": "application/json"},
-            "data": {"model": "tinyllama:gguf"},
+            "data": {"model": "tinyllama:1b"},
         },
         {
             "method": "POST",
@@ -89,14 +89,14 @@ def get_setup_configs(host_port):
             "method": "POST",
             "url": "http://" + host_port + "/v1/models/start",
             "headers": {"Content-Type": "application/json"},
-            "data": {"model": "tinyllama:gguf"},
+            "data": {"model": "tinyllama:1b"},
         },
         {
             "method": "POST",
             "url": "http://" + host_port + "/v1/chat/completions",
             "headers": {"Content-Type": "application/json"},
             "data": {
-                "model": "tinyllama:gguf",
+                "model": "tinyllama:1b",
                 "stream": True,
                 "messages": [{"content": "How are you today?", "role": "user"}],
                 "max_tokens": 256,
@@ -111,7 +111,7 @@ def get_teardown_configs(host_port):
             "method": "POST",
             "url": "http://" + host_port + "/v1/models/stop",
             "headers": {"Content-Type": "application/json"},
-            "data": {"model": "tinyllama:gguf"},
+            "data": {"model": "tinyllama:1b"},
         },
         {
             "method": "DELETE",

diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py
@@ -2,7 +2,7 @@
 import requests
 from utils.test_runner import wait_for_websocket_download_success_event
 
-repo_branches = ["tinyllama:gguf"]
+repo_branches = ["tinyllama:1b"]
 
 
 class TestCortexsoModels:

diff --git a/engine/e2e-test/utils/test_runner.py b/engine/e2e-test/utils/test_runner.py
@@ -90,7 +90,7 @@ def start_server_if_needed():
         start_server()
 
 
-def pull_model_if_needed(model_id: str = "tinyllama:gguf"):
+def pull_model_if_needed(model_id: str = "tinyllama:1b"):
     """
     Pull the model if it is not already pulled.
     """

diff --git a/engine/test/components/test_event.cc b/engine/test/components/test_event.cc
@@ -9,7 +9,7 @@ TEST_F(EventTest, EventFromString) {
   // clang-format off
   std::string ev_str = R"({
     "task": {
-      "id": "tinyllama:gguf",
+      "id": "tinyllama:1b",
       "items": [
         {
           "bytes": 668788096,

diff --git a/engine/test/components/test_huggingface_utils.cc b/engine/test/components/test_huggingface_utils.cc
@@ -7,17 +7,16 @@ TEST_F(HuggingFaceUtilTestSuite, TestGetModelRepositoryBranches) {
   auto branches =
       huggingface_utils::GetModelRepositoryBranches("cortexso", "tinyllama");
 
-  EXPECT_GE(branches.value().size(), 3);
+  EXPECT_GE(branches.value().size(), 1);
   EXPECT_EQ(branches.value()["main"].name, "main");
   EXPECT_EQ(branches.value()["main"].ref, "refs/heads/main");
   EXPECT_EQ(branches.value()["1b"].name, "1b");
   EXPECT_EQ(branches.value()["1b"].ref, "refs/heads/1b");
-  EXPECT_EQ(branches.value()["gguf"].name, "gguf");
-  EXPECT_EQ(branches.value()["gguf"].ref, "refs/heads/gguf");
 }
 
 // TODO(sang) re-enable when main branch is fixed
-TEST_F(HuggingFaceUtilTestSuite, DISABLED_TestGetHuggingFaceModelRepoInfoSuccessfully) {
+TEST_F(HuggingFaceUtilTestSuite,
+       DISABLED_TestGetHuggingFaceModelRepoInfoSuccessfully) {
   auto model_info =
       huggingface_utils::GetHuggingFaceModelRepoInfo("cortexso", "tinyllama");