intel
diff --git a/‎docs/source/quantization_weight_only.md
+2 b/‎docs/source/quantization_weight_only.md
+2
diff --git a/‎examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+4-1 b/‎examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+4-1
diff --git a/‎neural_compressor/adaptor/pytorch.py
+2 b/‎neural_compressor/adaptor/pytorch.py
+2
@@ -87,6 +87,8 @@ Notes:
 |  use_max_length  | False | Whether to align all calibration data to fixed length, which equals to pad_max_length. |
 |  block_size  | 128 | Execute GPTQ quantization per block, block shape = [$C_{out}$, block_size] |
 |  static_groups  | False | Whether to calculate group wise quantization parameters in advance. This option mitigate actorder's extra computational requirements |
+|  true_sequential  | False | Whether to quantize layers within a transformer block in their original order. This can lead to higher accuracy but slower overall quantization process. |
+|  lm_head  | False | Whether to quantize the lm_head (linear layer related to prediction in the end of the language models). |
 
 **Note:** Neural compressor provides `Unsigned integer for asymmetric quantization` and `Signed integer for symmetric quantization`. Please follow the below section to compress the low bit data type for saving.
 
 
@@ -77,6 +77,8 @@
                                                                            this should align with your model config, \
                                                                            and your dataset builder args: args.pad_max_length')
 parser.add_argument('--gptq_static_groups', action='store_true', help='Use determined group to do quantization')
+parser.add_argument('--gptq_true_sequential', action='store_true', help="Whether to run in true_sequential model.")
+parser.add_argument('--gptq_lm_head', action='store_true', help="Whether to use GPTQ to quantize the output layer of the LLMs.")
 # ==============code generation args===========
 parser.add_argument("--code_generation", action="store_true")
 parser.add_argument("--n_samples", default=200, type=int)
@@ -278,7 +280,8 @@ def calib_func(prepared_model):
             'use_max_length': args.gptq_use_max_length,
             'pad_max_length': args.gptq_pad_max_length,
             'static_groups': args.gptq_static_groups,
-            "enable_mse_search": args.woq_enable_mse_search,
+            "true_sequential": args.gptq_true_sequential,
+            "lm_head": args.gptq_lm_head,
         }
         # GPTQ: use assistive functions to modify calib_dataloader and calib_func
         # TEQ: set calib_func=None, use default training func as calib_func
 
@@ -4722,6 +4722,8 @@ def gptq_quantize(self, model, tune_cfg, dataloader):
                     "act_order": self.recipes["gptq_args"].get("act_order", False),
                     "block_size": self.recipes["gptq_args"].get("block_size", True),
                     "static_groups": self.recipes["gptq_args"].get("static_groups", False),
+                    "true_sequential": self.recipes["gptq_args"].get("true_sequential", False),
+                    "lm_head": self.recipes["gptq_args"].get("lm_head", False),
                 }
         nsamples = self.recipes["gptq_args"].get("nsamples", 128)
         use_max_length = self.recipes["gptq_args"].get("use_max_length", False)
Original file line number	Diff line number	Diff line change
`@@ -4722,6 +4722,8 @@ def gptq_quantize(self, model, tune_cfg, dataloader):`
`4722`	`4722`	`"act_order": self.recipes["gptq_args"].get("act_order", False),`
`4723`	`4723`	`"block_size": self.recipes["gptq_args"].get("block_size", True),`
`4724`	`4724`	`"static_groups": self.recipes["gptq_args"].get("static_groups", False),`
	`4725`	`+ "true_sequential": self.recipes["gptq_args"].get("true_sequential", False),`
	`4726`	`+ "lm_head": self.recipes["gptq_args"].get("lm_head", False),`
`4725`	`4727`	`}`
`4726`	`4728`	`nsamples = self.recipes["gptq_args"].get("nsamples", 128)`
`4727`	`4729`	`use_max_length = self.recipes["gptq_args"].get("use_max_length", False)`