vllm.model_executor.models.qwen ¶
 Inference-only QWen model compatible with HuggingFace weights.
  QWenAttention ¶
  Bases: Module
Source code in vllm/model_executor/models/qwen.py
   attn  instance-attribute  ¶
 attn = Attention(
    num_heads,
    head_dim,
    scaling,
    cache_config=cache_config,
    quant_config=quant_config,
    prefix=f"{prefix}.attn",
)
  c_attn  instance-attribute  ¶
 c_attn = QKVParallelLinear(
    hidden_size,
    head_dim,
    total_num_heads,
    bias=True,
    quant_config=quant_config,
)
  c_proj  instance-attribute  ¶
 c_proj = RowParallelLinear(
    total_num_heads * head_dim,
    hidden_size,
    bias=False,
    quant_config=quant_config,
)
  rotary_emb  instance-attribute  ¶
 rotary_emb = get_rope(
    head_dim,
    rotary_dim=head_dim,
    max_position=max_position_embeddings,
    base=rope_theta,
    rope_scaling=rope_scaling,
)
  __init__ ¶
 __init__(
    hidden_size: int,
    num_heads: int,
    max_position_embeddings: int,
    rope_theta: float = 10000,
    rope_scaling: dict[str, Any] | None = None,
    cache_config: CacheConfig | None = None,
    quant_config: QuantizationConfig | None = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/qwen.py
   forward ¶
  Source code in vllm/model_executor/models/qwen.py
   QWenBaseModel ¶
  Bases: Module
Source code in vllm/model_executor/models/qwen.py
   lm_head  instance-attribute  ¶
 lm_head = ParallelLMHead(
    vocab_size,
    hidden_size,
    quant_config=quant_config,
    prefix=maybe_prefix(prefix, "lm_head"),
)
  make_empty_intermediate_tensors  instance-attribute  ¶
    transformer  instance-attribute  ¶
 transformer = transformer_type(
    vllm_config=vllm_config,
    prefix=maybe_prefix(prefix, "transformer"),
)
  __init__ ¶
 __init__(
    *,
    vllm_config: VllmConfig,
    prefix: str = "",
    transformer_type: type[QWenModel] = QWenModel,
) -> None
Source code in vllm/model_executor/models/qwen.py
   compute_logits ¶
     load_weights ¶
  Source code in vllm/model_executor/models/qwen.py
   QWenBlock ¶
  Bases: Module
Source code in vllm/model_executor/models/qwen.py
   attn  instance-attribute  ¶
 attn = QWenAttention(
    hidden_size,
    num_attention_heads,
    max_position_embeddings,
    rope_theta=rope_theta,
    rope_scaling=rope_scaling,
    cache_config=cache_config,
    quant_config=quant_config,
    prefix=f"{prefix}.attn",
)
  mlp  instance-attribute  ¶
 mlp = QWenMLP(
    hidden_size,
    intermediate_size // 2,
    quant_config=quant_config,
)
  __init__ ¶
 __init__(
    config: PretrainedConfig,
    cache_config: CacheConfig | None = None,
    quant_config: QuantizationConfig | None = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/qwen.py
   forward ¶
 forward(
    positions: Tensor,
    hidden_states: Tensor,
    residual: Tensor | None,
) -> tuple[Tensor, Tensor]
Source code in vllm/model_executor/models/qwen.py
   QWenLMHeadModel ¶
  Bases: QWenBaseModel, SupportsPP, SupportsLoRA
Source code in vllm/model_executor/models/qwen.py
   packed_modules_mapping  class-attribute instance-attribute  ¶
    __init__ ¶
 __init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/qwen.py
   forward ¶
 forward(
    input_ids: Tensor,
    positions: Tensor,
    intermediate_tensors: IntermediateTensors | None = None,
    inputs_embeds: Tensor | None = None,
) -> Tensor | IntermediateTensors
Source code in vllm/model_executor/models/qwen.py
   QWenMLP ¶
  Bases: Module
MLP for the language component of the Qwen model, which contains a MergedColumnParallelLinear merging 2 outputs via silu activation.
Source code in vllm/model_executor/models/qwen.py
   c_proj  instance-attribute  ¶
 c_proj = RowParallelLinear(
    intermediate_size,
    hidden_size,
    bias=False,
    quant_config=quant_config,
)
  gate_up_proj  instance-attribute  ¶
 gate_up_proj = MergedColumnParallelLinear(
    hidden_size,
    [intermediate_size] * 2,
    bias=False,
    quant_config=quant_config,
)
  __init__ ¶
 __init__(
    hidden_size: int,
    intermediate_size: int,
    hidden_act: str = "silu",
    quant_config: QuantizationConfig | None = None,
)
Source code in vllm/model_executor/models/qwen.py
   QWenModel ¶
  Bases: Module
Source code in vllm/model_executor/models/qwen.py
   make_empty_intermediate_tensors  instance-attribute  ¶
 make_empty_intermediate_tensors = (
    make_empty_intermediate_tensors_factory(
        ["hidden_states", "residual"], hidden_size
    )
)
  __init__ ¶
 __init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/qwen.py
   forward ¶
 forward(
    input_ids: Tensor,
    positions: Tensor,
    intermediate_tensors: IntermediateTensors | None,
    inputs_embeds: Tensor | None = None,
) -> Tensor | IntermediateTensors